Python Workflow Exemples, mdstudio_workflow.Workflow Python Exemples

Exemple #1

0

Afficher le fichier

    def setUp(self):
        """
        Build two task workflow
        """

        self.wf = Workflow(project_dir=tmp_project_dir)

        tid1 = self.wf.add_task(
            'test1', custom_func="module.dummy_task_runners.task_runner")
        tid1.set_input(add_number=10, dummy=2)

        tid2 = self.wf.add_task(
            'test2', custom_func="module.dummy_task_runners.task_runner")
        tid2.set_input(add_number=8, output_to_disk=True)
        self.wf.connect_task(tid1.nid, tid2.nid)

Exemple #2

0

Afficher le fichier

    def setUpClass(cls):
        """
        Load previously created linear workflow spec file
        """

        cls.wf = Workflow()
        cls.wf.load(
            os.path.abspath(
                os.path.join(currpath, '../files/test-linear-finished.jgf')))

Exemple #3

0

Afficher le fichier

    def setUp(self):
        """
        Build a two task workflow
        """

        self.wf = Workflow()

        self.tid1 = self.wf.add_task(
            'test1',
            custom_func="module.dummy_task_runners.task_runner",
            store_output=False)
        self.tid1.set_input(add_number=10, dummy=2, return_more=True)

        self.tid2 = self.wf.add_task(
            'test2',
            custom_func="module.dummy_task_runners.task_runner",
            store_output=False)
        self.tid2.set_input(add_number=8, return_more=True)

Exemple #4

0

Afficher le fichier

    def setUpClass(cls):
        """
        Load previously created linear workflow spec file.
        Task test1, test2 already finished. No output stored locally.
        """

        cls.wf = Workflow()
        cls.wf.load(
            os.path.abspath(
                os.path.join(currpath, '../files/test-linear-unfinished.jgf')))

Exemple #5

0

Afficher le fichier

    def setUpClass(cls):
        """
        Load previously created linear workflow spec file
        """

        if not os.path.exists(workflow_file_path):
            raise unittest.SkipTest(
                'TestBuildLinearWorkflow failed to build workflow')

        cls.wf = Workflow()
        cls.wf.load(workflow_file_path)

Exemple #6

0

Afficher le fichier

class TestLocalWorkdir(UnittestPythonCompatibility):
    def setUp(self):
        """
        Build two task workflow
        """

        self.wf = Workflow(project_dir=tmp_project_dir)

        tid1 = self.wf.add_task(
            'test1', custom_func="module.dummy_task_runners.task_runner")
        tid1.set_input(add_number=10, dummy=2)

        tid2 = self.wf.add_task(
            'test2', custom_func="module.dummy_task_runners.task_runner")
        tid2.set_input(add_number=8, output_to_disk=True)
        self.wf.connect_task(tid1.nid, tid2.nid)

    def tearDown(self):
        """
        tearDown method called after each unittest to cleanup
        the project directory
        """

        if os.path.exists(tmp_project_dir):
            shutil.rmtree(tmp_project_dir)

    def test_store_output_all(self):
        """
        Test run workflow storing output of all tasks (default)
        """

        # Run the workflow
        self.wf.run()

        # Blocking: wait until workflow is no longer running
        while self.wf.is_running:
            time.sleep(2)

        # Check existence of project dir, tasks dirs and workflow graph file.
        self.assertTrue(os.path.exists(tmp_project_dir))
        self.assertEqual(
            len([d for d in os.listdir(tmp_project_dir) if os.path.isdir(d)]),
            2)
        self.assertTrue(
            os.path.isfile(os.path.join(tmp_project_dir, 'workflow.jgf')))

        # Check output
        expected = {'test1': 12, 'test2': 20}
        for task in self.wf.get_tasks():
            self.assertEqual(task.get_output().get('dummy'),
                             expected[task.key])

    def test_store_output_partial(self):
        """
        Test run workflow storing output only for last task
        """

        task = self.wf.get_task(key='test1')
        task.task_metadata.store_output.value = False

        # Run the workflow
        self.wf.run()

        # Blocking: wait until workflow is no longer running
        while self.wf.is_running:
            time.sleep(2)

        # Check existence of project dir, tasks dirs and workflow graph file.
        self.assertTrue(os.path.exists(tmp_project_dir))
        self.assertEqual(
            len([d for d in os.listdir(tmp_project_dir) if os.path.isdir(d)]),
            1)
        self.assertTrue(
            os.path.isfile(os.path.join(tmp_project_dir, 'workflow.jgf')))

        # Check output
        expected = {'test1': 12, 'test2': 20}
        for task in self.wf.get_tasks():
            self.assertEqual(task.get_output().get('dummy'),
                             expected[task.key])

Exemple #7

0

Afficher le fichier

    def on_run(self):
        """
        When the microservice has successfully registered with the broker the
        on_run method is the first method to be called.
        We are using this method now to run our example workflow.
        """

        # Workflow constants, these will be saved as part of the workflow
        # specification
        ligand_format = 'smi'
        pH = 7.4
        protein_file = os.path.abspath('protein.mol2')
        protein_binding_center = [4.9264, 19.0796, 21.9892]

        # Build Workflow
        wf = Workflow(description='MDStudio WAMP workflow')

        # Task 1: convert the SMILES string to mol2 format (2D).
        # Add a task using the 'add_task' method always defining:
        # an administrative title of the task and the task type here a WampTask
        # because we are calling an microservice endpoint defined by uri.
        # 'store_output' is True by default and stores the task input/output to disk.
        t1 = wf.add_task('Format_conversion',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.convert')

        # Use 'set_input' do define the input to a task. As we are now building
        # a workflow specification these will be task constants but the same
        # method will be used later on to define specific input when using the
        # workflow specification for a ligand.
        t1.set_input(output_format='mol2')

        # Task 2: Covert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D mol2
        # This particular 3D conversion routine is known to fail sometimes but by
        # setting retry_count to 3 the workflow manager will retry 3 times before
        # failing.
        t2 = wf.add_task('Make_3D',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.make3d',
                         retry_count=3)
        t2.set_input(output_format='mol2')

        # Use 'connect_task' to connect t1 to t2 using their unique identifiers
        # (nid). In addition we can specify the parameters for task 1 we wish to
        # use as input to task 2 as additional argument or keyword arguments to
        # the functions. A keyword argument defines a parameter name mapping
        # between the two tasks.
        wf.connect_task(t1.nid, t2.nid, 'mol')

        # Task 3: Adjust ligand protonation state to a given pH if applicable
        t3 = wf.add_task('Add hydrogens',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.addh')
        t3.set_input(output_format='mol2', correctForPH=True, pH=pH)
        wf.connect_task(t2.nid, t3.nid, 'mol')

        # Task 4: Get the formal charge for the protonated mol2 to use as input
        # for ACPYPE or ATB
        # Here store_output equals False which will keep all output in memory and
        # finally as part of the stored workflow file (*.jgf)
        t4 = wf.add_task('Get charge',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.info',
                         store_output=False)
        t4.set_input(input_format='mol2')
        wf.connect_task(t3.nid, t4.nid, 'mol')

        # Task 5: Create rotations of the molecule for better sampling
        t5 = wf.add_task('Create 3D rotations',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.rotate')
        t5.set_input(rotations=[[1, 0, 0, 90], [1, 0, 0, -90], [0, 1, 0, 90],
                                [0, 1, 0, -90], [0, 0, 1, 90], [0, 0, 1, -90]])
        wf.connect_task(t3.nid, t5.nid, 'mol')

        # Task 6: Run PLANTS on ligand and protein
        # The 'workdir' argument points to a tmp directory that is shared between
        # the microservice docker image and the host system to store results.
        t6 = wf.add_task('Plants docking',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_smartcyp.endpoint.docking')
        t6.set_input(cluster_structures=100,
                     bindingsite_center=protein_binding_center,
                     bindingsite_radius=12,
                     protein_file=protein_file,
                     threshold=3.0,
                     base_work_dir='/tmp/mdstudio/mdstudio_smartcyp')

        # Here we pass only the 'mol' parameter from task 5 to task 6 where it is
        # named 'ligand_file'
        wf.connect_task(t5.nid, t6.nid, 'mol', mol='ligand_file')

        # Task 7: Extract cluster medians from output using a custom function.

        # A task of type 'PythonTask' allows to add custom python functions
        # or classes to the workflow. They are defined using the 'custom_func'
        # parameter according to the Python import syntax. The package or file
        # containing the function should be available as part of the PYTHONPATH.
        t7 = wf.add_task('Get cluster medians',
                         task_type='PythonTask',
                         custom_func='workflow_helpers.get_docking_medians')
        wf.connect_task(t6.nid, t7.nid, 'result')

        # Task 8: retrieve median structures
        t8 = wf.add_task(
            'Retrieve median structures',
            task_type='WampTask',
            uri='mdgroup.mdstudio_smartcyp.endpoint.docking_structures')
        t8.set_input(create_ensemble=False)
        wf.connect_task(t7.nid, t8.nid, medians='paths')

        # Save the workflow specification
        wf.save('workflow_spec.jgf')

        # Lets run the workflow specification for a number of ligand SMILES
        # The current microservice instance (self) is passed as task_runner to the workflow
        # it will be used to make calls to other microservice endpoints when task_type equals WampTask.
        wf.task_runner = self

        currdir = os.getcwd()
        for i, ligand in enumerate([
                'O1[C@@H](CCC1=O)CCC',
                'C[C@]12CC[C@H]3[C@@H](CC=C4CCCC[C@]34CO)[C@@H]1CCC2=O',
                'CC12CCC3C(CC=C4C=CCCC34C)C1CCC2=O'
        ],
                                   start=1):
            wf.load('workflow_spec.jgf')
            wf.input(t1.nid,
                     mol={
                         'content': ligand,
                         'path': None,
                         'extension': ligand_format
                     })
            wf.run(project_dir='./ligand-{0}'.format(i))
            while wf.is_running:
                yield sleep(1)

            os.chdir(currdir)

Exemple #8

0

Afficher le fichier

    def on_run(self):

        # Ligand to make prediction for
        ligand = 'O1[C@@H](CCC1=O)CCC'
        ligand_format = 'smi'
        liemodel = os.path.join(os.getcwd(), '1A2_model')

        # CYP1A2 pre-calibrated model
        modelpicklefile = os.path.join(liemodel, 'params.pkl')
        modelfile = pickle.load(open(modelpicklefile))
        unbound_trajectory = os.path.join(os.getcwd(),
                                          "unbound_trajectory.ene")
        bound_trajectory = [os.path.join(os.getcwd(), "bound_trajectory.ene")]
        decompose_files = [
            os.path.join(os.getcwd(), "decompose_dataframe.ene")
        ]

        # Build Workflow
        wf = Workflow(project_dir='./lie_prediction')
        wf.task_runner = self

        # STAGE 5. PYLIE FILTERING, AD ANALYSIS AND BINDING-AFFINITY PREDICTION
        # Collect Gromacs bound and unbound MD energy trajectories in a dataframe
        t18 = wf.add_task(
            'Create mdframe',
            task_type='WampTask',
            uri='mdgroup.lie_pylie.endpoint.collect_energy_trajectories')
        t18.set_input(unbound_trajectory=unbound_trajectory,
                      bound_trajectory=bound_trajectory,
                      lie_vdw_header="Ligand-Ligenv-vdw",
                      lie_ele_header="Ligand-Ligenv-ele")

        # Determine stable regions in MDFrame and filter
        t19 = wf.add_task(
            'Detect stable regions',
            task_type='WampTask',
            uri='mdgroup.lie_pylie.endpoint.filter_stable_trajectory')
        t19.set_input(do_plot=True,
                      minlength=45,
                      workdir='/tmp/mdstudio/lie_pylie')
        wf.connect_task(t18.nid, t19.nid, 'mdframe')

        # Extract average LIE energy values from the trajectory
        t20 = wf.add_task(
            'LIE averages',
            task_type='WampTask',
            uri='mdgroup.lie_pylie.endpoint.calculate_lie_average')
        wf.connect_task(t19.nid, t20.nid, filtered_mdframe='mdframe')

        # Calculate dG using pre-calibrated model parameters
        t21 = wf.add_task('Calc dG',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.liedeltag')
        t21.set_input(alpha_beta_gamma=modelfile['LIE']['params'])
        wf.connect_task(t20.nid, t21.nid, 'averaged', averaged='dataframe')

        # Applicability domain: 1. Tanimoto similarity with training set
        t22 = wf.add_task(
            'AD1 tanimoto simmilarity',
            task_type='WampTask',
            uri='mdgroup.mdstudio_structures.endpoint.chemical_similarity')
        t22.set_input(test_set=[ligand],
                      mol_format=ligand_format,
                      reference_set=modelfile['AD']['Tanimoto']['smi'],
                      ci_cutoff=modelfile['AD']['Tanimoto']['Furthest'])
        wf.connect_task(t18.nid, t22.nid)

        # Applicability domain: 2. residue decomposition
        t23 = wf.add_task('AD2 residue decomposition',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.adan_residue_decomp',
                          inline_files=False)
        t23.set_input(model_pkl=modelpicklefile,
                      decompose_files=decompose_files)
        wf.connect_task(t18.nid, t23.nid)

        # Applicability domain: 3. deltaG energy range
        t24 = wf.add_task('AD3 dene yrange',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.adan_dene_yrange')
        t24.set_input(ymin=modelfile['AD']['Yrange']['min'],
                      ymax=modelfile['AD']['Yrange']['max'])
        wf.connect_task(t21.nid,
                        t24.nid,
                        'liedeltag_file',
                        liedeltag_file='dataframe')

        # Applicability domain: 4. deltaG energy distribution
        t25 = wf.add_task('AD4 dene distribution',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.adan_dene')
        t25.set_input(model_pkl=modelpicklefile,
                      center=list(modelfile['AD']['Dene']['Xmean']),
                      ci_cutoff=modelfile['AD']['Dene']['Maxdist'])
        wf.connect_task(t21.nid,
                        t25.nid,
                        'liedeltag_file',
                        liedeltag_file='dataframe')

        wf.run()
        while wf.is_running:
            yield sleep(1)

Exemple #9

0

Afficher le fichier

    def on_run(self):

        # Ligand to make prediction for
        ligand = 'O1[C@@H](CCC1=O)CCC'
        ligand_format = 'smi'
        liemodel = os.path.join(os.getcwd(), '1A2_model')

        # CYP1A2 Model data
        with open(os.path.join(liemodel, 'model.dat'), 'r') as mdf:
            model = json.load(mdf)

        # CYP1A2 pre-calibrated model
        modelpicklefile = os.path.join(liemodel, 'params.pkl')
        modelfile = pickle.load(open(modelpicklefile))

        # Build Workflow
        wf = Workflow(project_dir='./allies_run')
        wf.task_runner = self

        # STAGE 1: LIGAND PRE-PROCESSING
        # Convert ligand to mol2 irrespective of input format.
        t1 = wf.add_task('Format_conversion',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.convert')
        t1.set_input(mol={
            'content': ligand,
            'path': None,
            'extension': ligand_format
        })

        # Covert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D mol2
        t2 = wf.add_task('Make_3D',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.make3d')
        t2.set_input(output_format='mol2')
        wf.connect_task(t1.nid, t2.nid, 'mol')

        # Adjust ligand protonation state to a given pH if applicable
        t3 = wf.add_task('Add hydrogens',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.addh')
        t3.set_input(output_format='mol2',
                     correctForPH=model['pHCorr'],
                     pH=model['pH'])
        wf.connect_task(t2.nid, t3.nid, 'mol')

        # Get the formal charge for the protonated mol2 to use as input for ACPYPE or ATB
        t4 = wf.add_task('Get charge',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.info')
        t4.set_input(input_format='mol2')
        wf.connect_task(t3.nid, t4.nid, 'mol')

        # # STAGE 2. CREATE TOPOLOGY FOR LIGAND
        # Run acpype on ligands
        t5 = wf.add_task('ACPYPE',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_amber.endpoint.acpype',
                         retry_count=3)
        wf.connect_task(t3.nid, t5.nid, mol='structure')
        wf.connect_task(t4.nid, t5.nid, charge='net_charge')

        # STAGE 3. PLANTS DOCKING
        # Create rotations of the molecule for better sampling
        t6 = wf.add_task('Create 3D rotations',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.rotate')
        t6.set_input(rotations=[[1, 0, 0, 90], [1, 0, 0, -90], [0, 1, 0, 90],
                                [0, 1, 0, -90], [0, 0, 1, 90], [0, 0, 1, -90]])
        wf.connect_task(t3.nid, t6.nid, 'mol')

        # Run PLANTS on ligand and protein
        t7 = wf.add_task('Plants docking',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_smartcyp.endpoint.docking')
        t7.set_input(cluster_structures=100,
                     bindingsite_center=model['proteinParams'][0]['pocket'],
                     bindingsite_radius=model['proteinParams'][0]['radius'],
                     protein_file=os.path.join(
                         liemodel, model['proteinParams'][0]['proteinDock']),
                     threshold=3.0,
                     base_work_dir='/tmp/mdstudio/mdstudio_smartcyp')
        wf.connect_task(t6.nid, t7.nid, mol='ligand_file')

        # Get cluster median structures from docking
        t8 = wf.add_task(
            'Get cluster medians',
            custom_func='allies_workflow_helpers.get_docking_medians')
        wf.connect_task(t7.nid, t8.nid, 'output')

        # STAGE 4. GROMACS MD
        # Ligand in solution
        t14 = wf.add_task(
            'MD ligand in water',
            task_type='WampTask',
            uri='mdgroup.mdstudio_gromacs.endpoint.gromacs_ligand',
            store_output=True)
        t14.set_input(
            sim_time=0.001,  #sim_time=model['timeSim'],
            include=[
                os.path.join(liemodel, model['proteinTopPos']),
                os.path.join(liemodel, 'attype.itp')
            ],
            residues=model['resSite'],
            protein_file=None,
            protein_top=os.path.join(liemodel, model['proteinTop']),
            cerise_file=os.path.join(os.getcwd(), 'cerise_config_gt.json'))
        wf.connect_task(t5.nid,
                        t14.nid,
                        new_pdb='ligand_file',
                        gmx_itp='topology_file')

        # convert PLANTS mol2 to pdb
        t15 = wf.add_task('Ligand mol2 to PDB',
                          task_type='WampTask',
                          uri='mdgroup.mdstudio_structures.endpoint.convert')
        t15.set_input(output_format='pdb')
        wf.connect_task(t8.nid, t15.nid, medians='mol')

        # Run MD for protein + ligand
        t16 = wf.add_task(
            'MD protein-ligand',
            task_type='WampTask',
            uri='mdgroup.mdstudio_gromacs.endpoint.gromacs_protein')
        t16.set_input(sim_time=0.001,
                      include=[
                          os.path.join(liemodel, model['proteinTopPos']),
                          os.path.join(liemodel, 'attype.itp')
                      ],
                      residues=model['resSite'],
                      charge=model['charge'],
                      cerise_file=os.path.join(os.getcwd(),
                                               'cerise_config_gt.json'),
                      protein_file=os.path.join(
                          liemodel, model['proteinParams'][0]['proteinCoor']),
                      protein_top=os.path.join(liemodel, model['proteinTop']))
        wf.connect_task(t15.nid, t16.nid, mol='ligand_file')
        wf.connect_task(t5.nid, t16.nid, gmx_itp='topology_file')

        # Collect results
        t17 = wf.add_task(
            'Collect MD results',
            custom_func='allies_workflow_helpers.collect_md_enefiles')
        t17.set_input(model_dir=liemodel)
        wf.connect_task(t14.nid, t17.nid, output='unbound')
        wf.connect_task(t16.nid, t17.nid, output='bound')

        # STAGE 5. PYLIE FILTERING, AD ANALYSIS AND BINDING-AFFINITY PREDICTION
        # Collect Gromacs bound and unbound MD energy trajectories in a dataframe
        t18 = wf.add_task(
            'Create mdframe',
            task_type='WampTask',
            uri='mdgroup.lie_pylie.endpoint.collect_energy_trajectories')
        t18.set_input(lie_vdw_header="Ligand-Ligenv-vdw",
                      lie_ele_header="Ligand-Ligenv-ele")
        wf.connect_task(t17.nid, t18.nid, 'bound_trajectory',
                        'unbound_trajectory')

        # Determine stable regions in MDFrame and filter
        t19 = wf.add_task(
            'Detect stable regions',
            task_type='WampTask',
            uri='mdgroup.lie_pylie.endpoint.filter_stable_trajectory')
        t19.set_input(do_plot=True, FilterSplines={'minlength': 45})
        wf.connect_task(t18.nid, t19.nid, 'mdframe')

        # Extract average LIE energy values from the trajectory
        t20 = wf.add_task(
            'LIE averages',
            task_type='WampTask',
            uri='mdgroup.lie_pylie.endpoint.calculate_lie_average')
        wf.connect_task(t19.nid, t20.nid, filtered_mdframe='mdframe')

        # Calculate dG using pre-calibrated model parameters
        t21 = wf.add_task('Calc dG',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.liedeltag')
        t21.set_input(alpha=modelfile['LIE']['params'][0],
                      beta=modelfile['LIE']['params'][1],
                      gamma=modelfile['LIE']['params'][2])
        wf.connect_task(t20.nid, t21.nid, averaged='dataframe')

        # Applicability domain: 1. Tanimoto similarity with training set
        t22 = wf.add_task(
            'AD1 tanimoto simmilarity',
            task_type='WampTask',
            uri='mdgroup.lie_structures.endpoint.chemical_similarity')
        t22.set_input(test_set=[ligand],
                      mol_format=ligand_format,
                      reference_set=modelfile['AD']['Tanimoto']['smi'],
                      ci_cutoff=modelfile['AD']['Tanimoto']['Furthest'])
        wf.connect_task(t18.nid, t22.nid)

        # Applicability domain: 2. residue decomposition
        t23 = wf.add_task('AD2 residue decomposition',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.adan_residue_decomp')
        t23.set_input(model_pkl=modelpicklefile)
        wf.connect_task(t17.nid, t23.nid, 'decomp_files')

        # Applicability domain: 3. deltaG energy range
        t24 = wf.add_task('AD3 dene yrange',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.adan_dene_yrange')
        t24.set_input(ymin=modelfile['AD']['Yrange']['min'],
                      ymax=modelfile['AD']['Yrange']['max'])
        wf.connect_task(t21.nid, t24.nid, liedeltag_file='dataframe')

        # Applicability domain: 4. deltaG energy distribution
        t25 = wf.add_task('AD4 dene distribution',
                          task_type='WampTask',
                          uri='mdgroup.lie_pylie.endpoint.adan_dene')
        t25.set_input(model_pkl=modelpicklefile,
                      center=list(modelfile['AD']['Dene']['Xmean']),
                      ci_cutoff=modelfile['AD']['Dene']['Maxdist'])
        wf.connect_task(t21.nid, t25.nid, liedeltag_file='dataframe')

        # Save the workflow specification
        wf.save('workflow_spec.jgf')

        wf.run()
        while wf.is_running:
            yield sleep(1)

Exemple #10

0

Afficher le fichier

    def on_run(self):

        # Build Workflow
        wf = Workflow(project_dir='./loop_workflow')
        wf.task_runner = self

        # Task 1: Task that will provide an array of values that will be
        #         iterated over. Setting output_format=mol2 directly as input
        #         to this task is to demonstrate that parameters passed to the
        #         LoopTask will be forwarded to the mapped workflows created by
        #         the LoopTask. Equally so for steps=100 illustrating that input
        #         of steps in the workflow can obtained from tasks outside of
        #         the loop
        t1 = wf.add_task('Array provider')
        t1.set_input(output_format='mol2', steps=100)

        # Task 2: Add loop task. The 'mapper_arg' defines the parameter name in
        #         the input that holds an iterable of input values to iterate
        #         over. The 'loop_end_task' is required and defines the task
        #         that 'closes' the loop and collects all results.
        t2 = wf.add_task('Loop',
                         task_type='LoopTask',
                         mapper_arg='smiles',
                         loop_end_task='Collector')
        wf.connect_task(t1.nid, t2.nid)

        # Task 3: Convert SMILES to mol2
        # Convert ligand to mol2 format irrespective of input format.
        t3 = wf.add_task('Ligand conversion',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.convert')
        wf.connect_task(t2.nid, t3.nid, smiles='mol')

        # Task 4: Convert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D
        #         mol2 If 'output_format' is not specified it is deduced from
        #         the input wich is mol2 in this case. There are circumstances
        #         where conversion to 3D fails, retry upto 3 times.
        t4 = wf.add_task('Make_3D',
                         task_type='WampTask',
                         uri='mdgroup.mdstudio_structures.endpoint.make3d',
                         retry_count=3)
        wf.connect_task(t3.nid, t4.nid, 'mol')
        wf.connect_task(t1.nid, t4.nid, 'steps')

        # Task 5: Empty task that server as a collector for all results
        #         obtained during iteration.
        t5 = wf.add_task('Collector')
        wf.connect_task(t4.nid, t5.nid, 'mol')

        # Set the array of input SMILES string to task 1
        wf.input(t1.nid,
                 smiles=[
                     'O1[C@@H](CCC1=O)CCC',
                     'C[C@]12CC[C@H]3[C@@H](CC=C4CCCC[C@]34CO)[C@@H]1CCC2=O',
                     'CC12CCC3C(CC=C4C=CCCC34C)C1CCC2=O'
                 ])

        wf.run()
        while wf.is_running:
            yield sleep(1)

Exemple #11

0

Afficher le fichier

class TestInputOutputMapping(UnittestPythonCompatibility):
    def setUp(self):
        """
        Build a two task workflow
        """

        self.wf = Workflow()

        self.tid1 = self.wf.add_task(
            'test1',
            custom_func="module.dummy_task_runners.task_runner",
            store_output=False)
        self.tid1.set_input(add_number=10, dummy=2, return_more=True)

        self.tid2 = self.wf.add_task(
            'test2',
            custom_func="module.dummy_task_runners.task_runner",
            store_output=False)
        self.tid2.set_input(add_number=8, return_more=True)

    def test_run_default(self):
        """
        Test run workflow storing output of all tasks.
        Default task connection communicates all results
        """

        # Default task connect
        self.wf.connect_task(self.tid1.nid, self.tid2.nid)

        # Run the workflow
        self.wf.run()

        # Blocking: wait until workflow is no longer running
        while self.wf.is_running:
            time.sleep(2)

        # Check expected output. All output should be returned
        expected = {'test1': 12, 'test2': 20}
        for task in self.wf.get_tasks():
            self.assertEqual(task.get_output().get('dummy'),
                             expected[task.key])

    def test_run_keyword_selection(self):
        """
        Test run workflow storing output of all tasks
        Task connection communicating only the 'dummy' output variable
        """

        # Connect tasks with keyword selection
        self.wf.connect_task(self.tid1.nid, self.tid2.nid, 'dummy')

        # Run the workflow
        self.wf.run()

        # Blocking: wait until workflow is no longer running
        while self.wf.is_running:
            time.sleep(2)

        # Check expected output.
        expected = {'test1': 12, 'test2': 20}
        for task in self.wf.get_tasks():
            self.assertEqual(task.get_output().get('dummy'),
                             expected[task.key])

    def test_run_keyword_mapping(self):
        """
        Test run workflow storing output of all tasks
        Task connection translates the param3 parameter to 'dummy'
        """

        # Connect tasks with keyword selection and mapping
        self.wf.connect_task(self.tid1.nid, self.tid2.nid, param3='dummy')

        # Run the workflow
        self.wf.run()

        # Blocking: wait until workflow is no longer running
        while self.wf.is_running:
            time.sleep(2)

        # Check expected output.
        expected = {'test1': 12, 'test2': 13}
        for task in self.wf.get_tasks():
            self.assertEqual(task.get_output().get('dummy'),
                             expected[task.key])

    def test_run_keyword_selection_mapping(self):
        """
        Test run workflow storing output of all tasks
        Task connection communicating the 'dummy' and 'param3' output variables
        where param3 is translated to 'dummy', replacing default 'dummy'
        """

        # Connect tasks with keyword selection and mapping
        self.wf.connect_task(self.tid1.nid,
                             self.tid2.nid,
                             'dummy',
                             param3='dummy')

        # Run the workflow
        self.wf.run()

        # Blocking: wait until workflow is no longer running
        while self.wf.is_running:
            time.sleep(2)

        # Check expected output.
        expected = {'test1': 12, 'test2': 13}
        for task in self.wf.get_tasks():
            self.assertEqual(task.get_output().get('dummy'),
                             expected[task.key])