def on_run(self): # Build Workflow wf = Workflow(project_dir='./loop_workflow') wf.task_runner = self # Task 1: Task that will provide an array of values that will be # iterated over. Setting output_format=mol2 directly as input # to this task is to demonstrate that parameters passed to the # LoopTask will be forwarded to the mapped workflows created by # the LoopTask. Equally so for steps=100 illustrating that input # of steps in the workflow can obtained from tasks outside of # the loop t1 = wf.add_task('Array provider') t1.set_input(output_format='mol2', steps=100) # Task 2: Add loop task. The 'mapper_arg' defines the parameter name in # the input that holds an iterable of input values to iterate # over. The 'loop_end_task' is required and defines the task # that 'closes' the loop and collects all results. t2 = wf.add_task('Loop', task_type='LoopTask', mapper_arg='smiles', loop_end_task='Collector') wf.connect_task(t1.nid, t2.nid) # Task 3: Convert SMILES to mol2 # Convert ligand to mol2 format irrespective of input format. t3 = wf.add_task('Ligand conversion', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.convert') wf.connect_task(t2.nid, t3.nid, smiles='mol') # Task 4: Convert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D # mol2 If 'output_format' is not specified it is deduced from # the input wich is mol2 in this case. There are circumstances # where conversion to 3D fails, retry upto 3 times. t4 = wf.add_task('Make_3D', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.make3d', retry_count=3) wf.connect_task(t3.nid, t4.nid, 'mol') wf.connect_task(t1.nid, t4.nid, 'steps') # Task 5: Empty task that server as a collector for all results # obtained during iteration. t5 = wf.add_task('Collector') wf.connect_task(t4.nid, t5.nid, 'mol') # Set the array of input SMILES string to task 1 wf.input(t1.nid, smiles=[ 'O1[C@@H](CCC1=O)CCC', 'C[C@]12CC[C@H]3[C@@H](CC=C4CCCC[C@]34CO)[C@@H]1CCC2=O', 'CC12CCC3C(CC=C4C=CCCC34C)C1CCC2=O' ]) wf.run() while wf.is_running: yield sleep(1)
def on_run(self): """ When the microservice has successfully registered with the broker the on_run method is the first method to be called. We are using this method now to run our example workflow. """ # Workflow constants, these will be saved as part of the workflow # specification ligand_format = 'smi' pH = 7.4 protein_file = os.path.abspath('protein.mol2') protein_binding_center = [4.9264, 19.0796, 21.9892] # Build Workflow wf = Workflow(description='MDStudio WAMP workflow') # Task 1: convert the SMILES string to mol2 format (2D). # Add a task using the 'add_task' method always defining: # an administrative title of the task and the task type here a WampTask # because we are calling an microservice endpoint defined by uri. # 'store_output' is True by default and stores the task input/output to disk. t1 = wf.add_task('Format_conversion', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.convert') # Use 'set_input' do define the input to a task. As we are now building # a workflow specification these will be task constants but the same # method will be used later on to define specific input when using the # workflow specification for a ligand. t1.set_input(output_format='mol2') # Task 2: Covert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D mol2 # This particular 3D conversion routine is known to fail sometimes but by # setting retry_count to 3 the workflow manager will retry 3 times before # failing. t2 = wf.add_task('Make_3D', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.make3d', retry_count=3) t2.set_input(output_format='mol2') # Use 'connect_task' to connect t1 to t2 using their unique identifiers # (nid). In addition we can specify the parameters for task 1 we wish to # use as input to task 2 as additional argument or keyword arguments to # the functions. A keyword argument defines a parameter name mapping # between the two tasks. wf.connect_task(t1.nid, t2.nid, 'mol') # Task 3: Adjust ligand protonation state to a given pH if applicable t3 = wf.add_task('Add hydrogens', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.addh') t3.set_input(output_format='mol2', correctForPH=True, pH=pH) wf.connect_task(t2.nid, t3.nid, 'mol') # Task 4: Get the formal charge for the protonated mol2 to use as input # for ACPYPE or ATB # Here store_output equals False which will keep all output in memory and # finally as part of the stored workflow file (*.jgf) t4 = wf.add_task('Get charge', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.info', store_output=False) t4.set_input(input_format='mol2') wf.connect_task(t3.nid, t4.nid, 'mol') # Task 5: Create rotations of the molecule for better sampling t5 = wf.add_task('Create 3D rotations', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.rotate') t5.set_input(rotations=[[1, 0, 0, 90], [1, 0, 0, -90], [0, 1, 0, 90], [0, 1, 0, -90], [0, 0, 1, 90], [0, 0, 1, -90]]) wf.connect_task(t3.nid, t5.nid, 'mol') # Task 6: Run PLANTS on ligand and protein # The 'workdir' argument points to a tmp directory that is shared between # the microservice docker image and the host system to store results. t6 = wf.add_task('Plants docking', task_type='WampTask', uri='mdgroup.mdstudio_smartcyp.endpoint.docking') t6.set_input(cluster_structures=100, bindingsite_center=protein_binding_center, bindingsite_radius=12, protein_file=protein_file, threshold=3.0, base_work_dir='/tmp/mdstudio/mdstudio_smartcyp') # Here we pass only the 'mol' parameter from task 5 to task 6 where it is # named 'ligand_file' wf.connect_task(t5.nid, t6.nid, 'mol', mol='ligand_file') # Task 7: Extract cluster medians from output using a custom function. # A task of type 'PythonTask' allows to add custom python functions # or classes to the workflow. They are defined using the 'custom_func' # parameter according to the Python import syntax. The package or file # containing the function should be available as part of the PYTHONPATH. t7 = wf.add_task('Get cluster medians', task_type='PythonTask', custom_func='workflow_helpers.get_docking_medians') wf.connect_task(t6.nid, t7.nid, 'result') # Task 8: retrieve median structures t8 = wf.add_task( 'Retrieve median structures', task_type='WampTask', uri='mdgroup.mdstudio_smartcyp.endpoint.docking_structures') t8.set_input(create_ensemble=False) wf.connect_task(t7.nid, t8.nid, medians='paths') # Save the workflow specification wf.save('workflow_spec.jgf') # Lets run the workflow specification for a number of ligand SMILES # The current microservice instance (self) is passed as task_runner to the workflow # it will be used to make calls to other microservice endpoints when task_type equals WampTask. wf.task_runner = self currdir = os.getcwd() for i, ligand in enumerate([ 'O1[C@@H](CCC1=O)CCC', 'C[C@]12CC[C@H]3[C@@H](CC=C4CCCC[C@]34CO)[C@@H]1CCC2=O', 'CC12CCC3C(CC=C4C=CCCC34C)C1CCC2=O' ], start=1): wf.load('workflow_spec.jgf') wf.input(t1.nid, mol={ 'content': ligand, 'path': None, 'extension': ligand_format }) wf.run(project_dir='./ligand-{0}'.format(i)) while wf.is_running: yield sleep(1) os.chdir(currdir)