def on_run(self): # Build Workflow wf = Workflow(project_dir='./loop_workflow') wf.task_runner = self # Task 1: Task that will provide an array of values that will be # iterated over. Setting output_format=mol2 directly as input # to this task is to demonstrate that parameters passed to the # LoopTask will be forwarded to the mapped workflows created by # the LoopTask. Equally so for steps=100 illustrating that input # of steps in the workflow can obtained from tasks outside of # the loop t1 = wf.add_task('Array provider') t1.set_input(output_format='mol2', steps=100) # Task 2: Add loop task. The 'mapper_arg' defines the parameter name in # the input that holds an iterable of input values to iterate # over. The 'loop_end_task' is required and defines the task # that 'closes' the loop and collects all results. t2 = wf.add_task('Loop', task_type='LoopTask', mapper_arg='smiles', loop_end_task='Collector') wf.connect_task(t1.nid, t2.nid) # Task 3: Convert SMILES to mol2 # Convert ligand to mol2 format irrespective of input format. t3 = wf.add_task('Ligand conversion', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.convert') wf.connect_task(t2.nid, t3.nid, smiles='mol') # Task 4: Convert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D # mol2 If 'output_format' is not specified it is deduced from # the input wich is mol2 in this case. There are circumstances # where conversion to 3D fails, retry upto 3 times. t4 = wf.add_task('Make_3D', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.make3d', retry_count=3) wf.connect_task(t3.nid, t4.nid, 'mol') wf.connect_task(t1.nid, t4.nid, 'steps') # Task 5: Empty task that server as a collector for all results # obtained during iteration. t5 = wf.add_task('Collector') wf.connect_task(t4.nid, t5.nid, 'mol') # Set the array of input SMILES string to task 1 wf.input(t1.nid, smiles=[ 'O1[C@@H](CCC1=O)CCC', 'C[C@]12CC[C@H]3[C@@H](CC=C4CCCC[C@]34CO)[C@@H]1CCC2=O', 'CC12CCC3C(CC=C4C=CCCC34C)C1CCC2=O' ]) wf.run() while wf.is_running: yield sleep(1)
def on_run(self): """ When the microservice has successfully registered with the broker the on_run method is the first method to be called. We are using this method now to run our example workflow. """ # Workflow constants, these will be saved as part of the workflow # specification ligand_format = 'smi' pH = 7.4 protein_file = os.path.abspath('protein.mol2') protein_binding_center = [4.9264, 19.0796, 21.9892] # Build Workflow wf = Workflow(description='MDStudio WAMP workflow') # Task 1: convert the SMILES string to mol2 format (2D). # Add a task using the 'add_task' method always defining: # an administrative title of the task and the task type here a WampTask # because we are calling an microservice endpoint defined by uri. # 'store_output' is True by default and stores the task input/output to disk. t1 = wf.add_task('Format_conversion', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.convert') # Use 'set_input' do define the input to a task. As we are now building # a workflow specification these will be task constants but the same # method will be used later on to define specific input when using the # workflow specification for a ligand. t1.set_input(output_format='mol2') # Task 2: Covert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D mol2 # This particular 3D conversion routine is known to fail sometimes but by # setting retry_count to 3 the workflow manager will retry 3 times before # failing. t2 = wf.add_task('Make_3D', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.make3d', retry_count=3) t2.set_input(output_format='mol2') # Use 'connect_task' to connect t1 to t2 using their unique identifiers # (nid). In addition we can specify the parameters for task 1 we wish to # use as input to task 2 as additional argument or keyword arguments to # the functions. A keyword argument defines a parameter name mapping # between the two tasks. wf.connect_task(t1.nid, t2.nid, 'mol') # Task 3: Adjust ligand protonation state to a given pH if applicable t3 = wf.add_task('Add hydrogens', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.addh') t3.set_input(output_format='mol2', correctForPH=True, pH=pH) wf.connect_task(t2.nid, t3.nid, 'mol') # Task 4: Get the formal charge for the protonated mol2 to use as input # for ACPYPE or ATB # Here store_output equals False which will keep all output in memory and # finally as part of the stored workflow file (*.jgf) t4 = wf.add_task('Get charge', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.info', store_output=False) t4.set_input(input_format='mol2') wf.connect_task(t3.nid, t4.nid, 'mol') # Task 5: Create rotations of the molecule for better sampling t5 = wf.add_task('Create 3D rotations', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.rotate') t5.set_input(rotations=[[1, 0, 0, 90], [1, 0, 0, -90], [0, 1, 0, 90], [0, 1, 0, -90], [0, 0, 1, 90], [0, 0, 1, -90]]) wf.connect_task(t3.nid, t5.nid, 'mol') # Task 6: Run PLANTS on ligand and protein # The 'workdir' argument points to a tmp directory that is shared between # the microservice docker image and the host system to store results. t6 = wf.add_task('Plants docking', task_type='WampTask', uri='mdgroup.mdstudio_smartcyp.endpoint.docking') t6.set_input(cluster_structures=100, bindingsite_center=protein_binding_center, bindingsite_radius=12, protein_file=protein_file, threshold=3.0, base_work_dir='/tmp/mdstudio/mdstudio_smartcyp') # Here we pass only the 'mol' parameter from task 5 to task 6 where it is # named 'ligand_file' wf.connect_task(t5.nid, t6.nid, 'mol', mol='ligand_file') # Task 7: Extract cluster medians from output using a custom function. # A task of type 'PythonTask' allows to add custom python functions # or classes to the workflow. They are defined using the 'custom_func' # parameter according to the Python import syntax. The package or file # containing the function should be available as part of the PYTHONPATH. t7 = wf.add_task('Get cluster medians', task_type='PythonTask', custom_func='workflow_helpers.get_docking_medians') wf.connect_task(t6.nid, t7.nid, 'result') # Task 8: retrieve median structures t8 = wf.add_task( 'Retrieve median structures', task_type='WampTask', uri='mdgroup.mdstudio_smartcyp.endpoint.docking_structures') t8.set_input(create_ensemble=False) wf.connect_task(t7.nid, t8.nid, medians='paths') # Save the workflow specification wf.save('workflow_spec.jgf') # Lets run the workflow specification for a number of ligand SMILES # The current microservice instance (self) is passed as task_runner to the workflow # it will be used to make calls to other microservice endpoints when task_type equals WampTask. wf.task_runner = self currdir = os.getcwd() for i, ligand in enumerate([ 'O1[C@@H](CCC1=O)CCC', 'C[C@]12CC[C@H]3[C@@H](CC=C4CCCC[C@]34CO)[C@@H]1CCC2=O', 'CC12CCC3C(CC=C4C=CCCC34C)C1CCC2=O' ], start=1): wf.load('workflow_spec.jgf') wf.input(t1.nid, mol={ 'content': ligand, 'path': None, 'extension': ligand_format }) wf.run(project_dir='./ligand-{0}'.format(i)) while wf.is_running: yield sleep(1) os.chdir(currdir)
def on_run(self): # Ligand to make prediction for ligand = 'O1[C@@H](CCC1=O)CCC' ligand_format = 'smi' liemodel = os.path.join(os.getcwd(), '1A2_model') # CYP1A2 pre-calibrated model modelpicklefile = os.path.join(liemodel, 'params.pkl') modelfile = pickle.load(open(modelpicklefile)) unbound_trajectory = os.path.join(os.getcwd(), "unbound_trajectory.ene") bound_trajectory = [os.path.join(os.getcwd(), "bound_trajectory.ene")] decompose_files = [ os.path.join(os.getcwd(), "decompose_dataframe.ene") ] # Build Workflow wf = Workflow(project_dir='./lie_prediction') wf.task_runner = self # STAGE 5. PYLIE FILTERING, AD ANALYSIS AND BINDING-AFFINITY PREDICTION # Collect Gromacs bound and unbound MD energy trajectories in a dataframe t18 = wf.add_task( 'Create mdframe', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.collect_energy_trajectories') t18.set_input(unbound_trajectory=unbound_trajectory, bound_trajectory=bound_trajectory, lie_vdw_header="Ligand-Ligenv-vdw", lie_ele_header="Ligand-Ligenv-ele") # Determine stable regions in MDFrame and filter t19 = wf.add_task( 'Detect stable regions', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.filter_stable_trajectory') t19.set_input(do_plot=True, minlength=45, workdir='/tmp/mdstudio/lie_pylie') wf.connect_task(t18.nid, t19.nid, 'mdframe') # Extract average LIE energy values from the trajectory t20 = wf.add_task( 'LIE averages', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.calculate_lie_average') wf.connect_task(t19.nid, t20.nid, filtered_mdframe='mdframe') # Calculate dG using pre-calibrated model parameters t21 = wf.add_task('Calc dG', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.liedeltag') t21.set_input(alpha_beta_gamma=modelfile['LIE']['params']) wf.connect_task(t20.nid, t21.nid, 'averaged', averaged='dataframe') # Applicability domain: 1. Tanimoto similarity with training set t22 = wf.add_task( 'AD1 tanimoto simmilarity', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.chemical_similarity') t22.set_input(test_set=[ligand], mol_format=ligand_format, reference_set=modelfile['AD']['Tanimoto']['smi'], ci_cutoff=modelfile['AD']['Tanimoto']['Furthest']) wf.connect_task(t18.nid, t22.nid) # Applicability domain: 2. residue decomposition t23 = wf.add_task('AD2 residue decomposition', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.adan_residue_decomp', inline_files=False) t23.set_input(model_pkl=modelpicklefile, decompose_files=decompose_files) wf.connect_task(t18.nid, t23.nid) # Applicability domain: 3. deltaG energy range t24 = wf.add_task('AD3 dene yrange', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.adan_dene_yrange') t24.set_input(ymin=modelfile['AD']['Yrange']['min'], ymax=modelfile['AD']['Yrange']['max']) wf.connect_task(t21.nid, t24.nid, 'liedeltag_file', liedeltag_file='dataframe') # Applicability domain: 4. deltaG energy distribution t25 = wf.add_task('AD4 dene distribution', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.adan_dene') t25.set_input(model_pkl=modelpicklefile, center=list(modelfile['AD']['Dene']['Xmean']), ci_cutoff=modelfile['AD']['Dene']['Maxdist']) wf.connect_task(t21.nid, t25.nid, 'liedeltag_file', liedeltag_file='dataframe') wf.run() while wf.is_running: yield sleep(1)
def on_run(self): # Ligand to make prediction for ligand = 'O1[C@@H](CCC1=O)CCC' ligand_format = 'smi' liemodel = os.path.join(os.getcwd(), '1A2_model') # CYP1A2 Model data with open(os.path.join(liemodel, 'model.dat'), 'r') as mdf: model = json.load(mdf) # CYP1A2 pre-calibrated model modelpicklefile = os.path.join(liemodel, 'params.pkl') modelfile = pickle.load(open(modelpicklefile)) # Build Workflow wf = Workflow(project_dir='./allies_run') wf.task_runner = self # STAGE 1: LIGAND PRE-PROCESSING # Convert ligand to mol2 irrespective of input format. t1 = wf.add_task('Format_conversion', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.convert') t1.set_input(mol={ 'content': ligand, 'path': None, 'extension': ligand_format }) # Covert mol2 to 3D mol2 irrespective if input is 1D/2D or 3D mol2 t2 = wf.add_task('Make_3D', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.make3d') t2.set_input(output_format='mol2') wf.connect_task(t1.nid, t2.nid, 'mol') # Adjust ligand protonation state to a given pH if applicable t3 = wf.add_task('Add hydrogens', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.addh') t3.set_input(output_format='mol2', correctForPH=model['pHCorr'], pH=model['pH']) wf.connect_task(t2.nid, t3.nid, 'mol') # Get the formal charge for the protonated mol2 to use as input for ACPYPE or ATB t4 = wf.add_task('Get charge', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.info') t4.set_input(input_format='mol2') wf.connect_task(t3.nid, t4.nid, 'mol') # # STAGE 2. CREATE TOPOLOGY FOR LIGAND # Run acpype on ligands t5 = wf.add_task('ACPYPE', task_type='WampTask', uri='mdgroup.mdstudio_amber.endpoint.acpype', retry_count=3) wf.connect_task(t3.nid, t5.nid, mol='structure') wf.connect_task(t4.nid, t5.nid, charge='net_charge') # STAGE 3. PLANTS DOCKING # Create rotations of the molecule for better sampling t6 = wf.add_task('Create 3D rotations', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.rotate') t6.set_input(rotations=[[1, 0, 0, 90], [1, 0, 0, -90], [0, 1, 0, 90], [0, 1, 0, -90], [0, 0, 1, 90], [0, 0, 1, -90]]) wf.connect_task(t3.nid, t6.nid, 'mol') # Run PLANTS on ligand and protein t7 = wf.add_task('Plants docking', task_type='WampTask', uri='mdgroup.mdstudio_smartcyp.endpoint.docking') t7.set_input(cluster_structures=100, bindingsite_center=model['proteinParams'][0]['pocket'], bindingsite_radius=model['proteinParams'][0]['radius'], protein_file=os.path.join( liemodel, model['proteinParams'][0]['proteinDock']), threshold=3.0, base_work_dir='/tmp/mdstudio/mdstudio_smartcyp') wf.connect_task(t6.nid, t7.nid, mol='ligand_file') # Get cluster median structures from docking t8 = wf.add_task( 'Get cluster medians', custom_func='allies_workflow_helpers.get_docking_medians') wf.connect_task(t7.nid, t8.nid, 'output') # STAGE 4. GROMACS MD # Ligand in solution t14 = wf.add_task( 'MD ligand in water', task_type='WampTask', uri='mdgroup.mdstudio_gromacs.endpoint.gromacs_ligand', store_output=True) t14.set_input( sim_time=0.001, #sim_time=model['timeSim'], include=[ os.path.join(liemodel, model['proteinTopPos']), os.path.join(liemodel, 'attype.itp') ], residues=model['resSite'], protein_file=None, protein_top=os.path.join(liemodel, model['proteinTop']), cerise_file=os.path.join(os.getcwd(), 'cerise_config_gt.json')) wf.connect_task(t5.nid, t14.nid, new_pdb='ligand_file', gmx_itp='topology_file') # convert PLANTS mol2 to pdb t15 = wf.add_task('Ligand mol2 to PDB', task_type='WampTask', uri='mdgroup.mdstudio_structures.endpoint.convert') t15.set_input(output_format='pdb') wf.connect_task(t8.nid, t15.nid, medians='mol') # Run MD for protein + ligand t16 = wf.add_task( 'MD protein-ligand', task_type='WampTask', uri='mdgroup.mdstudio_gromacs.endpoint.gromacs_protein') t16.set_input(sim_time=0.001, include=[ os.path.join(liemodel, model['proteinTopPos']), os.path.join(liemodel, 'attype.itp') ], residues=model['resSite'], charge=model['charge'], cerise_file=os.path.join(os.getcwd(), 'cerise_config_gt.json'), protein_file=os.path.join( liemodel, model['proteinParams'][0]['proteinCoor']), protein_top=os.path.join(liemodel, model['proteinTop'])) wf.connect_task(t15.nid, t16.nid, mol='ligand_file') wf.connect_task(t5.nid, t16.nid, gmx_itp='topology_file') # Collect results t17 = wf.add_task( 'Collect MD results', custom_func='allies_workflow_helpers.collect_md_enefiles') t17.set_input(model_dir=liemodel) wf.connect_task(t14.nid, t17.nid, output='unbound') wf.connect_task(t16.nid, t17.nid, output='bound') # STAGE 5. PYLIE FILTERING, AD ANALYSIS AND BINDING-AFFINITY PREDICTION # Collect Gromacs bound and unbound MD energy trajectories in a dataframe t18 = wf.add_task( 'Create mdframe', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.collect_energy_trajectories') t18.set_input(lie_vdw_header="Ligand-Ligenv-vdw", lie_ele_header="Ligand-Ligenv-ele") wf.connect_task(t17.nid, t18.nid, 'bound_trajectory', 'unbound_trajectory') # Determine stable regions in MDFrame and filter t19 = wf.add_task( 'Detect stable regions', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.filter_stable_trajectory') t19.set_input(do_plot=True, FilterSplines={'minlength': 45}) wf.connect_task(t18.nid, t19.nid, 'mdframe') # Extract average LIE energy values from the trajectory t20 = wf.add_task( 'LIE averages', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.calculate_lie_average') wf.connect_task(t19.nid, t20.nid, filtered_mdframe='mdframe') # Calculate dG using pre-calibrated model parameters t21 = wf.add_task('Calc dG', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.liedeltag') t21.set_input(alpha=modelfile['LIE']['params'][0], beta=modelfile['LIE']['params'][1], gamma=modelfile['LIE']['params'][2]) wf.connect_task(t20.nid, t21.nid, averaged='dataframe') # Applicability domain: 1. Tanimoto similarity with training set t22 = wf.add_task( 'AD1 tanimoto simmilarity', task_type='WampTask', uri='mdgroup.lie_structures.endpoint.chemical_similarity') t22.set_input(test_set=[ligand], mol_format=ligand_format, reference_set=modelfile['AD']['Tanimoto']['smi'], ci_cutoff=modelfile['AD']['Tanimoto']['Furthest']) wf.connect_task(t18.nid, t22.nid) # Applicability domain: 2. residue decomposition t23 = wf.add_task('AD2 residue decomposition', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.adan_residue_decomp') t23.set_input(model_pkl=modelpicklefile) wf.connect_task(t17.nid, t23.nid, 'decomp_files') # Applicability domain: 3. deltaG energy range t24 = wf.add_task('AD3 dene yrange', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.adan_dene_yrange') t24.set_input(ymin=modelfile['AD']['Yrange']['min'], ymax=modelfile['AD']['Yrange']['max']) wf.connect_task(t21.nid, t24.nid, liedeltag_file='dataframe') # Applicability domain: 4. deltaG energy distribution t25 = wf.add_task('AD4 dene distribution', task_type='WampTask', uri='mdgroup.lie_pylie.endpoint.adan_dene') t25.set_input(model_pkl=modelpicklefile, center=list(modelfile['AD']['Dene']['Xmean']), ci_cutoff=modelfile['AD']['Dene']['Maxdist']) wf.connect_task(t21.nid, t25.nid, liedeltag_file='dataframe') # Save the workflow specification wf.save('workflow_spec.jgf') wf.run() while wf.is_running: yield sleep(1)