class IndexInputCube(SourceCube): """ An input cube that reads an index log and return the baitsets """ classification = [["Input"]] success = ObjectOutputPort('success') limit = parameter.IntegerParameter( 'limit', required=False, description='Read up to N items from this cube') data_in = parameter.DataSetInputParameter( 'data_in', required=True, title='Index log', description='The index log to read from') def begin(self): self.in_orion = config_from_env() is not None if self.in_orion: #self.stream = stream_file(988) self.stream = stream_file(self.args.data_in) else: self.stream = open(str(self.args.data_in), 'rb') def __iter__(self): max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 for chunk in self.stream: index_log = chunk.decode('utf-8') lines = index_log.split("set ") lines = lines[1:] for baitset in lines: baitset = baitset.split(" ") set_id = baitset[0][-2:-1] set_id = int(set_id) baitset = baitset[1:-1] for i, idx in enumerate(baitset): if idx.isdigit(): baitset[i] = int(idx) count += 1 if max_idx is not None and count == max_idx: break yield (set_id, baitset)
class OEMolTriggeredIStreamCube(ComputeCube): """ A source cube that uses oechem to read molecules """ classification = [["Input"]] success = MoleculeOutputPort('success') title = "Dataset Reader" limit = parameter.IntegerParameter( 'limit', required=False, description='Read up to N items from this cube') fp_input = ObjectInputPort('fp_input') data_in = parameter.DataSetInputParameter( 'data_in', required=True, title='Dataset to read from', description='The dataset to read from') download_format = parameter.StringParameter( 'download_format', choices=('.oeb.gz', '.oeb', '.smi', '.pdb', '.mol2'), required=False, description= 'The stream format to be used for retrieving molecules from Orion', default=".oeb.gz") received_act = False def process(self, data, port): #print(data,port) if port is 'fp_input': print('Curry wurst') self.received_act = True max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): self.success.emit(mol) count += 1 if max_idx is not None and count == max_idx: break
class Test(SourceCube): success = ObjectOutputPort('success') data_in = parameter.DataSetInputParameter( 'data_in', required=True, title='Index log', description='The index log to read from') def begin(self): pass def __iter__(self): molA = oechem.OEGraphMol() molB = oechem.OEGraphMol() oechem.OESmilesToMol(molA, 'c1cccc1 A') oechem.OESmilesToMol(molB, 'c1cccc1 B') act_list = [molA, molB] baitset = (0, [0]) ranking = list() dataset_infos = (0, {"A": 0, "B": 1}) yield (act_list, baitset, ranking, dataset_infos) def end(self): pass
class ForceFieldPrep(ParallelOEMolComputeCube): title = "Force Field Preparation Cube" version = "0.0.0" classification = [["Force Field Preparation", "OEChem", "Force Field preparation"]] tags = ['OEChem', 'OEBio', 'OpenMM'] description = """ Each complex is parametrized by using the selected force fields Input: ------- oechem.OEMCMol - Streamed-in of complexes Output: ------- oechem.OEMCMol - Emits force field parametrized complexes """ # Override defaults for some parameters parameter_overrides = { "prefetch_count": {"default": 1}, # 1 molecule at a time "item_timeout": {"default": 3600}, # Default 1 hour limit (units are seconds) "item_count": {"default": 1} # 1 molecule at a time } protein_forcefield = parameter.DataSetInputParameter( 'protein_forcefield', default='amber99sbildn.xml', help_text='Force field parameters for protein') solvent_forcefield = parameter.DataSetInputParameter( 'solvent_forcefield', default='tip3p.xml', help_text='Force field parameters for solvent') ligand_forcefield = parameter.StringParameter( 'ligand_forcefield', required=True, default='GAFF2', choices=['GAFF', 'GAFF2', 'SMIRNOFF'], help_text='Force field to parametrize the ligand') other_forcefield = parameter.StringParameter( 'other_forcefield', required=True, default='GAFF2', choices=['GAFF', 'GAFF2', 'SMIRNOFF'], help_text='Force field used to parametrize other molecules not recognized by the protein force field') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log def process(self, mol, port): try: # Split the complex in components in order to apply the FF protein, ligand, water, excipients = utils.split(mol) # Unique prefix name used to output parametrization files self.opt['prefix_name'] = mol.GetTitle() # Apply FF to the Protein protein_structure = utils.applyffProtein(protein, self.opt) # Apply FF to water molecules water_structure = utils.applyffWater(water, self.opt) # Apply FF to the excipients if excipients.NumAtoms() > 0: excipient_structure = utils.applyffExcipients(excipients, self.opt) # The excipient order is set equal to the order in related # parmed structure to avoid possible atom index mismatching excipients = oeommutils.openmmTop_to_oemol(excipient_structure.topology, excipient_structure.positions, verbose=False) # Apply FF to the ligand ligand_structure = utils.applyffLigand(ligand, self.opt) # Build the Parmed structure if excipients.NumAtoms() > 0: complex_structure = protein_structure + ligand_structure + \ excipient_structure + water_structure else: complex_structure = protein_structure + ligand_structure + water_structure num_atom_system = protein.NumAtoms() + ligand.NumAtoms() + excipients.NumAtoms() + water.NumAtoms() if not num_atom_system == complex_structure.topology.getNumAtoms(): oechem.OEThrow.Fatal("Parmed and OE topologies mismatch atom number error") # Assemble a new OEMol complex in a specific order # to match the defined Parmed structure complex complx = protein.CreateCopy() oechem.OEAddMols(complx, ligand) oechem.OEAddMols(complx, excipients) oechem.OEAddMols(complx, water) complx.SetTitle(mol.GetTitle()) # Set Parmed structure box_vectors vec_data = pack_utils.PackageOEMol.getData(complx, tag='box_vectors') vec = pack_utils.PackageOEMol.decodePyObj(vec_data) complex_structure.box_vectors = vec # Attach the Parmed structure to the complex packed_complex = pack_utils.PackageOEMol.pack(complx, complex_structure) # Attach the reference positions to the complex ref_positions = complex_structure.positions packedpos = pack_utils.PackageOEMol.encodePyObj(ref_positions) packed_complex.SetData(oechem.OEGetTag('OEMDDataRefPositions'), packedpos) # Set atom serial numbers, Ligand name and HETATM flag # oechem.OEPerceiveResidues(packed_complex, oechem.OEPreserveResInfo_SerialNumber) for at in packed_complex.GetAtoms(): thisRes = oechem.OEAtomGetResidue(at) thisRes.SetSerialNumber(at.GetIdx()) if thisRes.GetName() == 'UNL': thisRes.SetName("LIG") thisRes.SetHetAtom(True) oechem.OEAtomSetResidue(at, thisRes) if packed_complex.GetMaxAtomIdx() != complex_structure.topology.getNumAtoms(): raise ValueError("OEMol complex and Parmed structure mismatch atom numbers") # Check if it is possible to create the OpenMM System system = complex_structure.createSystem(nonbondedMethod=app.CutoffPeriodic, nonbondedCutoff=10.0 * unit.angstroms, constraints=app.HBonds, removeCMMotion=False) self.success.emit(packed_complex) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed mol self.failure.emit(mol) return
class ProteinReader(SourceCube): title = "Protein Reader Cube" version = "0.0.0" classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]] tags = ['OEChem'] description = """ A Protein Reader Cube Input: ------- oechem.OEMCMol or - Streamed-in of the protein system The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file Output: ------- oechem.OEMCMol - Emits the protein system """ success = MoleculeOutputPort("success") data_in = parameter.DataSetInputParameter( "data_in", help_text="Protein to read in", required=True, description="The Protein to read in") limit = parameter.IntegerParameter( "limit", required=False) download_format = parameter.StringParameter( "download_format", choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"], required=False, default=".oeb.gz") protein_prefix = parameter.StringParameter( 'protein_prefix', default='PRT', help_text='The protein prefix name used to identify the protein') def begin(self): self.opt = vars(self.args) def __iter__(self): max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 self.config = config_from_env() in_orion = self.config is not None if not in_orion: with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break else: stream = StreamingDataset(self.args.data_in, input_format=self.args.download_format) for mol in stream: mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break
class YankBindingCube(ParallelOEMolComputeCube): title = "YankBindingCube" description = """ Compute thebinding free energy of a small molecule with YANK. This cube uses the YANK alchemical free energy code to compute the binding free energy of one or more small molecules using harmonic restraints. See http://getyank.org for more information about YANK. """ classification = ["Alchemical free energy calculations"] tags = [tag for lists in classification for tag in lists] # Override defaults for some parameters parameter_overrides = { "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_timeout": { "default": 3600 }, # Default 1 hour limit (units are seconds) "item_count": { "default": 1 } # 1 molecule at a time } #Define Custom Ports to handle oeb.gz files intake = CustomMoleculeInputPort('intake') success = CustomMoleculeOutputPort('success') failure = CustomMoleculeOutputPort('failure') # Receptor specification receptor = parameter.DataSetInputParameter( 'receptor', required=True, help_text='Receptor structure file') # These can override YAML parameters nsteps_per_iteration = parameter.IntegerParameter( 'nsteps_per_iteration', default=500, help_text="Number of steps per iteration") timestep = parameter.DecimalParameter('timestep', default=2.0, help_text="Timestep (fs)") simulation_time = parameter.DecimalParameter( 'simulation_time', default=0.100, help_text="Simulation time (ns/replica)") temperature = parameter.DecimalParameter('temperature', default=300.0, help_text="Temperature (Kelvin)") pressure = parameter.DecimalParameter('pressure', default=1.0, help_text="Pressure (atm)") solvent = parameter.StringParameter( 'solvent', default='gbsa', choices=['gbsa', 'pme', 'rf'], help_text="Solvent choice ['gbsa', 'pme', 'rf']") minimize = parameter.BooleanParameter( 'minimize', default=True, help_text="Minimize initial structures for stability") randomize_ligand = parameter.BooleanParameter( 'randomize_ligand', default=False, help_text="Randomize initial ligand position (implicit only)") verbose = parameter.BooleanParameter( 'verbose', default=False, help_text="Print verbose YANK logging output") def construct_yaml(self, **kwargs): # Make substitutions to YAML here. # TODO: Can we override YAML parameters without having to do string substitutions? options = { 'timestep': self.args.timestep, 'nsteps_per_iteration': self.args.nsteps_per_iteration, 'number_of_iterations': int( np.ceil(self.args.simulation_time * unit.nanoseconds / (self.args.nsteps_per_iteration * self.args.timestep * unit.femtoseconds))), 'temperature': self.args.temperature, 'pressure': self.args.pressure, 'solvent': self.args.solvent, 'minimize': 'yes' if self.args.minimize else 'no', 'verbose': 'yes' if self.args.verbose else 'no', 'randomize_ligand': 'yes' if self.args.randomize_ligand else 'no', } for parameter in kwargs.keys(): options[parameter] = kwargs[parameter] return binding_yaml_template % options def begin(self): # TODO: Is there another idiom to use to check valid input? if self.args.solvent not in ['gbsa', 'pme', 'rf']: raise Exception("solvent must be one of ['gbsa', 'pme', 'rf']") # Compute kT kB = unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA # Boltzmann constant self.kT = kB * (self.args.temperature * unit.kelvin) # Load receptor self.receptor = oechem.OEMol() receptor_filename = download_dataset_to_file(self.args.receptor) with oechem.oemolistream(receptor_filename) as ifs: if not oechem.OEReadMolecule(ifs, self.receptor): raise RuntimeError("Error reading receptor") def process(self, mol, port): kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole) # Retrieve data about which molecule we are processing title = mol.GetTitle() with TemporaryDirectory() as output_directory: try: # Print out which molecule we are processing self.log.info('Processing {} in {}.'.format( title, output_directory)) # Check that molecule is charged. if not molecule_is_charged(mol): raise Exception( 'Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle()) # Write the receptor. pdbfilename = os.path.join(output_directory, 'receptor.pdb') with oechem.oemolostream(pdbfilename) as ofs: res = oechem.OEWriteConstMolecule(ofs, self.receptor) if res != oechem.OEWriteMolReturnCode_Success: raise RuntimeError( "Error writing receptor: {}".format(res)) # Write the specified molecule out to a mol2 file without changing its name. mol2_filename = os.path.join(output_directory, 'input.mol2') ofs = oechem.oemolostream(mol2_filename) oechem.OEWriteMol2File(ofs, mol) # Undo oechem fuckery with naming mol2 substructures `<0>` from YankCubes.utils import unfuck_oechem_mol2_file unfuck_oechem_mol2_file(mol2_filename) # Run YANK on the specified molecule. from yank.yamlbuild import YamlBuilder yaml = self.construct_yaml(output_directory=output_directory) yaml_builder = YamlBuilder(yaml) yaml_builder.build_experiments() self.log.info( 'Ran Yank experiments for molecule {}.'.format(title)) # Analyze the binding free energy # TODO: Use yank.analyze API for this from YankCubes.analysis import analyze store_directory = os.path.join(output_directory, 'experiments') [DeltaG_binding, dDeltaG_binding] = analyze(store_directory) """ # Extract trajectory (DEBUG) from yank.analyze import extract_trajectory trajectory_filename = 'trajectory.pdb' store_filename = os.path.join(store_directory, 'complex.pdb') extract_trajectory(trajectory_filename, store_filename, state_index=0, keep_solvent=False, discard_equilibration=True, image_molecules=True) ifs = oechem.oemolistream(trajectory_filename) ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) # load multi-conformer molecule mol = oechem.OEMol() for mol in ifs.GetOEMols(): print (mol.GetTitle(), "has", mol.NumConfs(), "conformers") ifs.close() os.remove(trajectory_filename) """ # Attach binding free energy estimates to molecule oechem.OESetSDData(mol, 'DeltaG_yank_binding', str(DeltaG_binding * kT_in_kcal_per_mole)) oechem.OESetSDData(mol, 'dDeltaG_yank_binding', str(dDeltaG_binding * kT_in_kcal_per_mole)) self.log.info( 'Analyzed and stored binding free energy for molecule {}.'. format(title)) # Emit molecule to success port. self.success.emit(mol) except Exception as e: self.log.info( 'Exception encountered when processing molecule {}.'. format(title)) # Attach error message to the molecule that failed # TODO: If there is an error in the leap setup log, # we should capture that and attach it to the failed molecule. self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mol)
class LigandReader(SourceCube): title = "LigandReader Cube" version = "0.0.0" classification = [["Ligand Reader Cube", "OEChem", "Reader Cube"]] tags = ['OEChem'] description = """ Ligand Reader Cube Input: ------- oechem.OEMCMol or - Streamed-in of Ligands The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file Output: ------- oechem.OEMCMol - Emits the Ligands """ success = MoleculeOutputPort("success") data_in = parameter.DataSetInputParameter( "data_in", help_text="Ligand to read in", required=True, description="The Ligand to read in") limit = parameter.IntegerParameter( "limit", required=False) download_format = parameter.StringParameter( "download_format", choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"], required=False, default=".oeb.gz") prefix = parameter.StringParameter( 'prefix', default='', help_text='An SD tag used as prefix string') suffix = parameter.StringParameter( 'suffix', default='', help_text='An SD tag used as suffix string') type = parameter.StringParameter( 'type', default='LIG', required=True, help_text='The ligand reside name') IDTag = parameter.BooleanParameter( 'IDTag', default=True, required=False, help_text='If True/Checked ligands are enumerated by sequentially integers.' 'A SD tag containing part of the ligand name and an integer is used ' 'to create a unique IDTag which is attached to the ligand') def begin(self): self.opt = vars(self.args) def __iter__(self): max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 self.config = config_from_env() in_orion = self.config is not None if not in_orion: with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix']) mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix']) for at in mol.GetAtoms(): residue = oechem.OEAtomGetResidue(at) residue.SetName(self.opt['type']) oechem.OEAtomSetResidue(at, residue) if self.opt['IDTag']: mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_' + str(count)) yield mol count += 1 if max_idx is not None and count == max_idx: break else: stream = StreamingDataset(self.args.data_in, input_format=self.args.download_format) for mol in stream: mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix']) mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix']) for at in mol.GetAtoms(): residue = oechem.OEAtomGetResidue(at) residue.SetName(self.opt['type']) oechem.OEAtomSetResidue(at, residue) if self.opt['IDTag']: mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_'+str(count)) yield mol count += 1 if max_idx is not None and count == max_idx: break
class FREDDocking(OEMolComputeCube): title = "FRED Docking" version = "0.0.1" classification = [["Ligand Preparation", "OEDock", "FRED"], ["Ligand Preparation", "OEDock", "ChemGauss4"]] tags = ['OEDock', 'FRED'] description = """ Dock molecules using the FRED docking engine against a prepared receptor file. Return the top scoring pose. Input: ------- receptor - Requires a prepared receptor (oeb.gz) file of the protein to dock molecules against. oechem.OEMCMol - Expects a charged multi-conformer molecule on input port. Output: ------- oechem.OEMol - Emits the top scoring pose of the molecule with attachments: - SDData Tags: { ChemGauss4 : pose score } """ receptor = parameter.DataSetInputParameter('receptor', required=True, help_text='Receptor OEB File') def begin(self): receptor = oechem.OEGraphMol() self.args.receptor = utils.download_dataset_to_file(self.args.receptor) if not oedocking.OEReadReceptorFile(receptor, str(self.args.receptor)): raise Exception("Unable to read receptor from {0}".format( self.args.receptor)) # Initialize Docking dock_method = oedocking.OEDockMethod_Hybrid if not oedocking.OEReceptorHasBoundLigand(receptor): oechem.OEThrow.Warning( "No bound ligand, switching OEDockMethod to ChemGauss4.") dock_method = oedocking.OEDockMethod_Chemgauss4 dock_resolution = oedocking.OESearchResolution_Default self.sdtag = oedocking.OEDockMethodGetName(dock_method) self.dock = oedocking.OEDock(dock_method, dock_resolution) if not self.dock.Initialize(receptor): raise Exception("Unable to initialize Docking with {0}".format( self.args.receptor)) def clean(self, mol): mol.DeleteData('CLASH') mol.DeleteData('CLASHTYPE') mol.GetActive().DeleteData('CLASH') mol.GetActive().DeleteData('CLASHTYPE') def process(self, mcmol, port): try: dockedMol = oechem.OEMol() res = self.dock.DockMultiConformerMolecule(dockedMol, mcmol) if res == oedocking.OEDockingReturnCode_Success: oedocking.OESetSDScore(dockedMol, self.dock, self.sdtag) self.dock.AnnotatePose(dockedMol) score = self.dock.ScoreLigand(dockedMol) self.log.info("{} {} score = {:.4f}".format( self.sdtag, dockedMol.GetTitle(), score)) oechem.OESetSDData(dockedMol, self.sdtag, "{}".format(score)) self.clean(dockedMol) self.success.emit(dockedMol) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) mcmol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mcmol) def end(self): pass
class ProteinReader(SourceCube): title = "Protein Reader Cube" version = "0.0.0" classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]] tags = ['OEChem'] description = """ A Protein Reader Cube Input: ------- oechem.OEMCMol or - Streamed-in of the protein system The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file Output: ------- oechem.OEMCMol - Emits the protein system """ success = MoleculeOutputPort("success") data_in = parameter.DataSetInputParameter( "data_in", help_text="Protein to read in", required=True, description="The Protein to read in") limit = parameter.IntegerParameter( "limit", required=False) download_format = parameter.StringParameter( "download_format", choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"], required=False, default=".oeb.gz") protein_prefix = parameter.StringParameter( 'protein_prefix', default='PRT', help_text='The protein prefix name used to identify the protein') def begin(self): self.opt = vars(self.args) def __iter__(self): max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 self.config = config_from_env() in_orion = self.config is not None if not in_orion: with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break else: stream = StreamingDataset(self.args.data_in, input_format=self.args.download_format) for mol in stream: mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break # class SimOutputCube(OEMolOStreamCube): # """ # A sink cube that writes molecules to a file # """ # classification = [["Output"]] # title = "Output Writer" # # intake = BinaryMoleculeInputPort('intake') # data_out = DataSetOutputParameter('data_out', # required=True, # title='Name of Dataset to create', # description='The dataset to output') # backend = DataSetOutputParameter( # 'backend', # default="auto", # choices=["db", "s3", "auto"], # description="The Orion storage backend to use") # # def begin(self): # self.in_orion = config_from_env() is not None # self.decoder = MoleculeSerializerMixin() # self.need_decode = not self.args.data_out.endswith(".oeb.gz") # if self.in_orion: # self.ofs = MultipartDatasetUploader(self.args.data_out, # tags=[self.name], # backend=self.args.backend) # elif self.need_decode: # self.ofs = oechem.oemolostream(str(self.args.data_out)) # else: # self.ofs = open(str(self.args.data_out), 'wb') # # def write(self, mol, port): # if self.in_orion or not self.need_decode: # self.ofs.write(mol) # else: # oechem.OEWriteMolecule(self.ofs, self.decoder.decode(mol)) # # def end(self): # if self.in_orion: # self.ofs.complete() # else: # self.ofs.close()