class PlatformTestCube(OEMolComputeCube): """ Runs a copy of OpenMM's simtk.installation script Checks available OpenMM platforms """ title = "OpenMM Platform Check" description = """ *OpenMM Platform Check* Checks to see which OpenMM Platforms are available amongst CPU, OpenCL, and CUDA """ classification = [ ["OpenMM", "PlatformCheck"], ] tags = [tag for lists in classification for tag in lists] # the string in the first argument has to be the same as the name of the object # note that right now, the pdb file has to reside in the same directory as this file pdbFileName = parameter.StringParameter( "pdbFileName", title="pdb File Name", description="name of pdb file to use in OpenMM Platform Check", default='test.pdb', ) def process(self, mol, port): output = testInstallation.run_tests(self.args.pdbFileName) self.log.info(output) with open('openmmPlatformCheck.txt', 'w') as ofs: ofs.write(output) self.emit(mol)
class AccumulateRankings(ComputeCube): """ A compute Cube that receives rankings and assemble them in a list """ classification = [["Compute", "Accumulator"]] url = parameter.StringParameter( 'url', default="http://10.0.1.22:4242", help_text="Url of the Restful FastROCS Server for the request") intake = ObjectInputPort('intake') success = ObjectOutputPort('success') def begin(self): self.ranking_list = list() self.dataset_id = None def process(self, data, port): self.ranking_list.append(data[2]) self.nb_ka = len(data[0]) - len(data[1][1]) self.method = data[3] if len(data) == 5: self.dataset_id = data[4] def end(self): url = self.args.url + '/datasets/' if self.dataset_id != None: pass #response = requests.delete(url + str(self.dataset_id) + '/') self.success.emit((self.ranking_list, self.nb_ka, self.method))
class TextRankingOutputCube(SinkCube): """ A cube that outputs text """ method = parameter.StringParameter('method', default='Fingerprint', help_text='Method used for the ranking') fptype = parameter.IntegerParameter('fptype', default=105, help_text="Fingerprint type to use for the ranking") intake = ObjectInputPort('intake') name = FileOutputParameter('name', required=True, description='The name of the output file') title = "Ranking Writer" classification = [["Output"]] def begin(self): if self.args.method == 'Fingerprint': fptypes = {102 : 'path', 104 : 'circular', 105 : 'tree'} self.FPType = fptypes[self.args.fptype] self.name_ext = 'FP_' + self.FPType elif self.args.method == 'FastROCS': self.name_ext = 'FR' self.in_orion = config_from_env() is not None if self.in_orion: self.stream = tempfile.NamedTemporaryFile() else: path = self.args.name + "ranking_" + self.name_ext + ".txt" self.stream = open(path, 'wb') def write(self, data, port): self.ranking_list = data[0] for i, ranking in enumerate(self.ranking_list): text = "\n" + "Set n°" + str(ranking[0][3]) + "\n" text = text.encode("utf-8") self.stream.write(text) for mol in ranking: mol_data = str(mol[3]) + " " + mol[1] + " " + str(mol[2]) + " " + str(mol[4]) + "\n" mol_data = mol_data.encode("utf-8") self.stream.write(mol_data) def end(self): if self.in_orion: self.stream.flush() name = self.args.name + "ranking_" + self.name_ext + ".txt" resp = upload_file(name, self.stream.name) self.log.info("Created result file {} with ID {}".format(self.args.name, resp['id'])) else: self.stream.close()
class BenchmarkCube(SourceCube): title = "OpenMM BenchmarkCube" description = """ Cube that performs a benchmark of OpenMM on all of the different platforms that are available and outputs the byte string resulting from the benchmarks line by line to its success port """ tags = [["OpenMM", "Benchmarking"]] success = BinaryOutputPort("success") cutoff = parameter.DecimalParameter("cutoff", default=0.9) seconds = parameter.IntegerParameter("seconds", default=60) polarization = parameter.StringParameter( "polarization", default="mutual", choices=["direct", "extrapolated", "mutual"]) amoeba_target_epsilon = parameter.DecimalParameter( "amoeba_target_epsilon", default=1e-5, title="Amoeba Mutual Induced Target Epsilon") use_heavy_hydrogens = parameter.BooleanParameter( "use_heavy_hydrogens", default=False, title="Use Heavy Hydrogens") precision = parameter.StringParameter( "precision", default="single", choices=["single", "mixed", "double"]) def __iter__(self): stream = StringIO() stream.write("Benchmarking Results:\n") run_platform_benchmarks(self.args, stream=stream) stream.flush() stream.seek(0) output = stream.readline() while len(output): self.log.info(output) yield output.encode("utf-8") output = stream.readline()
class OEMolTriggeredIStreamCube(ComputeCube): """ A source cube that uses oechem to read molecules """ classification = [["Input"]] success = MoleculeOutputPort('success') title = "Dataset Reader" limit = parameter.IntegerParameter( 'limit', required=False, description='Read up to N items from this cube') fp_input = ObjectInputPort('fp_input') data_in = parameter.DataSetInputParameter( 'data_in', required=True, title='Dataset to read from', description='The dataset to read from') download_format = parameter.StringParameter( 'download_format', choices=('.oeb.gz', '.oeb', '.smi', '.pdb', '.mol2'), required=False, description= 'The stream format to be used for retrieving molecules from Orion', default=".oeb.gz") received_act = False def process(self, data, port): #print(data,port) if port is 'fp_input': print('Curry wurst') self.received_act = True max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): self.success.emit(mol) count += 1 if max_idx is not None and count == max_idx: break
class SolvationCube(ParallelOEMolComputeCube): title = "Solvation Cube Packmol" version = "0.0.0" classification = [["Preparation", "OEChem"]] tags = ['OEChem', 'PackMol'] description = """ This cube solvate the molecular system Input: ------- oechem.OEMCMol - Streamed-in of the molecular system Output: ------- oechem.OEMCMol - Emits the solvated system """ # Override defaults for some parameters parameter_overrides = { "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_timeout": { "default": 3600 }, # Default 1 hour limit (units are seconds) "item_count": { "default": 1 } # 1 molecule at a time } density = parameter.DecimalParameter('density', default=1.0, help_text="Solution density in g/ml") padding_distance = parameter.DecimalParameter( 'padding_distance', default=10.0, help_text= "The padding distance between the solute and the box edge in A") distance_between_atoms = parameter.DecimalParameter( 'distance_between_atoms', default=2.0, help_text="The minimum distance between atoms in A") solvents = parameter.StringParameter( 'solvents', required=True, default='[H]O[H]', help_text= 'Select solvents. The solvents are specified as comma separated smiles strings' 'e.g. [H]O[H], C(Cl)(Cl)Cl, CS(=O)C') molar_fractions = parameter.StringParameter( 'molar_fractions', default='1.0', help_text= "Molar fractions of each solvent components. The molar fractions are specified" "as comma separated molar fractions strings e.g. 0.5,0.2,0.3") geometry = parameter.StringParameter( 'geometry', default='box', choices=['box', 'sphere'], help_text= "Geometry selection: box or sphere. Sphere cannot be used as periodic system " "along with MD simulation") close_solvent = parameter.BooleanParameter( 'close_solvent', default=False, help_text= "If Checked/True solvent molecules will be placed very close to the solute" ) salt = parameter.StringParameter( 'salt', default='[Na+], [Cl-]', help_text='Salt type. The salt is specified as list of smiles strings. ' 'Each smiles string is the salt component dissociated in the ' 'solution e.g. Na+, Cl-') salt_concentration = parameter.DecimalParameter( 'salt_concentration', default=0.0, help_text="Salt concentration in millimolar") neutralize_solute = parameter.BooleanParameter( 'neutralize_solute', default=True, help_text= 'Neutralize the solute by adding Na+ and Cl- counter-ions based on' 'the solute formal charge') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log def process(self, solute, port): try: opt = dict(self.opt) # Update cube simulation parameters with the eventually molecule SD tags new_args = { dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solute) if dp.GetTag() in ["solvents", "molar_fractions", "density"] } if new_args: for k in new_args: if k == 'molar_fractions': continue try: new_args[k] = float(new_args[k]) except: pass self.log.info( "Updating parameters for molecule: {}\n{}".format( solute.GetTitle(), new_args)) opt.update(new_args) # Solvate the system sol_system = oesolvate(solute, **opt) self.log.info("Solvated System atom number {}".format( sol_system.NumAtoms())) sol_system.SetTitle(solute.GetTitle()) self.success.emit(sol_system) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) solute.SetData('error', str(e)) # Return failed mol self.failure.emit(solute) return
class ParallelInsertKARestfulROCS(ParallelComputeCube): """ """ classification = [["ParallelCompute"]] url = parameter.StringParameter( 'url', default="http://10.0.1.22:4242", help_text="Url of the Restful FastROCS Server for the request") topn = parameter.IntegerParameter( 'topn', default=100, help_text= "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking" ) data_input = ObjectInputPort('data_input') success = ObjectOutputPort('success') def process(self, data, port): self.act_list = data[0] self.baitset = data[1] self.ranking = data[2] self.dataset_infos = data[3] self.log.info("processing KA for baitset : " + str(self.baitset[0])) self.dataset_identifier = self.dataset_infos[0] self.add_queries() self.get_results() for tanimoto, mol in self.cur_scores.values(): self.update_ranking(mol, tanimoto, True) self.success.emit((self.act_list, self.baitset, self.ranking, self.dataset_infos[0], 'FastROCS')) def add_queries(self): url = self.args.url + "/queries/" self.query_id_list = list() for idx in self.baitset[1]: self.query = tempfile.NamedTemporaryFile(suffix='.oeb', mode='wb', delete=False) with oechem.oemolostream(self.query.name) as ofs: oechem.OEWriteMolecule(ofs, self.act_list[idx]) self.query.flush() parameters = {} parameters["num_hits"] = self.args.topn parameters["dataset_identifier"] = self.dataset_identifier with open(self.query.name, "rb") as query_file: response = requests.post(url, files={"query": query_file}, data=parameters) os.remove(self.query.name) data = response.json() self.query_id_list.append(data["id"]) def get_results(self): self.cur_scores = {} for query_id in self.query_id_list: url = self.args.url + "/queries/{}/".format(query_id) response = None tries = 0 while response == None or data["status"]["job"] != "COMPLETED": tries += 1 time.sleep(tries) response = requests.get(url) data = response.json() results_url = data["results"] results_data = requests.get(self.args.url + results_url) with tempfile.NamedTemporaryFile(suffix='.oeb', mode='wb', delete=False) as temp: temp.write(results_data.content) temp.flush() with oechem.oemolistream(temp.name) as results: for mol in results.GetOEGraphMols(): if self.dataset_infos[1][ mol.GetTitle()] not in self.baitset[1]: tanimoto_combo = float( oechem.OEGetSDData(mol, "TanimotoCombo")) if mol.GetTitle() in self.cur_scores.keys(): if self.cur_scores[ mol.GetTitle()][0] < tanimoto_combo: self.cur_scores[mol.GetTitle()] = ( tanimoto_combo, mol.CreateCopy()) else: self.cur_scores[mol.GetTitle()] = ( tanimoto_combo, mol.CreateCopy()) os.remove(temp.name) def update_ranking(self, mol, max_tanimoto, ka_tag): index = 0 if len(self.ranking) >= self.args.topn and max_tanimoto < self.ranking[ len(self.ranking) - 1][2]: pass else: for top_mol in self.ranking: if max_tanimoto < top_mol[2]: index = self.ranking.index(top_mol) + 1 else: break upper = self.ranking[:index] lower = self.ranking[index:] self.ranking = upper + [(oechem.OEMolToSmiles(mol), mol.GetTitle(), max_tanimoto, self.baitset[0], ka_tag) ] + lower i = self.args.topn - 1 while i < len(self.ranking) - 1: if self.ranking[i][2] != self.ranking[i + 1][2]: self.ranking = self.ranking[:i + 1] break else: i += 1 def end(self): pass
class ParallelFastROCSRanking(ComputeCube): """ A compute Cube that receives a Molecule a baitset of indices and a FastROCSServer address and returns the ranking of the Server Molecules against the query """ classification = [["Compute", "FastROCS", "Similarity"]] url = parameter.StringParameter( 'url', default="http://10.0.61.25:4711", help_text="Url of the FastROCS Server for the request") dataset_name = parameter.StringParameter( 'dataset_name', default="screening_database", help_text="Name of the screening database") topn = parameter.IntegerParameter( 'topn', default=100, help_text= "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking" ) data_input = ObjectInputPort('data_input') success = ObjectOutputPort('success') def begin(self): pass def process(self, data, port): self.act_list = data[0] self.baitset = data[1] self.ranking = data[2] self.dataset_infos = data[3] self.log.info("start ranking baitset number {}".format( self.baitset[0])) url = self.args.url + "/datasets/?name={}".format( self.args.dataset_name) response = requests.get(url) data = response.json() self.dataset_identifier = int(data["id"]) count = 0 self.add_queries() for query_id in self.query_id_list: cur_rank = self.get_result(query_id) if len(self.ranking) == 0: self.ranking = cur_rank else: self.merge_ranking(cur_rank) count += 1 self.log.info("Baitset " + str(self.baitset[0]) + " : " + str(count) + " requests processed") sys.stdout.flush() self.log.info("Emitting ranking baitset " + str(self.baitset[0])) self.success.emit((self.act_list, self.baitset, self.ranking, self.dataset_infos, 'FastROCS')) def add_queries(self): url = self.args.url + "/queries/" self.query_id_list = list() for idx in self.baitset[1]: self.query = tempfile.NamedTemporaryFile(suffix='.oeb', mode='wb', delete=False) with oechem.oemolostream(self.query.name) as ofs: oechem.OEWriteMolecule(ofs, self.act_list[idx]) self.query.flush() parameters = {} parameters["num_hits"] = self.args.topn parameters["dataset_identifier"] = self.dataset_identifier with open(self.query.name, "rb") as query_file: response = requests.post(url, files={"query": query_file}, data=parameters) os.remove(self.query.name) data = response.json() self.query_id_list.append(data["id"]) def get_result(self, query_id): cur_rank = list() url = self.args.url + "/queries/{}/".format(query_id) response = None tries = 0 while response == None or data["status"]["job"] != "COMPLETED": time.sleep(60 * tries) tries += 1 response = requests.get(url) data = response.json() results_url = data["results"] results_data = requests.get(self.args.url + results_url) with tempfile.NamedTemporaryFile(suffix='.oeb', mode='wb', delete=False) as temp: temp.write(results_data.content) temp.flush() with oechem.oemolistream(temp.name) as results: for mol in results.GetOEGraphMols(): cur_rank.append( (oechem.OEMolToSmiles(mol), mol.GetTitle(), float(oechem.OEGetSDData(mol, 'TanimotoCombo')), self.baitset[0], False)) os.remove(temp.name) return cur_rank def merge_ranking(self, ranking): merged_list = list() i = 0 j = 0 count = 0 id_set = set() while i < len(self.ranking): while j < len(ranking) and ranking[j][2] > self.ranking[i][2]: if ranking[j][1] not in id_set: if count < self.args.topn or ranking[j][2] == merged_list[ count - 1][2]: merged_list.append(ranking[j]) count += 1 id_set.add(ranking[j][1]) j += 1 else: break else: j += 1 if self.ranking[i][1] not in id_set: if self.ranking[i] not in id_set and ( count < self.args.topn or self.ranking[i][2] == merged_list[count - 1][2]): merged_list.append(self.ranking[i]) count += 1 id_set.add(self.ranking[i][1]) i += 1 else: break else: i += 1 while j < len(ranking): if ranking[j][1] not in id_set: if ranking[j] not in id_set and (count < self.args.topn or ranking[j][2] == merged_list[count - 1][2]): merged_list.append(ranking[j]) count += 1 id_set.add(ranking[j][1]) j += 1 else: break else: j += 1 self.ranking = merged_list
class OpenMMminimizeCube(ParallelOEMolComputeCube): title = 'Minimization Cube' version = "0.0.0" classification = [["Simulation", "OpenMM", "Minimization"]] tags = ['OpenMM', 'Parallel Cube'] description = """ Minimize the protein:ligand complex. This cube will take in the streamed complex.oeb.gz file containing the solvated protein:ligand complex and minimize it. Input parameters: steps (integer): the number of steps of minimization to apply. If 0 the minimization will proceed until convergence is reached """ # Override defaults for some parameters parameter_overrides = { "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_timeout": { "default": 43200 }, # Default 12 hour limit (units are seconds) "item_count": { "default": 1 } # 1 molecule at a time } steps = parameter.IntegerParameter( 'steps', default=0, help_text="""Number of minimization steps. If 0 the minimization will continue until convergence""") restraints = parameter.StringParameter( 'restraints', default='', help_text="""Mask selection to apply restraints. Possible keywords are: ligand, protein, water, ions, ca_protein, cofactors. The selection can be refined by using logical tokens: not, noh, and, or, diff, around""") restraintWt = parameter.DecimalParameter( 'restraintWt', default=5.0, help_text="Restraint weight for xyz atom restraints in kcal/(mol A^2)") freeze = parameter.StringParameter( 'freeze', default='', help_text="""Mask selection to freeze atoms along the MD simulation. Possible keywords are: ligand, protein, water, ions, ca_protein, cofactors. The selection can be refined by using logical tokens: not, noh, and, or, diff, around""") temperature = parameter.DecimalParameter('temperature', default=300, help_text="Temperature (Kelvin)") nonbondedMethod = parameter.StringParameter( 'nonbondedMethod', default='PME', choices=[ 'NoCutoff', 'CutoffNonPeriodic', 'CutoffPeriodic', 'PME', 'Ewald' ], help_text="NoCutoff, CutoffNonPeriodic, CutoffPeriodic, PME, or Ewald") nonbondedCutoff = parameter.DecimalParameter( 'nonbondedCutoff', default=10, help_text="""The non-bonded cutoff in angstroms. This is ignored if the non-bonded method is NoCutoff.""") constraints = parameter.StringParameter( 'constraints', default='HBonds', choices=['None', 'HBonds', 'HAngles', 'AllBonds'], help_text="""None, HBonds, HAngles, or AllBonds Which type of constraints to add to the system (e.g., SHAKE). None means no bonds are constrained. HBonds means bonds with hydrogen are constrained""") outfname = parameter.StringParameter( 'outfname', default='min', help_text='Filename suffix for output simulation files') center = parameter.BooleanParameter( 'center', default=False, description='Center the system to the OpenMM unit cell') verbose = parameter.BooleanParameter( 'verbose', default=True, description='Increase log file verbosity') platform = parameter.StringParameter( 'platform', default='Auto', choices=['Auto', 'Reference', 'CPU', 'CUDA', 'OpenCL'], help_text='Select which platform to use to run the simulation') cuda_opencl_precision = parameter.StringParameter( 'cuda_opencl_precision', default='single', choices=['single', 'mixed', 'double'], help_text='Select the CUDA or OpenCL precision') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log self.opt['SimType'] = 'min' return def process(self, mol, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) # Update cube simulation parameters with the eventually molecule SD tags new_args = { dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(mol) if dp.GetTag() in ["temperature"] } if new_args: for k in new_args: try: new_args[k] = float(new_args[k]) except: pass self.log.info( "Updating parameters for molecule: {}\n{}".format( mol.GetTitle(), new_args)) opt.update(new_args) if utils.PackageOEMol.checkTags(mol, ['Structure']): gd = utils.PackageOEMol.unpack(mol) opt['outfname'] = '{}-{}'.format(gd['IDTag'], self.opt['outfname']) mdData = utils.MDData(mol) opt['molecule'] = mol self.log.info('MINIMIZING System: %s' % gd['IDTag']) simtools.simulation(mdData, **opt) packedmol = mdData.packMDData(mol) self.success.emit(packedmol) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed mol self.failure.emit(mol) return
class YankHydrationCube(ParallelOEMolComputeCube): title = "YankHydrationCube" description = """ Compute the hydration free energy of a small molecule with YANK. This cube uses the YANK alchemical free energy code to compute the transfer free energy of one or more small molecules from gas phase to TIP3P solvent. See http://getyank.org for more information about YANK. """ classification = ["Alchemical free energy calculations"] tags = [tag for lists in classification for tag in lists] # Override defaults for some parameters parameter_overrides = { "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_timeout": { "default": 3600 }, # Default 1 hour limit (units are seconds) "item_count": { "default": 1 } # 1 molecule at a time } #Define Custom Ports to handle oeb.gz files intake = CustomMoleculeInputPort('intake') success = CustomMoleculeOutputPort('success') failure = CustomMoleculeOutputPort('failure') # These can override YAML parameters nsteps_per_iteration = parameter.IntegerParameter( 'nsteps_per_iteration', default=500, help_text="Number of steps per iteration") timestep = parameter.DecimalParameter('timestep', default=2.0, help_text="Timestep (fs)") simulation_time = parameter.DecimalParameter( 'simulation_time', default=0.100, help_text="Simulation time (ns/replica)") temperature = parameter.DecimalParameter('temperature', default=300.0, help_text="Temperature (Kelvin)") pressure = parameter.DecimalParameter('pressure', default=1.0, help_text="Pressure (atm)") solvent = parameter.StringParameter( 'solvent', default='gbsa', choices=['gbsa', 'tip3p'], help_text="Solvent choice: one of ['gbsa', 'tip3p']") verbose = parameter.BooleanParameter( 'verbose', default=False, help_text="Print verbose YANK logging output") def construct_yaml(self, **kwargs): # Make substitutions to YAML here. # TODO: Can we override YAML parameters without having to do string substitutions? options = { 'timestep': self.args.timestep, 'nsteps_per_iteration': self.args.nsteps_per_iteration, 'number_of_iterations': int( np.ceil(self.args.simulation_time * unit.nanoseconds / (self.args.nsteps_per_iteration * self.args.timestep * unit.femtoseconds))), 'temperature': self.args.temperature, 'pressure': self.args.pressure, 'solvent': self.args.solvent, 'verbose': 'yes' if self.args.verbose else 'no', } for parameter in kwargs.keys(): options[parameter] = kwargs[parameter] return hydration_yaml_template % options def begin(self): # TODO: Is there another idiom to use to check valid input? if self.args.solvent not in ['gbsa', 'tip3p']: raise Exception("solvent must be one of ['gbsa', 'tip3p']") # Compute kT kB = unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA # Boltzmann constant self.kT = kB * (self.args.temperature * unit.kelvin) def process(self, mol, port): kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole) # Retrieve data about which molecule we are processing title = mol.GetTitle() with TemporaryDirectory() as output_directory: try: # Print out which molecule we are processing self.log.info('Processing {} in directory {}.'.format( title, output_directory)) # Check that molecule is charged. if not molecule_is_charged(mol): raise Exception( 'Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle()) # Write the specified molecule out to a mol2 file without changing its name. mol2_filename = os.path.join(output_directory, 'input.mol2') ofs = oechem.oemolostream(mol2_filename) oechem.OEWriteMol2File(ofs, mol) # Undo oechem fuckery with naming mol2 substructures `<0>` from YankCubes.utils import unfuck_oechem_mol2_file unfuck_oechem_mol2_file(mol2_filename) # Run YANK on the specified molecule. from yank.yamlbuild import YamlBuilder yaml = self.construct_yaml(output_directory=output_directory) yaml_builder = YamlBuilder(yaml) yaml_builder.build_experiments() self.log.info( 'Ran Yank experiments for molecule {}.'.format(title)) # Analyze the hydration free energy. from yank.analyze import estimate_free_energies (Deltaf_ij_solvent, dDeltaf_ij_solvent) = estimate_free_energies( netcdf.Dataset( output_directory + '/experiments/solvent1.nc', 'r')) (Deltaf_ij_vacuum, dDeltaf_ij_vacuum) = estimate_free_energies( netcdf.Dataset( output_directory + '/experiments/solvent2.nc', 'r')) DeltaG_hydration = Deltaf_ij_vacuum[0, -1] - Deltaf_ij_solvent[0, -1] dDeltaG_hydration = np.sqrt(Deltaf_ij_vacuum[0, -1]**2 + Deltaf_ij_solvent[0, -1]**2) # Add result to original molecule oechem.OESetSDData(mol, 'DeltaG_yank_hydration', str(DeltaG_hydration * kT_in_kcal_per_mole)) oechem.OESetSDData( mol, 'dDeltaG_yank_hydration', str(dDeltaG_hydration * kT_in_kcal_per_mole)) self.log.info( 'Analyzed and stored hydration free energy for molecule {}.' .format(title)) # Emit molecule to success port. self.success.emit(mol) except Exception as e: self.log.info( 'Exception encountered when processing molecule {}.'. format(title)) # Attach error message to the molecule that failed # TODO: If there is an error in the leap setup log, # we should capture that and attach it to the failed molecule. self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mol)
class YankBindingCube(ParallelOEMolComputeCube): title = "YankBindingCube" description = """ Compute thebinding free energy of a small molecule with YANK. This cube uses the YANK alchemical free energy code to compute the binding free energy of one or more small molecules using harmonic restraints. See http://getyank.org for more information about YANK. """ classification = ["Alchemical free energy calculations"] tags = [tag for lists in classification for tag in lists] # Override defaults for some parameters parameter_overrides = { "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_timeout": { "default": 3600 }, # Default 1 hour limit (units are seconds) "item_count": { "default": 1 } # 1 molecule at a time } #Define Custom Ports to handle oeb.gz files intake = CustomMoleculeInputPort('intake') success = CustomMoleculeOutputPort('success') failure = CustomMoleculeOutputPort('failure') # Receptor specification receptor = parameter.DataSetInputParameter( 'receptor', required=True, help_text='Receptor structure file') # These can override YAML parameters nsteps_per_iteration = parameter.IntegerParameter( 'nsteps_per_iteration', default=500, help_text="Number of steps per iteration") timestep = parameter.DecimalParameter('timestep', default=2.0, help_text="Timestep (fs)") simulation_time = parameter.DecimalParameter( 'simulation_time', default=0.100, help_text="Simulation time (ns/replica)") temperature = parameter.DecimalParameter('temperature', default=300.0, help_text="Temperature (Kelvin)") pressure = parameter.DecimalParameter('pressure', default=1.0, help_text="Pressure (atm)") solvent = parameter.StringParameter( 'solvent', default='gbsa', choices=['gbsa', 'pme', 'rf'], help_text="Solvent choice ['gbsa', 'pme', 'rf']") minimize = parameter.BooleanParameter( 'minimize', default=True, help_text="Minimize initial structures for stability") randomize_ligand = parameter.BooleanParameter( 'randomize_ligand', default=False, help_text="Randomize initial ligand position (implicit only)") verbose = parameter.BooleanParameter( 'verbose', default=False, help_text="Print verbose YANK logging output") def construct_yaml(self, **kwargs): # Make substitutions to YAML here. # TODO: Can we override YAML parameters without having to do string substitutions? options = { 'timestep': self.args.timestep, 'nsteps_per_iteration': self.args.nsteps_per_iteration, 'number_of_iterations': int( np.ceil(self.args.simulation_time * unit.nanoseconds / (self.args.nsteps_per_iteration * self.args.timestep * unit.femtoseconds))), 'temperature': self.args.temperature, 'pressure': self.args.pressure, 'solvent': self.args.solvent, 'minimize': 'yes' if self.args.minimize else 'no', 'verbose': 'yes' if self.args.verbose else 'no', 'randomize_ligand': 'yes' if self.args.randomize_ligand else 'no', } for parameter in kwargs.keys(): options[parameter] = kwargs[parameter] return binding_yaml_template % options def begin(self): # TODO: Is there another idiom to use to check valid input? if self.args.solvent not in ['gbsa', 'pme', 'rf']: raise Exception("solvent must be one of ['gbsa', 'pme', 'rf']") # Compute kT kB = unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA # Boltzmann constant self.kT = kB * (self.args.temperature * unit.kelvin) # Load receptor self.receptor = oechem.OEMol() receptor_filename = download_dataset_to_file(self.args.receptor) with oechem.oemolistream(receptor_filename) as ifs: if not oechem.OEReadMolecule(ifs, self.receptor): raise RuntimeError("Error reading receptor") def process(self, mol, port): kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole) # Retrieve data about which molecule we are processing title = mol.GetTitle() with TemporaryDirectory() as output_directory: try: # Print out which molecule we are processing self.log.info('Processing {} in {}.'.format( title, output_directory)) # Check that molecule is charged. if not molecule_is_charged(mol): raise Exception( 'Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle()) # Write the receptor. pdbfilename = os.path.join(output_directory, 'receptor.pdb') with oechem.oemolostream(pdbfilename) as ofs: res = oechem.OEWriteConstMolecule(ofs, self.receptor) if res != oechem.OEWriteMolReturnCode_Success: raise RuntimeError( "Error writing receptor: {}".format(res)) # Write the specified molecule out to a mol2 file without changing its name. mol2_filename = os.path.join(output_directory, 'input.mol2') ofs = oechem.oemolostream(mol2_filename) oechem.OEWriteMol2File(ofs, mol) # Undo oechem fuckery with naming mol2 substructures `<0>` from YankCubes.utils import unfuck_oechem_mol2_file unfuck_oechem_mol2_file(mol2_filename) # Run YANK on the specified molecule. from yank.yamlbuild import YamlBuilder yaml = self.construct_yaml(output_directory=output_directory) yaml_builder = YamlBuilder(yaml) yaml_builder.build_experiments() self.log.info( 'Ran Yank experiments for molecule {}.'.format(title)) # Analyze the binding free energy # TODO: Use yank.analyze API for this from YankCubes.analysis import analyze store_directory = os.path.join(output_directory, 'experiments') [DeltaG_binding, dDeltaG_binding] = analyze(store_directory) """ # Extract trajectory (DEBUG) from yank.analyze import extract_trajectory trajectory_filename = 'trajectory.pdb' store_filename = os.path.join(store_directory, 'complex.pdb') extract_trajectory(trajectory_filename, store_filename, state_index=0, keep_solvent=False, discard_equilibration=True, image_molecules=True) ifs = oechem.oemolistream(trajectory_filename) ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) # load multi-conformer molecule mol = oechem.OEMol() for mol in ifs.GetOEMols(): print (mol.GetTitle(), "has", mol.NumConfs(), "conformers") ifs.close() os.remove(trajectory_filename) """ # Attach binding free energy estimates to molecule oechem.OESetSDData(mol, 'DeltaG_yank_binding', str(DeltaG_binding * kT_in_kcal_per_mole)) oechem.OESetSDData(mol, 'dDeltaG_yank_binding', str(dDeltaG_binding * kT_in_kcal_per_mole)) self.log.info( 'Analyzed and stored binding free energy for molecule {}.'. format(title)) # Emit molecule to success port. self.success.emit(mol) except Exception as e: self.log.info( 'Exception encountered when processing molecule {}.'. format(title)) # Attach error message to the molecule that failed # TODO: If there is an error in the leap setup log, # we should capture that and attach it to the failed molecule. self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mol)
class LigandReader(SourceCube): title = "LigandReader Cube" version = "0.0.0" classification = [["Ligand Reader Cube", "OEChem", "Reader Cube"]] tags = ['OEChem'] description = """ Ligand Reader Cube Input: ------- oechem.OEMCMol or - Streamed-in of Ligands The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file Output: ------- oechem.OEMCMol - Emits the Ligands """ success = MoleculeOutputPort("success") data_in = parameter.DataSetInputParameter( "data_in", help_text="Ligand to read in", required=True, description="The Ligand to read in") limit = parameter.IntegerParameter( "limit", required=False) download_format = parameter.StringParameter( "download_format", choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"], required=False, default=".oeb.gz") prefix = parameter.StringParameter( 'prefix', default='', help_text='An SD tag used as prefix string') suffix = parameter.StringParameter( 'suffix', default='', help_text='An SD tag used as suffix string') type = parameter.StringParameter( 'type', default='LIG', required=True, help_text='The ligand reside name') IDTag = parameter.BooleanParameter( 'IDTag', default=True, required=False, help_text='If True/Checked ligands are enumerated by sequentially integers.' 'A SD tag containing part of the ligand name and an integer is used ' 'to create a unique IDTag which is attached to the ligand') def begin(self): self.opt = vars(self.args) def __iter__(self): max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 self.config = config_from_env() in_orion = self.config is not None if not in_orion: with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix']) mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix']) for at in mol.GetAtoms(): residue = oechem.OEAtomGetResidue(at) residue.SetName(self.opt['type']) oechem.OEAtomSetResidue(at, residue) if self.opt['IDTag']: mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_' + str(count)) yield mol count += 1 if max_idx is not None and count == max_idx: break else: stream = StreamingDataset(self.args.data_in, input_format=self.args.download_format) for mol in stream: mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix']) mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix']) for at in mol.GetAtoms(): residue = oechem.OEAtomGetResidue(at) residue.SetName(self.opt['type']) oechem.OEAtomSetResidue(at, residue) if self.opt['IDTag']: mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_'+str(count)) yield mol count += 1 if max_idx is not None and count == max_idx: break
class YankBindingFECube(ParallelOEMolComputeCube): version = "0.0.0" title = "YankSolvationFECube" description = """ Compute the hydration free energy of a small molecule with YANK. This cube uses the YANK alchemical free energy code to compute the transfer free energy of one or more small molecules from gas phase to the selected solvent. See http://getyank.org for more information about YANK. """ classification = ["Alchemical free energy calculations"] tags = [tag for lists in classification for tag in lists] # The intake port is re-defined as batch port intake = BatchMoleculeInputPort("intake") # Override defaults for some parameters parameter_overrides = { "prefetch_count": {"default": 1}, # 1 molecule at a time "item_timeout": {"default": 43200}, # Default 12 hour limit (units are seconds) "item_count": {"default": 1} # 1 molecule at a time } temperature = parameter.DecimalParameter( 'temperature', default=300.0, help_text="Temperature (Kelvin)") pressure = parameter.DecimalParameter( 'pressure', default=1.0, help_text="Pressure (atm)") minimize = parameter.BooleanParameter( 'minimize', default=False, help_text="Minimize input system") iterations = parameter.IntegerParameter( 'iterations', default=1000, help_text="Number of iterations") nsteps_per_iteration = parameter.IntegerParameter( 'nsteps_per_iteration', default=500, help_text="Number of steps per iteration") timestep = parameter.DecimalParameter( 'timestep', default=2.0, help_text="Timestep (fs)") nonbondedCutoff = parameter.DecimalParameter( 'nonbondedCutoff', default=10.0, help_text="The non-bonded cutoff in angstroms") restraints = parameter.StringParameter( 'restraints', default='Harmonic', choices=['FlatBottom', 'Harmonic', 'Boresch'], help_text='Select the restraint types') ligand_resname = parameter.StringParameter( 'ligand_resname', default='LIG', help_text='The decoupling ligand residue name') verbose = parameter.BooleanParameter( 'verbose', default=True, help_text="Print verbose YANK logging output") def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log def process(self, solvated_system, port): try: opt = dict(self.opt) # Extract the solvated ligand and the solvated complex solvated_ligand = solvated_system[0] solvated_complex = solvated_system[1] # Update cube simulation parameters with the eventually molecule SD tags new_args = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solvated_ligand) if dp.GetTag() in ["temperature", "pressure"]} if new_args: for k in new_args: try: new_args[k] = float(new_args[k]) except: pass self.log.info("Updating parameters for molecule: {}\n{}".format(solvated_ligand.GetTitle(), new_args)) opt.update(new_args) # Extract the MD data mdData_ligand = data_utils.MDData(solvated_ligand) solvated_ligand_structure = mdData_ligand.structure mdData_complex = data_utils.MDData(solvated_complex) solvated_complex_structure = mdData_complex.structure # Create the solvated OpenMM systems solvated_complex_omm_sys = solvated_complex_structure.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms, constraints=app.HBonds, removeCMMotion=False) solvated_ligand_omm_sys = solvated_ligand_structure.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms, constraints=app.HBonds, removeCMMotion=False) # Write out all the required files and set-run the Yank experiment with TemporaryDirectory() as output_directory: opt['Logger'].info("Output Directory {}".format(output_directory)) solvated_complex_structure_fn = os.path.join(output_directory, "complex.pdb") solvated_complex_structure.save(solvated_complex_structure_fn, overwrite=True) solvated_ligand_structure_fn = os.path.join(output_directory, "solvent.pdb") solvated_ligand_structure.save(solvated_ligand_structure_fn, overwrite=True) solvated_complex_omm_serialized = XmlSerializer.serialize(solvated_complex_omm_sys) solvated_complex_omm_serialized_fn = os.path.join(output_directory, "complex.xml") solvated_complex_f = open(solvated_complex_omm_serialized_fn, 'w') solvated_complex_f.write(solvated_complex_omm_serialized) solvated_complex_f.close() solvated_ligand_omm_serialized = XmlSerializer.serialize(solvated_ligand_omm_sys) solvated_ligand_omm_serialized_fn = os.path.join(output_directory, "solvent.xml") solvated_ligand_f = open(solvated_ligand_omm_serialized_fn, 'w') solvated_ligand_f.write(solvated_ligand_omm_serialized) solvated_ligand_f.close() # Build the Yank Experiment yaml_builder = ExperimentBuilder(yank_binding_template.format( verbose='yes' if opt['verbose'] else 'no', minimize='yes' if opt['minimize'] else 'no', output_directory=output_directory, timestep=opt['timestep'], nsteps_per_iteration=opt['nsteps_per_iteration'], number_iterations=opt['iterations'], temperature=opt['temperature'], pressure=opt['pressure'], complex_pdb_fn=solvated_complex_structure_fn, complex_xml_fn=solvated_complex_omm_serialized_fn, solvent_pdb_fn=solvated_ligand_structure_fn, solvent_xml_fn=solvated_ligand_omm_serialized_fn, restraints=opt['restraints'], ligand_resname=opt['ligand_resname'])) # Run Yank yaml_builder.run_experiments() exp_dir = os.path.join(output_directory, "experiments") DeltaG_binding, dDeltaG_binding, DeltaH, dDeltaH = yankutils.analyze_directory(exp_dir) protein, ligand, water, excipients = oeommutils.split(solvated_ligand, ligand_res_name=opt['ligand_resname']) # Add result to the extracted ligand in kcal/mol oechem.OESetSDData(ligand, 'DG_yank_binding', str(DeltaG_binding)) oechem.OESetSDData(ligand, 'dG_yank_binding', str(dDeltaG_binding)) self.success.emit(ligand) except Exception as e: # Attach an error message to the molecule that failed self.log.error(traceback.format_exc()) solvated_system[1].SetData('error', str(e)) # Return failed mol self.failure.emit(solvated_system[1]) return
class OpenMMnptCube(ParallelOEMolComputeCube): title = 'NPT Cube' version = "0.0.0" classification = [["Simulation", "OpenMM", "NPT"]] tags = ['OpenMM', 'Parallel Cube'] description = """NPT simulation of the protein:ligand complex. This cube will take in the streamed complex.oeb.gz file containing the solvated protein:ligand complex and will perform a MD simulation at constant temperature and pressure. Input parameters: ---------------- picosec (decimal): Number of picoseconds to perform the complex simulation. temperature (decimal): target temperature pressure (decimal): target pressure """ # Override defaults for some parameters parameter_overrides = { "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_timeout": { "default": 43200 }, # Default 12 hour limit (units are seconds) "item_count": { "default": 1 } # 1 molecule at a time } temperature = parameter.DecimalParameter('temperature', default=300.0, help_text="Temperature (Kelvin)") pressure = parameter.DecimalParameter('pressure', default=1.0, help_text="Pressure (atm)") time = parameter.DecimalParameter( 'time', default=10.0, help_text="NPT simulation time in picoseconds") restraints = parameter.StringParameter( 'restraints', default='', help_text=""""Mask selection to apply restraints. Possible keywords are: ligand, protein, water, ions, ca_protein, cofactors. Operational tokens are: and, not, noh""") restraintWt = parameter.DecimalParameter( 'restraintWt', default=2.0, help_text="Restraint weight for xyz atom restraints in kcal/(mol ang^2)" ) nonbondedMethod = parameter.StringParameter( 'nonbondedMethod', default='PME', choices=[ 'NoCutoff', 'CutoffNonPeriodic', 'CutoffPeriodic', 'PME', 'Ewald' ], help_text="NoCutoff, CutoffNonPeriodic, CutoffPeriodic, PME, or Ewald." ) nonbondedCutoff = parameter.DecimalParameter( 'nonbondedCutoff', default=10, help_text="""The non-bonded cutoff in angstroms. This is ignored if non-bonded method is NoCutoff""") constraints = parameter.StringParameter( 'constraints', default='HBonds', choices=['None', 'HBonds', 'HAngles', 'AllBonds'], help_text="""None, HBonds, HAngles, or AllBonds Which type of constraints to add to the system (e.g., SHAKE). None means no bonds are constrained. HBonds means bonds with hydrogen are constrained""") trajectory_filetype = parameter.StringParameter( 'trajectory_filetype', default='DCD', choices=['DCD', 'NetCDF', 'HDF5'], help_text="NetCDF, DCD, HDF5. File type to write trajectory files") trajectory_interval = parameter.IntegerParameter( 'trajectory_interval', default=0, help_text="Step interval for trajectory snapshots. If 0 the trajectory" "file will not be generated") reporter_interval = parameter.IntegerParameter( 'reporter_interval', default=0, help_text="Step interval for reporting data. If 0 the reporter file" "will not be generated") outfname = parameter.StringParameter( 'outfname', default='npt', help_text= 'Filename suffix for output simulation files. Formatted: <title>-<outfname>' ) tarxz = parameter.BooleanParameter( 'tarxz', default=False, description='Create a tar.xz file of the attached data') center = parameter.BooleanParameter( 'center', default=True, description='Center the system to the OpenMM unit cell') verbose = parameter.BooleanParameter( 'verbose', default=True, description='Increase log file verbosity.') platform = parameter.StringParameter( 'platform', default='Auto', choices=['Auto', 'Reference', 'CPU', 'CUDA', 'OpenCL'], help_text='Select which platform to use to run the simulation') cuda_opencl_precision = parameter.StringParameter( 'cuda_opencl_precision', default='single', choices=['single', 'mixed', 'double'], help_text='Select the CUDA or OpenCL precision') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log self.opt['SimType'] = 'npt' return def process(self, mol, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) if utils.PackageOEMol.checkTags(mol, ['Structure']): gd = utils.PackageOEMol.unpack(mol) opt['outfname'] = '{}-{}'.format(gd['IDTag'], self.opt['outfname']) mdData = utils.MDData(mol) opt['molecule'] = mol self.log.info('START NPT SIMULATION %s' % gd['IDTag']) simtools.simulation(mdData, **opt) packedmol = mdData.packMDData(mol) self.success.emit(packedmol) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed mol self.failure.emit(mol) return
class ProteinReader(SourceCube): title = "Protein Reader Cube" version = "0.0.0" classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]] tags = ['OEChem'] description = """ A Protein Reader Cube Input: ------- oechem.OEMCMol or - Streamed-in of the protein system The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file Output: ------- oechem.OEMCMol - Emits the protein system """ success = MoleculeOutputPort("success") data_in = parameter.DataSetInputParameter( "data_in", help_text="Protein to read in", required=True, description="The Protein to read in") limit = parameter.IntegerParameter( "limit", required=False) download_format = parameter.StringParameter( "download_format", choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"], required=False, default=".oeb.gz") protein_prefix = parameter.StringParameter( 'protein_prefix', default='PRT', help_text='The protein prefix name used to identify the protein') def begin(self): self.opt = vars(self.args) def __iter__(self): max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 self.config = config_from_env() in_orion = self.config is not None if not in_orion: with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break else: stream = StreamingDataset(self.args.data_in, input_format=self.args.download_format) for mol in stream: mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break
class ForceFieldPrep(ParallelOEMolComputeCube): title = "Force Field Preparation Cube" version = "0.0.0" classification = [["Force Field Preparation", "OEChem", "Force Field preparation"]] tags = ['OEChem', 'OEBio', 'OpenMM'] description = """ Each complex is parametrized by using the selected force fields Input: ------- oechem.OEMCMol - Streamed-in of complexes Output: ------- oechem.OEMCMol - Emits force field parametrized complexes """ # Override defaults for some parameters parameter_overrides = { "prefetch_count": {"default": 1}, # 1 molecule at a time "item_timeout": {"default": 3600}, # Default 1 hour limit (units are seconds) "item_count": {"default": 1} # 1 molecule at a time } protein_forcefield = parameter.DataSetInputParameter( 'protein_forcefield', default='amber99sbildn.xml', help_text='Force field parameters for protein') solvent_forcefield = parameter.DataSetInputParameter( 'solvent_forcefield', default='tip3p.xml', help_text='Force field parameters for solvent') ligand_forcefield = parameter.StringParameter( 'ligand_forcefield', required=True, default='GAFF2', choices=['GAFF', 'GAFF2', 'SMIRNOFF'], help_text='Force field to parametrize the ligand') other_forcefield = parameter.StringParameter( 'other_forcefield', required=True, default='GAFF2', choices=['GAFF', 'GAFF2', 'SMIRNOFF'], help_text='Force field used to parametrize other molecules not recognized by the protein force field') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log def process(self, mol, port): try: # Split the complex in components in order to apply the FF protein, ligand, water, excipients = utils.split(mol) # Unique prefix name used to output parametrization files self.opt['prefix_name'] = mol.GetTitle() # Apply FF to the Protein protein_structure = utils.applyffProtein(protein, self.opt) # Apply FF to water molecules water_structure = utils.applyffWater(water, self.opt) # Apply FF to the excipients if excipients.NumAtoms() > 0: excipient_structure = utils.applyffExcipients(excipients, self.opt) # The excipient order is set equal to the order in related # parmed structure to avoid possible atom index mismatching excipients = oeommutils.openmmTop_to_oemol(excipient_structure.topology, excipient_structure.positions, verbose=False) # Apply FF to the ligand ligand_structure = utils.applyffLigand(ligand, self.opt) # Build the Parmed structure if excipients.NumAtoms() > 0: complex_structure = protein_structure + ligand_structure + \ excipient_structure + water_structure else: complex_structure = protein_structure + ligand_structure + water_structure num_atom_system = protein.NumAtoms() + ligand.NumAtoms() + excipients.NumAtoms() + water.NumAtoms() if not num_atom_system == complex_structure.topology.getNumAtoms(): oechem.OEThrow.Fatal("Parmed and OE topologies mismatch atom number error") # Assemble a new OEMol complex in a specific order # to match the defined Parmed structure complex complx = protein.CreateCopy() oechem.OEAddMols(complx, ligand) oechem.OEAddMols(complx, excipients) oechem.OEAddMols(complx, water) complx.SetTitle(mol.GetTitle()) # Set Parmed structure box_vectors vec_data = pack_utils.PackageOEMol.getData(complx, tag='box_vectors') vec = pack_utils.PackageOEMol.decodePyObj(vec_data) complex_structure.box_vectors = vec # Attach the Parmed structure to the complex packed_complex = pack_utils.PackageOEMol.pack(complx, complex_structure) # Attach the reference positions to the complex ref_positions = complex_structure.positions packedpos = pack_utils.PackageOEMol.encodePyObj(ref_positions) packed_complex.SetData(oechem.OEGetTag('OEMDDataRefPositions'), packedpos) # Set atom serial numbers, Ligand name and HETATM flag # oechem.OEPerceiveResidues(packed_complex, oechem.OEPreserveResInfo_SerialNumber) for at in packed_complex.GetAtoms(): thisRes = oechem.OEAtomGetResidue(at) thisRes.SetSerialNumber(at.GetIdx()) if thisRes.GetName() == 'UNL': thisRes.SetName("LIG") thisRes.SetHetAtom(True) oechem.OEAtomSetResidue(at, thisRes) if packed_complex.GetMaxAtomIdx() != complex_structure.topology.getNumAtoms(): raise ValueError("OEMol complex and Parmed structure mismatch atom numbers") # Check if it is possible to create the OpenMM System system = complex_structure.createSystem(nonbondedMethod=app.CutoffPeriodic, nonbondedCutoff=10.0 * unit.angstroms, constraints=app.HBonds, removeCMMotion=False) self.success.emit(packed_complex) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed mol self.failure.emit(mol) return
class PrepareRanking(ComputeCube): url = parameter.StringParameter( 'url', default="http://10.0.1.22:4242", help_text="Url of the Restful FastROCS Server for the request") method = parameter.StringParameter('method', default='Fingerprint', help_text='Method used for the ranking') act_input = ObjectInputPort('act_input') baitset_input = ObjectInputPort('baitset_input') success = ObjectOutputPort('success') def begin(self): self.baitsets = list() self.act_list = list() self.ranking = list() def process(self, data, port): if port is 'act_input': self.act_list = data if self.args.method == 'FastROCS': self.dataset_infos = self.add_dataset() if port is 'baitset_input': self.baitsets.append(data) if len(self.act_list) > 0: while len(self.baitsets) > 0: if self.args.method == 'FastROCS': self.success.emit((self.act_list, self.baitsets.pop(), self.ranking, self.dataset_infos)) else: self.success.emit( (self.act_list, self.baitsets.pop(), self.ranking)) def add_dataset(self): url = self.args.url + "/datasets/" act_mol_idx = {} dataset = None parameters = {} self.dataset = tempfile.NamedTemporaryFile(suffix='.oeb', mode='wb', delete=False) with oechem.oemolostream(self.dataset.name) as ofs: for idx, mol in enumerate(self.act_list): act_mol_idx[mol.GetTitle()] = idx oechem.OEWriteMolecule(ofs, mol) self.dataset.flush() dataset = open(self.dataset.name, 'rb') parameters["dataset"] = (self.dataset.name, dataset, 'application/octet-stream') parameters["name"] = 'dataset of active molecules' multipart_data = MultipartEncoder(fields=parameters) response = requests.post( url, data=multipart_data, headers={"content-type": multipart_data.content_type}) if dataset is not None: dataset.close() os.remove(self.dataset.name) data = response.json() dataset_infos = (data["id"], act_mol_idx) return dataset_infos
class ParallelFastFPRanking(ParallelComputeCube): """ A compute Cube that receives a Molecule and a list of Fingerprints with a baitset of indices and returns the max Similarity value of the Molecule against the Fingerprints """ classification = [["Compute", "Fingerprint", "Similarity"]] url = parameter.StringParameter( 'url', default="http://10.0.62.124:8081", help_text="Url of the FastFingerPrint Server for the request") fptype = parameter.IntegerParameter( 'fptype', default=105, help_text="Fingerprint type to use for the ranking") topn = parameter.IntegerParameter( 'topn', default=100, help_text= "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking" ) data_input = ObjectInputPort('data_input') success = ObjectOutputPort('success') def begin(self): # self.max_tanimoto = 0 # self.fp = None # self.fp_list = None # self.baitset = None pass def process(self, data, port): self.act_list = data[0] self.baitset = data[1] self.ranking = data[2] fptypes = {102: 'path', 104: 'circular', 105: 'tree'} database = fptypes[self.args.fptype] + "_db" for idx in self.baitset[1]: smiles = oechem.OEMolToSmiles(self.act_list[idx]) safe_smiles = parse.quote(smiles) url = "%s/%s/hitlist?smiles=%s&oformat=csv&maxhits=%d" % ( self.args.url, database, safe_smiles, self.args.topn) response = requests.get(url) hitlist = response.content.decode().split('\n') hitlist.pop(0) hitlist.pop() cur_rank = list() for mol in hitlist: cur_mol = mol.split(',') cur_rank.append((cur_mol[0], cur_mol[1], float(cur_mol[4]), self.baitset[0], False)) if len(self.ranking) == 0: self.ranking = cur_rank else: self.merge_ranking(cur_rank) #if self.fp_list is not None and self.baitset is not None: #with oechem.oemolistream(str(self.args.data_in)) as ifs: # for mol in ifs.GetOEMols(): # max_tanimoto = 0 # fp = oegraphsim.OEFingerPrint() # oegraphsim.OEMakeFP(fp, mol, self.args.fptype) # for idx in self.baitset[1]: # act_fp = self.fp_list[idx] # tanimoto = oegraphsim.OETanimoto(fp, self.fp_list[idx]) # if tanimoto > max_tanimoto: # max_tanimoto = tanimoto # self.update_ranking(mol, max_tanimoto, False) self.success.emit((self.act_list, self.baitset, self.ranking)) def merge_ranking(self, ranking): merged_list = list() i = 0 j = 0 count = 0 id_set = set() while i < len(self.ranking): while j < len(ranking) and ranking[j][2] > self.ranking[i][2]: if ranking[j][1] not in id_set: if count < self.args.topn or ranking[j][2] == merged_list[ count - 1][2]: merged_list.append(ranking[j]) count += 1 id_set.add(ranking[j][1]) j += 1 else: break else: j += 1 if self.ranking[i][1] not in id_set: if self.ranking[i] not in id_set and ( count < self.args.topn or self.ranking[i][2] == merged_list[count - 1][2]): merged_list.append(self.ranking[i]) count += 1 id_set.add(self.ranking[i][1]) i += 1 else: break else: i += 1 while j < len(ranking): if ranking[j][1] not in id_set: if ranking[j] not in id_set and (count < self.args.topn or ranking[j][2] == merged_list[count - 1][2]): merged_list.append(ranking[j]) count += 1 id_set.add(ranking[j][1]) j += 1 else: break else: j += 1 self.ranking = merged_list def update_ranking(self, mol, max_tanimoto, ka_tag): index = 0 if len(self.ranking) >= self.args.topn and max_tanimoto < self.ranking[ len(self.ranking) - 1][2]: pass else: for top_mol in self.ranking: if max_tanimoto < top_mol[2]: index = self.ranking.index(top_mol) + 1 else: break upper = self.ranking[:index] lower = self.ranking[index:] self.ranking = upper + [(oechem.OEMolToSmiles(mol), mol.GetTitle(), max_tanimoto, self.baitset[0], ka_tag) ] + lower i = self.args.topn - 1 while i < len(self.ranking) - 1: if self.ranking[i][2] != self.ranking[i + 1][2]: self.ranking = self.ranking[:i + 1] break else: i += 1
class ProteinReader(SourceCube): title = "Protein Reader Cube" version = "0.0.0" classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]] tags = ['OEChem'] description = """ A Protein Reader Cube Input: ------- oechem.OEMCMol or - Streamed-in of the protein system The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file Output: ------- oechem.OEMCMol - Emits the protein system """ success = MoleculeOutputPort("success") data_in = parameter.DataSetInputParameter( "data_in", help_text="Protein to read in", required=True, description="The Protein to read in") limit = parameter.IntegerParameter( "limit", required=False) download_format = parameter.StringParameter( "download_format", choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"], required=False, default=".oeb.gz") protein_prefix = parameter.StringParameter( 'protein_prefix', default='PRT', help_text='The protein prefix name used to identify the protein') def begin(self): self.opt = vars(self.args) def __iter__(self): max_idx = self.args.limit if max_idx is not None: max_idx = int(max_idx) count = 0 self.config = config_from_env() in_orion = self.config is not None if not in_orion: with oechem.oemolistream(str(self.args.data_in)) as ifs: for mol in ifs.GetOEMols(): mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break else: stream = StreamingDataset(self.args.data_in, input_format=self.args.download_format) for mol in stream: mol.SetTitle(self.opt['protein_prefix']) yield mol count += 1 if max_idx is not None and count == max_idx: break # class SimOutputCube(OEMolOStreamCube): # """ # A sink cube that writes molecules to a file # """ # classification = [["Output"]] # title = "Output Writer" # # intake = BinaryMoleculeInputPort('intake') # data_out = DataSetOutputParameter('data_out', # required=True, # title='Name of Dataset to create', # description='The dataset to output') # backend = DataSetOutputParameter( # 'backend', # default="auto", # choices=["db", "s3", "auto"], # description="The Orion storage backend to use") # # def begin(self): # self.in_orion = config_from_env() is not None # self.decoder = MoleculeSerializerMixin() # self.need_decode = not self.args.data_out.endswith(".oeb.gz") # if self.in_orion: # self.ofs = MultipartDatasetUploader(self.args.data_out, # tags=[self.name], # backend=self.args.backend) # elif self.need_decode: # self.ofs = oechem.oemolostream(str(self.args.data_out)) # else: # self.ofs = open(str(self.args.data_out), 'wb') # # def write(self, mol, port): # if self.in_orion or not self.need_decode: # self.ofs.write(mol) # else: # oechem.OEWriteMolecule(self.ofs, self.decoder.decode(mol)) # # def end(self): # if self.in_orion: # self.ofs.complete() # else: # self.ofs.close()