def dump_sd_data(mol): print("Data Attached at the molecule level:") for dp in oechem.OEGetSDDataPairs(mol): print(dp.GetTag(), ":", dp.GetValue()) if type(mol) == oechem.OEMol: print("\n\n" + 10 * "-" + "Data Attached to Conformers:") for conf_id, conf in enumerate(mol.GetConfs()): print("Data attached to conformer {}:".format(conf_id)) for dp in oechem.OEGetSDDataPairs(conf): print(dp.GetTag(), ":", dp.GetValue()) print()
def get_sd_list(mol, taglabel): """ Get list of specified SD tag for all confs in mol. Parameters ---------- mol : OEMol with N conformers taglabel : string tag from which to extract SD data Returns ------- sdlist : list N-length list with value from SD tag """ sd_list = [] for j, conf in enumerate(mol.GetConfs()): for x in oechem.OEGetSDDataPairs(conf): if taglabel.lower() in x.GetTag().lower(): sd_list.append(x.GetValue()) break return sd_list
def SDF2CSV(ifs, csv): taglist = [] # read through once to find all unique tags for mol in ifs.GetOEGraphMols(): for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() not in taglist: taglist.append(dp.GetTag()) ifs.rewind() # print out column labels header = "Title" for tag in taglist: header += ",%s" % tag header += '\n' csv.write(header) # build csv file for mol in ifs.GetOEGraphMols(): line = [mol.GetTitle()] for tag in taglist: if oechem.OEHasSDData(mol, tag): value = oechem.OEGetSDData(mol, tag) else: value = '' line.append(',') line.append(value) csv.write(''.join(line)) csv.write('\n')
def items(self): """this is likely not threadsafe - what happens if a data pair is added/deleted between iterations? """ data = oechem.OEGetSDDataPairs(self._mol) for data_pair in data: yield data_pair.GetTag(), data_pair.GetValue()
def make_psi_input(mol, label, method, basisset, SPE=False, mem=None): """ Parameters ---------- mol: single OEChem conformer with coordinates label: string - name of the molecule. Can be an empty string. method: string - specification of method (see Psi4 website for options) basisset: string - specification of basis set SPE: boolean - False (default) for geom opt. True for single point E calcns mem: string - specify Psi4 job memory. E.g. "2 Gb" "2000 Mb" "2000000 Kb" Returns ------- inputstring: string - containing contents of whole input file for this conf """ inputstring = "" xyz = oechem.OEFloatArray(3) # specify memory requirements, if defined if mem != None: inputstring += "memory %s\n" % mem inputstring+=( 'molecule %s {\n' % label ) # charge and multiplicity; multiplicity hardwired to singlet (usually is) netCharge = oechem.OENetCharge( mol) inputstring+=( ' %s 1' % netCharge ) # get coordinates of each atom for atom in mol.GetAtoms(): mol.GetCoords( atom, xyz) inputstring+=( '\n %s %10.4f %10.4f %10.4f' \ %(oechem.OEGetAtomicSymbol(atom.GetAtomicNum()), xyz[0], xyz[1], xyz[2]) ) inputstring+=( '\n units angstrom\n}') # check if mol has a "freeze" tag for x in oechem.OEGetSDDataPairs(mol): if "atoms to freeze" in x.GetTag(): freeze_list = x.GetValue() inputstring += "\n\nfreeze_list = \"\"\"\n {} xyz\n {} xyz\n {} xyz\n {} xyz\n\"\"\"".format(freeze_list[1], freeze_list[4], freeze_list[7], freeze_list[10]) inputstring += "\nset optking frozen_cartesian $freeze_list" inputstring += "\nset optking dynamic_level = 1\nset optking consecutive_backsteps = 2\nset optking intrafrag_step_limit = 0.1\nset optking interfrag_step_limit = 0.1" # explicitly specify MP2 RI-auxiliary basis for Ahlrichs basis set # http://www.psicode.org/psi4manual/master/basissets_byfamily.html if method.lower()=='mp2' and 'def' in basisset and basisset.lower()!='def2-qzvpd': inputstring+=('\n\nset basis %s' % (basisset)) inputstring+=('\nset df_basis_mp2 %s-ri' % (basisset)) inputstring+=('\nset freeze_core True') else: inputstring+=('\n\nset basis %s' % (basisset)) inputstring+=('\nset freeze_core True') # specify command for type of calculation if SPE is False: inputstring+=('\noptimize(\'%s\')' % (method)) else: inputstring+=('\nenergy(\'%s\')' % (method)) return inputstring
def sanitize_fragment(mol): approved_tags = [ "TORSION_ATOMPROP", "TORSION_ATOMS_FRAGMENT", "TORSION_ATOMS_ParentMol", "COUNT", ] for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() in approved_tags: continue oechem.OEDeleteSDData(mol, dp.GetTag())
def reorder_sd_props(mol: oechem.OEGraphMol): strain1 = oechem.OEGetSDData(mol, TOTAL_STRAIN_TAG) data_pairs = [(TOTAL_STRAIN_TAG, strain1)] for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() == TOTAL_STRAIN_TAG: pass else: data_pairs.append((dp.GetTag(), dp.GetValue())) oechem.OEClearSDData(mol) for k, v in data_pairs: oechem.OESetSDData(mol, k, v) data_pairs = [] for dp in oechem.OEGetSDDataPairs(mol): data_pairs.append((dp.GetTag(), dp.GetValue())) oechem.OEClearSDData(mol) for k, v in data_pairs: oechem.OESetSDData(mol, k, v)
def process(self, mol, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) # Update cube simulation parameters with the eventually molecule SD tags new_args = { dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(mol) if dp.GetTag() in ["temperature", "pressure"] } if new_args: for k in new_args: try: new_args[k] = float(new_args[k]) except: pass self.log.info( "Updating parameters for molecule: {}\n{}".format( mol.GetTitle(), new_args)) opt.update(new_args) if utils.PackageOEMol.checkTags(mol, ['Structure']): gd = utils.PackageOEMol.unpack(mol) opt['outfname'] = '{}-{}'.format(gd['IDTag'], self.opt['outfname']) mdData = utils.MDData(mol) opt['molecule'] = mol self.log.info('START NPT SIMULATION %s' % gd['IDTag']) simtools.simulation(mdData, **opt) packedmol = mdData.packMDData(mol) self.success.emit(packedmol) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed mol self.failure.emit(mol) return
def get_sd_list(mol, datum, package='Psi4', method=None, basisset=None, taglabel=None): """ Get list of specified SD tag for all confs in mol. Parameters ---------- mol: OEChem molecule with all of its conformers datum: string description of property of interest options implemented: "QM opt energy" "MM opt energy" package: software package used for QM calculation. Psi4 or Turbomole. method: string, for specific properties. e.g. 'mp2' basisset: string, for specific properties. e.g. '6-31+G(d)' taglabel : string exact tag string from which to extract SD data Returns ------- sdlist: A 1D N-length list for N conformers with property from SDTag. """ if taglabel is None: taglabel = define_tag(datum, package, method, basisset) sd_list = [] for j, conf in enumerate(mol.GetConfs()): for x in oechem.OEGetSDDataPairs(conf): # Case: opt did not finish --> append nan if "note on opt." in x.GetTag().lower( ) and "did not finish" in x.GetValue().lower(): sd_list.append('nan') break # Case: want energy value OR want original index number elif taglabel.lower() in x.GetTag().lower(): sd_list.append(x.GetValue()) break return sd_list
def FilterMolData(self, mol): if not oechem.OEHasSDData(mol): return 0 if self.fields is None: return -1 if len(self.fields) == 0: oechem.OEClearSDData(mol) return 0 validdata = 0 deletefields = [] for dp in oechem.OEGetSDDataPairs(mol): tag = dp.GetTag() if tag not in self.fields: deletefields.append(tag) continue value = oechem.OEGetSDData(mol, tag) if self.asFloating: try: float(value) except ValueError: oechem.OEThrow.Warning("Failed to convert %s to numeric value (%s) in %s" % (tag, value, mol.GetTitle())) deletefields.append(tag) continue validdata += 1 if not validdata: oechem.OEClearSDData(mol) else: for nuke in deletefields: oechem.OEDeleteSDData(mol, nuke) return validdata
def process(self, solute, port): try: opt = dict(self.opt) # Update cube simulation parameters with the eventually molecule SD tags new_args = { dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solute) if dp.GetTag() in ["solvents", "molar_fractions", "density"] } if new_args: for k in new_args: if k == 'molar_fractions': continue try: new_args[k] = float(new_args[k]) except: pass self.log.info( "Updating parameters for molecule: {}\n{}".format( solute.GetTitle(), new_args)) opt.update(new_args) # Solvate the system sol_system = oesolvate(solute, **opt) self.log.info("Solvated System atom number {}".format( sol_system.NumAtoms())) sol_system.SetTitle(solute.GetTitle()) self.success.emit(sol_system) except Exception as e: # Attach error message to the molecule that failed self.log.error(traceback.format_exc()) solute.SetData('error', str(e)) # Return failed mol self.failure.emit(solute) return
docked_molecule = oechem.OEGraphMol() oechem.OEReadMolecule(ifs, docked_molecule) if docked_molecule is None: print('No docking poses available') import sys sys.exit(0) import os from openeye import oechem, oedocking # Write molecule as CSV with cleared SD tags output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - docked.csv') if not os.path.exists(output_filename): docked_molecule_clean = docked_molecule.CreateCopy() for sdpair in oechem.OEGetSDDataPairs(docked_molecule_clean): if sdpair.GetTag() not in ['Hybrid2', 'fragments', 'site', 'docked_fragment']: oechem.OEDeleteSDData(docked_molecule_clean, sdpair.GetTag()) with oechem.oemolostream(output_filename) as ofs: oechem.OEWriteMolecule(ofs, docked_molecule_clean) # Write molecule as SDF output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.sdf') if not os.path.exists(output_filename): with oechem.oemolostream(output_filename) as ofs: oechem.OEWriteMolecule(ofs, docked_molecule) # Write molecule as mol2 output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.mol2') if not os.path.exists(output_filename): with oechem.oemolostream(output_filename) as ofs:
def keys(self): """this is likely not threadsafe - what happens if a data pair is added/deleted between iterations? """ for data_pair in oechem.OEGetSDDataPairs(self._mol): yield data_pair.GetTag()
def construct_dihedral_energy_profile(torsion_conformers, num_points=24): angle_list = np.array([360 * i / num_points for i in range(num_points)]) num_confs = 0 profile = np.full(num_points, np.nan) for mol in torsion_conformers: if not mol: continue num_confs += 1 conf = mol.GetActive() conf_title = get_sd_data(conf, "CONFORMER_LABEL") tor_atoms = get_sd_data(mol, "TORSION_ATOMS_ParentMol").split() parent_name = conf_title[:-3] dih_label = "_".join(str(x) for x in tor_atoms) fragment_label = parent_name + "_" + dih_label angle_idx = int(conf_title[-2:]) profile[angle_idx] = np.float(get_sd_data(conf, "PSI4_ENERGY")) logging.debug("angle_idx: %d", angle_idx) logging.debug("Psi4 Energy: %f", float(get_sd_data(conf, "PSI4_ENERGY"))) # check for angles where no energies are available for angle in angle_list[np.all(np.isnan(profile))]: logging.warning( "Warning: No energies found for angle {:.1f} for fragment: {}". format(angle, fragment_label)) # calculate relative energies min_energy = np.nanmin(profile) profile -= min_energy profile[np.isnan(profile)] = -1 # set nans to -1 torsional_strain = np.column_stack((angle_list, profile)) # combine conformers output_conformers = oechem.OEMol(torsion_conformers[0]) output_conformers.DeleteConfs() title = fragment_label output_conformers.SetTitle(title) # setup normalization torsion_tag = "TORSION_ATOMS_FRAGMENT" torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split() print(torsion_atoms_in_fragment) dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment] dih, _ = get_dihedral(output_conformers, dihedral_atom_indices) for old_conf in torsion_conformers: if old_conf: new_conf = output_conformers.NewConf(old_conf) normalize_coordinates(new_conf, dih) oechem.OEClearSDData(new_conf) for dp in oechem.OEGetSDDataPairs(old_conf.GetActive()): if dp.GetTag() not in ["OEConfTitle", "CONFORMER_LABEL"]: oechem.OESetSDData(new_conf, dp.GetTag(), dp.GetValue()) torsion_angle = get_sd_data(old_conf, "TORSION_ANGLE") title = fragment_label + ": Angle " + torsion_angle new_conf.SetTitle(title) write_energy_profile_to_sddata(output_conformers, torsional_strain.copy()) # Calculate all possible torsion inchi keys for this fragment torsion_inchi_list = [] inchi_key = oechem.OECreateInChIKey(output_conformers) _, b, c, _ = get_torsion_oeatom_list(output_conformers) for a in b.GetAtoms(oechem.OEIsHeavy()): for d in c.GetAtoms(oechem.OEIsHeavy()): if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx(): continue torsion_inchi = inchi_key + get_modified_inchi_key( output_conformers, [a, b, c, d]) torsion_inchi_list.append(torsion_inchi) return output_conformers, torsional_strain, torsion_inchi_list
def IdentifyMinima(mol, tag, ThresholdE, ThresholdRMSD): """ For a molecule's set of conformers computed with some level of theory, whittle down unique conformers based on energy and RMSD. Parameters ---------- mol OEChem molecule with all of its conformers tag string name of the SD tag in this molecule ThresholdE float value for abs(E1-E2), below which 2 confs are "same" Units are hartrees (default output units of Psi4) ThresholdR float value for RMSD, below which 2 confs are "same" Units are in Angstrom (Psi4 default) Returns ------- boolean True if successful filter + delete. False if there's only one conf and it didn't optimize, or something else funky. """ # Parameters for OpenEye RMSD calculation automorph = True heavyOnly = False overlay = True # declare variables for conformers to delete confsToDel = set() delCount = 0 # check if SD tag exists for the case of single conformer if mol.NumConfs() == 1: testmol = mol.GetConfs().next() for x in oechem.OEGetSDDataPairs(mol): if tag.lower() in x.GetTag().lower(): return True else: return False # Loop over conformers twice (NxN diagonal comparison of RMSDs) for confRef in mol.GetConfs(): print(" ~ Reference: %s conformer %d" % (mol.GetTitle(), confRef.GetIdx() + 1)) # get real tag (correct for capitalization) for x in oechem.OEGetSDDataPairs(confRef): if tag.lower() in x.GetTag().lower(): taglabel = x.GetTag() # delete cases that don't have energy (opt not converged; or other) if not oechem.OEHasSDData(confRef, taglabel): confsToDel.add(confRef.GetIdx()) delCount += 1 continue refE = float(oechem.OEGetSDData(confRef, taglabel)) for confTest in mol.GetConfs(): # upper right triangle comparison if confTest.GetIdx() <= confRef.GetIdx(): continue # skip cases already set for removal if confTest.GetIdx() in confsToDel: continue # delete cases that don't have energy if not oechem.OEHasSDData(confTest, taglabel): confsToDel.add(confTest.GetIdx()) continue testE = float(oechem.OEGetSDData(confTest, taglabel)) # if MM (not Psi4) energies, convert absERel to Hartrees if 'mm' in taglabel.lower(): absERel = abs(refE - testE) / 627.5095 else: absERel = abs(refE - testE) # if energies are diff enough --> confs are diff --> keep & skip ahead if absERel > ThresholdE: continue # if energies are similar, see if they are diff by RMSD rmsd = oechem.OERMSD(confRef, confTest, automorph, heavyOnly, overlay) # if measured_RMSD < threshold_RMSD --> confs are same --> delete if rmsd < ThresholdRMSD: confsToDel.add(confTest.GetIdx()) # for the same molecule, delete tagged conformers print("%s original number of conformers: %d" % (mol.GetTitle(), mol.NumConfs())) if delCount == mol.NumConfs(): # all conformers in this mol has been tagged for deletion return False for conf in mol.GetConfs(): if conf.GetIdx() in confsToDel: print('Removing %s conformer index %d' % (mol.GetTitle(), conf.GetIdx())) if not mol.DeleteConf(conf): oechem.OEThrow.Fatal("Unable to delete %s GetIdx() %d" \ % (mol.GetTitle(), conf.GetIdx())) return True
files_missing = False for phase in ['complex', 'ligand']: for ext in ['gro', 'top']: filename = os.path.join( gromacs_basedir, f'{molecule.GetTitle()} - {phase}.{ext}') if not os.path.exists(filename): files_missing = True if files_missing: continue # Add RUN number oechem.OESetSDData(molecule, 'run', f'RUN{run_index}') if args.clean: for sdpair in oechem.OEGetSDDataPairs(molecule): if sdpair.GetTag() not in [ 'Hybrid2', 'docked_fragment', 'fragments', 'site', 'run' ]: oechem.OEDeleteSDData(molecule, sdpair.GetTag()) # Copy files run_dir = os.path.join(args.docked_basedir, 'fah-gromacs', f'RUN{run_index}') os.makedirs(run_dir, exist_ok=True) import shutil for phase in ['complex', 'ligand']: for ext in ['gro', 'top']: src = os.path.join( gromacs_basedir,
def checkSDData(molecule): """ Returns a dictionary of the SD Data from the OEMol """ sd_data = {} for dp in oechem.OEGetSDDataPairs(molecule): sd_data[dp.GetTag()] = dp.GetValue() return sd_data
def process(self, solvated_system, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) # Split the complex in components protein, solute, water, excipients = oeommutils.split(solvated_system, ligand_res_name='LIG') # Update cube simulation parameters with the eventually molecule SD tags new_args = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solute) if dp.GetTag() in ["temperature", "pressure"]} if new_args: for k in new_args: try: new_args[k] = float(new_args[k]) except: pass self.log.info("Updating parameters for molecule: {}\n{}".format(solute.GetTitle(), new_args)) opt.update(new_args) # Extract the MD data mdData = data_utils.MDData(solvated_system) solvated_structure = mdData.structure # Extract the ligand parmed structure solute_structure = solvated_structure.split()[0][0] solute_structure.box = None # Set the ligand title solute.SetTitle(solvated_system.GetTitle()) # Create the solvated and vacuum system solvated_omm_sys = solvated_structure.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms, constraints=app.HBonds, removeCMMotion=False) solute_omm_sys = solute_structure.createSystem(nonbondedMethod=app.NoCutoff, constraints=app.HBonds, removeCMMotion=False) # This is a note from: # https://github.com/MobleyLab/SMIRNOFF_paper_code/blob/e5012c8fdc4570ca0ec750f7ab81dd7102e813b9/scripts/create_input_files.py#L114 # Fix switching function. for force in solvated_omm_sys.getForces(): if isinstance(force, openmm.NonbondedForce): force.setUseSwitchingFunction(True) force.setSwitchingDistance((opt['nonbondedCutoff'] - 1.0) * unit.angstrom) # Write out all the required files and set-run the Yank experiment with TemporaryDirectory() as output_directory: opt['Logger'].info("Output Directory {}".format(output_directory)) solvated_structure_fn = os.path.join(output_directory, "solvated.pdb") solvated_structure.save(solvated_structure_fn, overwrite=True) solute_structure_fn = os.path.join(output_directory, "solute.pdb") solute_structure.save(solute_structure_fn, overwrite=True) solvated_omm_sys_serialized = XmlSerializer.serialize(solvated_omm_sys) solvated_omm_sys_serialized_fn = os.path.join(output_directory, "solvated.xml") solvated_f = open(solvated_omm_sys_serialized_fn, 'w') solvated_f.write(solvated_omm_sys_serialized) solvated_f.close() solute_omm_sys_serialized = XmlSerializer.serialize(solute_omm_sys) solute_omm_sys_serialized_fn = os.path.join(output_directory, "solute.xml") solute_f = open(solute_omm_sys_serialized_fn, 'w') solute_f.write(solute_omm_sys_serialized) solute_f.close() # Build the Yank Experiment yaml_builder = ExperimentBuilder(yank_solvation_template.format( verbose='yes' if opt['verbose'] else 'no', minimize='yes' if opt['minimize'] else 'no', output_directory=output_directory, timestep=opt['timestep'], nsteps_per_iteration=opt['nsteps_per_iteration'], number_iterations=opt['iterations'], temperature=opt['temperature'], pressure=opt['pressure'], solvated_pdb_fn=solvated_structure_fn, solvated_xml_fn=solvated_omm_sys_serialized_fn, solute_pdb_fn=solute_structure_fn, solute_xml_fn=solute_omm_sys_serialized_fn)) # Run Yank yaml_builder.run_experiments() exp_dir = os.path.join(output_directory, "experiments") # Calculate solvation free energy, solvation Enthalpy and their errors DeltaG_solvation, dDeltaG_solvation, DeltaH, dDeltaH = yankutils.analyze_directory(exp_dir) # # Add result to the original molecule in kcal/mol oechem.OESetSDData(solute, 'DG_yank_solv', str(DeltaG_solvation)) oechem.OESetSDData(solute, 'dG_yank_solv', str(dDeltaG_solvation)) # Emit the ligand self.success.emit(solute) except Exception as e: # Attach an error message to the molecule that failed self.log.error(traceback.format_exc()) solvated_system.SetData('error', str(e)) # Return failed mol self.failure.emit(solvated_system) return
def DumpSDData(mol): logging.info(("SD data of", mol.GetTitle())) #loop over SD data for dp in oechem.OEGetSDDataPairs(mol): logging.info((dp.GetTag(), ':', dp.GetValue())) logging.info()
def make_psi_input(mol, label, method, basisset, calctype='opt', mem=None): """ Get coordinates from input mol, and generate/format input text for Psi4 calculation. Parameters ---------- mol : OpenEye OEMol OEMol with coordinates label : string Name of molecule with integer identifier (for conformers). method: string Name of the method as understood by Psi4. Example: "mp2" basis : string Name of the basis set as understood by Psi4. Example: "def2-sv(p)" calctype : string What kind of Psi4 calculation to run. Supported inputs are: 'opt' for geometry optimization, 'spe' for single point energy calculation, and 'hess' for Hessian calculation. memory : string How much memory each Psi4 job should take. If not specified, the default in Psi4 is 500 Mb. Examples: "2000 MB" "1.5 GB" http://www.psicode.org/psi4manual/master/psithoninput.html Returns ------- inputstring : string Contents of Psi4 input file to be written out """ # check that specified calctype is valid if calctype not in {'opt', 'spe', 'hess'}: sys.exit("Specify a valid calculation type.") inputstring = "" # specify memory requirements, if defined if mem != None: inputstring += "memory %s\n" % mem inputstring += ('molecule %s {\n' % label) # charge and multiplicity; multiplicity hardwired to singlet (usually is) netCharge = oechem.OENetCharge(mol) inputstring += (' %s 1' % netCharge) # get atomic symbol and coordinates of each atom xyz = oechem.OEFloatArray(3) for atom in mol.GetAtoms(): mol.GetCoords(atom, xyz) inputstring+=( '\n %s %10.4f %10.4f %10.4f' \ %(oechem.OEGetAtomicSymbol(atom.GetAtomicNum()), xyz[0], xyz[1], xyz[2]) ) inputstring += ('\n units angstrom\n}') # check if mol has a "freeze" tag for x in oechem.OEGetSDDataPairs(mol): if calctype == "opt" and "atoms to freeze" in x.GetTag(): b = x.GetValue() y = b.replace("[", "") z = y.replace("]", "") a = z.replace(" ", "") freeze_list = a.split(",") inputstring += ( "\n\nfreeze_list = \"\"\"\n {} xyz\n {} xyz\n {} " "xyz\n {} xyz\n\"\"\"".format(freeze_list[0], freeze_list[1], freeze_list[2], freeze_list[3])) inputstring += "\nset optking frozen_cartesian $freeze_list" inputstring += ( "\nset optking dynamic_level = 1\nset optking " "consecutive_backsteps = 2\nset optking intrafrag_step_limit = " "0.1\nset optking interfrag_step_limit = 0.1\n") # best practices for scf calculations # http://www.psicode.org/psi4manual/master/scf.html#recommendations # http://www.psicode.org/psi4manual/master/dft.html#recommendations inputstring += '\n\nset scf_type df' inputstring += '\nset guess sad' # explicitly specify MP2 RI-auxiliary basis for [Ahlrichs] basis set # http://www.psicode.org/psi4manual/master/basissets_byfamily.html # DFMP2 *should* get MP2 aux sets fine for [Pople and Dunning] sets # http://www.psicode.org/psi4manual/master/dfmp2.html if method.lower() == 'mp2' and 'def2' in basisset: if basisset.lower() == 'def2-sv(p)': inputstring += ('\nset df_basis_mp2 def2-sv_p_-ri') elif basisset.lower() != 'def2-qzvpd': # no aux set for qzvpd 10-6-18 inputstring += ('\nset df_basis_mp2 %s-ri' % (basisset)) inputstring += ('\n\nset basis %s' % (basisset)) inputstring += ('\nset freeze_core True') # specify command for type of calculation if calctype == 'opt': inputstring += ('\noptimize(\'%s\')\n\n' % (method)) elif calctype == 'spe': inputstring += ('\nenergy(\'%s\')\n\n' % (method)) elif calctype == 'hess': inputstring += ( '\nH, wfn = hessian(\'%s\', return_wfn=True)\nwfn.hessian().print_out()\n\n' % (method)) return inputstring
def process(self, solvated_system, port): try: opt = dict(self.opt) # Extract the solvated ligand and the solvated complex solvated_ligand = solvated_system[0] solvated_complex = solvated_system[1] # Update cube simulation parameters with the eventually molecule SD tags new_args = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solvated_ligand) if dp.GetTag() in ["temperature", "pressure"]} if new_args: for k in new_args: try: new_args[k] = float(new_args[k]) except: pass self.log.info("Updating parameters for molecule: {}\n{}".format(solvated_ligand.GetTitle(), new_args)) opt.update(new_args) # Extract the MD data mdData_ligand = data_utils.MDData(solvated_ligand) solvated_ligand_structure = mdData_ligand.structure mdData_complex = data_utils.MDData(solvated_complex) solvated_complex_structure = mdData_complex.structure # Create the solvated OpenMM systems solvated_complex_omm_sys = solvated_complex_structure.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms, constraints=app.HBonds, removeCMMotion=False) solvated_ligand_omm_sys = solvated_ligand_structure.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms, constraints=app.HBonds, removeCMMotion=False) # Write out all the required files and set-run the Yank experiment with TemporaryDirectory() as output_directory: opt['Logger'].info("Output Directory {}".format(output_directory)) solvated_complex_structure_fn = os.path.join(output_directory, "complex.pdb") solvated_complex_structure.save(solvated_complex_structure_fn, overwrite=True) solvated_ligand_structure_fn = os.path.join(output_directory, "solvent.pdb") solvated_ligand_structure.save(solvated_ligand_structure_fn, overwrite=True) solvated_complex_omm_serialized = XmlSerializer.serialize(solvated_complex_omm_sys) solvated_complex_omm_serialized_fn = os.path.join(output_directory, "complex.xml") solvated_complex_f = open(solvated_complex_omm_serialized_fn, 'w') solvated_complex_f.write(solvated_complex_omm_serialized) solvated_complex_f.close() solvated_ligand_omm_serialized = XmlSerializer.serialize(solvated_ligand_omm_sys) solvated_ligand_omm_serialized_fn = os.path.join(output_directory, "solvent.xml") solvated_ligand_f = open(solvated_ligand_omm_serialized_fn, 'w') solvated_ligand_f.write(solvated_ligand_omm_serialized) solvated_ligand_f.close() # Build the Yank Experiment yaml_builder = ExperimentBuilder(yank_binding_template.format( verbose='yes' if opt['verbose'] else 'no', minimize='yes' if opt['minimize'] else 'no', output_directory=output_directory, timestep=opt['timestep'], nsteps_per_iteration=opt['nsteps_per_iteration'], number_iterations=opt['iterations'], temperature=opt['temperature'], pressure=opt['pressure'], complex_pdb_fn=solvated_complex_structure_fn, complex_xml_fn=solvated_complex_omm_serialized_fn, solvent_pdb_fn=solvated_ligand_structure_fn, solvent_xml_fn=solvated_ligand_omm_serialized_fn, restraints=opt['restraints'], ligand_resname=opt['ligand_resname'])) # Run Yank yaml_builder.run_experiments() exp_dir = os.path.join(output_directory, "experiments") DeltaG_binding, dDeltaG_binding, DeltaH, dDeltaH = yankutils.analyze_directory(exp_dir) protein, ligand, water, excipients = oeommutils.split(solvated_ligand, ligand_res_name=opt['ligand_resname']) # Add result to the extracted ligand in kcal/mol oechem.OESetSDData(ligand, 'DG_yank_binding', str(DeltaG_binding)) oechem.OESetSDData(ligand, 'dG_yank_binding', str(dDeltaG_binding)) self.success.emit(ligand) except Exception as e: # Attach an error message to the molecule that failed self.log.error(traceback.format_exc()) solvated_system[1].SetData('error', str(e)) # Return failed mol self.failure.emit(solvated_system[1]) return
def has_dist(mol): for sdpair in oechem.OEGetSDDataPairs(mol): tag = sdpair.GetTag() if 'dist' in tag: return True return False
def generate_fragalysis( series: CompoundSeriesAnalysis, fragalysis_config: FragalysisConfig, results_path: str, ) -> None: """ Generate input and upload to fragalysis from fragalysis_config Fragalysis spec:https://discuss.postera.ai/t/providing-computed-poses-for-others-to-look-at/1155/8?u=johnchodera Parameters ---------- series : CompoundSeriesAnalysis Analysis results fragalysis_config : FragalysisConfig Fragalysis input paramters results_path : str The path to the results """ import os from openeye import oechem from rich.progress import track # make a directory to store fragalysis upload data fa_path = os.path.join(results_path, "fragalysis_upload") os.makedirs(fa_path, exist_ok=True) ref_mols = fragalysis_config.ref_mols # e.g. x12073 ref_pdb = fragalysis_config.ref_pdb # e.g. x12073 # set paths ligands_path = os.path.join(results_path, fragalysis_config.ligands_filename) fa_ligands_path = os.path.join(fa_path, fragalysis_config.fragalysis_sdf_filename) # copy sprint generated sdf to new name for fragalysis input from shutil import copyfile copyfile(ligands_path, fa_ligands_path) # Read ligand poses molecules = [] with oechem.oemolistream(ligands_path) as ifs: oemol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, oemol): molecules.append(oemol.CreateCopy()) print(f"{len(molecules)} ligands read") # Get zipped PDB if specified if fragalysis_config.ref_pdb == "references.zip": consolidate_protein_snapshots_into_pdb( oemols=molecules, results_path=results_path, pdb_filename="references.pdb", fragalysis_input=True, fragalysis_path=fa_path, ) descriptions = { "DDG (kcal/mol)": "Relative computed free energy difference", "dDDG (kcal/mol)": "Uncertainty in computed relative free energy difference", "ref_mols": "a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)", "ref_pdb": "The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose", "original SMILES": "the original SMILES of the compound before any computation was carried out", } # Preprocess molecules tags_to_retain = {"DDG (kcal/mol)", "dDDG (kcal/mol)"} index = 0 for oemol in track(molecules, "Preprocessing molecules for Fragalysis..."): # Remove hydogrens oechem.OESuppressHydrogens(oemol, True) # Get original SMILES original_smiles = oechem.OEGetSDData(oemol, "SMILES") # Remove irrelevant SD tags for sdpair in oechem.OEGetSDDataPairs(oemol): tag = sdpair.GetTag() value = sdpair.GetValue() if tag not in tags_to_retain: oechem.OEDeleteSDData(oemol, tag) # Add required SD tags oechem.OESetSDData(oemol, "ref_mols", fragalysis_config.ref_mols) # If ref_pdb is zip file, use this if fragalysis_config.ref_pdb == "references.zip": oechem.OESetSDData(oemol, "ref_pdb", f"references/references_{index}.pdb"), index += 1 else: oechem.OESetSDData(oemol, "ref_pdb", fragalysis_config.ref_pdb) oechem.OESetSDData(oemol, "original SMILES", original_smiles) # Add initial blank molecule (that includes distances) import copy from datetime import datetime # Find a molecule that includes distances, if present oemol = molecules[0].CreateCopy() # Add descriptions to each SD field for sdpair in oechem.OEGetSDDataPairs(oemol): tag = sdpair.GetTag() value = sdpair.GetValue() oechem.OESetSDData(oemol, tag, descriptions[tag]) # Add other fields oemol.SetTitle("ver_1.2") oechem.OESetSDData(oemol, "ref_url", fragalysis_config.ref_url) oechem.OESetSDData(oemol, "submitter_name", fragalysis_config.submitter_name) oechem.OESetSDData(oemol, "submitter_email", fragalysis_config.submitter_email) oechem.OESetSDData(oemol, "submitter_institution", fragalysis_config.submitter_institution) oechem.OESetSDData(oemol, "generation_date", datetime.today().strftime("%Y-%m-%d")) oechem.OESetSDData(oemol, "method", fragalysis_config.method) molecules.insert(0, oemol) # make it first molecule # Write sorted molecules with oechem.oemolostream(fa_ligands_path) as ofs: for oemol in track(molecules, description="Writing Fragalysis SDF file..."): oechem.OEWriteMolecule(ofs, oemol) # TODO add check SDF step here? # Upload to fragalysis print("Uploading to Fragalysis...") print(f"--> Target: {fragalysis_config.target_name}") from fragalysis_api.xcextracter.computed_set_update import update_cset, REQ_URL if fragalysis_config.new_upload: update_set = "None" # new upload print(f"--> Uploading a new set") else: update_set = ("".join(fragalysis_config.submitter_name.split()) + "-" + "".join(fragalysis_config.method.split())) print(f"--> Updating set: {update_set}") if fragalysis_config.ref_pdb == "references.zip": pdb_zip_path = os.path.join(fa_path, "references.zip") else: pdb_zip_path = None taskurl = update_cset( REQ_URL, target_name=fragalysis_config.target_name, sdf_path=fa_ligands_path, pdb_zip_path=pdb_zip_path, update_set=update_set, upload_key=fragalysis_config.upload_key, submit_choice=1, add=False, ) print(f"Upload complete, check upload status: {taskurl}")
def get_sd_list(mol, datum, Package='Psi4', Method=None, Basisset=None): """ Get list of specified SD tag for all confs in mol. Parameters ---------- mol: OEChem molecule with all of its conformers datum: string description of property of interest options implemented: "QM opt energy" "MM opt energy" Package: software package used for QM calculation. Psi4 or Turbomole. Method: string, for specific properties. e.g. 'mp2' Basisset: string, for specific properties. e.g. '6-31+G(d)' Returns ------- sdlist: A 1D N-length list for N conformers with property from SDTag. """ # TODO: dictionary if datum == "QM opt energy": taglabel = "QM %s Final Opt. Energy (Har) %s/%s" % (Package, Method, Basisset) if datum == "QM opt energy scs": taglabel = "QM %s Final Opt. Energy (Har) SCS-%s/%s" % ( Package, Method, Basisset) if datum == "QM opt energy initial": taglabel = "QM %s Initial Opt. Energy (Har) %s/%s" % (Package, Method, Basisset) if datum == "QM spe": taglabel = "QM %s Single Pt. Energy (Har) %s/%s" % (Package, Method, Basisset) if datum == "QM spe scs": taglabel = "QM %s Single Pt. Energy (Har) SCS-%s/%s" % ( Package, Method, Basisset) if datum == "MM opt energy": taglabel = "MM Szybki Newton Energy" if datum == "original index": taglabel = "Original omega conformer number" if datum == "opt runtime": taglabel = "QM %s Opt. Runtime (sec) %s/%s" % (Package, Method, Basisset) if datum == "spe runtime": taglabel = "QM %s Single Pt. Runtime (sec) %s/%s" % (Package, Method, Basisset) if datum == "opt step": taglabel = "QM %s Opt. Steps %s/%s" % (Package, Method, Basisset) try: taglabel # "local var referenced before assignment" except UnboundLocalError as e: # lgtm [py/unreachable-statement] sys.exit("Error in input tag of extracting SD data.") SDList = [] for j, conf in enumerate(mol.GetConfs()): for x in oechem.OEGetSDDataPairs(conf): # Case: opt did not finish --> append nan if "note on opt." in x.GetTag().lower( ) and "did not finish" in x.GetValue().lower(): SDList.append('nan') break # Case: want energy value OR want original index number elif taglabel.lower() in x.GetTag().lower(): SDList.append(x.GetValue()) break return SDList
from openeye import oegraphsim # @ <SNIPPET-SDF2FP> if len(sys.argv) != 2: oechem.OEThrow.Usage("%s <infile>" % sys.argv[0]) ifs = oechem.oemolistream() if not ifs.open(sys.argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1]) if ifs.GetFormat() != oechem.OEFormat_SDF: oechem.OEThrow.Fatal("%s input file has to be an SDF file" % sys.argv[1]) molcounter = 0 fpcounter = 0 for mol in ifs.GetOEGraphMols(): molcounter += 1 for dp in oechem.OEGetSDDataPairs(mol): if oegraphsim.OEIsValidFPTypeString(dp.GetTag()): fpcounter += 1 fptypestr = dp.GetTag() fphexdata = dp.GetValue() fp = oegraphsim.OEFingerPrint() fptype = oegraphsim.OEGetFPType(fptypestr) fp.SetFPTypeBase(fptype) fp.FromHexString(fphexdata) print("Number of molecules = %d" % molcounter) print("Number of fingerprints = %d" % fpcounter) # @ </SNIPPET-SDF2FP>
def DumpSDData(mol): print("SD data of", mol.GetTitle()) #loop over SD data for dp in oechem.OEGetSDDataPairs(mol): print(dp.GetTag(), ':', dp.GetValue()) print()
def KeepProps(proplist, ifs, ofs): for mol in ifs.GetOEGraphMols(): for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() not in proplist: oechem.OEDeleteSDData(mol, dp.GetTag()) oechem.OEWriteMolecule(ofs, mol)