def fastrocs_query(self, qmol, numHits, host): with self.logger("fastrocs_query") as logger: ofs = oechem.oemolostream() ofs.SetFormat(oechem.OEFormat_OEB) ofs.openstring() oechem.OEWriteMolecule(ofs, qmol) bytes = ofs.GetString() s = ServerProxy("http://" + host) data = Binary(bytes) # idx = s.SubmitQuery(data, numHits) dargs = { 'altStarts': 'random', 'tversky': False, 'shapeOnly': False } assert (numHits is not None) assert (data is not None) assert (dargs is not None) idx = s.SubmitQuery(data, numHits, 'oeb', 'oeb', dargs) first = False while True: try: current, total = s.QueryStatus(idx, True) except Fault as e: logger.error((str(e))) return 1 if total == 0: continue if first: # logger.log("%s/%s" % ("current", "total")) first = False # logger.log("%i/%i" % (current, total)) if total <= current: break results = s.QueryResults(idx) ifs = oechem.oemolistream() ifs.openstring(results.data) ifs.SetFormat(oechem.OEFormat_OEB) mols = [] for mol in ifs.GetOEMols(): good_mol = oechem.OEMol(mol) oechem.OEAddExplicitHydrogens(good_mol) oechem.OEClearSDData(good_mol) oeshape.OEDeleteCompressedColorAtoms(good_mol) oeshape.OEClearCachedSelfColor(good_mol) oeshape.OEClearCachedSelfShape(good_mol) oeshape.OERemoveColorAtoms(good_mol) mols.append(good_mol) return mols
def from_oemol(self, from_oemol): with self.logger("from_oemol") as logger: tautomer_options = oequacpac.OETautomerOptions() tautomer_options.SetMaxTautomersGenerated(4096) tautomer_options.SetMaxTautomersToReturn(16) tautomer_options.SetCarbonHybridization(True) tautomer_options.SetMaxZoneSize(50) tautomer_options.SetApplyWarts(True) pKa_norm = True omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Pose) omegaOpts.SetStrictAtomTypes(False) omegaOpts.SetSampleHydrogens(True) omegaOpts.SetMaxSearchTime(30) omegaOpts.SetFixDeleteH(True) omega = oeomega.OEOmega(omegaOpts) options = oeshape.OEROCSOptions() overlayoptions = oeshape.OEOverlayOptions() overlayoptions.SetOverlapFunc( oeshape.OEOverlapFunc(oeshape.OEAnalyticShapeFunc())) options.SetOverlayOptions(overlayoptions) # options.SetNumBestHits(10) options.SetConfsPerHit(200) # options.SetMaxHits(10000) rocs = oeshape.OEROCS(options) for tautomer in oequacpac.OEGetReasonableTautomers( from_oemol, tautomer_options, pKa_norm): logger.log("got enantiomer") for enantiomer in oeomega.OEFlipper(tautomer, 4, False): logger.log("got tautomer ") enantiomer_ = oechem.OEMol(enantiomer) ret_code = omega.Build(enantiomer_) if ret_code != oeomega.OEOmegaReturnCode_Success: logger.error("got oemeg_failed", oeomega.OEGetOmegaError(ret_code)) else: rocs.AddMolecule(oechem.OEMol(enantiomer_)) for res in rocs.Overlay(self.refmol): outmol = oechem.OEMol(res.GetOverlayConfs()) good_mol = oechem.OEMol(outmol) oechem.OEAddExplicitHydrogens(good_mol) oechem.OEClearSDData(good_mol) oeshape.OEDeleteCompressedColorAtoms(good_mol) oeshape.OEClearCachedSelfColor(good_mol) oeshape.OEClearCachedSelfShape(good_mol) oeshape.OERemoveColorAtoms(good_mol) return good_mol logger.error("Returning None.") return None
def reorder_sd_props(mol: oechem.OEGraphMol): strain1 = oechem.OEGetSDData(mol, TOTAL_STRAIN_TAG) data_pairs = [(TOTAL_STRAIN_TAG, strain1)] for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() == TOTAL_STRAIN_TAG: pass else: data_pairs.append((dp.GetTag(), dp.GetValue())) oechem.OEClearSDData(mol) for k, v in data_pairs: oechem.OESetSDData(mol, k, v) data_pairs = [] for dp in oechem.OEGetSDDataPairs(mol): data_pairs.append((dp.GetTag(), dp.GetValue())) oechem.OEClearSDData(mol) for k, v in data_pairs: oechem.OESetSDData(mol, k, v)
def FilterMolData(self, mol): if not oechem.OEHasSDData(mol): return 0 if self.fields is None: return -1 if len(self.fields) == 0: oechem.OEClearSDData(mol) return 0 validdata = 0 deletefields = [] for dp in oechem.OEGetSDDataPairs(mol): tag = dp.GetTag() if tag not in self.fields: deletefields.append(tag) continue value = oechem.OEGetSDData(mol, tag) if self.asFloating: try: float(value) except ValueError: oechem.OEThrow.Warning("Failed to convert %s to numeric value (%s) in %s" % (tag, value, mol.GetTitle())) deletefields.append(tag) continue validdata += 1 if not validdata: oechem.OEClearSDData(mol) else: for nuke in deletefields: oechem.OEDeleteSDData(mol, nuke) return validdata
# Read all submitted designs: Compounds with the key substructure will be retained print('Reading submitted designs...') # Drop columns that cause trouble for OpenEye import pandas as pd drop_columns = ['Submission Rationale', 'Submission Notes'] df = pd.read_csv(submissions_csv_filename, dtype=str) df.drop(columns=drop_columns, inplace=True) import tempfile with tempfile.NamedTemporaryFile(suffix='.csv') as csv_file: df.to_csv(csv_file.name, header=True, index=False) # Read file with oechem.oemolistream(csv_file.name) as ifs: mol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, mol): # Clear SD tags oechem.OEClearSDData(mol) # Store the molecule mols.append(mol.CreateCopy()) print(f'{len(mols)} molecules read') # Aggregate all compound designs source_filenames = [ # Filtered synthetic designs 'filtered/transformations-final-ligands.csv', ] for source_filename in source_filenames: with oechem.oemolistream(source_filename) as ifs: mol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, mol): # Clear SD tags oechem.OEClearSDData(mol)
def construct_dihedral_energy_profile(torsion_conformers, num_points=24): angle_list = np.array([360 * i / num_points for i in range(num_points)]) num_confs = 0 profile = np.full(num_points, np.nan) for mol in torsion_conformers: if not mol: continue num_confs += 1 conf = mol.GetActive() conf_title = get_sd_data(conf, "CONFORMER_LABEL") tor_atoms = get_sd_data(mol, "TORSION_ATOMS_ParentMol").split() parent_name = conf_title[:-3] dih_label = "_".join(str(x) for x in tor_atoms) fragment_label = parent_name + "_" + dih_label angle_idx = int(conf_title[-2:]) profile[angle_idx] = np.float(get_sd_data(conf, "PSI4_ENERGY")) logging.debug("angle_idx: %d", angle_idx) logging.debug("Psi4 Energy: %f", float(get_sd_data(conf, "PSI4_ENERGY"))) # check for angles where no energies are available for angle in angle_list[np.all(np.isnan(profile))]: logging.warning( "Warning: No energies found for angle {:.1f} for fragment: {}". format(angle, fragment_label)) # calculate relative energies min_energy = np.nanmin(profile) profile -= min_energy profile[np.isnan(profile)] = -1 # set nans to -1 torsional_strain = np.column_stack((angle_list, profile)) # combine conformers output_conformers = oechem.OEMol(torsion_conformers[0]) output_conformers.DeleteConfs() title = fragment_label output_conformers.SetTitle(title) # setup normalization torsion_tag = "TORSION_ATOMS_FRAGMENT" torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split() print(torsion_atoms_in_fragment) dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment] dih, _ = get_dihedral(output_conformers, dihedral_atom_indices) for old_conf in torsion_conformers: if old_conf: new_conf = output_conformers.NewConf(old_conf) normalize_coordinates(new_conf, dih) oechem.OEClearSDData(new_conf) for dp in oechem.OEGetSDDataPairs(old_conf.GetActive()): if dp.GetTag() not in ["OEConfTitle", "CONFORMER_LABEL"]: oechem.OESetSDData(new_conf, dp.GetTag(), dp.GetValue()) torsion_angle = get_sd_data(old_conf, "TORSION_ANGLE") title = fragment_label + ": Angle " + torsion_angle new_conf.SetTitle(title) write_energy_profile_to_sddata(output_conformers, torsional_strain.copy()) # Calculate all possible torsion inchi keys for this fragment torsion_inchi_list = [] inchi_key = oechem.OECreateInChIKey(output_conformers) _, b, c, _ = get_torsion_oeatom_list(output_conformers) for a in b.GetAtoms(oechem.OEIsHeavy()): for d in c.GetAtoms(oechem.OEIsHeavy()): if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx(): continue torsion_inchi = inchi_key + get_modified_inchi_key( output_conformers, [a, b, c, d]) torsion_inchi_list.append(torsion_inchi) return output_conformers, torsional_strain, torsion_inchi_list
def gen_starting_confs( mol, torsion_library, max_one_bond_away=True, num_conformers=MAX_CONFS, rms_cutoff=0.0, energy_window=25, ): # Identify the atoms in the dihedral TAGNAME = "TORSION_ATOMS_FRAGMENT" if not has_sd_data(mol, TAGNAME): raise ValueError( "Molecule does not have the SD Data Tag '{}'.".format(TAGNAME)) dihedralAtomIndices = [ int(x) - 1 for x in get_sd_data(mol, TAGNAME).split() ] inDih = oechem.OEOrAtom( oechem.OEOrAtom( oechem.OEHasAtomIdx(dihedralAtomIndices[0]), oechem.OEHasAtomIdx(dihedralAtomIndices[1]), ), oechem.OEOrAtom( oechem.OEHasAtomIdx(dihedralAtomIndices[2]), oechem.OEHasAtomIdx(dihedralAtomIndices[3]), ), ) mol1 = mol.CreateCopy() mc_mol = oechem.OEMol(mol1) # Tag torsion atoms with their dihedral index for atom in mc_mol.GetAtoms(): if atom.GetIdx() == dihedralAtomIndices[0]: atom.SetData("dihidx", 0) if atom.GetIdx() == dihedralAtomIndices[1]: atom.SetData("dihidx", 1) if atom.GetIdx() == dihedralAtomIndices[2]: atom.SetData("dihidx", 2) if atom.GetIdx() == dihedralAtomIndices[3]: atom.SetData("dihidx", 3) if num_conformers > 1: # Set criterion for rotatable bond if False and max_one_bond_away: # this max function makes this seem potentially broken only_one_bond_away = distance_predicate(dihedralAtomIndices[1], dihedralAtomIndices[2]) rotor_predicate = oechem.OEAndBond( only_one_bond_away, oechem.PyBondPredicate(isRotatableBond)) elif False: # this ONLY samples special bonds & neglects "regualr" torsions rotor_predicate = oechem.PyBondPredicate(isRotatableBond) else: # try this more general sampling, but leave prior versions untouched rotor_predicate = oechem.OEOrBond( oechem.OEIsRotor(), oechem.PyBondPredicate(isRotatableBond)) # Initialize conformer generator and multi-conformer library conf_generator = configure_omega(torsion_library, rotor_predicate, rms_cutoff, energy_window, num_conformers) # Generator conformers if not conf_generator(mc_mol, inDih): raise ValueError("Conformers cannot be generated.") logging.debug( "Generated a total of %d conformers for %s.", mc_mol.NumConfs(), mol.GetTitle(), ) # Reassign new_didx = [-1, -1, -1, -1] for atom in mc_mol.GetAtoms(): if atom.HasData("dihidx"): new_didx[atom.GetData("dihidx")] = atom.GetIdx() oechem.OEClearSDData(mc_mol) oechem.OESetSDData(mc_mol, TAGNAME, " ".join(str(x + 1) for x in new_didx)) oechem.OESetSDData( mc_mol, "TORSION_ATOMS_ParentMol", get_sd_data(mol, "TORSION_ATOMS_ParentMol"), ) oechem.OESetSDData( mc_mol, "TORSION_ATOMPROP", f"cs1:0:1;1%{new_didx[0]+1}:1%{new_didx[1]+1}:1%{new_didx[2]+1}:1%{new_didx[3]+1}", ) for conf_no, conf in enumerate(mc_mol.GetConfs()): conformer_label = ( mol.GetTitle() + "_" + "_".join(get_sd_data(mol, "TORSION_ATOMS_ParentMol").split()) + "_{:02d}".format(conf_no)) oechem.OESetSDData(conf, "CONFORMER_LABEL", conformer_label) conf.SetTitle(conformer_label) return mc_mol
def ClearProps(ifs, ofs): for mol in ifs.GetOEGraphMols(): oechem.OEClearSDData(mol) oechem.OEWriteMolecule(ofs, mol)