def sanitize_fragment(mol): approved_tags = [ "TORSION_ATOMPROP", "TORSION_ATOMS_FRAGMENT", "TORSION_ATOMS_ParentMol", "COUNT", ] for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() in approved_tags: continue oechem.OEDeleteSDData(mol, dp.GetTag())
def delete_tag(mol, tag): """ Delete specified SD tag from all conformers of mol. Parameters ---------- mol: OEChem molecule with all of its conformers tag: exact string label of the data to delete """ for j, conf in enumerate(mol.GetConfs()): oechem.OEDeleteSDData(conf, tag)
def delete_tag(mol, tag): """ Delete specified SD tag from all conformers of mol. Note: Multi-conformer molecule must be specified else will get AttributeError: 'OEGraphMol' object has no attribute 'GetConfs'. Parameters ---------- mol : multi-conformer OEChem molecule tag : string exact label of the data to delete """ for j, conf in enumerate(mol.GetConfs()): oechem.OEDeleteSDData(conf, tag)
def FilterMolData(self, mol): if not oechem.OEHasSDData(mol): return 0 if self.fields is None: return -1 if len(self.fields) == 0: oechem.OEClearSDData(mol) return 0 validdata = 0 deletefields = [] for dp in oechem.OEGetSDDataPairs(mol): tag = dp.GetTag() if tag not in self.fields: deletefields.append(tag) continue value = oechem.OEGetSDData(mol, tag) if self.asFloating: try: float(value) except ValueError: oechem.OEThrow.Warning("Failed to convert %s to numeric value (%s) in %s" % (tag, value, mol.GetTitle())) deletefields.append(tag) continue validdata += 1 if not validdata: oechem.OEClearSDData(mol) else: for nuke in deletefields: oechem.OEDeleteSDData(mol, nuke) return validdata
def delete_sd_data(mol, tag, locator_tag): if oechem.OEHasSDData(mol, locator_tag): return oechem.OEDeleteSDData(mol, tag) elif oechem.OEHasSDData(mol.GetActive(), locator_tag): return oechem.OEDeleteSDData(mol.GetActive(), tag) return False
f'{molecule.GetTitle()} - {phase}.{ext}') if not os.path.exists(filename): files_missing = True if files_missing: continue # Add RUN number oechem.OESetSDData(molecule, 'run', f'RUN{run_index}') if args.clean: for sdpair in oechem.OEGetSDDataPairs(molecule): if sdpair.GetTag() not in [ 'Hybrid2', 'docked_fragment', 'fragments', 'site', 'run' ]: oechem.OEDeleteSDData(molecule, sdpair.GetTag()) # Copy files run_dir = os.path.join(args.docked_basedir, 'fah-gromacs', f'RUN{run_index}') os.makedirs(run_dir, exist_ok=True) import shutil for phase in ['complex', 'ligand']: for ext in ['gro', 'top']: src = os.path.join( gromacs_basedir, f'{molecule.GetTitle()} - {phase}.{ext}') dst = os.path.join(run_dir, f'{phase}.{ext}') shutil.copyfile(src, dst) oechem.OEWriteMolecule(ofs, molecule)
def DumpSDData(mol): print("SD data of", mol.GetTitle()) # loop over SD data for dp in oechem.OEGetSDDataPairs(mol): print(dp.GetTag(), ':', dp.GetValue()) print() mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") mol.SetTitle("benzene") # set some tagged data oechem.OESetSDData(mol, "color", "brown") oechem.OESetSDData(mol, oechem.OESDDataPair("size", "small")) DumpSDData(mol) # check for existence of data, then delete it if oechem.OEHasSDData(mol, "size"): oechem.OEDeleteSDData(mol, "size") DumpSDData(mol) # add additional color data oechem.OEAddSDData(mol, "color", "black") DumpSDData(mol) # remove all SD data oechem.OEClearSDData(mol) DumpSDData(mol) # @ </SNIPPET>
def save_profile_as_sd(mol: oechem.OEGraphMol): oechem.OEDeleteSDData(mol, TOTAL_STRAIN_TAG) oechem.OESetSDData(mol, TOTAL_STRAIN_TAG, '') # place holder oechem.OEDeleteSDData(mol, NUM_TORSION_PROFILES_TAG) oechem.OESetSDData(mol, NUM_TORSION_PROFILES_TAG, '') oechem.OEDeleteSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG) oechem.OESetSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG, '') strain_arr = np.zeros(1) strain_arr_high_conf_preds = np.zeros(1) num_torsion_profiles = 0 num_low_confidence_torsions = 0 can_torsions = get_canonical_torsions(mol) for num, can_torsion in enumerate(can_torsions): bond = mol.GetBond(can_torsion.b, can_torsion.c) if bond is not None and bond.HasData(ENERGY_PROFILE_TAG): num_torsion_profiles += 1 bond_strains = bond.GetData(STRAIN_TAG) profile_offset = bond.GetData(PROFILE_OFFSET_TAG) if profile_offset < OFFSET_THRESHOLD and ( not bond.HasData(SKIP_TORSION_TAG)): strain_arr_high_conf_preds += np.array(bond_strains) strain_arr += np.array(bond_strains) offset = bond.GetData(PROFILE_OFFSET_TAG) profile_str = bond.GetData(ENERGY_PROFILE_TAG) pred_confidence_value = HIGH_PREDICTION_CONFIDENCE_TAG if offset > OFFSET_THRESHOLD or bond.HasData(SKIP_TORSION_TAG): profile_str = 'LOW CONFIDENCE - ' + profile_str pred_confidence_value = LOW_PREDICTION_CONFIDENCE_TAG num_low_confidence_torsions += 1 #tor_atoms_str = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG) #tor_atoms_str_list = tor_atoms_str.split(':') #a_idx, b_idx, c_idx, d_idx = list(map(int, tor_atoms_str_list[0].split())) tor_atoms_str1 = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG) ca, cb, cc, cd = list(map(int, tor_atoms_str1.split())) apStr = "{}:{}:{}:{}".format(ca + 1, cb + 1, cc + 1, cd + 1) atom_ca = mol.GetAtom(oechem.OEHasAtomIdx(ca)) atom_cb = mol.GetAtom(oechem.OEHasAtomIdx(cb)) atom_cc = mol.GetAtom(oechem.OEHasAtomIdx(cc)) atom_cd = mol.GetAtom(oechem.OEHasAtomIdx(cd)) angle_float = oechem.OEGetTorsion(mol, atom_ca, atom_cb, atom_cc, atom_cd) * oechem.Rad2Deg sd_tag1 = 'TORSION_%s_ATOMS' % (num + 1) sd_tag2 = 'TORSION_%d_TORSIONNET_%s' % (num + 1, ENERGY_PROFILE_TAG) sd_tag3 = 'TORSION_%d_TORSIONNET_PRED_CONFIDENCE' % (num + 1) sd_tag4 = 'TORSION_%d_TORSIONNET_PROFILE_OFFSET' % (num + 1) oechem.OEDeleteSDData(mol, sd_tag1) oechem.OEDeleteSDData(mol, sd_tag2) oechem.OEDeleteSDData(mol, sd_tag3) oechem.OEDeleteSDData(mol, sd_tag4) oechem.OESetSDData(mol, sd_tag1, apStr) oechem.OESetSDData(mol, sd_tag2, profile_str) sd_tag6 = 'TORSION_%d_%s' % ((num + 1), STRAIN_TAG) oechem.OEDeleteSDData(mol, sd_tag6) oechem.OESetSDData(mol, sd_tag6, '%.1f' % bond_strains) angle = '%.1f' % angle_float sd_tag5 = 'TORSION_%d_ANGLE' % (num + 1) oechem.OEDeleteSDData(mol, sd_tag5) oechem.OESetSDData(mol, sd_tag5, angle) oechem.OESetSDData(mol, sd_tag3, pred_confidence_value) oechem.OESetSDData(mol, sd_tag4, '%.2f' % offset) strain_str = '%.1f' % strain_arr_high_conf_preds[0] oechem.OESetSDData(mol, TOTAL_STRAIN_TAG, strain_str) oechem.OESetSDData(mol, NUM_TORSION_PROFILES_TAG, str(num_torsion_profiles)) oechem.OESetSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG, str(num_low_confidence_torsions)) reorder_sd_props(mol) return mol
if docked_molecule is None: print('No docking poses available') import sys sys.exit(0) import os from openeye import oechem, oedocking # Write molecule as CSV with cleared SD tags output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - docked.csv') if not os.path.exists(output_filename): docked_molecule_clean = docked_molecule.CreateCopy() for sdpair in oechem.OEGetSDDataPairs(docked_molecule_clean): if sdpair.GetTag() not in ['Hybrid2', 'fragments', 'site', 'docked_fragment']: oechem.OEDeleteSDData(docked_molecule_clean, sdpair.GetTag()) with oechem.oemolostream(output_filename) as ofs: oechem.OEWriteMolecule(ofs, docked_molecule_clean) # Write molecule as SDF output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.sdf') if not os.path.exists(output_filename): with oechem.oemolostream(output_filename) as ofs: oechem.OEWriteMolecule(ofs, docked_molecule) # Write molecule as mol2 output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.mol2') if not os.path.exists(output_filename): with oechem.oemolostream(output_filename) as ofs: oechem.OEWriteMolecule(ofs, docked_molecule)
import drconvert for fragment in tqdm(all_fragments): filename = f'../docking/{prefix} - docked to {fragment}.oeb' if os.path.exists(filename): datarecords = drconvert.RecordConvertToMols(filename) for molecule in datarecords: # Annotate which fragment this was docked to oechem.OESetSDData(molecule, 'fragments', fragment) if molecule.NumAtoms() > 1000: # Store protein molecule.SetTitle(fragment) receptors[fragment] = oechem.OEMol(molecule) else: # Store ligands in best docked poses oechem.OEDeleteSDData(molecule, 'Number of Confs') CID = molecule.GetTitle() if CID not in docked_molecules: docked_molecules[CID] = oechem.OEMol(molecule) else: if score(molecule) < score(docked_molecules[CID]): docked_molecules[CID] = oechem.OEMol(molecule) # Sort compounds docked_molecules = [docked_molecules[CID] for CID in docked_molecules] docked_molecules.sort(key=score) # Write all molecules in order of increasing score print('Writing molecules to CSV...') filename = f'{prefix} - top docked.csv' with oechem.oemolostream(filename) as ofs:
def generate_fragalysis( series: CompoundSeriesAnalysis, fragalysis_config: FragalysisConfig, results_path: str, ) -> None: """ Generate input and upload to fragalysis from fragalysis_config Fragalysis spec:https://discuss.postera.ai/t/providing-computed-poses-for-others-to-look-at/1155/8?u=johnchodera Parameters ---------- series : CompoundSeriesAnalysis Analysis results fragalysis_config : FragalysisConfig Fragalysis input paramters results_path : str The path to the results """ import os from openeye import oechem from rich.progress import track # make a directory to store fragalysis upload data fa_path = os.path.join(results_path, "fragalysis_upload") os.makedirs(fa_path, exist_ok=True) ref_mols = fragalysis_config.ref_mols # e.g. x12073 ref_pdb = fragalysis_config.ref_pdb # e.g. x12073 # set paths ligands_path = os.path.join(results_path, fragalysis_config.ligands_filename) fa_ligands_path = os.path.join(fa_path, fragalysis_config.fragalysis_sdf_filename) # copy sprint generated sdf to new name for fragalysis input from shutil import copyfile copyfile(ligands_path, fa_ligands_path) # Read ligand poses molecules = [] with oechem.oemolistream(ligands_path) as ifs: oemol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, oemol): molecules.append(oemol.CreateCopy()) print(f"{len(molecules)} ligands read") # Get zipped PDB if specified if fragalysis_config.ref_pdb == "references.zip": consolidate_protein_snapshots_into_pdb( oemols=molecules, results_path=results_path, pdb_filename="references.pdb", fragalysis_input=True, fragalysis_path=fa_path, ) descriptions = { "DDG (kcal/mol)": "Relative computed free energy difference", "dDDG (kcal/mol)": "Uncertainty in computed relative free energy difference", "ref_mols": "a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)", "ref_pdb": "The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose", "original SMILES": "the original SMILES of the compound before any computation was carried out", } # Preprocess molecules tags_to_retain = {"DDG (kcal/mol)", "dDDG (kcal/mol)"} index = 0 for oemol in track(molecules, "Preprocessing molecules for Fragalysis..."): # Remove hydogrens oechem.OESuppressHydrogens(oemol, True) # Get original SMILES original_smiles = oechem.OEGetSDData(oemol, "SMILES") # Remove irrelevant SD tags for sdpair in oechem.OEGetSDDataPairs(oemol): tag = sdpair.GetTag() value = sdpair.GetValue() if tag not in tags_to_retain: oechem.OEDeleteSDData(oemol, tag) # Add required SD tags oechem.OESetSDData(oemol, "ref_mols", fragalysis_config.ref_mols) # If ref_pdb is zip file, use this if fragalysis_config.ref_pdb == "references.zip": oechem.OESetSDData(oemol, "ref_pdb", f"references/references_{index}.pdb"), index += 1 else: oechem.OESetSDData(oemol, "ref_pdb", fragalysis_config.ref_pdb) oechem.OESetSDData(oemol, "original SMILES", original_smiles) # Add initial blank molecule (that includes distances) import copy from datetime import datetime # Find a molecule that includes distances, if present oemol = molecules[0].CreateCopy() # Add descriptions to each SD field for sdpair in oechem.OEGetSDDataPairs(oemol): tag = sdpair.GetTag() value = sdpair.GetValue() oechem.OESetSDData(oemol, tag, descriptions[tag]) # Add other fields oemol.SetTitle("ver_1.2") oechem.OESetSDData(oemol, "ref_url", fragalysis_config.ref_url) oechem.OESetSDData(oemol, "submitter_name", fragalysis_config.submitter_name) oechem.OESetSDData(oemol, "submitter_email", fragalysis_config.submitter_email) oechem.OESetSDData(oemol, "submitter_institution", fragalysis_config.submitter_institution) oechem.OESetSDData(oemol, "generation_date", datetime.today().strftime("%Y-%m-%d")) oechem.OESetSDData(oemol, "method", fragalysis_config.method) molecules.insert(0, oemol) # make it first molecule # Write sorted molecules with oechem.oemolostream(fa_ligands_path) as ofs: for oemol in track(molecules, description="Writing Fragalysis SDF file..."): oechem.OEWriteMolecule(ofs, oemol) # TODO add check SDF step here? # Upload to fragalysis print("Uploading to Fragalysis...") print(f"--> Target: {fragalysis_config.target_name}") from fragalysis_api.xcextracter.computed_set_update import update_cset, REQ_URL if fragalysis_config.new_upload: update_set = "None" # new upload print(f"--> Uploading a new set") else: update_set = ("".join(fragalysis_config.submitter_name.split()) + "-" + "".join(fragalysis_config.method.split())) print(f"--> Updating set: {update_set}") if fragalysis_config.ref_pdb == "references.zip": pdb_zip_path = os.path.join(fa_path, "references.zip") else: pdb_zip_path = None taskurl = update_cset( REQ_URL, target_name=fragalysis_config.target_name, sdf_path=fa_ligands_path, pdb_zip_path=pdb_zip_path, update_set=update_set, upload_key=fragalysis_config.upload_key, submit_choice=1, add=False, ) print(f"Upload complete, check upload status: {taskurl}")
def RemoveProps(proplist, ifs, ofs): for mol in ifs.GetOEGraphMols(): for tag in proplist: oechem.OEDeleteSDData(mol, tag) oechem.OEWriteMolecule(ofs, mol)
def KeepProps(proplist, ifs, ofs): for mol in ifs.GetOEGraphMols(): for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() not in proplist: oechem.OEDeleteSDData(mol, dp.GetTag()) oechem.OEWriteMolecule(ofs, mol)