def main(argv=sys.argv): if len(argv) != 2: oechem.OEThrow.Usage("%s <infile (oeb file prefix)>" % argv[0]) ifs = oechem.oemolistream() if not ifs.open(argv[1] + '.oeb'): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1] + '.oeb') ofsff = oechem.oemolostream() ofsff.SetFlavor(oechem.OEFormat_MOL2, oechem.OEOFlavor_MOL2_Forcefield) if not ofsff.open(argv[1] + '_ff.mol2'): oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[1] + '_ff.mol2') ofsTri = oechem.oemolostream() ofsTri.SetFlavor(oechem.OEFormat_MOL2, oechem.OEOFlavor_MOL2_Forcefield) if not ofsTri.open(argv[1] + '_tripos.mol2'): oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[1] + '_tripos.mol2') for mol in ifs.GetOEMols(): oechem.OETriposAtomNames(mol) oechem.OEWriteConstMolecule(ofsff, mol) oechem.OETriposAtomTypeNames(mol) oechem.OEWriteConstMolecule(ofsTri, mol) ifs.close() ofsff.close() ofsTri.close() return 0
def genConfs(c_mol, ofsff, ofsTri, index): # set omega settings omega = oeomega.OEOmega() omega.SetMaxConfs(1) omega.SetIncludeInput(False) omega.SetEnergyWindow(15.0) strict_stereo = True omega.SetStrictStereo(strict_stereo) omega.SetSampleHydrogens(True) omega.SetStrictAtomTypes(True) mol = oechem.OEMol(c_mol) status = omega(mol) if status: # change title mol.SetTitle(f'DrugBank_{index}') # save force field type mol1 = oechem.OEMol(mol) oechem.OETriposAtomNames(mol1) oechem.OEWriteConstMolecule(ofsff, mol1) # save Tripos atom types mol2 = oechem.OEMol(mol) oechem.OETriposAtomTypeNames(mol2) oechem.OEWriteConstMolecule(ofsTri, mol2) return status
def enumerate_conformations(name, smiles=None, pdbname=None): """Run Epik to get protonation states using PDB residue templates for naming. Parameters ---------- name : str Common name of molecule (used to create subdirectory) smiles : str Isomeric SMILES string pdbname : str Three-letter PDB code (e.g. 'DB8') """ # Create output subfolder output_basepath = os.path.join(output_dir, name) if not os.path.isdir(output_basepath): os.mkdir(output_basepath) output_basepath = os.path.join(output_basepath, name) if pdbname: # Make sure to only use one entry if there are mutliple if ' ' in pdbname: pdbnames = pdbname.split(' ') print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0])) pdbname = pdbnames[0] # Retrieve PDB (for atom names) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname) pdb_filename = output_basepath + '-input.pdb' retrieve_url(url, pdb_filename) pdb_molecule = read_molecule(pdb_filename) # Retrieve SDF (for everything else) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname) sdf_filename = output_basepath + '-input.sdf' retrieve_url(url, sdf_filename) sdf_molecule = read_molecule(sdf_filename) # Replace atom names in SDF for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()): sdf_atom.SetName(pdb_atom.GetName()) # Assign Tripos atom types oechem.OETriposAtomTypeNames(sdf_molecule) oechem.OETriposBondTypeNames(sdf_molecule) oe_molecule = sdf_molecule # We already know the residue name residue_name = pdbname elif smiles: # Generate molecule geometry with OpenEye print("Generating molecule {}".format(name)) oe_molecule = openeye.smiles_to_oemol(smiles) # Assign Tripos atom types oechem.OETriposAtomTypeNames(oe_molecule) oechem.OETriposBondTypeNames(oe_molecule) try: oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) except RuntimeError as e: traceback.print_exc() print("Skipping molecule " + name) return residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] else: raise Exception('Must provide SMILES string or pdbname') # Save mol2 file, preserving atom names print("Running epik on molecule {}".format(name)) mol2_file_path = output_basepath + '-input.mol2' write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name) # Run epik on mol2 file mae_file_path = output_basepath + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4) # Convert maestro file to sdf and mol2 output_sdf_filename = output_basepath + '-epik.sdf' output_mol2_filename = output_basepath + '-epik.mol2' schrodinger.run_structconvert(mae_file_path, output_sdf_filename) schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEGraphMol() # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() # Assign charges. charged_molecules = list() index = 0 while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 print("Charging molecule %d" % (index)) try: # Charge molecule. charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None) # Assign Tripos types oechem.OETriposAtomTypeNames(charged_molecule) oechem.OETriposBondTypeNames(charged_molecule) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) # Store molecule charged_molecules.append(charged_molecule) except Exception as e: print(e) print("Skipping protomer/tautomer because of failed charging.") # Clean up ifs_sdf.close() ifs_mol2.close() # Write state penalites. outfile = open(output_basepath + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close() # Write as PDB charged_pdb_filename = output_basepath + '-epik-charged.pdb' ofs = oechem.oemolostream(charged_pdb_filename) flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH ofs.SetFlavor(oechem.OEFormat_PDB, flavor) for (index, charged_molecule) in enumerate(charged_molecules): # Fix residue names for atom in charged_molecule.GetAtoms(): residue = oechem.OEAtomGetResidue(atom) residue.SetName(residue_name) oechem.OEAtomSetResidue(atom, residue) #oechem.OEWritePDBFile(ofs, charged_molecule, flavor) oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write molecules as mol2. charged_mol2_filename = output_basepath + '-epik-charged.mol2' write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name)
def enumerate_conformations(name, pdbfile=None, smiles=None, pdbname=None, pH=7.4): """Run Epik to get protonation states using PDB residue templates for naming. Parameters ---------- name : str Common name of molecule (used to create subdirectory) smiles : str Isomeric SMILES string pdbname : str Three-letter PDB code (e.g. 'DB8') """ # Create output subfolder # output_basepath = os.path.join(output_dir, name) # if not os.path.isdir(output_basepath): # os.mkdir(output_basepath) # output_basepath = os.path.join(output_basepath, name) oehandler = openeye.oechem.OEThrow # String stream output oss = oechem.oeosstream() oehandler.SetOutputStream(oss) log = "New run:\nPDB code: {pdbname}; Molecule: {name}; pH {pH}\n".format( **locals()) success_status = True if pdbname: # Make sure to only use one entry if there are multiple if ' ' in pdbname: pdbnames = pdbname.split(' ') log += "Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0]) pdbname = pdbnames[0] # Retrieve PDB (for atom names) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % ( pdbname[0], pdbname, pdbname) pdb_filename = name + '-rcsb_download.pdb' log += "Retrieving PDB structure from RCSB ligand expo: {}.\n".format( pdb_filename) retrieve_url(url, pdb_filename) log += "Parsing PDB file.\n" pdb_molecule = read_molecule(pdb_filename) # Retrieve SDF (for everything else) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % ( pdbname[0], pdbname, pdbname) sdf_filename = name + '-rcsb_download.sdf' log += "Retrieving SDF structure from RCSB ligand expo: {}.\n".format( sdf_filename) retrieve_url(url, sdf_filename) log += "Parsing SDF file.\n" sdf_molecule = read_molecule(sdf_filename) # Replace atom names in SDF log += "Canonicalizing atom names.\n" for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()): sdf_atom.SetName(pdb_atom.GetName()) # Assign Tripos atom types log += "Assign atom type names.\n" oechem.OETriposAtomTypeNames(sdf_molecule) oechem.OETriposBondTypeNames(sdf_molecule) oe_molecule = sdf_molecule # We already know the residue name residue_name = pdbname # For the moment, disabling these two types of input # elif smiles: # # Generate molecule geometry with OpenEye # logging.info(("Generating molecule {}".format(name))) # oe_molecule = openeye.smiles_to_oemol(smiles) # # Assign Tripos atom types # oechem.OETriposAtomTypeNames(oe_molecule) # oechem.OETriposBondTypeNames(oe_molecule) # try: # logging.info("Charging initial") # write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug') # oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) # except RuntimeError as e: # traceback.print_exc() # logging.info(("Skipping molecule " + name)) # return # residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] # logging.info("resname = %s", residue_name) # oe_molecule.SetTitle(residue_name) # fix iupac name issue with mol2convert # elif pdbfile: # residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] # logging.info("Loading molecule molecule {0} from {1}".format(name, pdbfile)) # oe_molecule = read_molecule(pdbfile) # # Assign Tripos atom types # oechem.OETriposAtomTypeNames(oe_molecule) # oechem.OETriposBondTypeNames(oe_molecule) # try: # logging.info("Charging initial") # write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug') # oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) # except RuntimeError as e: # traceback.print_exc() # logging.info(("Skipping molecule " + name)) # return else: raise Exception('Must provide SMILES string or pdbname, or pdbfile') # Save mol2 file, preserving atom names log += "Running Epik.\n" mol2_file_path = name + '-before_epik.mol2' write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name) # Run epik on mol2 file mae_file_path = name + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=50, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=pH) log += "Epik run completed.\n" # Convert maestro file to sdf and mol2 output_sdf_filename = name + '-after_epik.sdf' output_mol2_filename = name + '-after_epik.mol2' # logging.info("Creating sdf") schrodinger.run_structconvert(mae_file_path, output_sdf_filename) # logging.info("Creating mol2") schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEGraphMol() # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() # Assign charges. # reset count of error handler oehandler.Clear() log += "Assigning charges to protonation states.\n" charged_molecules = list() index = 0 failed_states = set() while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 log += "State {0:d}\n".format(index) try: # Charge molecule. charged_molecule_conformers = omtoe.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=-1) log += "Charging stage output:\n" OEOutput = str(oss) log += OEOutput log += "\nCharging state completed.\n" # Restore coordinates to original charged_molecule = select_conformers(charged_molecule_conformers, mol2_molecule, keep_confs=None) # Assign Tripos types oechem.OETriposAtomTypeNames(charged_molecule) oechem.OETriposBondTypeNames(charged_molecule) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) # Store molecule charged_molecules.append(charged_molecule) # Check for failure in the log openeye_charge_log_parser(OEOutput, True) oehandler.Clear() except Exception as e: failed_states.add(index) logging.info(e) log += "State failed charging.\n" log += str(e) log += "\n" filename_failure = name + '-conformers-failed-state-{}-.mol2'.format( index) try: write_mol2_preserving_atomnames(filename_failure, charged_molecule_conformers, residue_name) except: log += "Could not store result, most likely failed during Omega step!\n" success_status = False oehandler.Clear() # Clean up ifs_sdf.close() ifs_mol2.close() # Write state penalties. outfile = open(name + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. log += "Writing Epik data for state {:d}\n".format(index + 1) epik_Ionization_Penalty = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float( oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close() # Write as PDB charged_pdb_filename = name + '-charged_output.pdb' ofs = oechem.oemolostream(charged_pdb_filename) flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH ofs.SetFlavor(oechem.OEFormat_PDB, flavor) for (index, charged_molecule) in enumerate(charged_molecules): # Fix residue names for atom in charged_molecule.GetAtoms(): residue = oechem.OEAtomGetResidue(atom) residue.SetName(residue_name) oechem.OEAtomSetResidue(atom, residue) oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write molecules as mol2. charged_mol2_filename = name + '-charged_output.mol2' write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name) log += "Run completed.\n" if success_status: log += "Status: Success\n" else: log += "Status: Failure\n" log += "Failed states: {}\n".format(" ".join( [str(state) for state in sorted(list(failed_states))])) with open("log.txt", 'w') as logfile: logfile.write(log) return log, success_status