def process(self, mol, port): kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole) # Retrieve data about which molecule we are processing title = mol.GetTitle() with TemporaryDirectory() as output_directory: try: # Print out which molecule we are processing self.log.info('Processing {} in directory {}.'.format(title, output_directory)) # Check that molecule is charged. if not molecule_is_charged(mol): raise Exception('Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle()) # Write the specified molecule out to a mol2 file without changing its name. mol2_filename = os.path.join(output_directory, 'input.mol2') ofs = oechem.oemolostream(mol2_filename) oechem.OEWriteMol2File(ofs, mol) # Undo oechem fuckery with naming mol2 substructures `<0>` from YankCubes.utils import unfuck_oechem_mol2_file unfuck_oechem_mol2_file(mol2_filename) # Run YANK on the specified molecule. from yank.yamlbuild import YamlBuilder yaml = self.construct_yaml(output_directory=output_directory) yaml_builder = YamlBuilder(yaml) yaml_builder.build_experiments() self.log.info('Ran Yank experiments for molecule {}.'.format(title)) # Analyze the hydration free energy. from yank.analyze import estimate_free_energies (Deltaf_ij_solvent, dDeltaf_ij_solvent) = estimate_free_energies(netcdf.Dataset(output_directory + '/experiments/solvent1.nc', 'r')) (Deltaf_ij_vacuum, dDeltaf_ij_vacuum) = estimate_free_energies(netcdf.Dataset(output_directory + '/experiments/solvent2.nc', 'r')) DeltaG_hydration = Deltaf_ij_vacuum[0,-1] - Deltaf_ij_solvent[0,-1] dDeltaG_hydration = np.sqrt(Deltaf_ij_vacuum[0,-1]**2 + Deltaf_ij_solvent[0,-1]**2) # Add result to original molecule oechem.OESetSDData(mol, 'DeltaG_yank_hydration', str(DeltaG_hydration * kT_in_kcal_per_mole)) oechem.OESetSDData(mol, 'dDeltaG_yank_hydration', str(dDeltaG_hydration * kT_in_kcal_per_mole)) self.log.info('Analyzed and stored hydration free energy for molecule {}.'.format(title)) # Emit molecule to success port. self.success.emit(mol) except Exception as e: self.log.info('Exception encountered when processing molecule {}.'.format(title)) # Attach error message to the molecule that failed # TODO: If there is an error in the leap setup log, # we should capture that and attach it to the failed molecule. self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mol)
def test_ncmc_engine_molecule(): """ Check alchemical elimination for alanine dipeptide in vacuum with 0, 1, 2, and 50 switching steps. """ molecule_names = ['pentane', 'biphenyl', 'imatinib'] #if os.environ.get("TRAVIS", None) == 'true': # molecule_names = ['pentane'] for molecule_name in molecule_names: from perses.tests.utils import createSystemFromIUPAC [molecule, system, positions, topology] = createSystemFromIUPAC(molecule_name) natoms = system.getNumParticles() # DEBUG print(molecule_name) from openeye import oechem ofs = oechem.oemolostream('%s.mol2' % molecule_name) oechem.OEWriteMol2File(ofs, molecule) ofs.close() # Eliminate half of the molecule # TODO: Use a more rigorous scheme to make sure we are really cutting the molecule in half and not just eliminating hydrogens or something. new_to_old_atom_map = { atom.index: atom.index for atom in topology.atoms() if str(atom.element.name) in ['carbon', 'nitrogen'] } # DEBUG print(new_to_old_atom_map) from perses.rjmc.topology_proposal import TopologyProposal topology_proposal = TopologyProposal( new_topology=topology, new_system=system, old_topology=topology, old_system=system, old_chemical_state_key='', new_chemical_state_key='', logp_proposal=0.0, new_to_old_atom_map=new_to_old_atom_map, metadata={'test': 0.0}) for ncmc_nsteps in [0, 1, 50]: f = partial(check_alchemical_null_elimination, topology_proposal, positions, ncmc_nsteps=ncmc_nsteps) f.description = "Testing alchemical null elimination for '%s' with %d NCMC steps" % ( molecule_name, ncmc_nsteps) yield f
def generateOEMolFromTopologyResidue(residue, geometry=False, tripos_atom_names=False): """ Generate an OpenEye OEMol molecule from an OpenMM Topology Residue. Parameters ---------- residue : simtk.openmm.app.topology.Residue The topology Residue from which an OEMol is to be created. An Exception will be thrown if this residue has external bonds. geometry : bool, optional, default=False If True, will generate a single configuration with OEOmega. Note that stereochemistry will be *random*. tripos_atom_names : bool, optional, default=False If True, will generate and assign Tripos atom names. Returns ------- molecule : openeye.oechem.OEMol The OEMol molecule corresponding to the topology. Atom order will be preserved and bond orders assigned. The Antechamber `bondtype` program will be used to assign bond orders, and these will be converted back into OEMol bond type assignments. Note that there is no way to preserve stereochemistry since `Residue` does not note stereochemistry in any way. """ # Raise an Exception if this residue has external bonds. if len(list(residue.external_bonds())) > 0: raise Exception( "Cannot generate an OEMol from residue '%s' because it has external bonds." % residue.name) from openeye import oechem # Create OEMol where all atoms have bond order 1. molecule = oechem.OEMol() molecule.SetTitle(residue.name) # name molecule after first residue for atom in residue.atoms(): oeatom = molecule.NewAtom(atom.element.atomic_number) oeatom.SetName(atom.name) oeatom.AddData("topology_index", atom.index) oeatoms = {oeatom.GetName(): oeatom for oeatom in molecule.GetAtoms()} for (atom1, atom2) in residue.bonds(): order = 1 molecule.NewBond(oeatoms[atom1.name], oeatoms[atom2.name], order) # Write out a mol2 file without altering molecule. import tempfile tmpdir = tempfile.mkdtemp() mol2_input_filename = os.path.join(tmpdir, 'molecule-before-bond-perception.mol2') ac_output_filename = os.path.join(tmpdir, 'molecule-after-bond-perception.ac') ofs = oechem.oemolostream(mol2_input_filename) m2h = True substruct = False oechem.OEWriteMol2File(ofs, molecule, m2h, substruct) ofs.close() # Run Antechamber bondtype import subprocess #command = 'bondtype -i %s -o %s -f mol2 -j full' % (mol2_input_filename, ac_output_filename) command = 'antechamber -i %s -fi mol2 -o %s -fo ac -j 2' % ( mol2_input_filename, ac_output_filename) [status, output] = getstatusoutput(command) # Define mapping from GAFF bond orders to OpenEye bond orders. order_map = {1: 1, 2: 2, 3: 3, 7: 1, 8: 2, 9: 5, 10: 5} # Read bonds. infile = open(ac_output_filename) lines = infile.readlines() infile.close() antechamber_bond_types = list() for line in lines: elements = line.split() if elements[0] == 'BOND': antechamber_bond_types.append(int(elements[4])) oechem.OEClearAromaticFlags(molecule) for (bond, antechamber_bond_type) in zip(molecule.GetBonds(), antechamber_bond_types): #bond.SetOrder(order_map[antechamber_bond_type]) bond.SetIntType(order_map[antechamber_bond_type]) oechem.OEFindRingAtomsAndBonds(molecule) oechem.OEKekulize(molecule) oechem.OEAssignFormalCharges(molecule) oechem.OEAssignAromaticFlags(molecule, oechem.OEAroModelOpenEye) # Clean up. os.unlink(mol2_input_filename) os.unlink(ac_output_filename) os.rmdir(tmpdir) # Generate Tripos atom names if requested. if tripos_atom_names: oechem.OETriposAtomNames(molecule) # Assign geometry if geometry: from openeye import oeomega omega = oeomega.OEOmega() omega.SetMaxConfs(1) omega.SetIncludeInput(False) omega.SetStrictStereo(False) omega(molecule) return molecule
def process(self, mol, port): kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole) # Retrieve data about which molecule we are processing title = mol.GetTitle() with TemporaryDirectory() as output_directory: try: # Print out which molecule we are processing self.log.info('Processing {} in {}.'.format( title, output_directory)) # Check that molecule is charged. if not molecule_is_charged(mol): raise Exception( 'Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle()) # Write the receptor. pdbfilename = os.path.join(output_directory, 'receptor.pdb') with oechem.oemolostream(pdbfilename) as ofs: res = oechem.OEWriteConstMolecule(ofs, self.receptor) if res != oechem.OEWriteMolReturnCode_Success: raise RuntimeError( "Error writing receptor: {}".format(res)) # Write the specified molecule out to a mol2 file without changing its name. mol2_filename = os.path.join(output_directory, 'input.mol2') ofs = oechem.oemolostream(mol2_filename) oechem.OEWriteMol2File(ofs, mol) # Undo oechem fuckery with naming mol2 substructures `<0>` from YankCubes.utils import unfuck_oechem_mol2_file unfuck_oechem_mol2_file(mol2_filename) # Run YANK on the specified molecule. from yank.yamlbuild import YamlBuilder yaml = self.construct_yaml(output_directory=output_directory) yaml_builder = YamlBuilder(yaml) yaml_builder.build_experiments() self.log.info( 'Ran Yank experiments for molecule {}.'.format(title)) # Analyze the binding free energy # TODO: Use yank.analyze API for this from YankCubes.analysis import analyze store_directory = os.path.join(output_directory, 'experiments') [DeltaG_binding, dDeltaG_binding] = analyze(store_directory) """ # Extract trajectory (DEBUG) from yank.analyze import extract_trajectory trajectory_filename = 'trajectory.pdb' store_filename = os.path.join(store_directory, 'complex.pdb') extract_trajectory(trajectory_filename, store_filename, state_index=0, keep_solvent=False, discard_equilibration=True, image_molecules=True) ifs = oechem.oemolistream(trajectory_filename) ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) # load multi-conformer molecule mol = oechem.OEMol() for mol in ifs.GetOEMols(): print (mol.GetTitle(), "has", mol.NumConfs(), "conformers") ifs.close() os.remove(trajectory_filename) """ # Attach binding free energy estimates to molecule oechem.OESetSDData(mol, 'DeltaG_yank_binding', str(DeltaG_binding * kT_in_kcal_per_mole)) oechem.OESetSDData(mol, 'dDeltaG_yank_binding', str(dDeltaG_binding * kT_in_kcal_per_mole)) self.log.info( 'Analyzed and stored binding free energy for molecule {}.'. format(title)) # Emit molecule to success port. self.success.emit(mol) except Exception as e: self.log.info( 'Exception encountered when processing molecule {}.'. format(title)) # Attach error message to the molecule that failed # TODO: If there is an error in the leap setup log, # we should capture that and attach it to the failed molecule. self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mol)
def mk_conformers_epik(options, molecule, maxconf=99, verbose=True, pH=7): """ Enumerate the list of conformers and associated properties for each protonation and tautomeric state using epik from the Schrodinger Suite. Parameters ---------- options molecule : openeye.oechem The molecule read from the PDB whose protomer and tautomer states are to be enumerated. maxconf : int, optional, default=128 Maximum number of protomers/tautomers to generate. pH : float, optional, default=7.0 pH to use for conformer enumeration Returns ------- conformers : list of Conformer The list of protomers/tautomers generated. """ from schrodinger import structure # Requires Schrodinger Suite # Write mol2 file. if verbose: print "Writing input file as mol2..." outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() ofs.open('epik-input.mol2') oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Use low level writer to get atom names correct. ofs = oechem.oemolostream() ofs.open('epik-input.mol2') for (dest_atom, src_atom) in zip(outmol.GetAtoms(), molecule.GetAtoms()): dest_atom.SetName(src_atom.GetName()) oechem.OEWriteMol2File(ofs, outmol, True) ofs.close() # Write mol2 file. if verbose: print "Writing input file as sdf..." outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() ofs.open('epik-input.sdf') oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Write pdb file. if verbose: print "Writing input file as pdb..." outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() ofs.open('epik-input.pdb') oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Write input for epik. if verbose: print "Converting input file to Maestro format..." reader = structure.StructureReader("epik-input.mol2") writer = structure.StructureWriter("epik-input.mae") for st in reader: writer.append(st) reader.close() writer.close() # Run epik to enumerate protomers/tautomers and get associated state penalties. if verbose: print "Running Epik..." cmd = '%s/epik -imae epik-input.mae -omae epik-output.mae -pht 10.0 -ms 100 -nt -pKa_atom -ph %f -WAIT' % (os.environ['SCHRODINGER'], pH) output = commands.getoutput(cmd) if verbose: print output # Convert output from epik from .mae to .sdf. if verbose: print "Converting output file to SDF..." reader = structure.StructureReader("epik-output.mae") writer = structure.StructureWriter("epik-output.sdf") for st in reader: writer.append(st) reader.close() writer.close() # Also convert to .mol2. if verbose: print "Converting output file to MOL2..." reader = structure.StructureReader("epik-output.mae") writer = structure.StructureWriter("epik-output.mol2") for st in reader: writer.append(st) reader.close() writer.close() # Find minimum charge. ifs = oechem.oemolistream() ifs.open('epik-output.mol2') molecule = oechem.OEGraphMol() min_formal_charge = 1000 while oechem.OEReadMolecule(ifs, molecule): # Check aromaticity. oechem.OEAssignAromaticFlags(molecule) # Assign formal charge oechem.OEAssignFormalCharges(molecule) formal_charge = 0 for atom in molecule.GetAtoms(): formal_charge += atom.GetFormalCharge() # Keep most negative formal charge min_formal_charge = min(min_formal_charge, formal_charge) ifs.close() if verbose: print "Minimum formal charge = %d" % min_formal_charge # Read conformers from SDF and mol2 (converted from Epik). if verbose: print "Reading conformers from SDF..." ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open('epik-output.sdf') sdf_molecule = oechem.OEGraphMol() ifs_mol2 = oechem.oemolistream() ifs_mol2.open('epik-output.mol2') mol2_molecule = oechem.OEGraphMol() conformer_index = 1 conformers = list() while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): if verbose: print "Conformer %d" % conformer_index # Read corresponding mol2 molecule. oechem.OEReadMolecule(ifs_mol2, mol2_molecule) oechem.OEAssignAromaticFlags(mol2_molecule) # check aromaticity # Make a copy of the mol2 molecule. molecule = oechem.OEMol(mol2_molecule) # Set name name = options.ligand+'%02d' % conformer_index molecule.SetTitle(name) # Assign formal charge oechem.OEAssignFormalCharges(molecule) formal_charge = 0.0 for atom in molecule.GetAtoms(): formal_charge += atom.GetFormalCharge() if verbose: print "formal charge: %d" % formal_charge # DEBUG: Write mol2 file before assigning charges. if verbose: print "Writing %s to mol2..." % name outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() ofs.open(name + '.mol2') oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Assign canonical AM1BCC charges. try: if verbose: print "Assigning AM1-BCC charges..." #assign_canonical_am1bcc_charges(molecule) assign_simple_am1bcc_charges(molecule) except Exception as e: print str(e) continue # Get Epik data. epik_Ionization_Penalty = float(oechem.OEGetSDData(sdf_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(sdf_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(sdf_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float(oechem.OEGetSDData(sdf_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(sdf_molecule, "i_epik_Tot_Q")) # Compute number of protons. nprotons = epik_Tot_Q - min_formal_charge + 1 # Compute effective pKa. import numpy as np kT = 298 * 6.022e23 * 1.381e-23 / 4184 # kcal/mol for 298 K pKa = options.pH - epik_State_Penalty / (nprotons * kT * np.log(10)) print "effective pKa = %8.3f" % pKa # DEBUG print "%24s : pKa penalty %8.3f kcal/mol | tautomer penalty %8.3f kcal/mol | total state penalty %8.3f\n" % (name, epik_Ionization_Penalty, epik_State_Penalty - epik_Ionization_Penalty, epik_State_Penalty) # Create a conformer and append it to the list. conformer = Conformer(name, epik_Tot_Q, molecule, state_penalty=epik_State_Penalty) conformers.append(conformer) print epik_Tot_Q # DEBUG # Increment counter. conformer_index += 1 ifs_sdf.close() ifs_mol2.close() if verbose: print "%d protomer/tautomer states were enumerated" % len(conformers) return conformers
def create_openeye_molecule(pdb, options, verbose=True): """ Create OpenEye molecule from PDB representation. The molecule will have hydrogens added and be normalized, but the overall geometry will not be altered. Parameters ---------- pdb : Pdb The PDB-extracted entries for the ligand. Returns ------- molecule : openeye.oechem.OEMol Molecule representation. options : options struct Options structure. """ # Create a molecule container. molecule = oechem.OEGraphMol() # Open a PDB file reader from the stored PDB string representation of HETATM and CONECT records. print pdb.pdb_extract ifs = oechem.oemolistream() ifs.openstring(pdb.pdb_extract) flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_PDB_Default | oechem.OEIFlavor_PDB_ALL ifs.SetFlavor(oechem.OEFormat_PDB, flavor) oechem.OEReadPDBFile(ifs, molecule) # Add explicit hydrogens. oechem.OEDetermineConnectivity(molecule) oechem.OEFindRingAtomsAndBonds(molecule) oechem.OEAssignAromaticFlags(molecule) # check aromaticity oechem.OEPerceiveBondOrders(molecule) # We must assign implicit hydrogens first so that the valence model will be correct. oechem.OEAssignImplicitHydrogens(molecule) oechem.OEAssignFormalCharges(molecule) # Now add explicit hydrogens. polarOnly = False set3D = True oechem.OEAddExplicitHydrogens(molecule, polarOnly, set3D) # TODO: Sequentially number hydrogen atoms. # Perceive stereochemostry. oechem.OEPerceiveChiral(molecule) # Set title. molecule.SetTitle(options.ligand) # Write out PDB form of this molecule. # TODO: Fix atom numbering. #if verbose: print "Writing input molecule as PDB..." #outmol = oechem.OEMol(molecule) #ofs = oechem.oemolostream() #flavor = oechem.OEOFlavor_Generic_Default | oechem.OEOFlavor_PDB_Default #ofs.SetFlavor(oechem.OEFormat_PDB, flavor) #ofs.open(options.ligand + '.pdb') #oechem.OEWriteMolecule(ofs, outmol) #ofs.close() # Write mol2 file for this molecule. if verbose: print "Writing input molecule as mol2..." outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() filename = options.ligand + '.mol2' ofs.open(filename) oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Use low level writer to get atom names correct. ofs = oechem.oemolostream() ofs.open(filename) for (dest_atom, src_atom) in zip(outmol.GetAtoms(), molecule.GetAtoms()): dest_atom.SetName(src_atom.GetName()) oechem.OEWriteMol2File(ofs, outmol, True) ofs.close() # Read and write in PDB format. if verbose: print "Converting mol2 to PDB..." ifs = oechem.oemolistream() ofs = oechem.oemolostream() if ifs.open(options.ligand + '.mol2'): if ofs.open(options.ligand + '.pdb'): for mol in ifs.GetOEGraphMols(): oechem.OEWriteMolecule(ofs, mol) return molecule
def run_epik(molecule, maxconf=99, verbose=False, outfile=None): """ Enumerate the list of conformers and associated properties for each protonation and tautomeric state using epik from the Schrodinger Suite. Parameters ---------- options molecule : openeye.oechem The molecule read from the PDB whose protomer and tautomer states are to be enumerated. maxconf : int, optional, default=128 Maximum number of protomers/tautomers to generate. verbose : bool, optiona, default=False If True, outputs more information. outfile : file, optional, default=None If specified, record compiled pKas to this output file. """ from schrodinger import structure # Requires Schrodinger Suite # Write mol2 file. if verbose: print "Writing input file as mol2..." outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() ofs.open('epik-input.mol2') oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Use low level writer to get atom names correct. ofs = oechem.oemolostream() ofs.open('epik-input.mol2') for (dest_atom, src_atom) in zip(outmol.GetAtoms(), molecule.GetAtoms()): dest_atom.SetName(src_atom.GetName()) oechem.OEWriteMol2File(ofs, outmol, True) ofs.close() # Write mol2 file. if verbose: print "Writing input file as sdf..." outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() ofs.open('epik-input.sdf') oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Write pdb file. if verbose: print "Writing input file as pdb..." outmol = oechem.OEMol(molecule) ofs = oechem.oemolostream() ofs.open('epik-input.pdb') oechem.OEWriteMolecule(ofs, outmol) ofs.close() # Write input for epik. if verbose: print "Converting input file to Maestro format..." reader = structure.StructureReader("epik-input.mol2") writer = structure.StructureWriter("epik-input.mae") for st in reader: writer.append(st) reader.close() writer.close() # Run epik to enumerate predicted pKas. if verbose: print "Running Epik..." cmd = '%s/epik -imae epik-input.mae -omae epik-output.mae -scan -lowest_pka 1.0 -highest_pka 12.0 -WAIT' % (os.environ['SCHRODINGER']) output = commands.getoutput(cmd) if verbose: print output # Extract pKas from Epik output. cmd = 'grep conjugate epik-output.log' output = commands.getoutput(cmd) pKas = list() for line in output.split('\n'): print "> %s" % line # Extract information pKa = float(line[0:7].strip()) type = line[9:13] atom_index = int(line[24:28].strip()) notes = line[31:] state = dict({ 'pKa' : pKa, 'type' : type, 'atom_index' : atom_index, 'notes' : notes }) pKas.append(state) print "pKa %8.3f %s atom %d type %s" % (pKa, type, atom_index, notes) # Store output. command = 'cp epik-output.log epik-output.%s.log' % molecule.GetTitle() commands.getoutput(command) # Write pKas only to file, if desired. if outfile: outfile.write('%s %d' % (molecule.GetTitle(), len(pKas))) for pKa in pKas: outfile.write(' %8.2f' % (pKa['pKa'])) outfile.write('\n') return