Example #1
0
def SmilesToFragments(smiles, fgroup_smarts, bondOrderThreshold=1.2, chargesMol=True):
    """
    Fragment molecule at bonds below Bond Order Threshold

    Parameters
    ----------
    smiles: str
        smiles string of molecule to fragment

    Returns
    -------
    frags: list of OE AtomBondSets

    """
    # Charge molecule
    mol = oechem.OEGraphMol()
    oemol = openeye.smiles_to_oemol(smiles)
    charged = openeye.get_charges(oemol, keep_confs=1)

    # Tag functional groups
    _tag_fgroups(charged, fgroups_smarts=fgroup_smarts)

    # Generate fragments
    G = OeMolToGraph(charged)
    subraphs = FragGraph(G, bondOrderThreshold=bondOrderThreshold)

    frags = []
    for subraph in subraphs:
        frags.append(subgraphToAtomBondSet(G, subraph, charged))

    if chargesMol:
        return frags, charged
    else:
        return frags
Example #2
0
def generate_fragments(mol):
    """
    This function generates fragments from a molecule.

    Parameters
    ----------
    mol: OEMol

    Returns
    -------
    charged: charged OEMOl
    frags: dict of AtomBondSet mapped to rotatable bond index the fragment was built up from.
    """

    charged = openeye.get_charges(mol, keep_confs=1)

    tagged_rings, tagged_fgroups = tag_molecule(charged)

    # Iterate over bonds
    frags = {}
    for bond in charged.GetBonds():
        if bond.IsRotor():
            atoms, bonds = _build_frag(bond=bond, mol=charged, tagged_fgroups=tagged_fgroups, tagged_rings=tagged_rings)
            atom_bond_set = _to_AtomBondSet(charged, atoms, bonds)
            frags[bond.GetIdx()] = atom_bond_set

    return charged, frags
def enumerate_conformations(name, smiles):
    """Generate geometry and run epik."""
    # Generate molecule geometry with OpenEye
    print "Generating molecule {}".format(name)
    oe_molecule = openeye.smiles_to_oemol(smiles)
    try:
        oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    except RuntimeError as e:
        traceback.print_exc()
        print "Skipping molecule " + name
        return

    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    # Save mol2 file with residue name = first three uppercase letters
    print "Running epik on molecule {}".format(name)
    mol2_file_path = output_basepath + '-input.mol2'
    residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=True,
                         max_structures=32, ph_tolerance=10.0)

    # Convert maestro file to sdf and mol2
    schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.sdf')
    schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.mol2')
Example #4
0
def generateSMIRNOFFStructure(molecule):
    """
    Given an OpenEye molecule (oechem.OEMol), create an OpenMM System and use to
    generate a ParmEd structure using the SMIRNOFF forcefield parameters.
    """
    from openforcefield.typing.engines.smirnoff import ForceField
    from openforcefield.typing.engines.smirnoff.forcefield_utils import create_system_from_molecule

    ff = get_data_filename('forcefield/smirnoff99Frosst.ffxml')
    with open(ff) as ffxml:
        mol_ff = ForceField(ffxml)

    if not checkCharges(molecule):
        from openmoltools.openeye import get_charges
        print("Assigning charges to molecule.")
        charged_molecule = get_charges(molecule)
    else:
        charged_molecule = molecule
    mol_top, mol_sys, mol_pos = create_system_from_molecule(
        mol_ff, charged_molecule)
    molecule_structure = parmed.openmm.load_topology(mol_top,
                                                     mol_sys,
                                                     xyz=mol_pos)

    return molecule_structure
Example #5
0
def create_molecule(iupac_name):
    molecule = openeye.iupac_to_oemol(iupac_name)
    molecule = openeye.get_charges(molecule, max_confs=1)
    #import openeye.oeomega as om
    #omega = om.OEOmega()
    #omega.SetMaxConfs(1)
    #omega(molecule)
    return molecule
Example #6
0
def _generate_fragments(mol, strict_stereo=True):
    """
    This function generates fragments from a molecule.

    Parameters
    ----------
    mol: OEMol

    Returns
    -------
    charged: charged OEMOl
    frags: dict of AtomBondSet mapped to rotatable bond index the fragment was built up from.
    """

    charged = openeye.get_charges(mol,
                                  keep_confs=1,
                                  strictStereo=strict_stereo)

    # Check if WBO were calculated
    bonds = [bond for bond in charged.GetBonds()]
    for bond in bonds[:1]:
        try:
            bond.GetData('WibergBondOrder')
        except ValueError:
            logger().warn(
                "WBO were not calculate. Cannot fragment molecule {}".format(
                    charged.GetTitle()))
            return False

    tagged_rings, tagged_fgroups = tag_molecule(charged)

    # Iterate over bonds
    frags = {}
    for bond in charged.GetBonds():
        if bond.IsRotor():
            atoms, bonds = _build_frag(bond=bond,
                                       mol=charged,
                                       tagged_fgroups=tagged_fgroups,
                                       tagged_rings=tagged_rings)
            atom_bond_set = _to_AtomBondSet(charged, atoms, bonds)
            frags[bond.GetIdx()] = atom_bond_set

    return charged, frags
def oemol_to_antechamber(m, gaff_mol2_filename, frcmod_filename, residue_name="MOL", strictStereo=False):
    """
        Build a molecule from a mol2 file and run antechamber,
        generating GAFF mol2 and frcmod files from a smiles string.  Charges
        will be generated using the OpenEye QuacPac AM1-BCC implementation.
        
        Created by hacking openmoltools/openeye.py
        
        Parameters
        ----------
        m : oechem molecule object
        Molecule to construct and charge
        gaff_mol2_filename : str
        Filename of mol2 file output of antechamber, with charges
        created from openeye
        frcmod_filename : str
        Filename of frcmod file output of antechamber.  Most likely
        this file will be almost empty, at least for typical molecules.
        residue_name : str, optional, default="MOL"
        
        OpenEye writes mol2 files with <0> as the residue / ligand name.
        This chokes many mol2 parsers, so we replace it with a string of
        your choosing.  This might be useful for downstream applications
        if the residue names are required to be unique.
        strictStereo : bool, optional, default=False
        If False, permits smiles strings with unspecified stereochemistry.
        See https://docs.eyesopen.com/omega/usage.html
    """
    #oechem = import_("openeye.oechem")
    #if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for oechem!"))
    
    # Get the absolute path so we can find these filenames from inside a temporary directory.
    gaff_mol2_filename = os.path.abspath(gaff_mol2_filename)
    frcmod_filename = os.path.abspath(frcmod_filename)
    
    m = openeye.get_charges(m, strictStereo=strictStereo, keep_confs=1)
    
    with enter_temp_directory():  # Avoid dumping 50 antechamber files in local directory.
        _unused = openeye.molecule_to_mol2(m, "./tmp.mol2", residue_name=residue_name)
        net_charge = oechem.OENetCharge(m)
        tmp_gaff_mol2_filename, tmp_frcmod_filename = amber.run_antechamber("tmp", "./tmp.mol2", charge_method=None, net_charge=net_charge)  # USE OE AM1BCC charges!
        shutil.copy(tmp_gaff_mol2_filename, gaff_mol2_filename)
        shutil.copy(tmp_frcmod_filename, frcmod_filename)
Example #8
0
    def __init__(self, cas_or_aa, min_atoms=6):
        """
        Initialize using cas numbers OR amino acid name
        Requires openmoltools.openeye and cirpy

        Arguments
            cas_or_aa (list of strings) either cas number or name of amino acid

        Optional Arguments
            min_atoms (int) - a minimum number of atoms for substructure match (default: 6)

        Creates class variables:
            self.cas_or_aa (list of strings) 
              input representing molecules to be combined 
            self.smiles_strings (list of strings) 
              smiles representation of molecules to be combined
            self.ligands (list of OEMol) 
              openeye molecule representation of molecules to be combined
            self.title (string) 
              used as an identifier for input group of molecules
            self.min_atoms (int) 
              minimum number of common atoms to constitute a substructure match (default: 6)

        """

        self.cas_or_aa = cas_or_aa
        self.smiles_strings = []
        self.ligands = []
        for cas in cas_or_aa:
            smiles = cirpy.resolve(cas, 'smiles')
            self.smiles_strings.append(smiles)
            ligand = openeye.smiles_to_oemol(smiles)
            ligand = openeye.get_charges(ligand, strictStereo=False)
            self.ligands.append(ligand)
        self.title = self.cas_or_aa[0] + "_and_analogs"
        self.min_atoms = min_atoms
        self.common_substructure = None
        self.dual_topology = None
        self.each_molecule_N = []
        self.mapping_dictionaries = []
        self.pdb_filename = None
        self.ffxml_filename = None
Example #9
0
    def __init__(self, cas_or_aa, min_atoms=6):
        """
        Initialize using cas numbers OR amino acid name
        Requires openmoltools.openeye and cirpy

        Arguments
            cas_or_aa (list of strings) either cas number or name of amino acid

        Optional Arguments
            min_atoms (int) - a minimum number of atoms for substructure match (default: 6)

        Creates class variables:
            self.cas_or_aa (list of strings) 
              input representing molecules to be combined 
            self.smiles_strings (list of strings) 
              smiles representation of molecules to be combined
            self.ligands (list of OEMol) 
              openeye molecule representation of molecules to be combined
            self.title (string) 
              used as an identifier for input group of molecules
            self.min_atoms (int) 
              minimum number of common atoms to constitute a substructure match (default: 6)

        """

        self.cas_or_aa = cas_or_aa
        self.smiles_strings = []
        self.ligands = []
        for cas in cas_or_aa:
            smiles = cirpy.resolve(cas,'smiles')
            self.smiles_strings.append(smiles)
            ligand = openeye.smiles_to_oemol(smiles)
            ligand = openeye.get_charges(ligand, strictStereo=False) 
            self.ligands.append(ligand)
        self.title = self.cas_or_aa[0]+"_and_analogs"
        self.min_atoms = min_atoms
        self.common_substructure = None
        self.dual_topology = None
        self.each_molecule_N = []
        self.mapping_dictionaries = []
        self.pdb_filename = None
        self.ffxml_filename = None
Example #10
0
def SmilesToFragments(smiles,
                      fgroup_smarts,
                      bondOrderThreshold=1.2,
                      chargesMol=True):
    """
    Fragment molecule at bonds below Bond Order Threshold

    Parameters
    ----------
    smiles: str
        smiles string of molecule to fragment

    Returns
    -------
    frags: list of OE AtomBondSets

    """
    # Charge molecule
    mol = oechem.OEGraphMol()
    oemol = openeye.smiles_to_oemol(smiles)
    charged = openeye.get_charges(oemol, keep_confs=1)

    # Tag functional groups
    _tag_fgroups(charged, fgroups_smarts=fgroup_smarts)

    # Generate fragments
    G = OeMolToGraph(charged)
    subraphs = FragGraph(G, bondOrderThreshold=bondOrderThreshold)

    frags = []
    for subraph in subraphs:
        frags.append(subgraphToAtomBondSet(G, subraph, charged))

    if chargesMol:
        return frags, charged
    else:
        return frags
Example #11
0
def _generate_fragments(mol, strict_stereo=True):
    """
    This function generates fragments from a molecule.

    Parameters
    ----------
    mol: OEMol

    Returns
    -------
    charged: charged OEMOl
    frags: dict of AtomBondSet mapped to rotatable bond index the fragment was built up from.
    """

    charged = openeye.get_charges(mol, keep_confs=1, strictStereo=strict_stereo)

    # Check if WBO were calculated
    bonds = [bond for bond in charged.GetBonds()]
    for bond in bonds[:1]:
        try:
            bond.GetData('WibergBondOrder')
        except ValueError:
            logger().warn("WBO were not calculate. Cannot fragment molecule {}".format(charged.GetTitle()))
            return False

    tagged_rings, tagged_fgroups = tag_molecule(charged)

    # Iterate over bonds
    frags = {}
    for bond in charged.GetBonds():
        if bond.IsRotor():
            atoms, bonds = _build_frag(bond=bond, mol=charged, tagged_fgroups=tagged_fgroups, tagged_rings=tagged_rings)
            atom_bond_set = _to_AtomBondSet(charged, atoms, bonds)
            frags[bond.GetIdx()] = atom_bond_set

    return charged, frags
Example #12
0
def run_epik(name, filename, residue_name, perceive_bonds=False):
    """Generate conformer with OpenEye omega, protonation states with Schrodinger Epik, and charges with OpenEye AM1-BCC.

    Parameters
    ----------
    name : str
       The name of the output directory to generate.
    filename : str
       The mol2, PDB, or SDF file to read in.
    residue_name : str
       Three uppercase letters to name residue.
    perceive_bonds : bool, optional, default=False
       If True, will use geometry to perceive connectivity.
       This is necessary for PDB files.

    """
    # Generate molecule geometry with OpenEye
    print("Generating molecule %s from %s" % (name, filename))
    oe_molecule = read_molecules(filename)
    if perceive_bonds:
        oechem.OEDetermineConnectivity(oe_molecule)

    # Assign geometry and charges with Omega
    oe_molecule = openeye.get_charges(oe_molecule, max_confs=1, strictStereo=False, normalize=True, keep_confs=1)

    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    # Save mol2 file with residue name = first three uppercase letters
    print "Running epik on molecule {}".format(name)
    mol2_file_path = output_basepath + '-input.mol2'
    residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name)
    from openeye import oechem
    ofs = oechem.oemolostream(mol2_file_path)
    oechem.OEWriteMol2File(ofs, oe_molecule, True, False)
    ofs.close()

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False,
                         max_structures=100, min_probability=np.exp(-6), ph=7.4)

    # Convert maestro file to sdf and mol2
    output_sdf_filename = output_basepath + '-epik.sdf'
    output_mol2_filename = output_basepath + '-epik.mol2'
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEMol()
    uncharged_molecules = read_molecules(output_sdf_filename)

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()
    uncharged_molecules = read_molecules(output_sdf_filename)

    # Assign charges.
    charged_molecules = list()
    index = 0
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        molecule = oechem.OEReadMolecule(ifs_mol2, mol2_molecule)
        index += 1
        print "Charging molecule %d / %d" % (index, len(uncharged_molecules))
        try:
            # Charge molecule.
            charged_molecule = openeye.get_charges(sdf_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None)

            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)

            charged_molecules.append(charged_molecule)
        except Exception as e:
            print(e)
            print("Skipping protomer/tautomer because of failed charging.")

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write molecules.
    charged_mol2_filename = output_basepath + '-epik-charged.mol2'
    ofs = oechem.oemolostream(charged_mol2_filename)
    for (index, charged_molecule) in enumerate(charged_molecules):
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write state penalites.
    outfile = open(output_basepath + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):

        # Get Epik data.
        epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()
def generateResidueTemplate(molecule, residue_atoms=None):
    """
    Generate an residue template for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule to be parameterized.
        The molecule must have explicit hydrogens.
        Charge will be inferred from the net formal charge.
    residue_atomset : set of OEAtom, optional, default=None
        If not None, only the atoms in this set will be used to construct the residue template

    Returns
    -------
    template : simtk.openmm.app.forcefield._TemplateData
        Residue template for ForceField using atom types and parameters from `gaff.xml`.
    additional_parameters_ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2).

    Note that this method preserves stereochemistry during AM1-BCC charge parameterization.

    """
    # Generate a unique residue template name to avoid namespace collisions.
    # TODO: Can we come up with a more intelligent name?
    #from uuid import uuid4
    #template_name = str(uuid4())
    template_name = molecule.GetTitle()

    # Compute net formal charge.
    from openeye import oechem
    oechem.OEAssignFormalCharges(molecule)
    charges = [ atom.GetFormalCharge() for atom in molecule.GetAtoms() ]
    net_charge = np.array(charges).sum()

    # Generate canonical AM1-BCC charges and a reference conformation.
    molecule = get_charges(molecule, strictStereo=False, keep_confs=1)

    # Create temporary directory for running antechamber.
    import tempfile
    tmpdir = tempfile.mkdtemp()
    input_mol2_filename = os.path.join(tmpdir, template_name + '.tripos.mol2')
    gaff_mol2_filename = os.path.join(tmpdir, template_name + '.gaff.mol2')
    frcmod_filename = os.path.join(tmpdir, template_name + '.frcmod')

    # Write Tripos mol2 file as antechamber input.
    ofs = oechem.oemolostream(input_mol2_filename)
    oechem.OEWriteMolecule(ofs, molecule)
    ofs.close()

    # Parameterize the molecule with antechamber.
    run_antechamber(template_name, input_mol2_filename, charge_method=None, net_charge=net_charge, gaff_mol2_filename=gaff_mol2_filename, frcmod_filename=frcmod_filename)

    # Read the resulting GAFF mol2 file as a ParmEd structure.
    ifs = oechem.oemolistream(gaff_mol2_filename)
    ifs.SetFlavor(oechem.OEFormat_MOL2, oechem.OEIFlavor_MOL2_DEFAULT | oechem.OEIFlavor_MOL2_M2H | oechem.OEIFlavor_MOL2_Forcefield)
    m2h = True
    oechem.OEReadMolecule(ifs, molecule)
    ifs.close()

    # If residue_atoms = None, add all atoms to the residues
    if residue_atoms == None:
        residue_atoms = [ atom for atom in molecule.GetAtoms() ]

    # Modify partial charges so that charge on residue atoms is integral.
    residue_charge = 0.0
    sum_of_absolute_charge = 0.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        residue_charge += charge
        sum_of_absolute_charge += abs(charge)
    excess_charge = residue_charge - net_charge
    if sum_of_absolute_charge == 0.0:
        sum_of_absolute_charge = 1.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        atom.SetPartialCharge( charge + excess_charge * (abs(charge) / sum_of_absolute_charge) )

    # Create residue template.
    template = ForceField._TemplateData(template_name)
    for (index, atom) in enumerate(molecule.GetAtoms()):
        atomname = atom.GetName()
        typename = atom.GetType()
        element = Element.getByAtomicNumber(atom.GetAtomicNum())
        charge = atom.GetPartialCharge()
        parameters = { 'charge' : charge }
        atom_template = ForceField._TemplateAtomData(atomname, typename, element, parameters)
        template.atoms.append(atom_template)
    for bond in molecule.GetBonds():
        if (bond.GetBgn() in residue_atoms) and (bond.GetEnd() in residue_atoms):
            template.addBondByName(bond.GetBgn().GetName(), bond.GetEnd().GetName())
        elif (bond.GetBgn() in residue_atoms) and (bond.GetEnd() not in residue_atoms):
            template.addExternalBondByName(bond.GetBgn().GetName())
        elif (bond.GetBgn() not in residue_atoms) and (bond.GetEnd() in residue_atoms):
            template.addExternalBondByName(bond.GetEnd().GetName())

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO("parm = loadamberparams %s" % frcmod_filename)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    return template, ffxml.getvalue()
def generateResidueTemplate(molecule, residue_atoms=None, normalize=True, gaff_version='gaff'):
    """
    Generate an residue template for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule to be parameterized.
        The molecule must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.
    residue_atomset : set of OEAtom, optional, default=None
        If not None, only the atoms in this set will be used to construct the residue template
    normalize : bool, optional, default=True
        If True, normalize the molecule by checking aromaticity, adding
        explicit hydrogens, and renaming by IUPAC name.
    gaff_version : str, default = 'gaff'
        One of ['gaff', 'gaff2']; selects which atom types to use.


    Returns
    -------
    template : simtk.openmm.app.forcefield._TemplateData
        Residue template for ForceField using atom types and parameters from `gaff.xml` or `gaff2.xml`.
    additional_parameters_ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2).

    Notes
    -----
    The residue template will be named after the molecule title.
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    # Set the template name based on the molecule title plus a globally unique UUID.
    from uuid import uuid4
    template_name = molecule.GetTitle() + '-' + str(uuid4())

    # If any atom names are not unique, atom names
    _ensureUniqueAtomNames(molecule)

    # Compute net formal charge.
    net_charge = _computeNetCharge(molecule)

    # Generate canonical AM1-BCC charges and a reference conformation.
    molecule = get_charges(molecule, strictStereo=False, keep_confs=1, normalize=normalize)

    # DEBUG: This may be necessary.
    molecule.SetTitle('MOL')

    # Create temporary directory for running antechamber.
    import tempfile
    tmpdir = tempfile.mkdtemp()
    prefix = 'molecule'
    input_mol2_filename = os.path.join(tmpdir, prefix + '.tripos.mol2')
    gaff_mol2_filename = os.path.join(tmpdir, prefix + '.gaff.mol2')
    frcmod_filename = os.path.join(tmpdir, prefix + '.frcmod')

    # Write Tripos mol2 file as antechamber input.
    _writeMolecule(molecule, input_mol2_filename, standardize=normalize)

    # Parameterize the molecule with antechamber.
    run_antechamber(template_name, input_mol2_filename, charge_method=None, net_charge=net_charge, gaff_mol2_filename=gaff_mol2_filename, frcmod_filename=frcmod_filename, gaff_version=gaff_version)

    # Read the resulting GAFF mol2 file as a ParmEd structure.
    from openeye import oechem
    ifs = oechem.oemolistream(gaff_mol2_filename)
    ifs.SetFlavor(oechem.OEFormat_MOL2, oechem.OEIFlavor_MOL2_DEFAULT | oechem.OEIFlavor_MOL2_M2H | oechem.OEIFlavor_MOL2_Forcefield)
    m2h = True
    oechem.OEReadMolecule(ifs, molecule)
    ifs.close()

    # If residue_atoms = None, add all atoms to the residues
    if residue_atoms == None:
        residue_atoms = [ atom for atom in molecule.GetAtoms() ]

    # Modify partial charges so that charge on residue atoms is integral.
    residue_charge = 0.0
    sum_of_absolute_charge = 0.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        residue_charge += charge
        sum_of_absolute_charge += abs(charge)
    excess_charge = residue_charge - net_charge
    if sum_of_absolute_charge == 0.0:
        sum_of_absolute_charge = 1.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        atom.SetPartialCharge( charge + excess_charge * (abs(charge) / sum_of_absolute_charge) )

    # Create residue template.
    template = ForceField._TemplateData(template_name)
    for (index, atom) in enumerate(molecule.GetAtoms()):
        atomname = atom.GetName()
        typename = atom.GetType()
        element = Element.getByAtomicNumber(atom.GetAtomicNum())
        charge = atom.GetPartialCharge()
        parameters = { 'charge' : charge }
        atom_template = ForceField._TemplateAtomData(atomname, typename, element, parameters)
        template.atoms.append(atom_template)
    for bond in molecule.GetBonds():
        if (bond.GetBgn() in residue_atoms) and (bond.GetEnd() in residue_atoms):
            template.addBondByName(bond.GetBgn().GetName(), bond.GetEnd().GetName())
        elif (bond.GetBgn() in residue_atoms) and (bond.GetEnd() not in residue_atoms):
            template.addExternalBondByName(bond.GetBgn().GetName())
        elif (bond.GetBgn() not in residue_atoms) and (bond.GetEnd() in residue_atoms):
            template.addExternalBondByName(bond.GetEnd().GetName())

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO('parm = loadamberparams %s' % frcmod_filename)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    return template, ffxml.getvalue()
def generateForceFieldFromMolecules(molecules):
    """
    Generate ffxml file containing additional parameters and residue templates for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecules : list of openeye.oechem.OEMol
        The molecules to be parameterized.
        All molecules must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.

    Returns
    -------
    ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2) and residue templates.

    Notes
    -----
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Residue template names will be set from molecule names.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    # Check template names are unique.
    template_names = set()
    for molecule in molecules:
        template_name = molecule.GetTitle()
        if template_name == '<0>':
            raise Exception("Molecule '%s' has invalid name" % template_name)
        if template_name in template_names:
            raise Exception("Molecule '%s' has template name collision." % template_name)
        template_names.add(template_name)

    # Process molecules.
    import tempfile
    tmpdir = tempfile.mkdtemp()
    olddir = os.getcwd()
    os.chdir(tmpdir)
    leaprc = ""
    for (molecule_index, molecule) in enumerate(molecules):
        # Set the template name based on the molecule title.
        template_name = molecule.GetTitle()

        # If any atom names are not unique, atom names
        _ensureUniqueAtomNames(molecule)

        # Compute net formal charge.
        net_charge = _computeNetCharge(molecule)

        # Generate canonical AM1-BCC charges and a reference conformation.
        molecule = get_charges(molecule, strictStereo=False, keep_confs=1)

        # Create a unique prefix.
        prefix = 'molecule%010d' % molecule_index

        # Create temporary directory for running antechamber.
        input_mol2_filename = prefix + '.tripos.mol2'
        gaff_mol2_filename  = prefix + '.gaff.mol2'
        frcmod_filename     = prefix + '.frcmod'

        # Write Tripos mol2 file as antechamber input.
        _writeMolecule(molecule, input_mol2_filename)

        # Parameterize the molecule with antechamber.
        run_antechamber(prefix, input_mol2_filename, charge_method=None, net_charge=net_charge, gaff_mol2_filename=gaff_mol2_filename, frcmod_filename=frcmod_filename)

        # Append to leaprc input for parmed.
        leaprc += '%s = loadmol2 %s\n' % (prefix, gaff_mol2_filename)
        leaprc += 'loadamberparams %s\n' % frcmod_filename

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO(leaprc)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    # TODO: Clean up temporary directory.
    os.chdir(olddir)

    return ffxml.getvalue()
Example #16
0
def enumerate_conformations(name,
                            pdbfile=None,
                            smiles=None,
                            pdbname=None,
                            pH=7.4):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    """
    # Create output subfolder
    # output_basepath = os.path.join(output_dir, name)
    # if not os.path.isdir(output_basepath):
    #     os.mkdir(output_basepath)
    # output_basepath = os.path.join(output_basepath, name)

    oehandler = openeye.oechem.OEThrow
    # String stream output
    oss = oechem.oeosstream()
    oehandler.SetOutputStream(oss)
    log = "New run:\nPDB code: {pdbname}; Molecule: {name}; pH {pH}\n".format(
        **locals())
    success_status = True

    if pdbname:
        # Make sure to only use one entry if there are multiple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            log += "Splitting '%s' into first entry only: '%s'" % (pdbname,
                                                                   pdbnames[0])
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (
            pdbname[0], pdbname, pdbname)
        pdb_filename = name + '-rcsb_download.pdb'
        log += "Retrieving PDB structure from RCSB ligand expo: {}.\n".format(
            pdb_filename)
        retrieve_url(url, pdb_filename)
        log += "Parsing PDB file.\n"
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (
            pdbname[0], pdbname, pdbname)
        sdf_filename = name + '-rcsb_download.sdf'
        log += "Retrieving SDF structure from RCSB ligand expo: {}.\n".format(
            sdf_filename)
        retrieve_url(url, sdf_filename)
        log += "Parsing SDF file.\n"
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        log += "Canonicalizing atom names.\n"
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(),
                                        pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        log += "Assign atom type names.\n"
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname

    # For the moment, disabling these two types of input
    # elif smiles:
    #     # Generate molecule geometry with OpenEye
    #     logging.info(("Generating molecule {}".format(name)))
    #     oe_molecule = openeye.smiles_to_oemol(smiles)
    #     # Assign Tripos atom types
    #     oechem.OETriposAtomTypeNames(oe_molecule)
    #     oechem.OETriposBondTypeNames(oe_molecule)
    #     try:
    #         logging.info("Charging initial")
    #         write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug')
    #         oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    #     except RuntimeError as e:
    #         traceback.print_exc()
    #         logging.info(("Skipping molecule " + name))
    #         return
    #     residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #     logging.info("resname = %s", residue_name)
    #     oe_molecule.SetTitle(residue_name) # fix iupac name issue with mol2convert
    # elif pdbfile:
    #     residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #     logging.info("Loading molecule molecule {0} from {1}".format(name, pdbfile))
    #     oe_molecule = read_molecule(pdbfile)
    #     # Assign Tripos atom types
    #     oechem.OETriposAtomTypeNames(oe_molecule)
    #     oechem.OETriposBondTypeNames(oe_molecule)
    #     try:
    #         logging.info("Charging initial")
    #         write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug')
    #         oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    #     except RuntimeError as e:
    #         traceback.print_exc()
    #         logging.info(("Skipping molecule " + name))
    #         return
    else:
        raise Exception('Must provide SMILES string or pdbname, or pdbfile')

    # Save mol2 file, preserving atom names
    log += "Running Epik.\n"
    mol2_file_path = name + '-before_epik.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = name + '-epik.mae'
    schrodinger.run_epik(mol2_file_path,
                         mae_file_path,
                         tautomerize=False,
                         max_structures=50,
                         min_probability=np.exp(-MAX_ENERGY_PENALTY),
                         ph=pH)

    log += "Epik run completed.\n"
    # Convert maestro file to sdf and mol2
    output_sdf_filename = name + '-after_epik.sdf'
    output_mol2_filename = name + '-after_epik.mol2'
    # logging.info("Creating sdf")
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    # logging.info("Creating mol2")
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.

    # reset count of error handler
    oehandler.Clear()
    log += "Assigning charges to protonation states.\n"
    charged_molecules = list()
    index = 0
    failed_states = set()
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        log += "State {0:d}\n".format(index)
        try:
            # Charge molecule.
            charged_molecule_conformers = omtoe.get_charges(mol2_molecule,
                                                            max_confs=800,
                                                            strictStereo=False,
                                                            normalize=True,
                                                            keep_confs=-1)

            log += "Charging stage output:\n"
            OEOutput = str(oss)
            log += OEOutput
            log += "\nCharging state completed.\n"

            # Restore coordinates to original
            charged_molecule = select_conformers(charged_molecule_conformers,
                                                 mol2_molecule,
                                                 keep_confs=None)

            # Assign Tripos types
            oechem.OETriposAtomTypeNames(charged_molecule)
            oechem.OETriposBondTypeNames(charged_molecule)
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)

            # Check for failure in the log
            openeye_charge_log_parser(OEOutput, True)

            oehandler.Clear()

        except Exception as e:
            failed_states.add(index)
            logging.info(e)
            log += "State failed charging.\n"
            log += str(e)
            log += "\n"

            filename_failure = name + '-conformers-failed-state-{}-.mol2'.format(
                index)
            try:
                write_mol2_preserving_atomnames(filename_failure,
                                                charged_molecule_conformers,
                                                residue_name)
            except:
                log += "Could not store result, most likely failed during Omega step!\n"

            success_status = False
            oehandler.Clear()

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalties.
    outfile = open(name + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        log += "Writing Epik data for state {:d}\n".format(index + 1)
        epik_Ionization_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = name + '-charged_output.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = name + '-charged_output.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules,
                                    residue_name)
    log += "Run completed.\n"
    if success_status:
        log += "Status: Success\n"
    else:
        log += "Status: Failure\n"
        log += "Failed states: {}\n".format(" ".join(
            [str(state) for state in sorted(list(failed_states))]))

    with open("log.txt", 'w') as logfile:
        logfile.write(log)

    return log, success_status
Example #17
0
                                       return_molecules=True)
    # Generate figure
    atom_indices = utils.tag_conjugated_bond(molecule, tautomers=tautomers)
    utils.depict_conjugation(molecule,
                             height=700,
                             width=1000,
                             fname='images/{}_oe_conj.png'.format(
                                 molecule.GetTitle()),
                             label=None)

# In[9]:

# Add OpenEye WBO to depiction
for molecule in mollist:
    # Generate charges
    charged = openeye.get_charges(molecule)
    charged.SetTitle(molecule.GetName())
    atom_indices = utils.tag_conjugated_bond(charged,
                                             tag='WibergBondOrder',
                                             threshold=1.05)
    utils.depict_conjugation(charged,
                             height=700,
                             width=1000,
                             fname='images/{}_oe_labeled_1.05.png'.format(
                                 molecule.GetTitle()),
                             label='WibergBondOrder')
    atom_indices = utils.tag_conjugated_bond(charged,
                                             tag='WibergBondOrder',
                                             threshold=1.2)
    utils.depict_conjugation(charged,
                             height=700,
Example #18
0
if not os.path.exists(mol2_directory_path):
    os.makedirs(mol2_directory_path)
    print("{} directory created.".format(mol2_directory_path))

print("Generating charged OEMol molecules...")

# Dictionary to keep track of failed molecules
failed_molecules_dict = {}

# Generate charges for an OpenEye OEMol molecule. It will return  molecule with OpenEye's recommended AM1BCC
# charge selection scheme.

for key, value in eMolID_oemol_dict.items():
    print("Generating conformer for ", key, "...")
    try:
        oe_molecule = omtoe.get_charges(value, keep_confs=1)
    except RuntimeError:
        print("Conformation generation failed for {}.".format(key))
        # Save failed molecule to failed_molecules_dict
        failed_molecules_dict[key] = value

    mol2_filename = mol2_directory_path + "/" + str(key) + ".mol2"
    omtoe.molecule_to_mol2(oe_molecule, tripos_mol2_filename=mol2_filename)
    print("Mol2 file {} generated.".format(mol2_filename))

print("")
print("Conformer generation for {} molecules failed.".format(
    len(failed_molecules_dict)))

# Remove failed molecules from oMolID_oemol_dict dictionary
for key, value in failed_molecules_dict.items():
Example #19
0
def generateResidueTemplate(molecule,
                            residue_atoms=None,
                            normalize=True,
                            gaff_version='gaff'):
    """
    Generate an residue template for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecule : openeye.oechem.OEMol
        The molecule to be parameterized.
        The molecule must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.
    residue_atomset : set of OEAtom, optional, default=None
        If not None, only the atoms in this set will be used to construct the residue template
    normalize : bool, optional, default=True
        If True, normalize the molecule by checking aromaticity, adding
        explicit hydrogens, and renaming by IUPAC name.
    gaff_version : str, default = 'gaff'
        One of ['gaff', 'gaff2']; selects which atom types to use.
        

    Returns
    -------
    template : simtk.openmm.app.forcefield._TemplateData
        Residue template for ForceField using atom types and parameters from `gaff.xml` or `gaff2.xml`.
    additional_parameters_ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2).

    Notes
    -----
    The residue template will be named after the molecule title.
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    # Set the template name based on the molecule title plus a globally unique UUID.
    from uuid import uuid4
    template_name = molecule.GetTitle() + '-' + str(uuid4())

    # If any atom names are not unique, atom names
    _ensureUniqueAtomNames(molecule)

    # Compute net formal charge.
    net_charge = _computeNetCharge(molecule)

    # Generate canonical AM1-BCC charges and a reference conformation.
    molecule = get_charges(molecule,
                           strictStereo=False,
                           keep_confs=1,
                           normalize=normalize)

    # DEBUG: This may be necessary.
    molecule.SetTitle('MOL')

    # Create temporary directory for running antechamber.
    import tempfile
    tmpdir = tempfile.mkdtemp()
    prefix = 'molecule'
    input_mol2_filename = os.path.join(tmpdir, prefix + '.tripos.mol2')
    gaff_mol2_filename = os.path.join(tmpdir, prefix + '.gaff.mol2')
    frcmod_filename = os.path.join(tmpdir, prefix + '.frcmod')

    # Write Tripos mol2 file as antechamber input.
    _writeMolecule(molecule, input_mol2_filename, standardize=normalize)

    # Parameterize the molecule with antechamber.
    run_antechamber(template_name,
                    input_mol2_filename,
                    charge_method=None,
                    net_charge=net_charge,
                    gaff_mol2_filename=gaff_mol2_filename,
                    frcmod_filename=frcmod_filename,
                    gaff_version=gaff_version)

    # Read the resulting GAFF mol2 file as a ParmEd structure.
    from openeye import oechem
    ifs = oechem.oemolistream(gaff_mol2_filename)
    ifs.SetFlavor(
        oechem.OEFormat_MOL2, oechem.OEIFlavor_MOL2_DEFAULT
        | oechem.OEIFlavor_MOL2_M2H | oechem.OEIFlavor_MOL2_Forcefield)
    m2h = True
    oechem.OEReadMolecule(ifs, molecule)
    ifs.close()

    # If residue_atoms = None, add all atoms to the residues
    if residue_atoms == None:
        residue_atoms = [atom for atom in molecule.GetAtoms()]

    # Modify partial charges so that charge on residue atoms is integral.
    residue_charge = 0.0
    sum_of_absolute_charge = 0.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        residue_charge += charge
        sum_of_absolute_charge += abs(charge)
    excess_charge = residue_charge - net_charge
    if sum_of_absolute_charge == 0.0:
        sum_of_absolute_charge = 1.0
    for atom in residue_atoms:
        charge = atom.GetPartialCharge()
        atom.SetPartialCharge(charge + excess_charge *
                              (abs(charge) / sum_of_absolute_charge))

    # Create residue template.
    template = ForceField._TemplateData(template_name)
    for (index, atom) in enumerate(molecule.GetAtoms()):
        atomname = atom.GetName()
        typename = atom.GetType()
        element = Element.getByAtomicNumber(atom.GetAtomicNum())
        charge = atom.GetPartialCharge()
        parameters = {'charge': charge}
        atom_template = ForceField._TemplateAtomData(atomname, typename,
                                                     element, parameters)
        template.atoms.append(atom_template)
    for bond in molecule.GetBonds():
        if (bond.GetBgn() in residue_atoms) and (bond.GetEnd()
                                                 in residue_atoms):
            template.addBondByName(bond.GetBgn().GetName(),
                                   bond.GetEnd().GetName())
        elif (bond.GetBgn() in residue_atoms) and (bond.GetEnd()
                                                   not in residue_atoms):
            template.addExternalBondByName(bond.GetBgn().GetName())
        elif (bond.GetBgn() not in residue_atoms) and (bond.GetEnd()
                                                       in residue_atoms):
            template.addExternalBondByName(bond.GetEnd().GetName())

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO('parm = loadamberparams %s' % frcmod_filename)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    return template, ffxml.getvalue()
def generateForceFieldFromMolecules(molecules, ignoreFailures=False, generateUniqueNames=False, normalize=True, gaff_version='gaff'):
    """
    Generate ffxml file containing additional parameters and residue templates for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecules : list of openeye.oechem.OEMol
        The molecules to be parameterized.
        All molecules must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.
    ignoreFailures: bool, optional, default=False
        Determines whether to add a failed molecule to the list of failed molecules (True),
        or raise an Exception (False).
    generateUniqueNames : bool, optional, default=False
        If True, will generate globally unique names for templates.
    normalize : bool, optional, default=True
        If True, normalize the molecule by checking aromaticity, adding
        explicit hydrogens, and renaming by IUPAC name.
    gaff_version : str, default = 'gaff'
        One of ['gaff', 'gaff2']; selects which atom types to use.

    Returns
    -------
    ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2) and residue templates.
    failed_molecule_list : list of openeye.oechem.OEMol
        List of the oemols that could not be parameterized. Only returned if ignoreFailures=True

    Notes
    -----
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Residue template names will be set from molecule names.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    if not generateUniqueNames:
    # Check template names are unique.
        template_names = set()
        for molecule in molecules:
            template_name = molecule.GetTitle()
            if template_name == '<0>':
                raise Exception("Molecule '%s' has invalid name" % template_name)
            if template_name in template_names:
                raise Exception("Molecule '%s' has template name collision." % template_name)
            template_names.add(template_name)

    # Process molecules.
    import tempfile
    tmpdir = tempfile.mkdtemp()
    olddir = os.getcwd()
    os.chdir(tmpdir)
    leaprc = ""
    failed_molecule_list = []
    for (molecule_index, molecule) in enumerate(molecules):
        # Set the template name based on the molecule title.
        if generateUniqueNames:
            from uuid import uuid4
            template_name = molecule.GetTitle() + '-' + str(uuid4())
        else:
            template_name = molecule.GetTitle()

        # If any atom names are not unique, atom names
        _ensureUniqueAtomNames(molecule)

        # Compute net formal charge.
        net_charge = _computeNetCharge(molecule)

        # Generate canonical AM1-BCC charges and a reference conformation.
        if not ignoreFailures:
            molecule = get_charges(molecule, strictStereo=False, keep_confs=1, normalize=normalize)
        else:
            try:
                molecule = get_charges(molecule, strictStereo=False, keep_confs=1, normalize=normalize)
            except:
                failed_molecule_list.append(molecule)

        # Create a unique prefix.
        prefix = 'molecule%010d' % molecule_index

        # Create temporary directory for running antechamber.
        input_mol2_filename = prefix + '.tripos.mol2'
        gaff_mol2_filename  = prefix + '.gaff.mol2'
        frcmod_filename     = prefix + '.frcmod'

        # Write Tripos mol2 file as antechamber input.
        _writeMolecule(molecule, input_mol2_filename, standardize=normalize)

        # Parameterize the molecule with antechamber.
        run_antechamber(prefix, input_mol2_filename, charge_method=None, net_charge=net_charge, gaff_mol2_filename=gaff_mol2_filename, frcmod_filename=frcmod_filename, gaff_version=gaff_version)

        # Append to leaprc input for parmed.
        leaprc += '%s = loadmol2 %s\n' % (prefix, gaff_mol2_filename)
        leaprc += 'loadamberparams %s\n' % frcmod_filename

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO(leaprc)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    # TODO: Clean up temporary directory.
    os.chdir(olddir)

    if ignoreFailures:
        return ffxml.getvalue(), failed_molecule_list
    else:
        return ffxml.getvalue()
Example #21
0
""" Test fragmentation """

__author__ = 'Chaya D. Stern'

from torsionfit.tests.utils import get_fn, has_openeye, FileIOTestCase
import unittest

# TODO should I move this to SetUp?
if has_openeye:
    from openmoltools.openeye import get_charges, smiles_to_oemol
    import openeye.oechem as oechem
    from torsionfit.qmscan import fragment
    mol = smiles_to_oemol(
        'CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4'
    )
    charged = get_charges(mol, keep_confs=1)


class TestFragments(FileIOTestCase):
    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_funcgroup(self):
        """ Test tag functional groups """
        tagged_funcgroups = fragment._tag_fgroups(charged)
        self.assertEquals(len(tagged_funcgroups), 3)
        atom_idx = tagged_funcgroups['amide_0'][0].pop()
        atom = charged.GetAtom(oechem.OEHasAtomIdx(atom_idx))
        fgroup = atom.GetData('fgroup')
        self.assertEquals('amide_0', fgroup)

    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_rings(self):
Example #22
0
def generateForceFieldFromMolecules(molecules,
                                    ignoreFailures=False,
                                    generateUniqueNames=False,
                                    normalize=True,
                                    gaff_version='gaff'):
    """
    Generate ffxml file containing additional parameters and residue templates for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecules : list of openeye.oechem.OEMol
        The molecules to be parameterized.
        All molecules must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.
    ignoreFailures: bool, optional, default=False
        Determines whether to add a failed molecule to the list of failed molecules (True),
        or raise an Exception (False).
    generateUniqueNames : bool, optional, default=False
        If True, will generate globally unique names for templates.
    normalize : bool, optional, default=True
        If True, normalize the molecule by checking aromaticity, adding
        explicit hydrogens, and renaming by IUPAC name.        
    gaff_version : str, default = 'gaff'
        One of ['gaff', 'gaff2']; selects which atom types to use.

    Returns
    -------
    ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2) and residue templates.
    failed_molecule_list : list of openeye.oechem.OEMol
        List of the oemols that could not be parameterized. Only returned if ignoreFailures=True

    Notes
    -----
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Residue template names will be set from molecule names.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    if not generateUniqueNames:
        # Check template names are unique.
        template_names = set()
        for molecule in molecules:
            template_name = molecule.GetTitle()
            if template_name == '<0>':
                raise Exception("Molecule '%s' has invalid name" %
                                template_name)
            if template_name in template_names:
                raise Exception("Molecule '%s' has template name collision." %
                                template_name)
            template_names.add(template_name)

    # Process molecules.
    import tempfile
    tmpdir = tempfile.mkdtemp()
    olddir = os.getcwd()
    os.chdir(tmpdir)
    leaprc = ""
    failed_molecule_list = []
    for (molecule_index, molecule) in enumerate(molecules):
        # Set the template name based on the molecule title.
        if generateUniqueNames:
            from uuid import uuid4
            template_name = molecule.GetTitle() + '-' + str(uuid4())
        else:
            template_name = molecule.GetTitle()

        # If any atom names are not unique, atom names
        _ensureUniqueAtomNames(molecule)

        # Compute net formal charge.
        net_charge = _computeNetCharge(molecule)

        # Generate canonical AM1-BCC charges and a reference conformation.
        if not ignoreFailures:
            molecule = get_charges(molecule,
                                   strictStereo=False,
                                   keep_confs=1,
                                   normalize=normalize)
        else:
            try:
                molecule = get_charges(molecule,
                                       strictStereo=False,
                                       keep_confs=1,
                                       normalize=normalize)
            except:
                failed_molecule_list.append(molecule)

        # Create a unique prefix.
        prefix = 'molecule%010d' % molecule_index

        # Create temporary directory for running antechamber.
        input_mol2_filename = prefix + '.tripos.mol2'
        gaff_mol2_filename = prefix + '.gaff.mol2'
        frcmod_filename = prefix + '.frcmod'

        # Write Tripos mol2 file as antechamber input.
        _writeMolecule(molecule, input_mol2_filename, standardize=normalize)

        # Parameterize the molecule with antechamber.
        run_antechamber(prefix,
                        input_mol2_filename,
                        charge_method=None,
                        net_charge=net_charge,
                        gaff_mol2_filename=gaff_mol2_filename,
                        frcmod_filename=frcmod_filename,
                        gaff_version=gaff_version)

        # Append to leaprc input for parmed.
        leaprc += '%s = loadmol2 %s\n' % (prefix, gaff_mol2_filename)
        leaprc += 'loadamberparams %s\n' % frcmod_filename

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO(leaprc)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    # TODO: Clean up temporary directory.
    os.chdir(olddir)

    if ignoreFailures:
        return ffxml.getvalue(), failed_molecule_list
    else:
        return ffxml.getvalue()
Example #23
0
""" Test fragmentation """

__author__ = 'Chaya D. Stern'

from torsionfit.tests.utils import get_fun, has_openeye
import unittest
if has_openeye:
    from openmoltools.openeye import get_charges, smiles_to_oemol
    import openeye.oechem as oechem
    from torsionfit.qmscan import fragment
    mol = smiles_to_oemol('CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4')
    charged = get_charges(mol, keep_confs=1)


class TestFragments(unittest.TestCase):

    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_funcgroup(self):
        """ Test tag functional groups """
        tagged_funcgroups = fragment._tag_fgroups(charged)
        self.assertEquals(len(tagged_funcgroups), 3)
        atom_idx = tagged_funcgroups['amide_0'][0].pop()
        atom = charged.GetAtom(oechem.OEHasAtomIdx(atom_idx))
        fgroup = atom.GetData('fgroup')
        self.assertEquals('amide_0', fgroup)

    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_rings(self):
        """ Test tag rings"""
        tagged_rings = fragment._tag_rings(charged)
        self.assertEquals(len(tagged_rings), 3)
def enumerate_conformations(name, smiles=None, pdbname=None):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    """
    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    if pdbname:
        # Make sure to only use one entry if there are mutliple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0]))
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname)
        pdb_filename = output_basepath + '-input.pdb'
        retrieve_url(url, pdb_filename)
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname)
        sdf_filename = output_basepath + '-input.sdf'
        retrieve_url(url, sdf_filename)
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname
    elif smiles:
        # Generate molecule geometry with OpenEye
        print("Generating molecule {}".format(name))
        oe_molecule = openeye.smiles_to_oemol(smiles)
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(oe_molecule)
        oechem.OETriposBondTypeNames(oe_molecule)
        try:
            oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
        except RuntimeError as e:
            traceback.print_exc()
            print("Skipping molecule " + name)
            return
        residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    else:
        raise Exception('Must provide SMILES string or pdbname')

    # Save mol2 file, preserving atom names
    print("Running epik on molecule {}".format(name))
    mol2_file_path = output_basepath + '-input.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False,
                         max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4)

    # Convert maestro file to sdf and mol2
    output_sdf_filename = output_basepath + '-epik.sdf'
    output_mol2_filename = output_basepath + '-epik.mol2'
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.
    charged_molecules = list()
    index = 0
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        print("Charging molecule %d" % (index))
        try:
            # Charge molecule.
            charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None)
            # Assign Tripos types
            oechem.OETriposAtomTypeNames(charged_molecule)
            oechem.OETriposBondTypeNames(charged_molecule)
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)
        except Exception as e:
            print(e)
            print("Skipping protomer/tautomer because of failed charging.")

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalites.
    outfile = open(output_basepath + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = output_basepath + '-epik-charged.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)

        #oechem.OEWritePDBFile(ofs, charged_molecule, flavor)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = output_basepath + '-epik-charged.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name)
Example #25
0
def generateForceFieldFromMolecules(molecules):
    """
    Generate ffxml file containing additional parameters and residue templates for simtk.openmm.app.ForceField using GAFF/AM1-BCC.

    This requires the OpenEye toolkit.

    Parameters
    ----------
    molecules : list of openeye.oechem.OEMol
        The molecules to be parameterized.
        All molecules must have explicit hydrogens.
        Net charge will be inferred from the net formal charge on each molecule.
        Partial charges will be determined automatically using oequacpac and canonical AM1-BCC charging rules.

    Returns
    -------
    ffxml : str
        Contents of ForceField `ffxml` file defining additional parameters from parmchk(2) and residue templates.

    Notes
    -----
    This method preserves stereochemistry during AM1-BCC charge parameterization.
    Residue template names will be set from molecule names.
    Atom names in molecules will be assigned Tripos atom names if any are blank or not unique.

    """
    # Check template names are unique.
    template_names = set()
    for molecule in molecules:
        template_name = molecule.GetTitle()
        if template_name == '<0>':
            raise Exception("Molecule '%s' has invalid name" % template_name)
        if template_name in template_names:
            raise Exception("Molecule '%s' has template name collision." %
                            template_name)
        template_names.add(template_name)

    # Process molecules.
    import tempfile
    tmpdir = tempfile.mkdtemp()
    olddir = os.getcwd()
    os.chdir(tmpdir)
    leaprc = ""
    for (molecule_index, molecule) in enumerate(molecules):
        # Set the template name based on the molecule title.
        template_name = molecule.GetTitle()

        # If any atom names are not unique, atom names
        _ensureUniqueAtomNames(molecule)

        # Compute net formal charge.
        net_charge = _computeNetCharge(molecule)

        # Generate canonical AM1-BCC charges and a reference conformation.
        molecule = get_charges(molecule, strictStereo=False, keep_confs=1)

        # Create a unique prefix.
        prefix = 'molecule%010d' % molecule_index

        # Create temporary directory for running antechamber.
        input_mol2_filename = prefix + '.tripos.mol2'
        gaff_mol2_filename = prefix + '.gaff.mol2'
        frcmod_filename = prefix + '.frcmod'

        # Write Tripos mol2 file as antechamber input.
        _writeMolecule(molecule, input_mol2_filename)

        # Parameterize the molecule with antechamber.
        run_antechamber(prefix,
                        input_mol2_filename,
                        charge_method=None,
                        net_charge=net_charge,
                        gaff_mol2_filename=gaff_mol2_filename,
                        frcmod_filename=frcmod_filename)

        # Append to leaprc input for parmed.
        leaprc += '%s = loadmol2 %s\n' % (prefix, gaff_mol2_filename)
        leaprc += 'loadamberparams %s\n' % frcmod_filename

    # Generate ffxml file contents for parmchk-generated frcmod output.
    leaprc = StringIO(leaprc)
    params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
    params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
    ffxml = StringIO()
    params.write(ffxml)

    # TODO: Clean up temporary directory.
    os.chdir(olddir)

    return ffxml.getvalue()