Example #1
0
    def test_atom_map(self):
        """Test get atom map"""
        from openeye import oechem
        tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]'
        mol_1 = openeye.smiles_to_oemol('CC[N+]#C')
        inf = get_fn('ethylmethylidyneamonium.mol2')
        ifs = oechem.oemolistream(inf)
        mol_2 = oechem.OEMol()
        oechem.OEReadMolecule(ifs, mol_2)

        atom_map = utils.get_atom_map(tagged_smiles, mol_1)

        for i, mapping in enumerate(atom_map):
            atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping]))
            atom_1.SetAtomicNum(i+1)
            atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping-1))
            atom_2.SetAtomicNum(i+1)
            self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2))

        # Test aromatic molecule
        tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]'
        mol_1 = openeye.smiles_to_oemol('Cc1ccccc1')
        inf = get_fn('toluene.mol2')
        ifs = oechem.oemolistream(inf)
        mol_2 = oechem.OEMol()
        oechem.OEReadMolecule(ifs, mol_2)

        atom_map = utils.get_atom_map(tagged_smiles, mol_1)
        for i, mapping in enumerate(atom_map):
            atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping]))
            atom_1.SetAtomicNum(i+1)
            atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping-1))
            atom_2.SetAtomicNum(i+1)
            self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2))
Example #2
0
def SmilesToFragments(smiles, fgroup_smarts, bondOrderThreshold=1.2, chargesMol=True):
    """
    Fragment molecule at bonds below Bond Order Threshold

    Parameters
    ----------
    smiles: str
        smiles string of molecule to fragment

    Returns
    -------
    frags: list of OE AtomBondSets

    """
    # Charge molecule
    mol = oechem.OEGraphMol()
    oemol = openeye.smiles_to_oemol(smiles)
    charged = openeye.get_charges(oemol, keep_confs=1)

    # Tag functional groups
    _tag_fgroups(charged, fgroups_smarts=fgroup_smarts)

    # Generate fragments
    G = OeMolToGraph(charged)
    subraphs = FragGraph(G, bondOrderThreshold=bondOrderThreshold)

    frags = []
    for subraph in subraphs:
        frags.append(subgraphToAtomBondSet(G, subraph, charged))

    if chargesMol:
        return frags, charged
    else:
        return frags
def enumerate_conformations(name, smiles):
    """Generate geometry and run epik."""
    # Generate molecule geometry with OpenEye
    print "Generating molecule {}".format(name)
    oe_molecule = openeye.smiles_to_oemol(smiles)
    try:
        oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    except RuntimeError as e:
        traceback.print_exc()
        print "Skipping molecule " + name
        return

    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    # Save mol2 file with residue name = first three uppercase letters
    print "Running epik on molecule {}".format(name)
    mol2_file_path = output_basepath + '-input.mol2'
    residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=True,
                         max_structures=32, ph_tolerance=10.0)

    # Convert maestro file to sdf and mol2
    schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.sdf')
    schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.mol2')
Example #4
0
    def test_atom_map_order(self):
        """Test atom map"""
        from openeye import oechem
        tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]'
        mol_from_tagged_smiles = openeye.smiles_to_oemol(tagged_smiles)
        atom_map = utils.get_atom_map(tagged_smiles, mol_from_tagged_smiles)

        # Compare atom map to tag
        for i in range(1, len(atom_map) +1):
            atom_1 = mol_from_tagged_smiles.GetAtom(oechem.OEHasAtomIdx(atom_map[i]))
            self.assertEqual(i, atom_1.GetMapIdx())
Example #5
0
    def test_atom_map_order(self):
        """Test atom map"""
        from openeye import oechem
        tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]'
        mol_from_tagged_smiles = openeye.smiles_to_oemol(tagged_smiles)
        atom_map = utils.get_atom_map(tagged_smiles, mol_from_tagged_smiles)

        # Compare atom map to tag
        for i in range(1, len(atom_map) + 1):
            atom_1 = mol_from_tagged_smiles.GetAtom(
                oechem.OEHasAtomIdx(atom_map[i]))
            self.assertEqual(i, atom_1.GetMapIdx())
Example #6
0
    def test_atom_map(self):
        """Test get atom map"""
        from openeye import oechem
        tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]'
        mol_1 = openeye.smiles_to_oemol('CC[N+]#C')
        inf = get_fn('ethylmethylidyneamonium.mol2')
        ifs = oechem.oemolistream(inf)
        mol_2 = oechem.OEMol()
        oechem.OEReadMolecule(ifs, mol_2)

        atom_map = utils.get_atom_map(tagged_smiles, mol_1)

        for i, mapping in enumerate(atom_map):
            atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping]))
            atom_1.SetAtomicNum(i + 1)
            atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping - 1))
            atom_2.SetAtomicNum(i + 1)
            self.assertEqual(oechem.OECreateCanSmiString(mol_1),
                             oechem.OECreateCanSmiString(mol_2))

        # Test aromatic molecule
        tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]'
        mol_1 = openeye.smiles_to_oemol('Cc1ccccc1')
        inf = get_fn('toluene.mol2')
        ifs = oechem.oemolistream(inf)
        mol_2 = oechem.OEMol()
        oechem.OEReadMolecule(ifs, mol_2)

        atom_map = utils.get_atom_map(tagged_smiles, mol_1)
        for i, mapping in enumerate(atom_map):
            atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping]))
            atom_1.SetAtomicNum(i + 1)
            atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping - 1))
            atom_2.SetAtomicNum(i + 1)
            self.assertEqual(oechem.OECreateCanSmiString(mol_1),
                             oechem.OECreateCanSmiString(mol_2))
Example #7
0
def get_atom_map(tagged_smiles, molecule=None):
    """
    Returns a dictionary that maps tag on SMILES to atom index in molecule.
    Parameters
    ----------
    tagged_smiles: str
        index-tagged explicit hydrogen SMILES string
    molecule: OEMol
        molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to
        this molecule and it will be returned.

    Returns
    -------
    atom_map: dict
        a dictionary that maps tag to atom index {tag:idx}
    molecule: OEMol
        If a molecule was not provided, the generated molecule will be returned.
    """
    if molecule is None:
        molecule = openeye.smiles_to_oemol(tagged_smiles)

    ss = oechem.OESubSearch(tagged_smiles)
    oechem.OEPrepareSearch(molecule, ss)
    ss.SetMaxMatches(1)

    atom_map = {}
    t1 = time.time()
    matches = [m for m in ss.Match(molecule)]
    t2 = time.time()
    seconds = t2 - t1
    logger().info("Substructure search took {} seconds".format(seconds))
    if not matches:
        logger().info("MCSS failed for {}, smiles: {}".format(
            molecule.GetTitle(), tagged_smiles))
        return False
    for match in matches:
        for ma in match.GetAtoms():
            atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx()

    # sanity check
    mol = oechem.OEGraphMol()
    oechem.OESubsetMol(mol, match, True)
    logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol)))
    if molecule is None:
        return molecule, atom_map

    return atom_map
Example #8
0
def get_atom_map(tagged_smiles, molecule=None):
    """
    Returns a dictionary that maps tag on SMILES to atom index in molecule.
    Parameters
    ----------
    tagged_smiles: str
        index-tagged explicit hydrogen SMILES string
    molecule: OEMol
        molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to
        this molecule and it will be returned.

    Returns
    -------
    atom_map: dict
        a dictionary that maps tag to atom index {tag:idx}
    molecule: OEMol
        If a molecule was not provided, the generated molecule will be returned.
    """
    if molecule is None:
        molecule = openeye.smiles_to_oemol(tagged_smiles)

    ss = oechem.OESubSearch(tagged_smiles)
    oechem.OEPrepareSearch(molecule, ss)
    ss.SetMaxMatches(1)

    atom_map = {}
    t1 = time.time()
    matches = [m for m in ss.Match(molecule)]
    t2 = time.time()
    seconds = t2-t1
    logger().info("Substructure search took {} seconds".format(seconds))
    if not matches:
        logger().info("MCSS failed for {}, smiles: {}".format(molecule.GetTitle(), tagged_smiles))
        return False
    for match in matches:
        for ma in match.GetAtoms():
            atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx()

    # sanity check
    mol = oechem.OEGraphMol()
    oechem.OESubsetMol(mol, match, True)
    logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol)))
    if molecule is None:
        return molecule, atom_map

    return atom_map
Example #9
0
    def __init__(self, cas_or_aa, min_atoms=6):
        """
        Initialize using cas numbers OR amino acid name
        Requires openmoltools.openeye and cirpy

        Arguments
            cas_or_aa (list of strings) either cas number or name of amino acid

        Optional Arguments
            min_atoms (int) - a minimum number of atoms for substructure match (default: 6)

        Creates class variables:
            self.cas_or_aa (list of strings) 
              input representing molecules to be combined 
            self.smiles_strings (list of strings) 
              smiles representation of molecules to be combined
            self.ligands (list of OEMol) 
              openeye molecule representation of molecules to be combined
            self.title (string) 
              used as an identifier for input group of molecules
            self.min_atoms (int) 
              minimum number of common atoms to constitute a substructure match (default: 6)

        """

        self.cas_or_aa = cas_or_aa
        self.smiles_strings = []
        self.ligands = []
        for cas in cas_or_aa:
            smiles = cirpy.resolve(cas, 'smiles')
            self.smiles_strings.append(smiles)
            ligand = openeye.smiles_to_oemol(smiles)
            ligand = openeye.get_charges(ligand, strictStereo=False)
            self.ligands.append(ligand)
        self.title = self.cas_or_aa[0] + "_and_analogs"
        self.min_atoms = min_atoms
        self.common_substructure = None
        self.dual_topology = None
        self.each_molecule_N = []
        self.mapping_dictionaries = []
        self.pdb_filename = None
        self.ffxml_filename = None
Example #10
0
    def __init__(self, cas_or_aa, min_atoms=6):
        """
        Initialize using cas numbers OR amino acid name
        Requires openmoltools.openeye and cirpy

        Arguments
            cas_or_aa (list of strings) either cas number or name of amino acid

        Optional Arguments
            min_atoms (int) - a minimum number of atoms for substructure match (default: 6)

        Creates class variables:
            self.cas_or_aa (list of strings) 
              input representing molecules to be combined 
            self.smiles_strings (list of strings) 
              smiles representation of molecules to be combined
            self.ligands (list of OEMol) 
              openeye molecule representation of molecules to be combined
            self.title (string) 
              used as an identifier for input group of molecules
            self.min_atoms (int) 
              minimum number of common atoms to constitute a substructure match (default: 6)

        """

        self.cas_or_aa = cas_or_aa
        self.smiles_strings = []
        self.ligands = []
        for cas in cas_or_aa:
            smiles = cirpy.resolve(cas,'smiles')
            self.smiles_strings.append(smiles)
            ligand = openeye.smiles_to_oemol(smiles)
            ligand = openeye.get_charges(ligand, strictStereo=False) 
            self.ligands.append(ligand)
        self.title = self.cas_or_aa[0]+"_and_analogs"
        self.min_atoms = min_atoms
        self.common_substructure = None
        self.dual_topology = None
        self.each_molecule_N = []
        self.mapping_dictionaries = []
        self.pdb_filename = None
        self.ffxml_filename = None
Example #11
0
    def test_mapped_xyz(self):
        """Test writing out mapped xyz"""
        from openeye import oechem, oeomega
        tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]'
        mol_1 = openeye.smiles_to_oemol('Cc1ccccc1')
        inf = get_fn('toluene.mol2')
        ifs = oechem.oemolistream(inf)
        mol_2 = oechem.OEMol()
        oechem.OEReadMolecule(ifs, mol_2)

        atom_map = utils.get_atom_map(tagged_smiles, mol_1)
        for i, mapping in enumerate(atom_map):
            atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping]))
            atom_1.SetAtomicNum(i + 1)
            atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping - 1))
            atom_2.SetAtomicNum(i + 1)

        xyz_1 = utils.to_mapped_xyz(mol_1, atom_map)
        # molecule generated from mol2 should be in the right order.
        atom_map_mol2 = {
            1: 0,
            2: 1,
            3: 2,
            4: 3,
            5: 4,
            6: 5,
            7: 6,
            8: 7,
            9: 8,
            10: 9,
            11: 10,
            12: 11,
            13: 12,
            14: 13,
            15: 14
        }
        xyz_2 = utils.to_mapped_xyz(mol_2, atom_map_mol2)

        for ele1, ele2 in zip(xyz_1.split('\n')[:-1], xyz_2.split('\n')[:-1]):
            self.assertEqual(ele1.split(' ')[2], ele2.split(' ')[2])
Example #12
0
def SmilesToFragments(smiles,
                      fgroup_smarts,
                      bondOrderThreshold=1.2,
                      chargesMol=True):
    """
    Fragment molecule at bonds below Bond Order Threshold

    Parameters
    ----------
    smiles: str
        smiles string of molecule to fragment

    Returns
    -------
    frags: list of OE AtomBondSets

    """
    # Charge molecule
    mol = oechem.OEGraphMol()
    oemol = openeye.smiles_to_oemol(smiles)
    charged = openeye.get_charges(oemol, keep_confs=1)

    # Tag functional groups
    _tag_fgroups(charged, fgroups_smarts=fgroup_smarts)

    # Generate fragments
    G = OeMolToGraph(charged)
    subraphs = FragGraph(G, bondOrderThreshold=bondOrderThreshold)

    frags = []
    for subraph in subraphs:
        frags.append(subgraphToAtomBondSet(G, subraph, charged))

    if chargesMol:
        return frags, charged
    else:
        return frags
Example #13
0
    def test_mapped_xyz(self):
        """Test writing out mapped xyz"""
        from openeye import oechem, oeomega
        tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]'
        mol_1 = openeye.smiles_to_oemol('Cc1ccccc1')
        inf = get_fn('toluene.mol2')
        ifs = oechem.oemolistream(inf)
        mol_2 = oechem.OEMol()
        oechem.OEReadMolecule(ifs, mol_2)

        atom_map = utils.get_atom_map(tagged_smiles, mol_1)
        for i, mapping in enumerate(atom_map):
            atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping]))
            atom_1.SetAtomicNum(i+1)
            atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping-1))
            atom_2.SetAtomicNum(i+1)

        xyz_1 = utils.to_mapped_xyz(mol_1, atom_map)
        # molecule generated from mol2 should be in the right order.
        atom_map_mol2 = {1:0, 2:1, 3:2, 4:3, 5:4, 6:5, 7:6, 8:7, 9:8, 10:9, 11:10, 12:11, 13:12, 14:13, 15:14}
        xyz_2 = utils.to_mapped_xyz(mol_2, atom_map_mol2)

        for ele1, ele2 in zip(xyz_1.split('\n')[:-1], xyz_2.split('\n')[:-1]):
            self.assertEqual(ele1.split(' ')[2], ele2.split(' ')[2])
def enumerate_conformations(name, smiles=None, pdbname=None):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    """
    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    if pdbname:
        # Make sure to only use one entry if there are mutliple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0]))
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname)
        pdb_filename = output_basepath + '-input.pdb'
        retrieve_url(url, pdb_filename)
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname)
        sdf_filename = output_basepath + '-input.sdf'
        retrieve_url(url, sdf_filename)
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname
    elif smiles:
        # Generate molecule geometry with OpenEye
        print("Generating molecule {}".format(name))
        oe_molecule = openeye.smiles_to_oemol(smiles)
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(oe_molecule)
        oechem.OETriposBondTypeNames(oe_molecule)
        try:
            oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
        except RuntimeError as e:
            traceback.print_exc()
            print("Skipping molecule " + name)
            return
        residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    else:
        raise Exception('Must provide SMILES string or pdbname')

    # Save mol2 file, preserving atom names
    print("Running epik on molecule {}".format(name))
    mol2_file_path = output_basepath + '-input.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False,
                         max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4)

    # Convert maestro file to sdf and mol2
    output_sdf_filename = output_basepath + '-epik.sdf'
    output_mol2_filename = output_basepath + '-epik.mol2'
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.
    charged_molecules = list()
    index = 0
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        print("Charging molecule %d" % (index))
        try:
            # Charge molecule.
            charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None)
            # Assign Tripos types
            oechem.OETriposAtomTypeNames(charged_molecule)
            oechem.OETriposBondTypeNames(charged_molecule)
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)
        except Exception as e:
            print(e)
            print("Skipping protomer/tautomer because of failed charging.")

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalites.
    outfile = open(output_basepath + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = output_basepath + '-epik-charged.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)

        #oechem.OEWritePDBFile(ofs, charged_molecule, flavor)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = output_basepath + '-epik-charged.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name)
Example #15
0
eMolID_can_iso_smiles_dict = eMolID_smiles_dict
pickle.dump(eMolID_can_iso_smiles_dict,
            open("eMolID_can_iso_smiles_dict.pickle", "wb"))

print("Finished converting eMolecules SMILES to canonical isomeric SMILES.")
print("\n")

##### CONVERT SMILES TO OEMOL #####

print("Converting SMILES to OEMol...")

eMolID_oemol_dict = {}

for key, value in eMolID_smiles_dict.items():
    # Create a OEMolBuilder from a smiles string.
    oemol_molecule = omtoe.smiles_to_oemol(smiles=value)
    eMolID_oemol_dict[key] = oemol_molecule

# print(oMolID_oemol_dict)

print("\n")

##### GENERATE CHARGED CONFORMERS AND SAVE AS MOL2 FILE #####

mol2_directory_path = "./mol2_files"
if not os.path.exists(mol2_directory_path):
    os.makedirs(mol2_directory_path)
    print("{} directory created.".format(mol2_directory_path))

print("Generating charged OEMol molecules...")
Example #16
0
""" Test fragmentation """

__author__ = 'Chaya D. Stern'

from torsionfit.tests.utils import get_fn, has_openeye, FileIOTestCase
import unittest

# TODO should I move this to SetUp?
if has_openeye:
    from openmoltools.openeye import get_charges, smiles_to_oemol
    import openeye.oechem as oechem
    from torsionfit.qmscan import fragment
    mol = smiles_to_oemol(
        'CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4'
    )
    charged = get_charges(mol, keep_confs=1)


class TestFragments(FileIOTestCase):
    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_funcgroup(self):
        """ Test tag functional groups """
        tagged_funcgroups = fragment._tag_fgroups(charged)
        self.assertEquals(len(tagged_funcgroups), 3)
        atom_idx = tagged_funcgroups['amide_0'][0].pop()
        atom = charged.GetAtom(oechem.OEHasAtomIdx(atom_idx))
        fgroup = atom.GetData('fgroup')
        self.assertEquals('amide_0', fgroup)

    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_rings(self):
    # Create output directory
    if not os.path.exists(LIGANDS_DIR_PATH):
        os.makedirs(LIGANDS_DIR_PATH)

    # Parse SMILES file and generate ligand
    smiles_file = open(SMILES_FILE_PATH, 'r')
    for line in smiles_file:

        # Get SMILES representation from CVS file
        ligand_name, smiles_str = line.strip().split(',')

        print "Generating inhibitor", ligand_name

        # Convert to OEMol molecule
        mol_oemol = smiles_to_oemol(smiles_str)
        mol_oemol.SetTitle(ligand_name)

        # Use OpenEye Omega toolkit to generate lowest energy structure
        omega = oeomega.OEOmega()
        omega.SetCanonOrder(False)
        omega.SetMaxConfs(OMEGA_MAX_CONFS)
        omega(mol_oemol)

        # Generate protonation/tautomeric states with epik and charge molecule
        valid_structure = False
        with working_directory(temp_dir):
            for i in range(mol_oemol.GetMaxConfIdx()):
                try:
                    confomer = mol_oemol.GetConf(oechem.OEHasConfIdx(i))
                    mk_single_conformer_epik(ligand_name, confomer, pH=PH)
Example #18
0
    # Create output directory
    if not os.path.exists(LIGANDS_DIR_PATH):
        os.makedirs(LIGANDS_DIR_PATH)

    # Parse SMILES file and generate ligand
    smiles_file = open(SMILES_FILE_PATH, 'r')
    for line in smiles_file:

        # Get SMILES representation from CVS file
        ligand_name, smiles_str = line.strip().split(',')

        print "Generating inhibitor", ligand_name

        # Convert to OEMol molecule
        mol_oemol = smiles_to_oemol(smiles_str)
        mol_oemol.SetTitle(ligand_name)

        # Use OpenEye Omega toolkit to generate lowest energy structure
        omega = oeomega.OEOmega()
        omega.SetCanonOrder(False)
        omega.SetMaxConfs(OMEGA_MAX_CONFS)
        omega(mol_oemol)

        # Generate protonation/tautomeric states with epik and charge molecule
        valid_structure = False
        with working_directory(temp_dir):
            for i in range(mol_oemol.GetMaxConfIdx()):
                try:
                    confomer = mol_oemol.GetConf(oechem.OEHasConfIdx(i))
                    mk_single_conformer_epik(ligand_name, confomer, pH=PH)
Example #19
0
    def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None):
        self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules]
        environments = ['explicit', 'vacuum']
        temperature = 298.15 * unit.kelvin
        pressure = 1.0 * unit.atmospheres
        constraints = app.HBonds
        self._storage = NetCDFStorage(output_filename)
        self._ncmc_switching_times = ncmc_switching_times
        self._n_equilibrium_steps = equilibrium_steps
        self._geometry_options = geometry_options

        # Create a system generator for our desired forcefields.
        from perses.rjmc.topology_proposal import SystemGenerator
        system_generators = dict()
        from pkg_resources import resource_filename
        gaff_xml_filename = resource_filename('perses', 'data/gaff.xml')
        barostat = openmm.MonteCarloBarostat(pressure, temperature)
        system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'],
                                                        forcefield_kwargs={'nonbondedCutoff': 9.0 * unit.angstrom,
                                                                           'implicitSolvent': None,
                                                                           'constraints': constraints,
                                                                           'ewaldErrorTolerance': 1e-5,
                                                                           'hydrogenMass': 3.0*unit.amu}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME}
                                                        barostat=barostat)
        system_generators['vacuum'] = SystemGenerator([gaff_xml_filename],
                                                      forcefield_kwargs={'implicitSolvent': None,
                                                                         'constraints': constraints,
                                                                         'hydrogenMass': 3.0*unit.amu}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff})

        #
        # Create topologies and positions
        #
        topologies = dict()
        positions = dict()

        from openmoltools import forcefield_generators
        forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
        forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator)

        # Create molecule in vacuum.
        from perses.utils.openeye import extractPositionsFromOEMol
        from openmoltools.openeye import smiles_to_oemol, generate_conformers
        if initial_molecule:
            smiles = initial_molecule
        else:
            smiles = np.random.choice(molecules)
        molecule = smiles_to_oemol(smiles)
        molecule = generate_conformers(molecule, max_confs=1)
        topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule)
        positions['vacuum'] = extractPositionsFromOEMol(molecule)

        # Create molecule in solvent.
        modeller = app.Modeller(topologies['vacuum'], positions['vacuum'])
        modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom)
        topologies['explicit'] = modeller.getTopology()
        positions['explicit'] = modeller.getPositions()

        # Set up the proposal engines.
        proposal_metadata = {}
        proposal_engines = dict()

        for environment in environments:
            proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules,
                                                                               system_generators[environment])

        # Generate systems
        systems = dict()
        for environment in environments:
            systems[environment] = system_generators[environment].build_system(topologies[environment])

        # Define thermodynamic state of interest.

        thermodynamic_states = dict()
        thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'],
                                                                     temperature=temperature, pressure=pressure)
        thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature)

        # Create SAMS samplers
        from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler
        mcmc_samplers = dict()
        exen_samplers = dict()
        sams_samplers = dict()
        for environment in environments:
            storage = NetCDFStorageView(self._storage, envname=environment)

            if self._geometry_options:
                n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment]
                use_sterics = self._geometry_options['use_sterics'][environment]

            else:
                n_torsion_divisions = 180
                use_sterics = False

            geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics)
            move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V",
                                                       n_restart_attempts=10)
            chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment])
            if environment == 'explicit':
                sampler_state = states.SamplerState(positions=positions[environment],
                                                    box_vectors=systems[environment].getDefaultPeriodicBoxVectors())
            else:
                sampler_state = states.SamplerState(positions=positions[environment])
            mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move)


            exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment],
                                                                 chemical_state_key, proposal_engines[environment],
                                                                 geometry_engine,
                                                                 options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment])
            exen_samplers[environment].verbose = True
            sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage)
            sams_samplers[environment].verbose = True

        # Create test MultiTargetDesign sampler.
        from perses.samplers.samplers import MultiTargetDesign
        target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0}
        designer = MultiTargetDesign(target_samplers, storage=self._storage)

        # Store things.
        self.molecules = molecules
        self.environments = environments
        self.topologies = topologies
        self.positions = positions
        self.system_generators = system_generators
        self.proposal_engines = proposal_engines
        self.thermodynamic_states = thermodynamic_states
        self.mcmc_samplers = mcmc_samplers
        self.exen_samplers = exen_samplers
        self.sams_samplers = sams_samplers
        self.designer = designer
Example #20
0
def generate_vacuum_hostguest_proposal(current_mol_name="B2",
                                       proposed_mol_name="MOL"):
    """
    Generate a test vacuum topology proposal, current positions, and new positions triplet
    from two IUPAC molecule names.

    Parameters
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    from openmoltools import forcefield_generators
    from openmmtools import testsystems

    from perses.utils.openeye import smiles_to_oemol
    from perses.utils.data import get_data_filename

    host_guest = testsystems.HostGuestVacuum()
    unsolv_old_system, old_positions, top_old = host_guest.system, host_guest.positions, host_guest.topology

    ligand_topology = [res for res in top_old.residues()]
    current_mol = forcefield_generators.generateOEMolFromTopologyResidue(
        ligand_topology[1])  # guest is second residue in topology
    proposed_mol = smiles_to_oemol('C1CC2(CCC1(CC2)C)C')

    initial_smiles = oechem.OEMolToSmiles(current_mol)
    final_smiles = oechem.OEMolToSmiles(proposed_mol)

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
    forcefield.registerTemplateGenerator(
        forcefield_generators.gaffTemplateGenerator)

    solvated_system = forcefield.createSystem(top_old, removeCMMotion=False)

    gaff_filename = get_data_filename('data/gaff.xml')
    system_generator = SystemGenerator(
        [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'],
        forcefield_kwargs={
            'removeCMMotion': False,
            'nonbondedMethod': app.NoCutoff
        })
    geometry_engine = geometry.FFAllAngleGeometryEngine()
    proposal_engine = SmallMoleculeSetProposalEngine(
        [initial_smiles, final_smiles],
        system_generator,
        residue_name=current_mol_name)

    #generate topology proposal
    topology_proposal = proposal_engine.propose(solvated_system,
                                                top_old,
                                                current_mol=current_mol,
                                                proposed_mol=proposed_mol)

    #generate new positions with geometry engine
    new_positions, _ = geometry_engine.propose(topology_proposal,
                                               old_positions, beta)

    return topology_proposal, old_positions, new_positions
Example #21
0
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene",
                                           proposed_mol_name="benzene",
                                           current_mol_smiles=None,
                                           proposed_mol_smiles=None,
                                           vacuum=False,
                                           render_atom_mapping=False):
    """
    This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles.
    The function will (by default) read the iupac names first.  If they are set to None, then it will attempt to read a set of current and new smiles.
    An atom mapping pdf will be generated if specified.
    Arguments
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule
    current_mol_smiles : str (default None)
        current mol smiles
    proposed_mol_smiles : str (default None)
        proposed mol smiles
    vacuum: bool (default False)
        whether to render a vacuum or solvated topology_proposal
    render_atom_mapping : bool (default False)
        whether to render the atom map of the current_mol_name and proposed_mol_name

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from openeye import oechem
    from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol
    from openmoltools import forcefield_generators
    import perses.utils.openeye as openeye
    from perses.utils.data import get_data_filename
    from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine
    import simtk.unit as unit
    from perses.rjmc.geometry import FFAllAngleGeometryEngine

    if current_mol_name != None and proposed_mol_name != None:
        try:
            old_oemol, new_oemol = iupac_to_oemol(
                current_mol_name), iupac_to_oemol(proposed_mol_name)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(
                f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!"
            )
    elif current_mol_smiles != None and proposed_mol_smiles != None:
        try:
            old_oemol, new_oemol = smiles_to_oemol(
                current_mol_smiles), smiles_to_oemol(proposed_mol_smiles)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(f"the variables are not compatible")
    else:
        raise Exception(
            f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings."
        )

    old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES(
        old_smiles, title="MOL")

    #correct the old positions
    old_positions = openeye.extractPositionsFromOEMol(old_oemol)
    old_positions = old_positions.in_units_of(unit.nanometers)

    new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES(
        new_smiles, title="NEW")

    ffxml = forcefield_generators.generateForceFieldFromMolecules(
        [old_oemol, new_oemol])

    old_oemol.SetTitle('MOL')
    new_oemol.SetTitle('MOL')

    old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol)
    new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol)

    if not vacuum:
        nonbonded_method = app.PME
        barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                             300.0 * unit.kelvin, 50)
    else:
        nonbonded_method = app.NoCutoff
        barostat = None

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    system_generator = SystemGenerator(
        [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'],
        barostat=barostat,
        forcefield_kwargs={
            'removeCMMotion': False,
            'nonbondedMethod': nonbonded_method,
            'constraints': app.HBonds,
            'hydrogenMass': 4.0 * unit.amu
        })
    system_generator._forcefield.loadFile(StringIO(ffxml))

    proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles],
                                                     system_generator,
                                                     residue_name='MOL')
    geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                               use_sterics=False,
                                               n_bond_divisions=1000,
                                               n_angle_divisions=180,
                                               n_torsion_divisions=360,
                                               verbose=True,
                                               storage=None,
                                               bond_softening_constant=1.0,
                                               angle_softening_constant=1.0,
                                               neglect_angles=False)

    if not vacuum:
        #now to solvate
        modeller = app.Modeller(old_topology, old_positions)
        hs = [
            atom for atom in modeller.topology.atoms()
            if atom.element.symbol in ['H']
            and atom.residue.name not in ['MOL', 'OLD', 'NEW']
        ]
        modeller.delete(hs)
        modeller.addHydrogens(forcefield=system_generator._forcefield)
        modeller.addSolvent(system_generator._forcefield,
                            model='tip3p',
                            padding=9.0 * unit.angstroms)
        solvated_topology = modeller.getTopology()
        solvated_positions = modeller.getPositions()
        solvated_positions = unit.quantity.Quantity(value=np.array([
            list(atom_pos) for atom_pos in
            solvated_positions.value_in_unit_system(unit.md_unit_system)
        ]),
                                                    unit=unit.nanometers)
        solvated_system = system_generator.build_system(solvated_topology)

        #now to create proposal
        top_proposal = proposal_engine.propose(
            current_system=solvated_system,
            current_topology=solvated_topology,
            current_mol=old_oemol,
            proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal,
                                                   solvated_positions, beta)

        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(
                f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}"
            )
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol,
                                proposal_engine.non_offset_new_to_old_atom_map)

        return top_proposal, solvated_positions, new_positions

    else:
        vacuum_system = system_generator.build_system(old_topology)
        top_proposal = proposal_engine.propose(current_system=vacuum_system,
                                               current_topology=old_topology,
                                               current_mol=old_oemol,
                                               proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal, old_positions,
                                                   beta)
        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(f"new_to_old: {top_proposal._new_to_old_atom_map}")
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol, top_proposal._new_to_old_atom_map)
        return top_proposal, old_positions, new_positions
Example #22
0
""" Test fragmentation """

__author__ = 'Chaya D. Stern'

from torsionfit.tests.utils import get_fun, has_openeye
import unittest
if has_openeye:
    from openmoltools.openeye import get_charges, smiles_to_oemol
    import openeye.oechem as oechem
    from torsionfit.qmscan import fragment
    mol = smiles_to_oemol('CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4')
    charged = get_charges(mol, keep_confs=1)


class TestFragments(unittest.TestCase):

    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_funcgroup(self):
        """ Test tag functional groups """
        tagged_funcgroups = fragment._tag_fgroups(charged)
        self.assertEquals(len(tagged_funcgroups), 3)
        atom_idx = tagged_funcgroups['amide_0'][0].pop()
        atom = charged.GetAtom(oechem.OEHasAtomIdx(atom_idx))
        fgroup = atom.GetData('fgroup')
        self.assertEquals('amide_0', fgroup)

    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
    def test_tag_rings(self):
        """ Test tag rings"""
        tagged_rings = fragment._tag_rings(charged)
        self.assertEquals(len(tagged_rings), 3)