def keep_molecule(mol, remove_smirks = list()):
    """
    Determines if the molecule will be stored.

    Parameters
    ----------
    mol - OEMol
    remove_smirks - list of SMIRKS strings you don't want in your molecules

    Returns
    -------
    boolean - True (molecule meets the requirements below)
            - has no metal atoms
            - no more than 200 heavy atoms
            - has none of the SMIRKS in remove_smirks list
            - molecule has appropriate valency
    """
    # Check number of metal atoms
    if oechem.OECount(mol, oechem.OEIsMetal()) > 0:
        return False
    # Check number of heavy atoms
    if oechem.OECount(mol, oechem.OEIsHeavy()) > 200:
        return False
    # Check for patterns in remove smirks list
    for smirks in remove_smirks:
        qmol = oechem.OEQMol()
        if not oechem.OEParseSmarts(qmol, smirks):
            continue
        ss = oechem.OESubSearch(qmol)
        matches = [match for match in ss.Match(mol, False)]
        if len(matches) > 0:
            return False
    # check valency
    return check_valence(mol)
def GetFragmentScore(mol):

    score = 0.0
    score += 2.0 * oechem.OECount(mol, oechem.OEAtomIsInRing())
    score += 1.0 * oechem.OECount(mol, oechem.OENotAtom(oechem.OEAtomIsInRing()))

    return score
def keep_molecule(mol, max_heavy_atoms = 100,
        remove_smirks = list(), max_metals = 0, elements = [], check_type = None):
    if oechem.OECount(mol, oechem.OEIsMetal()) > max_metals:
        return False
    if oechem.OECount(mol, oechem.OEIsHeavy()) > max_heavy_atoms:
        return False
    # Remove very small molecules that are not interesting
    if oechem.OECount(mol, oechem.OEIsHeavy()) < 5:
        return False
    for smirks in remove_smirks:
        qmol = oechem.OEQMol()
        if not oechem.OEParseSmarts(qmol, smirks):
            continue
        ss = oechem.OESubSearch(qmol)
        matches = [match for match in ss.Match(mol, False)]
        if len(matches) > 0:
            return False
    if elements != None:
        elements_list = read_Elements(elements)
        if not check_element(mol, elements_list):
            return False
    if check_type != None:
        types = check_type.split(",")
        if not check_atomtype(mol, types):
            return False
    return check_valence(mol)
Esempio n. 4
0
def DumpGroups(mol):
    print("groups of", mol.GetTitle())
    print("number of atom groups", oechem.OECount(mol, IsAtomGroup()))
    print("number of bond groups", oechem.OECount(mol, IsBondGroup()))
    print("number of aromatic atoms groups",
          oechem.OECount(mol, oechem.OEHasGroupType(oechem.OEGetTag("aromatic atoms"))))
    print("number of aromatic bonds groups",
          oechem.OECount(mol, oechem.OEHasGroupType(oechem.OEGetTag("aromatic bonds"))))
    # loop over groups
    for g in mol.GetGroups():
        DumpGroup(g)
    print()
Esempio n. 5
0
def is_undesirable_molecule(mol):
    if has_undesirable_elements(mol):
        return True
    if oechem.OECount(mol, oechem.IsRotor()) == 0:
        return True

    return False
Esempio n. 6
0
    def GetFuncGroups(mol):
        '''
        :param mol:
        :return:
        '''
        funcGrps = []
        for funcGrp in oemedchem.OEGetFuncGroupFragments(mol):
            if oechem.OECount(funcGrp, oechem.OEIsHeavy()) > 5:
                continue
            if oechem.OECount(funcGrp, oechem.OEIsHetero()) == 0:
                continue
            if oechem.OECount(funcGrp, oechem.OEAtomIsInRing()) > 0:
                continue

            funcGrps.append(oechem.OEAtomBondSet(funcGrp))

        return funcGrps
Esempio n. 7
0
def main(argv=[__name__]):
    if len(argv) != 2:
        oechem.OEThrow.Usage("%s <infile>" % argv[0])

    ifs = oechem.oemolistream()
    if not ifs.open(argv[1]):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    print("Title MolWt NumAtoms NumHeavyAtoms NumRingAtoms NumRotors NumConfs")

    for mol in ifs.GetOEMols():
        title = mol.GetTitle()
        if not title:
            title = "Untitled"
        print("%s %.3f %d %d %d %d %d" %
              (title, oechem.OECalculateMolecularWeight(mol), mol.NumAtoms(),
               oechem.OECount(mol, oechem.OEIsHeavy()),
               oechem.OECount(mol, oechem.OEAtomIsInRing()),
               oechem.OECount(mol, oechem.OEIsRotor()), mol.NumConfs()))
def main(argv=[__name__]):
    if len(argv) != 4:
        oechem.OEThrow.Usage("%s <database> <prefix> <n_servers>" % argv[0])

    # input - preserve rotor-offset-compression
    ifs = oechem.oemolistream()
    oechem.OEPreserveRotCompress(ifs)

    ifname = argv[1]
    if not ifs.open(ifname):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    # output
    prefix = argv[2]
    ext = oechem.OEGetFileExtension(prefix)
    extstrt = len(prefix)
    if ext:
        extstrt = -(len(ext) + 1)
    else:
        ext = oechem.OEGetFileExtension(ifname)
    base = prefix[:extstrt]
    fmt = base + "_%i." + ext

    nservers = int(argv[3])
    outstrms = []
    for i in range(1, nservers + 1):
        ofs = oechem.oemolostream()
        if not ofs.open(fmt % i):
            oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2])

        outstrms.append(ofs)

    dots = oechem.OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        oefastrocs.OEPrepareFastROCSMol(mol)

        nhvyatoms = oechem.OECount(mol, oechem.OEIsHeavy())

        ofs = outstrms[nhvyatoms % nservers]
        oechem.OEWriteMolecule(ofs, mol)

        dots.Update()

    dots.Total()

    for strm in outstrms:
        fname = strm.GetFileName()
        strm.close()
        oechem.OEThrow.Info("Indexing %s" % fname)
        if not oechem.OECreateMolDatabaseIdx(fname):
            oechem.OEThrow.Fatal("Failed to index %s" % fname)

    return 0
    def heavy_atom_count(self):
        """ Counts the number of heavy atoms in an oemol

        Parameters
        ----------


        Returns
        -------
        int, number of heavy atoms in molecule
        """
        return oechem.OECount(self.mol, oechem.OEIsHeavy())
Esempio n. 10
0
def CountRotors(ifs):
    rotcounts = []
    for mol in ifs.GetOEMols():
        nrots = oechem.OECount(mol, oechem.OEIsRotor())
        while nrots >= len(rotcounts):
            rotcounts.append(0)
        rotcounts[nrots] += 1

    print("Max rotors:", len(rotcounts) - 1)
    print("Rotorcount distribution:")

    for rots, numrot in enumerate(rotcounts):
        print("\t%d:\t%d" % (rots, numrot))
Esempio n. 11
0
    def _check_one_molecule(mol2file: str) -> (oechem.OEMol, bool):
        """Checks if the molecule in the given file has only one trivalent
        nitrogen.

        Returns:
            The molecule itself, as well as a bool telling if there is only one
            trivalent nitrogen.
        """
        istream = oechem.oemolistream(mol2file)
        istream.SetFormat(oechem.OEFormat_MOL2)
        mol = oechem.OEMol()
        oechem.OEReadMolecule(istream, mol)

        return mol, oechem.OECount(mol, oechem.OEIsInvertibleNitrogen()) == 1
def check_frag_complexity(frag_smi,
                          filter_type=2,
                          check_n_rings=True,
                          filter_ortho=True):
    from openeye import oechem
    oemol = oechem.OEGraphMol()
    oechem.OESmilesToMol(oemol, frag_smi)
    nrots = oechem.OECount(oemol, oechem.OEIsRotor())
    # print(f'{frag_smi}: {nrots}')
    num_components, component_membership = oechem.OEDetermineComponents(oemol)
    num_rings = oemol.NumBonds() - oemol.NumAtoms() + num_components
    if filter_type == 1:
        if nrots > 0:
            return True, f' nrot: {nrots}'
        else:
            if check_n_rings and num_rings > 1:
                return True, f' nrings: {num_rings}'
            else:
                if filter_ortho and find_ortho_substituents(frag_smi):
                    return True, f'ortho substituent exists.'
                else:
                    return False, 'pass'
    elif filter_type == 2:
        if nrots > 1:
            return True, f' nrot: {nrots}'
        else:  # nrot = 0 or 1
            if check_n_rings and num_rings > 1:
                return True, f' nrings: {num_rings}'
            # remain 1ring with nrot 0 or 1/ chain with nrot 0 or 1
            elif check_n_rings and num_rings == 1 and nrots == 1:
                return True, f' nrings: {num_rings}, nrots: {nrots}'
            else:
                if filter_ortho and find_ortho_substituents(frag_smi):
                    return True, f'ortho substituent exists.'
                else:
                    return False, 'pass'
Esempio n. 13
0
from __future__ import print_function
from openeye import oechem


class PredHasDoubleBondO(oechem.OEUnaryAtomPred):
    def __call__(self, atom):
        for bond in atom.GetBonds():
            if bond.GetOrder() == 2 and bond.GetNbr(atom).IsOxygen():
                return True
        return False


class PredAmideBond(oechem.OEUnaryBondPred):
    def __call__(self, bond):
        if bond.GetOrder() != 1:
            return False
        atomB = bond.GetBgn()
        atomE = bond.GetEnd()
        pred = PredHasDoubleBondO()
        if atomB.IsCarbon() and atomE.IsNitrogen() and pred(atomB):
            return True
        if atomB.IsNitrogen() and atomE.IsCarbon() and pred(atomE):
            return True
        return False


mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "CC(=O)Nc1c[nH]cc1")
print("Number of amide bonds =", oechem.OECount(mol, PredAmideBond()))
# @ </SNIPPET>
Esempio n. 14
0
def generate_torsion_profile(mol_list):
    sf_map = {}
    for graph_mol in mol_list:
        if oechem.OECount(graph_mol, oechem.OEIsRotor()) == 0:
            logging.warning(
                'WARNING: Skipping molecule %s... rotor count is zero',
                graph_mol.GetTitle())
            continue

        frag_mols = get_molecule_torsion_fragments(graph_mol)
        if len(frag_mols) == 0:
            logging.warning(
                'WARNING: Skipping molecule %s... cannot identify torsional fragments',
                graph_mol.GetTitle())
            continue

        _, torsion_data = extract_molecule_torsion_data(graph_mol, frag_mols)

        for frag_mol in frag_mols:
            if has_undesirable_elements(frag_mol) or oechem.OECount(
                    frag_mol, oechem.OEIsPhosphorus()) > 0:
                logging.warning(
                    'WARNING: Skipping a fragment in molecule %s... fragment has undesirable elements',
                    graph_mol.GetTitle())
                continue

            # skip fragments with one or more formal charge
            skip_torsion = False
            if oechem.OECount(frag_mol, oechem.OEHasFormalCharge(1)) > 0 \
                or oechem.OECount(frag_mol, oechem.OEHasFormalCharge(2)) > 0:
                skip_torsion = True

            specific_inchi = get_specific_dihedral_inchi_key(frag_mol)

            if specific_inchi not in sf_map:
                sf_list = get_profile_sf(frag_mol)
                sf_map[specific_inchi] = sf_list

            torsion_data_items = torsion_data[specific_inchi]
            for torsion_data_item in torsion_data_items:
                a_idx, b_idx, c_idx, d_idx, _ = torsion_data_item
                b = graph_mol.GetAtom(oechem.OEHasAtomIdx(b_idx))
                c = graph_mol.GetAtom(oechem.OEHasAtomIdx(c_idx))

                bond = graph_mol.GetBond(b, c)
                if skip_torsion:
                    bond.SetData(SKIP_TORSION_TAG, True)

                tor_atoms_str = ' '.join(
                    list(map(str, [a_idx, b_idx, c_idx, d_idx])))
                if not bond.HasData(TORSION_ATOMS_FRAGMENT_TAG):
                    bond.SetData(TORSION_ATOMS_FRAGMENT_TAG, tor_atoms_str)
                    bond.SetData(SPECIFIC_INCHI_TAG, specific_inchi)
                else:
                    tmp_data = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG)
                    tmp_data = tmp_data + ':' + tor_atoms_str
                    bond.SetData(TORSION_ATOMS_FRAGMENT_TAG, tmp_data)

        graph_mol.SetData(HAS_PROFILES_TAG, False)
        for bond in graph_mol.GetBonds(oechem.OEIsRotor()):
            if bond.HasData(TORSION_ATOMS_FRAGMENT_TAG):
                graph_mol.SetData(HAS_PROFILES_TAG, True)
                break

    return mol_list, sf_map
Esempio n. 15
0
#
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of OpenEye products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem

mol = oechem.OEGraphMol()
if not oechem.OEParseSmiles(mol, "C1=CC=CC=C1"):
    print("SMILES string was invalid!")

print("Number of aromatic atoms =",
      oechem.OECount(mol, oechem.OEIsAromaticAtom()))
oechem.OEAssignAromaticFlags(mol)
print("Number of aromatic atoms =",
      oechem.OECount(mol, oechem.OEIsAromaticAtom()))
# @ </SNIPPET>
Esempio n. 16
0
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of OpenEye products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem


class PredAliphaticNitrogen(oechem.OEUnaryAtomPred):
    def __call__(self, atom):
        return atom.IsNitrogen() and not atom.IsAromatic()


mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2")
print("Number of aliphatic N atoms =", end=" ")
print(oechem.OECount(mol, PredAliphaticNitrogen()))
# @ </SNIPPET>
Esempio n. 17
0
#!/usr/bin/env python
# (C) 2017 OpenEye Scientific Software Inc. All rights reserved.
#
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of OpenEye products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

from openeye import oechem
# @ <SNIPPET>
mol = oechem.OEGraphMol()

oechem.OEFastaToMol(mol, "AVILMPTWSTNQCGPRHKDE")
print(oechem.OECount(mol, oechem.OEIsCAlpha()))
# @ </SNIPPET>
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1cnc(O)cc1CCCBr")

print("Number of chain atoms =", end=" ")
print(oechem.OECount(mol, oechem.OENotAtom(oechem.OEAtomIsInRing())))

print("Number of aromatic nitrogens =", end=" ")
print(
    oechem.OECount(
        mol, oechem.OEAndAtom(oechem.OEIsNitrogen(),
                              oechem.OEIsAromaticAtom())))

print("Number of non-carbons =", end=" ")
print(
    oechem.OECount(mol,
                   oechem.OENotAtom(oechem.OEHasAtomicNum(oechem.OEElemNo_C))))

print("Number of nitrogen and oxygen atoms =", end=" ")
print(
    oechem.OECount(
    # create subdirectory for this set
    if not os.path.exists(fileprefix):
        os.makedirs(fileprefix)
    os.chdir(fileprefix)

    # copy temporary files
    copyfile('../../frcmod.Frosst_AlkEthOH', './frcmod.Frosst_AlkEthOH')
    copyfile('../../leaprc.Frosst_AlkEthOH', './leaprc.Frosst_AlkEthOH')
    copyfile('../../' + fileprefix + '.oeb', './' + fileprefix + '.oeb')

    ifs = oechem.oemolistream(fileprefix + '.oeb')
    mol = oechem.OEMol()
    for mol in ifs.GetOEMols():
        # add atom names c0 (methane) and c1302 (water)
        if (oechem.OECount(mol, oechem.OEIsHeavy()) == 1):
            oechem.OETriposAtomNames(mol)
        # generate input files
        if hasAmberParams(mol, cmd_string):
            print('%s successful writing amber .mol2, .top, and .crd file' %
                  mol.GetTitle())

        # treat water with diff pre-existing tleap input file
        elif mol.GetTitle().split("_")[1] == 'c1302':
            copyfile('../../files_for_c1302/frcmod.tip3p', './frcmod.tip3p')
            copyfile('../../files_for_c1302/AlkEthOH_c1302_edited.leap_in',
                     './AlkEthOH_c1302_edited.leap_in')
            os.system(
                'tleap -f leaprc.Frosst_AlkEthOH -f AlkEthOH_c1302_edited.leap_in >| leap_lig.stdout'
            )
            print('%s successful writing amber .mol2, .top, and .crd file' %
def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    mol = itf.GetOEGraphMol("-mol")
    print("Number of heavy atoms in molecule = %d" %
          oechem.OECount(mol, oechem.OEIsHeavy()))
Esempio n. 21
0
#!/usr/bin/env python
# (C) 2017 OpenEye Scientific Software Inc. All rights reserved.
#
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of OpenEye products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2")

print("Number of heavy atoms =", oechem.OECount(mol, oechem.OEIsHeavy()))
print("Number of ring atoms  =", oechem.OECount(mol, oechem.OEAtomIsInRing()))
# @ </SNIPPET>
Esempio n. 22
0
def all_info_df(ffdirectorylist, all_ff_df):
    """
    This is the all_info_df function. It takes in the list of forcefields,
    as well as the dataframe of all molecule names, and runs TFD and Tanimoto
    Combo on all molecules. Its output is a dataframe of all this data.

    Args: 
        ffdirectorylist (list) list of ff to compare 
        all_ff_df (dataframe) dataframe created by make_molname_df func above.

    Returns: 
        all_ff_df (dataframe) same dataframe with appended columns. 
    """
    # Creating empty dictionaries that TFD and TANI scores will go in later,
    # As well as a heavyatomlist for putting heavy atoms in
    heavyatomdict = {}
    TFDdict = {}
    TANIdict = {}
    # Creates combinations of forcefields and puts them into dictionaries
    for i, j in list(itertools.combinations(ffdirectorylist, 2)):
        TFDdict['%s %s' % (i, j)] = {}
        TANIdict['%s %s' % (i, j)] = {}
    # Generates all the data
    for molname in all_ff_df['MolNames']:
        print(molname)
        mol_file = '%s' % molname + '.mol2'
        try:
            refmolin = oechem.oemolistream(
                '%s/%s/%s' % (directory, ffdirectorylist[0], mol_file))
            refmolhev = oechem.OEGraphMol()
            oechem.OEReadMolecule(refmolin, refmolhev)
            heavyvalue = oechem.OECount(refmolhev, oechem.OEIsHeavy())
            heavyatomdict[molname] = heavyvalue
            refmolin.close()
            # Gets TanimotoCombo and TFD values
            for i, j in list(itertools.combinations(ffdirectorylist, 2)):
                refmolin = oechem.oemolistream('%s/%s/%s' %
                                               (directory, i, mol_file))
                refmol = oechem.OEGraphMol()
                oechem.OEReadMolecule(refmolin, refmol)
                qmolin = oechem.oemolistream('%s/%s/%s' %
                                             (directory, j, mol_file))
                qmol = oechem.OEGraphMol()
                oechem.OEReadMolecule(qmolin, qmol)
                # Getting TFD
                TFDvalue = TFD_for_oemols(refmol, qmol)
                TFDdict['%s %s' % (i, j)]['%s' % molname] = TFDvalue
                # Getting TanimotoCombo
                TANIvalue = tanimotocombo(refmol, qmol)
                TANIdict['%s %s' % (i, j)][molname] = TANIvalue
                qmolin.close()
                refmolin.close()
        except:
            heavyatomdict[molname] = -1
            for i, j in list(itertools.combinations(ffdirectorylist, 2)):
                TANIdict['%s %s' % (i, j)][molname] = -1
                TFDdict['%s %s' % (i, j)][molname] = -1
            qmolin.close()
            refmolin.close()
    # Loads data into dataframe
    for key in TFDdict:
        tempdf = pd.DataFrame.from_dict(TFDdict['%s' % key], 'index')
        tempdf = tempdf.rename({0: 'TFD %s' % key}, axis='columns')
        tempdf['MolNames'] = tempdf.index
        all_ff_df = all_ff_df.merge(tempdf, on='MolNames')
    for key in TANIdict:
        tempdf = pd.DataFrame.from_dict(TANIdict['%s' % key], 'index')
        tempdf = tempdf.rename({0: 'TANI %s' % key}, axis='columns')
        tempdf['MolNames'] = tempdf.index
        all_ff_df = all_ff_df.merge(tempdf, on='MolNames')
    tempdf = pd.DataFrame.from_dict(heavyatomdict, orient="index")
    tempdf = tempdf.rename({0: 'HeavyAtomCount'}, axis='columns')
    tempdf['MolNames'] = tempdf.index
    all_ff_df = all_ff_df.merge(tempdf, on='MolNames')
    return all_ff_df
Esempio n. 23
0
#!/usr/bin/env python
# (C) 2017 OpenEye Scientific Software Inc. All rights reserved.
#
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of OpenEye products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2")
# @ <SNIPPET-PRED-ATOMIC>
print("Number of oxygen atoms =",
      oechem.OECount(mol, oechem.OEHasAtomicNum(oechem.OEElemNo_O)))
print("Number of oxygen atoms =", oechem.OECount(mol, oechem.OEIsOxygen()))
# @ </SNIPPET-PRED-ATOMIC>
# @ </SNIPPET>
Esempio n. 24
0
def IsMoleculeInHeavyAtomCountRange(min, max, mol):
    count = oechem.OECount(mol, oechem.OEIsHeavy())
    return IsBetween(min, max, count)
Esempio n. 25
0
# liable for any damages or liability in connection with the Sample Code
# or its use.

import sys
from openeye import oechem

ifs = oechem.oemolistream()
ifs.open(sys.argv[1])
mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, mol)

# @ <SNIPPET-OEAtomMatchResidue>
resAla = oechem.OEAtomMatchResidueID()
resAla.SetName("ALA")
predAla = oechem.OEAtomMatchResidue(resAla)
print("Number of atoms matching residue name ALA = ", oechem.OECount(mol, predAla))

resChainA = oechem.OEAtomMatchResidueID()
resChainA.SetChainID("A")
predChainA = oechem.OEAtomMatchResidue(resChainA)
print("Number of atoms matching chain A = ", oechem.OECount(mol, predChainA))

resHis = oechem.OEAtomMatchResidueID()
resHis.SetName("HIS")
resHis.SetChainID("A")
resHis.SetResidueNumber("88")
predHis = oechem.OEAtomMatchResidue(resHis)
print("Number of atoms matching residue (HIS A 88) = ", oechem.OECount(mol, predHis))

# alternative way to initialize as regex
predHis2 = oechem.OEAtomMatchResidue("HIS:88:.*:A:.*:.*")
Esempio n. 26
0
def do(controller):
    """
    """
    # get the controller command
    cmd = controller.command

    # get the command line arguments and options
    args = controller.pargs

    # predicate to remove non-polymer atoms from structure
    nonpolymers = oechem.OEOrAtom(
        OEAtomHasIntData(('entity_type_bm', 0)),
        OEAtomBinaryAndIntData(('entity_type_bm', 3)))

    assemblysets = get_assembly_sets(args)

    # directory containing all the biological assemblies in OEB format
    OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb')

    # directory where surface areas will be written
    CREDO_DATA_DIR = app.config.get('directories', 'credo_data')

    ifs = oechem.oemolistream()
    ifs.SetFormat(oechem.OEFormat_OEB)

    # initialize progressbar
    if args.progressbar:
        bar = ProgressBar(widgets=[
            'PDB entries: ',
            SimpleProgress(), ' ',
            Percentage(),
            Bar()
        ],
                          maxval=len(assemblysets)).start()

    # iterate through assembly sets
    for counter, (pdb, assemblyset) in enumerate(assemblysets, 1):
        if args.progressbar: bar.update(counter)

        # create a data directory for this structure to which all data will be written
        struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(),
                                       pdb.lower())

        # make necessary directories recursively if they do not exist yet
        if not exists(struct_data_dir):
            os.makedirs(struct_data_dir)

        # path to the file where the atom surface areas of all atoms will be written
        surface_areas_path = os.path.join(
            struct_data_dir, 'binding_site_atom_surface_areas.credo')

        # do not recalculate atom surface area contributions if incremental
        if args.incremental and exists(
                surface_areas_path) and getsize(surface_areas_path) > 0:
            continue
        elif (args.update and exists(surface_areas_path)
              and getmtime(surface_areas_path) >= time() -
              (args.update * 60 * 60 * 24) and getsize(surface_areas_path)):
            app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\
                         .format(pdb, args.update))
            continue

        # output file stream and CSV writer
        atomfs = open(surface_areas_path, 'w')
        atomwriter = csv.writer(atomfs, dialect='tabs')

        # deal with each found assembly separately
        # some pdb entries consist of more than one
        for assembly in assemblyset:
            if args.quat:
                path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(),
                                    pdb.lower(), assembly)

            else:
                app.log.error("the calculation of buried ligand surface areas "
                              "is only supported for quaternary structures.")
                sys.exit(1)

            if not os.path.isfile(path):
                app.log.warn("cannot calculate buried surface areas: "
                             "file {} does not exist!".format(path))

            # get the quaternary structure
            ifs.open(str(path))

            try:
                assembly = ifs.GetOEGraphMols().next()
            except StopIteration:
                assembly = None

            if not assembly:
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} does not contain a valid molecule!".format(path))
                continue

            if not assembly.GetListData('ligands'):
                continue

            # identifier of the assembly
            assembly_serial = assembly.GetIntData('assembly_serial')

            # remove all non-polymers from assembly
            for atom in assembly.GetAtoms(nonpolymers):
                assembly.DeleteAtom(atom)

            # ignore bizarre assemblies
            if not assembly.NumAtoms():
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} contains assembly with no atoms!".format(path))
                continue

            # keep only the location state with the largest average occupancy
            assembly_hi_occ = oechem.OEGraphMol()
            altlocfactory = oechem.OEAltLocationFactory(assembly)
            altlocfactory.MakeCurrentAltMol(assembly_hi_occ)

            # get the ligands
            ligands = assembly_hi_occ.GetListData('ligands')

            # iterate through all ligands of the biomolecule and calculate the buried
            # surface area atom contributions for all involved atoms
            for ligand in ligands:

                # ignore small ligands
                if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue

                entity_serial = ligand.GetIntData('entity_serial')

                # keep only the location state with the largest average occupancy
                altlig = oechem.OEGraphMol()
                altlocfactory = oechem.OEAltLocationFactory(ligand)
                altlocfactory.MakeCurrentAltMol(altlig)

                cmplx_srf = oespicoli.OESurface()
                ligand_srf = oespicoli.OESurface()

                # make solvent-accessible surface of ligand
                oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf)

                # extract the binding site of the assembly to speed up surface
                # area calculation
                binding_site = get_binding_site(assembly_hi_occ, altlig)

                # make solvent-accessible surface of binding site
                binding_site_srf = oespicoli.OESurface()
                oespicoli.OEMakeAccessibleSurface(binding_site_srf,
                                                  binding_site, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                binding_site_atom_areas = get_atom_surface_areas(
                    binding_site, binding_site_srf)

                # create complex
                cmplx = oechem.OEGraphMol()
                oechem.OEAddMols(cmplx, binding_site)
                oechem.OEAddMols(cmplx, altlig)

                # make solvent-accessible surface of the complex
                oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4)

                # surface area atom contributions of the whole complex
                cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf)

                ## extract the atom surface areas in the bound state through slices
                binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site.
                                                                 NumAtoms()]
                ligand_atom_areas_bound = cmplx_atom_areas[binding_site.
                                                           NumAtoms():]

                # difference between apo and bound state per polymer atom
                binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound
                ligand_delta = ligand_atom_areas - ligand_atom_areas_bound

                # boolean map indicating for which atom the surface area has changed
                binding_site_atom_map = binding_site_delta != 0
                ligand_atom_map = ligand_delta != 0

                if args.dry_run: continue

                # only record the atoms where the solvent-accessible surface
                # area has actually changed
                write_atoms(atomwriter, binding_site, binding_site_atom_map,
                            pdb, assembly_serial, entity_serial,
                            binding_site_atom_areas,
                            binding_site_atom_areas_bound)

                # only record the atoms where the solvent-accessible surface area
                # has actually changed
                write_atoms(atomwriter, altlig, ligand_atom_map, pdb,
                            assembly_serial, entity_serial, ligand_atom_areas,
                            ligand_atom_areas_bound)

                app.log.debug("wrote buried surface areas for all ligands in "
                              "biomolecule {} to {}.".format(
                                  pdb, surface_areas_path))

            atomfs.flush()
        atomfs.close()

    if args.progressbar:
        bar.finish()
Esempio n. 27
0
mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "CC(=O)Nc1c[nH]cc1")


class PredHasDoubleBondO(oechem.OEUnaryAtomPred):
    def __call__(self, atom):
        for bond in atom.GetBonds():
            if bond.GetOrder() == 2 and bond.GetNbr(atom).IsOxygen():
                return True
        return False


# @ <SNIPPET-PRED-PYBOND>
def AmideBond(bond):
    if bond.GetOrder() != 1:
        return False
    atomB = bond.GetBgn()
    atomE = bond.GetEnd()
    pred = PredHasDoubleBondO()
    if atomB.IsCarbon() and atomE.IsNitrogen() and pred(atomB):
        return True
    if atomB.IsNitrogen() and atomE.IsCarbon() and pred(atomE):
        return True
    return False


print("Number of amide bonds =", oechem.OECount(mol, oechem.PyBondPredicate(AmideBond)))
# @ </SNIPPET-PRED-PYBOND>
# @ </SNIPPET>
Esempio n. 28
0
        if not frag in frags:
            print('{} not in {}'.format(frag, bond))
            failures[ser_bond] = frag
            continue

        idx = frags.index(frag)
        sqrt_mmd = np.sqrt(np.asarray(mmd_scores))
        norm = plt.Normalize(min(sqrt_mmd), max(sqrt_mmd))
        normed_scores = norm(sqrt_mmd)
        score = sqrt_mmd[idx]
        normed_score = normed_scores[idx]
        print(f.fragments)
        if tuple(bond) not in f.fragments:
            bond = tuple(reversed(bond))
        mol = f.fragments[tuple(bond)]
        size = oechem.OECount(mol, oechem.OEIsHeavy())
        score_size[ser_bond] = [frag, score, normed_score, size]

        if ser_bond not in frag_scores_2:
            continue
        frags_2 = frag_scores_2[ser_bond]['frags']
        mmd_scores_2 = frag_scores_2[ser_bond]['mmd_scores']
        sqrt_mmd_2 = np.sqrt(np.asarray(mmd_scores_2))
        idx_2 = frags_2.index(frag)

        score_2 = sqrt_mmd_2[idx_2]
        norm_2 = plt.Normalize(min(sqrt_mmd_2), max(sqrt_mmd_2))
        normed_scores_2 = norm_2(sqrt_mmd_2)

        normed_score_2 = normed_scores_2[idx_2]
        if tuple(bond) not in f.fragments:
Esempio n. 29
0
#!/usr/bin/env python
# (C) 2017 OpenEye Scientific Software Inc. All rights reserved.
#
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of OpenEye products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "CC(=O)Nc1c[nH]cc1")

print("Number of ring bonds  =", oechem.OECount(mol, oechem.OEBondIsInRing()))
print("Number of rotor bonds =", oechem.OECount(mol, oechem.OEIsRotor()))
# @ </SNIPPET>
Esempio n. 30
0
    def process(self, record, port):
        if record.has_value(self.args.in_mol_field):
            mol = record.get_value(self.args.in_mol_field)
        else:
            self.log.error("Could not find molecules in OEMolRecord")
            self.failure.emit(record)
            return

        parent_torsion_tag = "TORSION_ATOMS_ParentMol"
        torsion_atoms_in_parent = get_sd_data(mol, parent_torsion_tag).split()
        dih_name = mol.GetTitle() + "_" + "_".join(torsion_atoms_in_parent)

        torsion_tag = "TORSION_ATOMS_FRAGMENT"
        torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split()
        dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment]
        if dihedral_atom_indices is None:
            self.log.warn("Unable to find labelled torsion in %s" % dih_name)
            self.failure.emit(record)
            return

        opt_basis = self.args.opt_basis
        spe_basis = self.args.spe_basis

        # If fragment contains S
        #     use 6-31+G* instead of 6-31G*
        #     use 6-31+G** instead of 6-31G**
        need_diffuse = False
        if oechem.OECount(mol, oechem.OEIsSulfur()) > 0:
            need_diffuse = True

        for atom in mol.GetAtoms(oechem.OEIsHeavy()):
            if atom.GetFormalCharge() < 0:
                need_diffuse = True

        if need_diffuse:
            if opt_basis == "6-31G*":
                self.log.warn(
                    "Using 6-31+G* instead of 6-31G* as opt basis because fragment contains S."
                )
                opt_basis = "6-31+G*"

            if spe_basis == "6-31G*":
                self.log.warn(
                    "Using 6-31+G* instead of 6-31G* as spe basis because fragment contains S."
                )
                spe_basis = "6-31+G*"

            if opt_basis == "6-31G**":
                self.log.warn(
                    "Using 6-31+G** instead of 6-31G** as opt basis because fragment contains S."
                )
                opt_basis = "6-31+G**"

            if spe_basis == "6-31G**":
                self.log.warn(
                    "Using 6-31+G** instead of 6-31G** as spe basis because fragment contains S."
                )
                spe_basis = "6-31+G**"

        try:
            if self.args.only_selected_conformer:
                conf_selection_tag = "SELECTED_CONFORMER"
                key_conf_id = mol.GetIntData(conf_selection_tag)
                for conf in mol.GetConfs():
                    if conf.GetIdx() != key_conf_id:
                        continue
                conf_name = get_sd_data(conf, "CONFORMER_LABEL")
            else:
                conf_name = get_sd_data(mol, "CONFORMER_LABEL")
            time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now())
            hostname = socket.gethostname()
            self.log.info("Starting psi4 calculation for %s on %s at %s" %
                          (conf_name, hostname, time_stamp))

            if self.args.only_selected_conformer:
                oechem.OESetSDData(conf, "%s start time" % self.name,
                                   time_stamp)
            else:
                oechem.OESetSDData(mol, "%s start time" % self.name,
                                   time_stamp)

            dih, _ = get_dihedral(mol, dihedral_atom_indices)
            calculate_energy(
                mol,
                dih,
                spe_method=self.args.spe_method,
                spe_basis=spe_basis,
                geom_opt_technique=self.args.geom_opt_technique,
                opt_method=self.args.opt_method,
                opt_basis=opt_basis,
                geom_maxiter=self.args.geom_maxiter,
                only_selected_conf=self.args.only_selected_conformer,
                molden_output=self.args.molden_output,
                **self.psi4opts)

            if self.args.only_selected_conformer:
                conf_selection_tag = "SELECTED_CONFORMER"
                key_conf_id = mol.GetIntData(conf_selection_tag)
                for conf in mol.GetConfs():
                    if conf.GetIdx() != key_conf_id:
                        continue
                conf_name = get_sd_data(conf, "CONFORMER_LABEL")
            else:
                conf_name = get_sd_data(mol, "CONFORMER_LABEL")
            time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now())
            hostname = socket.gethostname()
            self.log.info("Completed psi4 calculation for %s on %s at %s" %
                          (conf_name, hostname, time_stamp))

            if self.args.only_selected_conformer:
                oechem.OESetSDData(conf, "%s end time" % self.name, time_stamp)
            else:
                oechem.OESetSDData(mol, "%s end time" % self.name, time_stamp)

            optimized_mol_record = OEMolRecord()
            optimized_mol_record.set_mol(mol)
            self.success.emit(optimized_mol_record)
        except Exception as e:
            print(e)
            #            traceback.print_stack()
            self.log.error("Error with {} {}".format(mol.GetTitle(), e))
            self.failure.emit(record)