Esempio n. 1
0
    def test_success(self):
        """
        Test that YankBindingCube can successfully process a single molecule.
        """
        print('Testing cube:', self.cube.name)
        # Read a molecule
        mol = oechem.OEMol()
        ifs = oechem.oemolistream(get_data_filename('p-xylene.mol2'))
        if not oechem.OEReadMolecule(ifs, mol):
            raise Exception('Cannot read molecule')
        ifs.close()

        # Process the molecules
        self.cube.process(mol, self.cube.intake.name)
        # Assert that one molecule was emitted on the success port
        self.assertEqual(self.runner.outputs['success'].qsize(), 1)
        # Assert that zero molecules were emitted on the failure port
        self.assertEqual(self.runner.outputs['failure'].qsize(), 0)

        outmol = self.runner.outputs["success"].get()
        # Check that the number of atoms in input and output molecules match.
        self.assertEqual(outmol.NumAtoms(), mol.NumAtoms())
        # Check that a free energy of hydration has been attached
        self.assertTrue(oechem.OEHasSDData(outmol, 'DeltaG_yank_binding'))
        self.assertTrue(oechem.OEHasSDData(outmol, 'dDeltaG_yank_binding'))
def get_sd_data(mol, tag):
    try:
        if oechem.OEHasSDData(mol, tag):
            return oechem.OEGetSDData(mol, tag)
        if oechem.OEHasSDData(mol.GetActive(), tag):
            return oechem.OEGetSDData(mol.GetActive(), tag)
    except AttributeError as e:
        print(e)
        return ""
Esempio n. 3
0
 def GetScoreToCmp(mol):
     if oechem.OEHasSDData(mol, "ShapeTanimoto"):
         # sort by shape tanimoto
         if oechem.OEHasSDData(mol, "TanimotoCombo"):
             return float(oechem.OEGetSDData(mol, "TanimotoCombo"))
         return float(oechem.OEGetSDData(mol, "ShapeTanimoto"))
     else:
         # sort by shape tversky
         if oechem.OEHasSDData(mol, "TverskyCombo"):
             return float(oechem.OEGetSDData(mol, "TverskyCombo"))
         return float(oechem.OEGetSDData(mol, "ShapeTversky"))
def has_ic50(mol):
    """Return True if this molecule has fluorescence IC50 data"""
    from openeye import oechem
    if not oechem.OEHasSDData(mol, 'f_avg_pIC50'):
        return False

    try:
        if oechem.OEHasSDData(mol, 'f_avg_pIC50'):
            pIC50 = oechem.OEGetSDData(mol, 'f_avg_pIC50')
            pIC50 = float(pIC50)
            return True
        else:
            return False
    except Exception as e:
        return False
Esempio n. 5
0
def RenderData(image, mol, tags):
    from openeye import oechem
    from openeye import oedepict

    data = []
    for tag in tags:
        value = "N/A"
        if oechem.OEHasSDData(mol, tag):
            value = oechem.OEGetSDData(mol, tag)
        data.append((tag, value))

    nrdata = len(data)

    tableopts = oedepict.OEImageTableOptions(
        nrdata, 2, oedepict.OEImageTableStyle_LightBlue)
    tableopts.SetColumnWidths([10, 20])
    tableopts.SetMargins(2.0)
    tableopts.SetHeader(False)
    tableopts.SetStubColumn(True)
    table = oedepict.OEImageTable(image, tableopts)

    for row, (tag, value) in enumerate(data):
        cell = table.GetCell(row + 1, 1)
        table.DrawText(cell, tag + ":")
        cell = table.GetBodyCell(row + 1, 1)
        table.DrawText(cell, value)
Esempio n. 6
0
def SDF2CSV(ifs, csv):
    taglist = []
    # read through once to find all unique tags
    for mol in ifs.GetOEGraphMols():
        for dp in oechem.OEGetSDDataPairs(mol):
            if dp.GetTag() not in taglist:
                taglist.append(dp.GetTag())

    ifs.rewind()
    # print out column labels
    header = "Title"
    for tag in taglist:
        header += ",%s" % tag
    header += '\n'
    csv.write(header)

    # build csv file
    for mol in ifs.GetOEGraphMols():
        line = [mol.GetTitle()]
        for tag in taglist:
            if oechem.OEHasSDData(mol, tag):
                value = oechem.OEGetSDData(mol, tag)
            else:
                value = ''
            line.append(',')
            line.append(value)
        csv.write(''.join(line))
        csv.write('\n')
Esempio n. 7
0
def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)
    if not (itf.HasDouble("-min") or itf.HasDouble("-max")):
        oechem.OEThrow.Fatal("Please set a filter value with -min or -max")

    ifs = oechem.oemolistream()
    if not ifs.open(itf.GetString("-i")):
        oechem.OEThrow.Fatal("Unable to open %s for reading" %
                             itf.GetString("-i"))

    if not oechem.OEIsSDDataFormat(ifs.GetFormat()):
        oechem.OEThrow.Fatal(
            "Only works for input file formats that support SD data (sdf,oeb,csv)"
        )

    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-o")):
        oechem.OEThrow.Fatal("Unable to open %s for writing" %
                             itf.GetString("-i"))

    if not oechem.OEIsSDDataFormat(ofs.GetFormat()):
        oechem.OEThrow.Fatal(
            "Only works for output file formats that support SD data \
                             (sdf,oeb,csv)")

    tag = itf.GetString("-tag")

    minval = float("-inf")
    if itf.HasDouble("-min"):
        minval = itf.GetDouble("-min")

    maxval = float("inf")
    if itf.HasDouble("-max"):
        maxval = itf.GetDouble("-max")

    for mol in ifs.GetOEGraphMols():
        if not oechem.OEHasSDData(mol, tag):
            oechem.OEThrow.Warning("Unable to find %s tag on %s" %
                                   (tag, mol.GetTitle()))
            continue

        value = oechem.OEGetSDData(mol, tag)
        try:
            tagvalue = float(value)
        except ValueError:
            oechem.OEThrow.Warning("Failed to convert (%s) to a number in %s" %
                                   (value, mol.GetTitle()))
            continue

        if tagvalue < minval:
            continue

        if tagvalue > maxval:
            continue

        oechem.OEWriteMolecule(ofs, mol)
Esempio n. 8
0
def Rename(ifs, ofs, fieldname):
    for mol in ifs.GetOEGraphMols():
        if oechem.OEHasSDData(mol, fieldname):
            mol.SetTitle(oechem.OEGetSDData(mol, fieldname))
        else:
            title = mol.GetTitle()
            oechem.OEThrow.Warning(
                "Renaming of molecule %s failed; no field %s" %
                (title, fieldname))
        oechem.OEWriteMolecule(ofs, mol)
Esempio n. 9
0
def test_read_mols_slice():
    mlist = read_mols(os.path.join(mydir, 'data_tests',
                                   'two_alkanes_prefilt.sdf'),
                      mol_slice=[0, 1, 1])
    assert len(mlist) == 1
    assert mlist[0].GetTitle() == 'AlkEthOH_c312'
    assert mlist[0].NumConfs() == 9
    conf = list(mlist[0].GetConfs())[0]
    assert oechem.OEHasSDData(conf, "MM Szybki SD Energy") == True

    #AlkEthOH_c1178, Div_2, Div_6, Div_9, Div_3b, Div_7b, Div_8b, AlkEthOH_r187
    mlist = read_mols(os.path.join(mydir, 'data_tests', 'eight_mols.sdf'),
                      mol_slice=[2, 8, 2])
    assert len(mlist) == 3
    assert mlist[0].GetTitle() == 'Div_6'
    assert mlist[1].GetTitle() == 'Div_3b'
    assert mlist[2].GetTitle() == 'Div_8b'
def dock_molecule(molecule, ofs, default_receptor='x0387'):
    """
    Dock the specified molecules, writing out to specified file

    Parameters
    ----------
    molecule : OEMol
        The molecule to dock
    ofs : oechem.oemolostream
        The filename to stream docked molecules to
    default_receptor : str, optional, default='0387'
        The default receptor to dock to
    """
    # Make a copy of the molecule
    molecule = oechem.OEMol(molecule)

    import os
    # Extract list of corresponding receptor(s)
    import oechem
    print(f'\n{molecule.GetTitle()}')
    if oechem.OEHasSDData(molecule, "fragments"):
        fragments = oechem.OEGetSDData(molecule, "fragments").split(',')
        print(f'fragments before filter: {fragments}')
        fragments = [
            fragment for fragment in fragments
            if os.path.exists(f'../receptors/Mpro-{fragment}-receptor.oeb.gz')
        ]
        print(f'fragments after filter: {fragments}')
        if len(fragments) == 0:
            fragments = [default_receptor]
        for fragment in fragments:
            molecule_to_dock = oechem.OEMol(molecule)

            import os
            receptor_filename = os.path.join(
                f'../receptors/Mpro-{fragment}-receptor.oeb.gz')
            oechem.OESetSDData(molecule_to_dock, "fragments", fragment)

            # Enumerate reasonable protomers/tautomers
            from openeye import oequacpac
            protomer = oechem.OEMol()
            protomers = [
                oechem.OEMol(protomer) for protomer in
                oequacpac.OEGetReasonableProtomers(molecule_to_dock)
            ]
            dock_molecules_to_receptor(receptor_filename, protomers, ofs)
Esempio n. 11
0
    def test_success(self):
        print('Testing cube:', self.cube.name)
        # Read a molecule
        mol = oechem.OEMol()
        ifs = oechem.oemolistream(utils.get_data_filename('examples', 'data/TOL-smnf.oeb.gz'))
        if not oechem.OEReadMolecule(ifs, mol):
            raise Exception('Cannot read molecule')
        ifs.close()

        # Process the molecules
        self.cube.process(mol, self.cube.intake.name)

        # Assert that one molecule was emitted on the success port
        self.assertEqual(self.runner.outputs['success'].qsize(), 1)
        # Assert that zero molecules were emitted on the failure port
        self.assertEqual(self.runner.outputs['failure'].qsize(), 0)

        # Get the output molecule, check that it has score.
        outmol = self.runner.outputs["success"].get()
        self.assertTrue(oechem.OEHasSDData(outmol, 'Chemgauss4'))
Esempio n. 12
0
    def FilterMolData(self, mol):
        if not oechem.OEHasSDData(mol):
            return 0

        if self.fields is None:
            return -1

        if len(self.fields) == 0:
            oechem.OEClearSDData(mol)
            return 0

        validdata = 0
        deletefields = []
        for dp in oechem.OEGetSDDataPairs(mol):
            tag = dp.GetTag()
            if tag not in self.fields:
                deletefields.append(tag)
                continue

            value = oechem.OEGetSDData(mol, tag)
            if self.asFloating:
                try:
                    float(value)
                except ValueError:
                    oechem.OEThrow.Warning("Failed to convert %s to numeric value (%s) in %s" %
                                           (tag, value, mol.GetTitle()))
                    deletefields.append(tag)
                    continue

            validdata += 1

        if not validdata:
            oechem.OEClearSDData(mol)
        else:
            for nuke in deletefields:
                oechem.OEDeleteSDData(mol, nuke)

        return validdata
Esempio n. 13
0
def delete_sd_data(mol, tag, locator_tag):
    if oechem.OEHasSDData(mol, locator_tag):
        return oechem.OEDeleteSDData(mol, tag)
    elif oechem.OEHasSDData(mol.GetActive(), locator_tag):
        return oechem.OEDeleteSDData(mol.GetActive(), tag)
    return False
Esempio n. 14
0
 def __contains__(self, kee:str) -> bool:
     return oechem.OEHasSDData(self._mol, kee)
Esempio n. 15
0
def IdentifyMinima(mol, tag, ThresholdE, ThresholdRMSD):
    """
    For a molecule's set of conformers computed with some level of theory,
        whittle down unique conformers based on energy and RMSD.

    Parameters
    ----------
    mol           OEChem molecule with all of its conformers
    tag           string name of the SD tag in this molecule
    ThresholdE    float value for abs(E1-E2), below which 2 confs are "same"
        Units are hartrees (default output units of Psi4)
    ThresholdR    float value for RMSD, below which 2 confs are "same"
        Units are in Angstrom (Psi4 default)

    Returns
    -------
    boolean True if successful filter + delete. False if there's only
        one conf and it didn't optimize, or something else funky.

    """
    # Parameters for OpenEye RMSD calculation
    automorph = True
    heavyOnly = False
    overlay = True

    # declare variables for conformers to delete
    confsToDel = set()
    delCount = 0

    # check if SD tag exists for the case of single conformer
    if mol.NumConfs() == 1:
        testmol = mol.GetConfs().next()
        for x in oechem.OEGetSDDataPairs(mol):
            if tag.lower() in x.GetTag().lower():
                return True
            else:
                return False

    # Loop over conformers twice (NxN diagonal comparison of RMSDs)
    for confRef in mol.GetConfs():
        print(" ~ Reference: %s conformer %d" %
              (mol.GetTitle(), confRef.GetIdx() + 1))

        # get real tag (correct for capitalization)
        for x in oechem.OEGetSDDataPairs(confRef):
            if tag.lower() in x.GetTag().lower():
                taglabel = x.GetTag()

        # delete cases that don't have energy (opt not converged; or other)
        if not oechem.OEHasSDData(confRef, taglabel):
            confsToDel.add(confRef.GetIdx())
            delCount += 1
            continue
        refE = float(oechem.OEGetSDData(confRef, taglabel))

        for confTest in mol.GetConfs():
            # upper right triangle comparison
            if confTest.GetIdx() <= confRef.GetIdx():
                continue
            # skip cases already set for removal
            if confTest.GetIdx() in confsToDel:
                continue
            # delete cases that don't have energy
            if not oechem.OEHasSDData(confTest, taglabel):
                confsToDel.add(confTest.GetIdx())
                continue

            testE = float(oechem.OEGetSDData(confTest, taglabel))
            # if MM (not Psi4) energies, convert absERel to Hartrees
            if 'mm' in taglabel.lower():
                absERel = abs(refE - testE) / 627.5095
            else:
                absERel = abs(refE - testE)
            # if energies are diff enough --> confs are diff --> keep & skip ahead
            if absERel > ThresholdE:
                continue
            # if energies are similar, see if they are diff by RMSD
            rmsd = oechem.OERMSD(confRef, confTest, automorph, heavyOnly,
                                 overlay)
            # if measured_RMSD < threshold_RMSD --> confs are same --> delete
            if rmsd < ThresholdRMSD:
                confsToDel.add(confTest.GetIdx())

    # for the same molecule, delete tagged conformers
    print("%s original number of conformers: %d" %
          (mol.GetTitle(), mol.NumConfs()))
    if delCount == mol.NumConfs():
        # all conformers in this mol has been tagged for deletion
        return False
    for conf in mol.GetConfs():
        if conf.GetIdx() in confsToDel:
            print('Removing %s conformer index %d' %
                  (mol.GetTitle(), conf.GetIdx()))
            if not mol.DeleteConf(conf):
                oechem.OEThrow.Fatal("Unable to delete %s GetIdx() %d" \
                                  % (mol.GetTitle(), conf.GetIdx()))
    return True
import argparse
from openeye import oechem
from tqdm import tqdm


def getargs():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", required=True, type=str)
    parser.add_argument("-o", required=True, type=str)
    return parser.parse_args()


if __name__ == '__main__':
    args = getargs()

    ifs = oechem.oemolistream(args.i)
    ofs = oechem.oemolostream(args.o)
    lig = oechem.OEGraphMol()

    pbar = tqdm()
    while oechem.OEReadMolecule(ifs, lig):
        if oechem.OEHasSDData(lig, "Catalog ID"):
            lig.SetTitle(oechem.OEGetSDData(lig, "Catalog ID"))

        oechem.OEWriteMolecule(ofs, lig)
        pbar.update(1)

    pbar.close()
    ifs.close()
    ofs.close()
Esempio n. 17
0
            mols.append(mol.CreateCopy())
print(f'{len(mols)} molecules read')

# Annotate molecules with SMARTS labels
print('Annotating SMARTS labels...')
import csv
labels_filename = 'annotations/benzopyran_annotations.csv'  # list of labels for various SMARTS patterns
smarts_labels = dict()
with open(labels_filename, 'r') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=',')
    for row in csvreader:
        smarts = row[0]
        label = row[1]
        smarts_labels[smarts] = label
# Label the molecules
for smarts, label in smarts_labels.items():
    ss = oechem.OESubSearch(smarts)
    for mol in track(mols, description=label):
        oechem.OEPrepareSearch(mol, ss)
        if ss.SingleMatch(mol):
            oechem.OESetSDData(mol, 'intermediate', label)
# Discard molecules without labels
mols = [mol for mol in mols if oechem.OEHasSDData(mol, 'intermediate')]
print(f'{len(mols)} molecules remain after discarding unlabeled molecules')

# Write molecules
output_filename = 'sorted/sprint-5-dimer.csv'
with oechem.oemolostream(output_filename) as ofs:
    for mol in track(mols, description='Writing molecules...'):
        oechem.OEWriteMolecule(ofs, mol)
def get_series(mol):
    from rdkit import Chem
    from rdkit.Chem import AllChem
    from rdkit.Chem import Descriptors
    series_SMARTS_dict = {
        #"3-aminopyridine": "[R1][C,N;R0;!$(NC(=O)CN)]C(=O)[C,N;R0;!$(NC(=O)CN)][c]1cnccc1",
        "3-aminopyridine-like": "[R1]!@[C,N]C(=O)[C,N]!@[R1]",
        "3-aminopyridine-strict": "c1ccncc1NC(=O)!@[R1]",
        "Ugi":
        "[c,C:1][C](=[O])[N]([c,C,#1:2])[C]([c,C,#1:3])([c,C,#1:4])[C](=[O])[NH1][c,C:5]",
        "quinolones": "NC(=O)c1cc(=O)[nH]c2ccccc12",
        "piperazine-chloroacetamide": "O=C(CCl)N1CCNCC1",
        #'benzotriazoles': 'c1ccc(NC(=O)[C,N]n2nnc3ccccc32)cc1',
        #'benzotriazoles': 'a1aaa([C,N]C(=O)[C,N]a2aaa3aaaaa32)aa1',
        'benzotriazoles': 'a2aaa3aaaaa32',
    }

    smi = oechem.OECreateSmiString(mol)

    # Filter out covalent
    try:
        covalent_warheads = ['acrylamide', 'chloroacetamide']
        for warhead in covalent_warheads:
            if oechem.OEHasSDData(mol, warhead) and oechem.OEGetSDData(
                    mol, warhead) == 'True':
                return None
    except Exception as e:
        logging.warning(e)

    def check_if_smi_in_series(smi,
                               SMARTS,
                               MW_cutoff=550,
                               num_atoms_cutoff=70,
                               num_rings_cutoff=10):
        mol = Chem.MolFromSmiles(smi)
        MW = Chem.Descriptors.MolWt(mol)
        num_heavy_atoms = mol.GetNumHeavyAtoms()
        num_rings = Chem.rdMolDescriptors.CalcNumRings(mol)
        patt = Chem.MolFromSmarts(SMARTS)
        if ((len(
                Chem.AddHs(Chem.MolFromSmiles(smi)).GetSubstructMatches(patt))
             > 0) and (MW <= MW_cutoff)
                and (num_heavy_atoms <= num_atoms_cutoff)
                and (num_rings <= num_rings_cutoff)):
            return True
        else:
            return False

    for series in series_SMARTS_dict:
        series_SMARTS = series_SMARTS_dict[series]
        if series == "3-amonipyridine-like":
            if check_if_smi_in_series(
                    smi,
                    series_SMARTS,
                    MW_cutoff=410,
                    num_rings_cutoff=3,
                    num_atoms_cutoff=28,
            ):
                return series
        else:
            if check_if_smi_in_series(smi, series_SMARTS):
                return series
    return None
# Compounds
from fah_xchem.schema import Compound, CompoundMetadata
smiles_flag = oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_ISOMERIC

from openeye import oechem
print('Processing compounds...')
compounds = dict()
with oechem.oemolistream(compounds_sdf_filename) as ifs:
    for oemol in ifs.GetOEGraphMols():
        # Set ID and SMILES
        compound_id = oemol.GetTitle()
        smiles = oechem.OECreateSmiString(oemol, smiles_flag)
        # Extract experimental data, if present
        experimental_data = dict()
        if oechem.OEHasSDData(oemol, 'f_avg_pIC50'):
            pIC50 = oechem.OEGetSDData(oemol, 'f_avg_pIC50')
            if pIC50 != '':
                pIC50 = float(pIC50)
                experimental_data['pIC50'] = pIC50
        # Extract information about the compound
        compound_metadata = CompoundMetadata(
            compound_id=compound_id,
            smiles=oechem.OECreateSmiString(oemol, smiles_flag),
            experimental_data=experimental_data,
        )
        # Create new compound
        compound = Compound(metadata=compound_metadata, microstates=list())
        # Store compound
        compounds[compound_id] = compound
def DumpSDData(mol):
    print("SD data of", mol.GetTitle())
    # loop over SD data
    for dp in oechem.OEGetSDDataPairs(mol):
        print(dp.GetTag(), ':', dp.GetValue())
    print()


mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1ccccc1")
mol.SetTitle("benzene")

# set some tagged data
oechem.OESetSDData(mol, "color", "brown")
oechem.OESetSDData(mol, oechem.OESDDataPair("size", "small"))
DumpSDData(mol)

# check for existence of data, then delete it
if oechem.OEHasSDData(mol, "size"):
    oechem.OEDeleteSDData(mol, "size")
DumpSDData(mol)

# add additional color data
oechem.OEAddSDData(mol, "color", "black")
DumpSDData(mol)

# remove all SD data
oechem.OEClearSDData(mol)
DumpSDData(mol)
# @ </SNIPPET>
molstring = '''\
ethene
  -OEChem-04060917472D

  2  1  0     0  0  0  0  0  0999 V2000
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0  0  0  0
M  END
> <weight>
30.069040000000

$$$$
'''
ims = oechem.oemolistream()
ims.SetFormat(oechem.OEFormat_SDF)
ims.openstring(molstring)

mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ims, mol)
# @ <SNIPPET-GET-SD>
if oechem.OEHasSDData(mol, "weight"):
    weight = float(oechem.OEGetSDData(mol, "weight"))
    print("weight=", weight)
# @ </SNIPPET-GET-SD>

# @ <SNIPPET-SET-SD>
oechem.OESetSDData(mol, "number of atoms", str(mol.NumAtoms()))
# @ </SNIPPET-SET-SD>
# @ </SNIPPET>
        head, tail = os.path.split(args.molecules_filename)
        prefix, ext = os.path.splitext(tail)
        molecule.SetTitle(f'{prefix}-{args.molecule_index}')

    # Dock if the molecule has not already been docked
    from openeye import oechem
    sdf_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.sdf')
    if not os.path.exists(sdf_filename):
        # Determine what fragments to dock to
        if is_covalent:
            fragments_to_dock_to = covalent_active_site_fragments
        else:
            fragments_to_dock_to = all_fragments

        if args.userfrags:
            if oechem.OEHasSDData(molecule, 'fragments'):
                fragments_to_dock_to = oechem.OEGetSDData(molecule, 'fragments').split(',')
        # Dock the molecule
        docked_molecule = ensemble_dock(molecule, fragments_to_dock_to, covalent=is_covalent)
    else:
        # Read the molecule
        print(f'Docked molecule exists, so reading from {sdf_filename}')
        with oechem.oemolistream(sdf_filename) as ifs:
            docked_molecule = oechem.OEGraphMol()
            oechem.OEReadMolecule(ifs, docked_molecule)

    if docked_molecule is None:
        print('No docking poses available')
        import sys
        sys.exit(0)
Esempio n. 23
0
def set_sd_tags(Conf, Props, calctype):
    """
    For one particular conformer, set all available SD tags based on data
    in Props dictionary.

    Warning
    -------
    If the exact tag already exists, and you want to add a new one then there
    will be duplicate tags with maybe different data. (NOT recommended).
    Then the function to get sd_list will only get one or the other;
    I think it just gets the first matching tag.

    TODO: maybe add some kind of checking to prevent duplicate tags added

    Parameters
    ----------
    Conf:       Single conformer from OEChem molecule
    Props:      Dictionary output from ProcessOutput function.
                Should contain the keys: basis, method, numSteps,
                initEnergy, finalEnergy, coords, time, pkg
    calctype: string; one of 'opt','spe','hess' for geometry optimization,
        single point energy calculation, or Hessian calculation

    """

    # get level of theory for setting SD tags
    method = Props['method']
    basisset = Props['basis']
    pkg = Props['package']

    # turn parameters into tag descriptions
    full_method = "{}/{}".format(method, basisset)
    cdict = {'spe': 'Single Pt.', 'opt': 'Opt.', 'hess': 'Hessian'}

    # time info can be set for all cases
    taglabel = "QM {} {} Runtime (sec) {}".format(pkg, cdict[calctype],
                                                  full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['time']))

    # hessian has no other info for sd tag
    if calctype == 'hess':
        return

    # check that finalEnergy is there. if not, opt probably did not finish
    # make a note of that in SD tag then quit function
    if not 'finalEnergy' in Props:
        taglabel = "Note on {} {}".format(cdict[calctype], full_method)
        oechem.OEAddSDData(Conf, taglabel, "JOB DID NOT FINISH")
        return

    # Set new SD tag for conformer's final energy
    taglabel = "QM {} Final {} Energy (Har) {}".format(pkg, cdict[calctype],
                                                       full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['finalEnergy']))

    # Set new SD tag for final SCS-MP2 energy if method is MP2
    if method.lower() == 'mp2':
        taglabel = "QM {} Final {} Energy (Har) SCS-{}".format(
            pkg, cdict[calctype], full_method)
        oechem.OEAddSDData(Conf, taglabel, str(Props['finalSCSEnergy']))

    # Add COSMO energy with outlying charge correction. Turbomole only!
    if 'ocEnergy' in Props:
        if calctype == 'spe':
            print(
                "Extraction of COSMO OC energy from Turbomole not yet supported for SPE calcns"
            )
        elif calctype == 'opt':
            taglabel = "QM {} Final {} Energy with OC correction (Har) {}".format(
                pkg, cdict[calctype], full_method)
            oechem.OEAddSDData(Conf, taglabel, str(Props['ocEnergy']))

    # spe has no other relevant info for sd tag
    if calctype == 'spe':
        return

    # Set new SD tag for original conformer number if not existing
    # !! Opt2 files should ALREADY have this !! Opt2 index is NOT orig index !!
    taglabel = "Original omega conformer number"
    if not oechem.OEHasSDData(Conf, taglabel):
        # if not working with confs, will have no GetIdx
        try:
            oechem.OEAddSDData(Conf, taglabel, str(Conf.GetIdx() + 1))
        except AttributeError as err:
            pass
    # if tag exists, append new conformer ID after the old one
    else:
        try:
            oldid = oechem.OEGetSDData(Conf, taglabel)
            newid = str(Conf.GetIdx() + 1)
            totid = "{}, {}".format(oldid, newid)
            oechem.OESetSDData(Conf, taglabel, totid)
        except AttributeError as err:
            pass

    # Set new SD tag for numSteps of geom. opt.
    taglabel = "QM {} {} Steps {}".format(pkg, cdict[calctype], full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['numSteps']))

    # Set new SD tag for conformer's initial energy
    taglabel = "QM {} Initial {} Energy (Har) {}".format(
        pkg, cdict[calctype], full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['initEnergy']))
Esempio n. 24
0
def has_sd_data(mol, tag):
    if oechem.OEHasSDData(mol, tag):
        return True
    if oechem.OEHasSDData(mol.GetActive(), tag):
        return True
    return False
Esempio n. 25
0
 def __getitem__(self, kee:str ):
     if oechem.OEHasSDData(self._mol, kee):
         return oechem.OEGetSDData(self._mol, kee)
     else:
         raise KeyError("{} has no key {!r}".
                        format(self.__class__.__name__, kee))
Esempio n. 26
0
def ChEMBLSolubilityUsage(itf):
    ifs = oechem.oemolistream()
    if not ifs.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading: " +
                             itf.GetString("-input"))

    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-output")):
        oechem.OEThrow.Fatal("Unable to open %s for writing: " +
                             ofs.GetString("-output"))

    oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning)

    # @ <SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE>
    # number of bonds of chemistry context at site of change
    #  for the applied transforms
    totalmols = 0
    xformctxt = oemedchem.OEMatchedPairContext_Bond2
    for molidx, mol in enumerate(ifs.GetOEGraphMols(), start=1):
        # consider only the largest input fragment
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)

        smolcnt = 0
        # only consider solubility transforms having at least 5 matched pairs
        for solMol in oemedchem.OEApplyChEMBL24SolubilityTransforms(
                mol, xformctxt, 5):
            # compute net change in solubility from MMP data
            deltasol = []
            if oechem.OEHasSDData(solMol, "OEMMP_normalized_value (uM)"):
                for sditem in oechem.OEGetSDData(
                        solMol, "OEMMP_normalized_value (uM)").split('\n'):
                    # fromIndex,toIndex,fromValue,toValue
                    sdvalues = sditem.split(',')
                    if not sdvalues[2] or not sdvalues[3]:
                        continue
                    deltasol.append(float(sdvalues[3]) - float(sdvalues[2]))
            if not len(deltasol):
                continue

            avgsol = deltasol[0]
            if len(deltasol) > 1:
                avgsol = average(deltasol)

            # reject examples with net decrease in solubility
            if avgsol < 0.0:
                continue
            sdev = stddev(deltasol)

            # annotate with average,stddev,num
            oechem.OEAddSDData(
                solMol, "OEMMP_average_delta_normalized_value",
                "{0:.1F},{1:.2F},{2}".format(avgsol, sdev, len(deltasol)))

            # export solubility transformed molecule with SDData annotations
            if oechem.OEWriteMolecule(
                    ofs, solMol) == oechem.OEWriteMolReturnCode_Success:
                smolcnt += 1

        oechem.OEThrow.Info("{0}: Exported molecule count, {1}".format(
            molidx, smolcnt))
        totalmols += smolcnt
    # @ </SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE>

    print("Exported molecule count = {0}".format(totalmols))

    return True
Esempio n. 27
0
    def process(self, mol, port):
        """
            The input to this cube will be an OEMol with one or more conformers
            with "CONFORMER_LABEL" SD Data of the form 'XY-1234567_1_2_3_4_00_00'
        """
        num_confs = mol.NumConfs()

        last_conf = mol.GetActive()
        last_conf_name = oechem.OEGetSDData(last_conf, "CONFORMER_LABEL")
        self.log.info(
            "Processing conformer {} on {} at {:%Y-%m-%d %H:%M:%S}".format(
                last_conf_name, os.environ["HOSTNAME"],
                datetime.datetime.now()))

        if num_confs == self.args.num_points:
            self.success.emit(mol)
            self.log.info(
                "Completed scan for {} on {} at {:%Y-%m-%d %H:%M:%S}".format(
                    mol.GetTitle(), os.environ["HOSTNAME"],
                    datetime.datetime.now()))
            return

        if num_confs == 1 and not mol.HasData(self.conf_selection_tag):
            self.log.info(
                "Conformer {} is a fresh starting conformer on {} at {:%Y-%m-%d %H:%M:%S}"
                .format(mol.GetTitle(), os.environ["HOSTNAME"],
                        datetime.datetime.now()))
            mol.SetIntData(self.conf_selection_tag, last_conf.GetIdx())
            last_conf.SetDoubleData("TORSION_ANGLE", 0.0)
            oechem.OESetSDData(last_conf, "TORSION_ANGLE", "0.0")
            self.log.info(
                "Sending conformer {} to energy calculation from {} at {:%Y-%m-%d %H:%M:%S}"
                .format(last_conf_name, os.environ["HOSTNAME"],
                        datetime.datetime.now()))
            self.to_energy_calc.emit(mol)
            return

        try:
            torsion_tag = "TORSION_ATOMS_FRAGMENT"
            torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split()
            dihedral_atom_indices = [
                int(x) - 1 for x in torsion_atoms_in_fragment
            ]

            dih, _ = get_dihedral(mol, dihedral_atom_indices)
            dih_atoms = [x for x in dih.GetAtoms()]

            # if the last energy calculation failed
            if not oechem.OEHasSDData(last_conf, "PSI4_ENERGY"):
                self.log.info(
                    "Conformer {} found to have NO ENERGY on {} at {:%Y-%m-%d %H:%M:%S}"
                    .format(last_conf_name, os.environ["HOSTNAME"],
                            datetime.datetime.now()))
                mol.PopActive()
                last_conf = mol.GetActive()

            new_conf = mol.NewConf(last_conf)
            mol.PushActive(new_conf)
            conf_no = num_confs
            conformer_label = last_conf_name[:-3] + "_{:02d}".format(conf_no)
            oechem.OESetSDData(new_conf, "CONFORMER_LABEL", conformer_label)

            angle = num_confs * 2 * oechem.Pi / self.args.num_points
            angle_deg = oechem.Rad2Deg * angle
            new_conf.SetDoubleData("TORSION_ANGLE", angle_deg)
            oechem.OESetSDData(new_conf, "TORSION_ANGLE",
                               "{:.1f}".format(angle_deg))

            if not oechem.OESetTorsion(new_conf, dih_atoms[0], dih_atoms[1],
                                       dih_atoms[2], dih_atoms[3], angle):
                self.log.error(
                    "Could not rotate conformer {} by {:.1f} on {} at {:%Y-%m-%d %H:%M:%S}"
                    .format(
                        last_conf_name,
                        angle_deg,
                        os.environ["HOSTNAME"],
                        datetime.datetime.now(),
                    ))

            mol.SetIntData(self.conf_selection_tag, new_conf.GetIdx())
            self.log.info(
                "Sending conformer {} to energy calculation from {} at {:%Y-%m-%d %H:%M:%S}"
                .format(conformer_label, os.environ["HOSTNAME"],
                        datetime.datetime.now()))
            self.to_energy_calc.emit(mol)

        except Exception as e:
            self.log.error(
                "COuld not drive torsion in  conformer {} on {} at {:%Y-%m-%d %H:%M:%S}: {}"
                .format(last_conf_name, os.environ["HOSTNAME"],
                        datetime.datetime.now(), e))
            self.failure.emit(mol)
Esempio n. 28
0
def SetOptSDTags(Conf, Props, spe=False):
    """
    For one particular conformer, set all available SD tags based on data
    in Props dictionary.

    Warning
    -------
    If the exact tag already exists, and you want to add a new one then there
    will be duplicate tags with maybe different data. (NOT recommended).
    Then the function to get SDList will only get one or the other;
    I think it just gets the first matching tag.

    TODO: maybe add some kind of checking to prevent duplicate tags added

    Parameters
    ----------
    Conf:       Single conformer from OEChem molecule
    Props:      Dictionary output from ProcessOutput function.
                Should contain the keys: basis, method, numSteps,
                initEnergy, finalEnergy, coords, time, pkg
    spe:        Boolean - are the results of a single point energy calcn?

    """

    # get level of theory for setting SD tags
    method = Props['method']
    basisset = Props['basis']
    pkg = Props['package']

    # check that finalEnergy is there. if not, opt probably did not finish
    # make a note of that in SD tag
    if not 'finalEnergy' in Props:
        if not spe:            oechem.OEAddSDData(Conf, "Note on opt. %s/%s" \
% (method, basisset), "JOB DID NOT FINISH")
        else:            oechem.OEAddSDData(Conf, "Note on SPE %s/%s"\
% (method, basisset), "JOB DID NOT FINISH")
        return

    # Set new SD tag for conformer's final energy
    if not spe:
        taglabel = "QM %s Final Opt. Energy (Har) %s/%s" % (pkg, method,
                                                            basisset)
    else:
        taglabel = "QM %s Single Pt. Energy (Har) %s/%s" % (pkg, method,
                                                            basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['finalEnergy']))

    # Set new SD tag for wall-clock time
    if not spe:
        taglabel = "QM %s Opt. Runtime (sec) %s/%s" % (pkg, method, basisset)
    else:
        taglabel = "QM %s Single Pt. Runtime (sec) %s/%s" % (pkg, method,
                                                             basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['time']))

    # Add COSMO energy with outlying charge correction. Turbomole only!
    if 'ocEnergy' in Props:
        if not spe:
            taglabel = "QM %s Final Opt. Energy with OC correction (Har) %s/%s" % (
                pkg, method, basisset)
        else:
            print(
                "Extraction of COSMO OC energy from Turbomole not yet supported for SPE calcns"
            )
        oechem.OEAddSDData(Conf, taglabel, str(Props['ocEnergy']))

    if spe: return  # stop here if SPE

    # Set new SD tag for original conformer number
    # !! Opt2 files should ALREADY have this !! Opt2 index is NOT orig index !!
    taglabel = "Original omega conformer number"
    # add new tag if not existing
    if not oechem.OEHasSDData(Conf, taglabel):
        # if not working with confs, will have no GetIdx
        try:
            oechem.OEAddSDData(Conf, taglabel, str(Conf.GetIdx() + 1))
        except AttributeError as err:
            pass
    # if tag exists, append new conformer ID after the old one
    else:
        # if not working with confs, will have no GetIdx
        try:
            oldid = oechem.OEGetSDData(Conf, taglabel)
            newid = str(Conf.GetIdx() + 1)
            totid = "{}, {}".format(oldid, newid)
            oechem.OESetSDData(Conf, taglabel, totid)
        except AttributeError as err:
            pass

    # Set new SD tag for numSteps of geom. opt.
    taglabel = "QM %s Opt. Steps %s/%s" % (pkg, method, basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['numSteps']))

    # Set new SD tag for conformer's initial energy
    taglabel = "QM %s Initial Opt. Energy (Har) %s/%s" % (pkg, method,
                                                          basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['initEnergy']))