Exemple #1
0
def parse_input_file(in_filename: str) -> oechem.OEGraphMol:
    """Parses the input file for a molecule. The final energy is stored within
    the molecule.

    Returns:
        molecule: the molecule parsed from the input file
    """
    molecule = None
    final_energy = 0.0

    with open(in_filename, "r") as input_file:
        prev = ""  # Remember the previous line so the last line can be checked
        line = input_file.readline()

        while line != "":  # Empty string from readline() indicates EOF
            line = line.strip()
            if line.startswith(FINAL_ENERGY_LINE):
                final_energy = float(line.split()[-1])
            if line.startswith(FINAL_GEOMETRY_LINE):
                molecule = parse_molecule(input_file)
                line = ""
            prev = line
            line = input_file.readline()
        check_last_line(prev, in_filename)

    oechem.OEAddSDData(molecule, FINAL_ENERGY_FIELD_NAME, str(final_energy))
    return molecule
def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    # flag on command line indicates uncoloring option or not
    bUncolor = itf.GetBool("-uncolor")

    # input structure(s) to transform
    ifsmols = oechem.oemolistream()
    if not ifsmols.open(itf.GetString("-i")):
        oechem.OEThrow.Fatal("Unable to open %s for reading" %
                             itf.GetString("-i"))

    # save output structure(s) to this file
    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-o")):
        oechem.OEThrow.Fatal("Unable to open %s for writing" %
                             itf.GetString("-o"))
    if not oechem.OEIsSDDataFormat(ofs.GetFormat()):
        oechem.OEThrow.Fatal("Unable to open %s for writing" %
                             itf.GetString("-o"))

    irec = 0
    ototal = 0
    frag = oechem.OEGraphMol()
    for mol in ifsmols.GetOEGraphMols():
        irec += 1
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)
        iter = oemedchem.OEGetBemisMurcko(mol)
        if not iter.IsValid():
            name = mol.GetTitle()
            if not mol.GetTitle():
                name = 'Record ' + str(irec)
            oechem.OEThrow.Warning("%s: no perceived regions" % name)
            continue
        for bmregion in iter:
            # create a fragment from the perceived region
            oechem.OESubsetMol(frag, mol, bmregion, True)
            if bUncolor:
                # ignore 3D stereo parities
                if (frag.GetDimension() == 3):
                    frag.SetDimension(0)
                # uncolor the fragment
                oechem.OEUncolorMol(frag)
            smi = oechem.OEMolToSmiles(frag)
            # annotate the input molecule with the role information
            for role in bmregion.GetRoles():
                oechem.OEAddSDData(mol, role.GetName(), smi)
        ototal += 1
        oechem.OEWriteMolecule(ofs, mol)

    if not irec:
        oechem.OEThrow.Fatal('No records in input structure file to perceive')

    if not ototal:
        oechem.OEThrow.Warning('No annotated structures generated')

    print(
        "Input molecules={0:d}, output annotated {1:s}molecules={2:d}".format(
            irec, ("(uncolored) " if bUncolor else ""), ototal))

    return 0
def DumpSDData(mol):
    print("SD data of", mol.GetTitle())
    # loop over SD data
    for dp in oechem.OEGetSDDataPairs(mol):
        print(dp.GetTag(), ':', dp.GetValue())
    print()


mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1ccccc1")
mol.SetTitle("benzene")

# set some tagged data
oechem.OESetSDData(mol, "color", "brown")
oechem.OESetSDData(mol, oechem.OESDDataPair("size", "small"))
DumpSDData(mol)

# check for existence of data, then delete it
if oechem.OEHasSDData(mol, "size"):
    oechem.OEDeleteSDData(mol, "size")
DumpSDData(mol)

# add additional color data
oechem.OEAddSDData(mol, "color", "black")
DumpSDData(mol)

# remove all SD data
oechem.OEClearSDData(mol)
DumpSDData(mol)
# @ </SNIPPET>
Exemple #4
0
def set_sd_tags(Conf, Props, calctype):
    """
    For one particular conformer, set all available SD tags based on data
    in Props dictionary.

    Warning
    -------
    If the exact tag already exists, and you want to add a new one then there
    will be duplicate tags with maybe different data. (NOT recommended).
    Then the function to get sd_list will only get one or the other;
    I think it just gets the first matching tag.

    TODO: maybe add some kind of checking to prevent duplicate tags added

    Parameters
    ----------
    Conf:       Single conformer from OEChem molecule
    Props:      Dictionary output from ProcessOutput function.
                Should contain the keys: basis, method, numSteps,
                initEnergy, finalEnergy, coords, time, pkg
    calctype: string; one of 'opt','spe','hess' for geometry optimization,
        single point energy calculation, or Hessian calculation

    """

    # get level of theory for setting SD tags
    method = Props['method']
    basisset = Props['basis']
    pkg = Props['package']

    # turn parameters into tag descriptions
    full_method = "{}/{}".format(method, basisset)
    cdict = {'spe': 'Single Pt.', 'opt': 'Opt.', 'hess': 'Hessian'}

    # time info can be set for all cases
    taglabel = "QM {} {} Runtime (sec) {}".format(pkg, cdict[calctype],
                                                  full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['time']))

    # hessian has no other info for sd tag
    if calctype == 'hess':
        return

    # check that finalEnergy is there. if not, opt probably did not finish
    # make a note of that in SD tag then quit function
    if not 'finalEnergy' in Props:
        taglabel = "Note on {} {}".format(cdict[calctype], full_method)
        oechem.OEAddSDData(Conf, taglabel, "JOB DID NOT FINISH")
        return

    # Set new SD tag for conformer's final energy
    taglabel = "QM {} Final {} Energy (Har) {}".format(pkg, cdict[calctype],
                                                       full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['finalEnergy']))

    # Set new SD tag for final SCS-MP2 energy if method is MP2
    if method.lower() == 'mp2':
        taglabel = "QM {} Final {} Energy (Har) SCS-{}".format(
            pkg, cdict[calctype], full_method)
        oechem.OEAddSDData(Conf, taglabel, str(Props['finalSCSEnergy']))

    # Add COSMO energy with outlying charge correction. Turbomole only!
    if 'ocEnergy' in Props:
        if calctype == 'spe':
            print(
                "Extraction of COSMO OC energy from Turbomole not yet supported for SPE calcns"
            )
        elif calctype == 'opt':
            taglabel = "QM {} Final {} Energy with OC correction (Har) {}".format(
                pkg, cdict[calctype], full_method)
            oechem.OEAddSDData(Conf, taglabel, str(Props['ocEnergy']))

    # spe has no other relevant info for sd tag
    if calctype == 'spe':
        return

    # Set new SD tag for original conformer number if not existing
    # !! Opt2 files should ALREADY have this !! Opt2 index is NOT orig index !!
    taglabel = "Original omega conformer number"
    if not oechem.OEHasSDData(Conf, taglabel):
        # if not working with confs, will have no GetIdx
        try:
            oechem.OEAddSDData(Conf, taglabel, str(Conf.GetIdx() + 1))
        except AttributeError as err:
            pass
    # if tag exists, append new conformer ID after the old one
    else:
        try:
            oldid = oechem.OEGetSDData(Conf, taglabel)
            newid = str(Conf.GetIdx() + 1)
            totid = "{}, {}".format(oldid, newid)
            oechem.OESetSDData(Conf, taglabel, totid)
        except AttributeError as err:
            pass

    # Set new SD tag for numSteps of geom. opt.
    taglabel = "QM {} {} Steps {}".format(pkg, cdict[calctype], full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['numSteps']))

    # Set new SD tag for conformer's initial energy
    taglabel = "QM {} Initial {} Energy (Har) {}".format(
        pkg, cdict[calctype], full_method)
    oechem.OEAddSDData(Conf, taglabel, str(Props['initEnergy']))
    def GetBestOverlays(self, querymolstr, options, iformat, oformat):
        """ Return a string of the format specified by 'oformat'
        containing nhits overlaid confomers using querymolstr as the
        query interpretted as iformat.

        querymolstr - a string containing a molecule to use as the query
        options - an instance of OEShapeDatabaseOptions
        iformat - a string representing the file extension to parse the querymolstr as.
                  Note: old clients could be passing .sq files, so
                  iformat == '.oeb' will try to interpret the file as
                  a .sq file.
        oformat - file format to write the results as
        """
        timer = oechem.OEWallTimer()

        # make sure to wait for the load to finish
        blocking = True
        loaded = self.IsLoaded(blocking)
        assert loaded

        if iformat.startswith(".sq"):
            query = ReadShapeQuery(querymolstr)
        else:
            # read in query
            qfs = oechem.oemolistream()
            qfs = SetupStream(qfs, iformat)
            if not qfs.openstring(querymolstr):
                raise ValueError("Unable to open input molecule string")

            query = oechem.OEGraphMol()
            if not oechem.OEReadMolecule(qfs, query):
                if iformat == ".oeb":  # could be an old client trying to send a .sq file.
                    query = ReadShapeQuery(querymolstr)
                else:
                    raise ValueError(
                        "Unable to read a molecule from the string of format '%s'"
                        % iformat)

        ofs = oechem.oemolostream()
        ofs = SetupStream(ofs, oformat)
        if not ofs.openstring():
            raise ValueError("Unable to openstring for output")

        # do we only want shape based results?

        # this is a "Write" lock to be paranoid and not overload the GPU
        self.rwlock.AcquireWriteLock()
        try:
            # do search
            scores = self.shapedb.GetSortedScores(query, options)
            sys.stderr.write("%f seconds to do search\n" % timer.Elapsed())
        finally:
            self.rwlock.ReleaseWriteLock()

        timer.Start()
        # write results
        for score in scores:
            mcmol = oechem.OEMol()
            if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()):
                oechem.OEThrow.Warning(
                    "Can't retrieve molecule %i from the OEMolDatabase, "
                    "skipping..." % score.GetMolIdx())
                continue
            # remove hydrogens to make output smaller, this also
            # ensures OEPrepareFastROCSMol will have the same output
            oechem.OESuppressHydrogens(mcmol)

            mol = oechem.OEGraphMol(
                mcmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OECopySDData(mol, mcmol)

            if options.GetSimFunc() == oefastrocs.OEShapeSimFuncType_Tanimoto:
                oechem.OESetSDData(mol, "ShapeTanimoto",
                                   "%.4f" % score.GetShapeTanimoto())
                oechem.OESetSDData(mol, "ColorTanimoto",
                                   "%.4f" % score.GetColorTanimoto())
                oechem.OESetSDData(mol, "TanimotoCombo",
                                   "%.4f" % score.GetTanimotoCombo())
            else:
                oechem.OESetSDData(mol, "ShapeTversky",
                                   "%.4f" % score.GetShapeTversky())
                oechem.OESetSDData(mol, "ColorTversky",
                                   "%.4f" % score.GetColorTversky())
                oechem.OESetSDData(mol, "TverskyCombo",
                                   "%.4f" % score.GetTverskyCombo())

            if options.GetInitialOrientation(
            ) != oefastrocs.OEFastROCSOrientation_Inertial:
                oechem.OEAddSDData(
                    mol, "Opt. Starting Pos.",
                    GetAltStartsString(options.GetInitialOrientation()))

            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)

        output = ofs.GetString()
        sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed())
        sys.stderr.flush()
        ofs.close()

        return output
Exemple #6
0
def compare_ffs(in_dict, conf_id_tag, out_prefix, mol_slice=None):
    """
    For 2+ SDF files that are analogous in terms of molecules and their
    conformers, assess them by RMSD, TFD, and relative energy differences.

    Parameters
    ----------
    in_dict : OrderedDict
        dictionary from input file, where key is method and value is dictionary
        first entry should be reference method
        in sub-dictionary, keys are 'sdfile' and 'sdtag'
    conf_id_tag : string
        label of the SD tag that should be the same for matching conformers
        in different files
    out_prefix : string
        prefix appended to sdf file name to write out new SDF file
        with RMSD and TFD info added as SD tags
    mol_slice : numpy slice object
        The resulting integers are numerically sorted and duplicates removed.
        e.g., slices = np.s_[0, 3:5, 6::3] would be parsed to return
        [0, 3, 4, 6, 9, 12, 15, 18, ...]
        Can also parse from end: [-3:] gets the last 3 molecules, and
        [-2:-1] is the same as [-2] to get just next to last molecule.

    Returns
    -------
    enes_full : 3D list
        enes_full[i][j][k] = ddE of ith method, jth mol, kth conformer.
        ddE = (dE of query method) - (dE of ref method),
        where the dE is computed as conformer M - conformer N,
        and conformer N is chosen from the lowest energy of the ref confs.
        the reference method is not present; i.e., self-comparison is skipped,
        so the max i value represents total number of files minus one.
    rmsds_full : 3D list
        same format as that of enes_full but with conformer RMSDs
    tfds_full : 3D list
        same format as that of enes_full but with conformer TFDs
    smiles_full : 3D list
        same format as that of enes_full but with conformer SMILES strings

    """
    # set RMSD calculation parameters
    automorph = True  # take into acct symmetry related transformations
    heavyOnly = False  # do consider hydrogen atoms for automorphisms
    overlay = True  # find the lowest possible RMSD

    # initiate final data lists
    enes_full = []
    rmsds_full = []
    tfds_full = []
    smiles_full = []

    # get first filename representing the reference geometries
    sdf_ref = list(in_dict.values())[0]['sdfile']
    tag_ref = list(in_dict.values())[0]['sdtag']

    # assess each file against reference
    for ff_label, ff_dict in in_dict.items():

        # get details of queried file
        sdf_que = ff_dict['sdfile']
        tag_que = ff_dict['sdtag']

        if sdf_que == sdf_ref:
            continue

        # initiate new sublists
        enes_method = []
        rmsds_method = []
        tfds_method = []
        smiles_method = []

        # open an output file to store query molecules with new SD tags
        out_file = f'{out_prefix}_{os.path.basename(sdf_que)}'
        ofs = oechem.oemolostream()
        if not ofs.open(out_file):
            oechem.OEThrow.Fatal(f"Unable to open {out_file} for writing")

        # load molecules from open reference and query files
        print(f"\n\nOpening reference file {sdf_ref}")
        mols_ref = reader.read_mols(sdf_ref, mol_slice)

        print(f"Opening query file {sdf_que} for [ {ff_label} ] energies")
        mols_que = reader.read_mols(sdf_que, mol_slice)

        # loop over each molecule in reference and query files
        for rmol, qmol in zip(mols_ref, mols_que):

            # initial check that they have same title and number of confs
            rmol_name = rmol.GetTitle()
            rmol_nconfs = rmol.NumConfs()
            if (rmol_name != qmol.GetTitle()) or (rmol_nconfs !=
                                                  qmol.NumConfs()):
                raise ValueError(
                    "ERROR: Molecules not aligned in iteration. "
                    "Offending molecules and number of conformers:\n"
                    f"\'{rmol_name}\': {rmol_nconfs} nconfs\n"
                    f"\'{qmol.GetTitle()}\': {qmol.NumConfs()} nconfs")

            # initialize lists to store conformer energies
            enes_ref = []
            enes_que = []
            rmsds_mol = []
            tfds_mol = []
            smiles_mol = []

            # loop over each conformer of this mol
            for ref_conf, que_conf in zip(rmol.GetConfs(), qmol.GetConfs()):

                # check confomer match from the specified tag
                ref_id = oechem.OEGetSDData(ref_conf, conf_id_tag)
                que_id = oechem.OEGetSDData(que_conf, conf_id_tag)
                if ref_id != que_id:
                    raise ValueError(
                        "ERROR: Conformers not aligned in iteration"
                        f" for mol: '{rmol_name}'. The conformer "
                        f"IDs ({conf_id_tag}) for ref and query are:"
                        f"\n{ref_id}\n{que_id}.")

                # note the smiles id
                smiles_mol.append(ref_id)

                # get energies
                enes_ref.append(float(oechem.OEGetSDData(ref_conf, tag_ref)))
                enes_que.append(float(oechem.OEGetSDData(que_conf, tag_que)))

                # compute RMSD between reference and query conformers
                rmsd = oechem.OERMSD(ref_conf, que_conf, automorph, heavyOnly,
                                     overlay)
                rmsds_mol.append(rmsd)

                # compute TFD between reference and query conformers
                tfd = calc_tfd(ref_conf, que_conf, conf_id_tag)
                tfds_mol.append(tfd)

                # store data in SD tags for query conf, and write conf to file
                oechem.OEAddSDData(que_conf, f'RMSD to {sdf_ref}', str(rmsd))
                oechem.OEAddSDData(que_conf, f'TFD to {sdf_ref}', str(tfd))
                oechem.OEWriteConstMolecule(ofs, que_conf)

            # compute relative energies against lowest E reference conformer
            lowest_ref_idx = enes_ref.index(min(enes_ref))
            rel_enes_ref = np.array(enes_ref) - enes_ref[lowest_ref_idx]
            rel_enes_que = np.array(enes_que) - enes_que[lowest_ref_idx]

            # subtract them to get ddE = dE (query method) - dE (ref method)
            enes_mol = np.array(rel_enes_que) - np.array(rel_enes_ref)

            # store then move on
            enes_method.append(enes_mol)
            rmsds_method.append(np.array(rmsds_mol))
            tfds_method.append(np.array(tfds_mol))
            smiles_method.append(smiles_mol)
            #print(rmsds_method, len(rmsds_method))
            #print(enes_method, len(enes_method))

        enes_full.append(enes_method)
        rmsds_full.append(rmsds_method)
        tfds_full.append(tfds_method)
        smiles_full.append(smiles_method)

    ofs.close()

    return enes_full, rmsds_full, tfds_full, smiles_full
Exemple #7
0
def SetOptSDTags(Conf, Props, spe=False):
    """
    For one particular conformer, set all available SD tags based on data
    in Props dictionary.

    Warning
    -------
    If the exact tag already exists, and you want to add a new one then there
    will be duplicate tags with maybe different data. (NOT recommended).
    Then the function to get SDList will only get one or the other;
    I think it just gets the first matching tag.

    TODO: maybe add some kind of checking to prevent duplicate tags added

    Parameters
    ----------
    Conf:       Single conformer from OEChem molecule
    Props:      Dictionary output from ProcessOutput function.
                Should contain the keys: basis, method, numSteps,
                initEnergy, finalEnergy, coords, time, pkg
    spe:        Boolean - are the results of a single point energy calcn?

    """

    # get level of theory for setting SD tags
    method = Props['method']
    basisset = Props['basis']
    pkg = Props['package']

    # check that finalEnergy is there. if not, opt probably did not finish
    # make a note of that in SD tag
    if not 'finalEnergy' in Props:
        if not spe:            oechem.OEAddSDData(Conf, "Note on opt. %s/%s" \
% (method, basisset), "JOB DID NOT FINISH")
        else:            oechem.OEAddSDData(Conf, "Note on SPE %s/%s"\
% (method, basisset), "JOB DID NOT FINISH")
        return

    # Set new SD tag for conformer's final energy
    if not spe:
        taglabel = "QM %s Final Opt. Energy (Har) %s/%s" % (pkg, method,
                                                            basisset)
    else:
        taglabel = "QM %s Single Pt. Energy (Har) %s/%s" % (pkg, method,
                                                            basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['finalEnergy']))

    # Set new SD tag for wall-clock time
    if not spe:
        taglabel = "QM %s Opt. Runtime (sec) %s/%s" % (pkg, method, basisset)
    else:
        taglabel = "QM %s Single Pt. Runtime (sec) %s/%s" % (pkg, method,
                                                             basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['time']))

    # Add COSMO energy with outlying charge correction. Turbomole only!
    if 'ocEnergy' in Props:
        if not spe:
            taglabel = "QM %s Final Opt. Energy with OC correction (Har) %s/%s" % (
                pkg, method, basisset)
        else:
            print(
                "Extraction of COSMO OC energy from Turbomole not yet supported for SPE calcns"
            )
        oechem.OEAddSDData(Conf, taglabel, str(Props['ocEnergy']))

    if spe: return  # stop here if SPE

    # Set new SD tag for original conformer number
    # !! Opt2 files should ALREADY have this !! Opt2 index is NOT orig index !!
    taglabel = "Original omega conformer number"
    # add new tag if not existing
    if not oechem.OEHasSDData(Conf, taglabel):
        # if not working with confs, will have no GetIdx
        try:
            oechem.OEAddSDData(Conf, taglabel, str(Conf.GetIdx() + 1))
        except AttributeError as err:
            pass
    # if tag exists, append new conformer ID after the old one
    else:
        # if not working with confs, will have no GetIdx
        try:
            oldid = oechem.OEGetSDData(Conf, taglabel)
            newid = str(Conf.GetIdx() + 1)
            totid = "{}, {}".format(oldid, newid)
            oechem.OESetSDData(Conf, taglabel, totid)
        except AttributeError as err:
            pass

    # Set new SD tag for numSteps of geom. opt.
    taglabel = "QM %s Opt. Steps %s/%s" % (pkg, method, basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['numSteps']))

    # Set new SD tag for conformer's initial energy
    taglabel = "QM %s Initial Opt. Energy (Har) %s/%s" % (pkg, method,
                                                          basisset)
    oechem.OEAddSDData(Conf, taglabel, str(Props['initEnergy']))
def main(args):
    if len(args) != 4:
        oechem.OEThrow.Usage(
            "%s ligand_file protein_file output_file (SDF or OEB)" % args[0])

    lfs = oechem.oemolistream()
    if not lfs.open(args[1]):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % args[1])

    pfs = oechem.oemolistream()
    if not pfs.open(args[2]):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % args[2])

    ofs = oechem.oemolostream()
    if not ofs.open(args[3]):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % args[3])

    if not oechem.OEIsSDDataFormat(ofs.GetFormat()):
        oechem.OEThrow.Fatal(
            "Output file does not support SD data used by this example")

    # Szybki options for VdW-Coulomb calculations
    optsC = oeszybki.OESzybkiOptions()
    optsC.GetProteinOptions().SetProteinElectrostaticModel(
        oeszybki.OEProteinElectrostatics_ExactCoulomb)
    optsC.SetRunType(oeszybki.OERunType_CartesiansOpt)

    # Szybki options for PB calculations
    optsPB = oeszybki.OESzybkiOptions()
    optsPB.GetProteinOptions().SetProteinElectrostaticModel(
        oeszybki.OEProteinElectrostatics_SolventPBForces)
    optsPB.SetRunType(oeszybki.OERunType_SinglePoint)

    # Szybki objects
    szC = oeszybki.OESzybki(optsC)
    szPB = oeszybki.OESzybki(optsPB)

    # read and setup protein
    protein = oechem.OEGraphMol()
    oechem.OEReadMolecule(pfs, protein)
    szC.SetProtein(protein)
    szPB.SetProtein(protein)

    terms = set([
        oeszybki.OEPotentialTerms_ProteinLigandInteraction,
        oeszybki.OEPotentialTerms_VdWProteinLigand,
        oeszybki.OEPotentialTerms_CoulombProteinLigand,
        oeszybki.OEPotentialTerms_ProteinDesolvation,
        oeszybki.OEPotentialTerms_LigandDesolvation,
        oeszybki.OEPotentialTerms_SolventScreening
    ])

    # process molecules
    for mol in lfs.GetOEMols():

        # optimize mol
        if not list(szC(mol)):
            oechem.OEThrow.Warning("No results processing molecule: %s" %
                                   mol.GetTitle())
            continue

        # do single point with better electrostatics
        for conf, results in zip(mol.GetConfs(), szPB(mol)):
            for i in terms:
                strEnergy = ("%9.4f" % results.GetEnergyTerm(i))
                oechem.OEAddSDData(conf, oeszybki.OEGetEnergyTermName(i),
                                   strEnergy)

        oechem.OEWriteMolecule(ofs, mol)

    return 0
Exemple #9
0
def ChEMBLSolubilityUsage(itf):
    ifs = oechem.oemolistream()
    if not ifs.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading: " +
                             itf.GetString("-input"))

    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-output")):
        oechem.OEThrow.Fatal("Unable to open %s for writing: " +
                             ofs.GetString("-output"))

    oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning)

    # @ <SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE>
    # number of bonds of chemistry context at site of change
    #  for the applied transforms
    totalmols = 0
    xformctxt = oemedchem.OEMatchedPairContext_Bond2
    for molidx, mol in enumerate(ifs.GetOEGraphMols(), start=1):
        # consider only the largest input fragment
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)

        smolcnt = 0
        # only consider solubility transforms having at least 5 matched pairs
        for solMol in oemedchem.OEApplyChEMBL24SolubilityTransforms(
                mol, xformctxt, 5):
            # compute net change in solubility from MMP data
            deltasol = []
            if oechem.OEHasSDData(solMol, "OEMMP_normalized_value (uM)"):
                for sditem in oechem.OEGetSDData(
                        solMol, "OEMMP_normalized_value (uM)").split('\n'):
                    # fromIndex,toIndex,fromValue,toValue
                    sdvalues = sditem.split(',')
                    if not sdvalues[2] or not sdvalues[3]:
                        continue
                    deltasol.append(float(sdvalues[3]) - float(sdvalues[2]))
            if not len(deltasol):
                continue

            avgsol = deltasol[0]
            if len(deltasol) > 1:
                avgsol = average(deltasol)

            # reject examples with net decrease in solubility
            if avgsol < 0.0:
                continue
            sdev = stddev(deltasol)

            # annotate with average,stddev,num
            oechem.OEAddSDData(
                solMol, "OEMMP_average_delta_normalized_value",
                "{0:.1F},{1:.2F},{2}".format(avgsol, sdev, len(deltasol)))

            # export solubility transformed molecule with SDData annotations
            if oechem.OEWriteMolecule(
                    ofs, solMol) == oechem.OEWriteMolReturnCode_Success:
                smolcnt += 1

        oechem.OEThrow.Info("{0}: Exported molecule count, {1}".format(
            molidx, smolcnt))
        totalmols += smolcnt
    # @ </SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE>

    print("Exported molecule count = {0}".format(totalmols))

    return True
Exemple #10
0
def FixSDFTitle(tag, mol):
    if len(mol.GetTitle()) > 80:
        oechem.OEAddSDData(mol, tag, mol.GetTitle())
        mol.SetTitle(mol.GetTitle()[:80])
        return True
    return False