Exemple #1
0
def main(argv=[__name__]):
    if len(argv) != 3:
        oechem.OEThrow.Usage("%s <infile> <outfile>" % argv[0])

    ifs = oechem.oemolistream()
    if not ifs.open(argv[1]):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    ofs = oechem.oemolostream()
    if not ofs.open(argv[2]):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2])

    for mol in ifs.GetOEMols():
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)
        oechem.OEWriteMolecule(ofs, mol)
def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    # flag on command line indicates uncoloring option or not
    bUncolor = itf.GetBool("-uncolor")

    # input structure(s) to transform
    ifsmols = oechem.oemolistream()
    if not ifsmols.open(itf.GetString("-i")):
        oechem.OEThrow.Fatal("Unable to open %s for reading" %
                             itf.GetString("-i"))

    # save output structure(s) to this file
    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-o")):
        oechem.OEThrow.Fatal("Unable to open %s for writing" %
                             itf.GetString("-o"))
    if not oechem.OEIsSDDataFormat(ofs.GetFormat()):
        oechem.OEThrow.Fatal("Unable to open %s for writing" %
                             itf.GetString("-o"))

    irec = 0
    ototal = 0
    frag = oechem.OEGraphMol()
    for mol in ifsmols.GetOEGraphMols():
        irec += 1
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)
        iter = oemedchem.OEGetBemisMurcko(mol)
        if not iter.IsValid():
            name = mol.GetTitle()
            if not mol.GetTitle():
                name = 'Record ' + str(irec)
            oechem.OEThrow.Warning("%s: no perceived regions" % name)
            continue
        for bmregion in iter:
            # create a fragment from the perceived region
            oechem.OESubsetMol(frag, mol, bmregion, True)
            if bUncolor:
                # ignore 3D stereo parities
                if (frag.GetDimension() == 3):
                    frag.SetDimension(0)
                # uncolor the fragment
                oechem.OEUncolorMol(frag)
            smi = oechem.OEMolToSmiles(frag)
            # annotate the input molecule with the role information
            for role in bmregion.GetRoles():
                oechem.OEAddSDData(mol, role.GetName(), smi)
        ototal += 1
        oechem.OEWriteMolecule(ofs, mol)

    if not irec:
        oechem.OEThrow.Fatal('No records in input structure file to perceive')

    if not ototal:
        oechem.OEThrow.Warning('No annotated structures generated')

    print(
        "Input molecules={0:d}, output annotated {1:s}molecules={2:d}".format(
            irec, ("(uncolored) " if bUncolor else ""), ototal))

    return 0
Exemple #3
0
def MMPIndex(itf):
    # output index file
    mmpindexfile = itf.GetString("-output")
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpindexfile):
        oechem.OEThrow.Fatal("Output file is not a matched pair index type - \
                             needs .mmpidx extension: {}"
                             .format(mmpindexfile))

    # create options class with defaults
    mmpopts = oemedchem.OEMatchedPairAnalyzerOptions()
    # set up options from command line
    if not oemedchem.OESetupMatchedPairIndexOptions(mmpopts, itf):
        oechem.OEThrow.Fatal("Error setting matched pair indexing options!")

    # input structures to index
    ifsindex = oechem.oemolistream()
    if not ifsindex.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open {} for reading"
                             .format(itf.GetString("-input")))

    # get requested verbosity setting
    verbose = itf.GetBool("-verbose")
    vverbose = itf.GetBool("-vverbose")
    if vverbose:
        verbose = vverbose

    maxrec = max(itf.GetInt("-maxrec"), 0)
    statusrec = itf.GetInt("-status")

    if itf.GetBool("-exportcompress"):
        if not mmpopts.SetOptions(mmpopts.GetOptions() |
                                  oemedchem.OEMatchedPairOptions_ExportCompression):
            oechem.OEThrow.Warning("Error enabling export compression!")

    stripstereo = itf.GetBool("-stripstereo")
    stripsalts = itf.GetBool("-stripsalts")

    keepFields = []
    if itf.HasString("-keepSD"):
        for field in itf.GetStringList("-keepSD"):
            keepFields.append(field)
        if verbose:
            oechem.OEThrow.Info('Retaining SD data fields: {}'.format(' '.join(keepFields)))

    alldata = itf.GetBool("-allSD")
    cleardata = itf.GetBool("-clearSD")

    if keepFields:
        if verbose and (alldata or cleardata):
            oechem.OEThrow.Info("Option -keepSD overriding -allSD, -clearSD")
        alldata = False
        cleardata = False
    elif cleardata:
        alldata = False
        if verbose:
            oechem.OEThrow.Info("Forced clearing of all input SD data")
    elif alldata:
        if verbose:
            oechem.OEThrow.Info("Retaining all input SD data")
        cleardata = False
    elif verbose:
        oechem.OEThrow.Info("No SD data handling option specified, -allSD assumed")

    if cleardata:
        keepFields = ['-CLEARSD']
    elif alldata or not keepFields:
        keepFields = ['-ALLSD']

    if verbose:
        if not mmpopts.HasIndexableFragmentHeavyAtomRange():
            oechem.OEThrow.Info("Indexing all fragments")
        else:
            oechem.OEThrow.Info("Limiting fragment cores to {0:.2f}-{1:.2f}% of input molecules"
                                .format(mmpopts.GetIndexableFragmentRangeMin(),
                                        mmpopts.GetIndexableFragmentRangeMax()))
        if statusrec:
            oechem.OEThrow.Info("Status output after every {0} records".format(statusrec))
        if maxrec:
            oechem.OEThrow.Info("Indexing a maximum of {0} records".format(maxrec))

        if itf.GetBool("-exportcompress"):
            oechem.OEThrow.Info("Removing singleton index nodes from index")

        if stripstereo:
            oechem.OEThrow.Info("Stripping stereo")

        if stripsalts:
            oechem.OEThrow.Info("Stripping salts")

        if itf.GetBool("-clearSD"):
            oechem.OEThrow.Info("Clearing all input SD data")
        elif alldata:
            oechem.OEThrow.Info("Retaining all input SD data")
        elif keepFields:
            oechem.OEThrow.Info('Retaining floating point SD data fields: {}'
                                .format(''.join(keepFields)))

    # create indexing engine
    mmp = oemedchem.OEMatchedPairAnalyzer(mmpopts)

    # interpret SD fields as floating point data
    validdata = FilterSDData(keepFields, True)

    # add molecules to be indexed
    record = 0
    unindexed = 0
    for mol in ifsindex.GetOEGraphMols():
        if not alldata:
            # filter the input molecule SD data based on allowed fields
            validdata.FilterMolData(mol)

        if stripsalts:
            oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)

        if stripstereo:
            oechem.OEUncolorMol(mol,
                                (oechem.OEUncolorStrategy_RemoveAtomStereo |
                                 oechem.OEUncolorStrategy_RemoveBondStereo |
                                 oechem.OEUncolorStrategy_RemoveGroupStereo))

        status = mmp.AddMol(mol, record)
        if status != record:
            unindexed += 1
            if vverbose:
                oechem.OEThrow.Info('Input structure not added to index, record=%d status=%s' %
                                    (record, oemedchem.OEMatchedPairIndexStatusName(status)))
        record += 1
        if maxrec and record >= maxrec:
            break
        if statusrec and (record % statusrec) == 0:
            oechem.OEThrow.Info("Records: {} Indexed: {} Unindexed: {}"
                                .format(record, (record - unindexed), unindexed))

    if not mmp.NumMols():
        oechem.OEThrow.Fatal('No records in index structure file')

    if not mmp.NumMatchedPairs():
        oechem.OEThrow.Fatal('No matched pairs found from indexing, ' +
                             'use -fragGe,-fragLe options to extend indexing range')

    if not oemedchem.OEWriteMatchedPairAnalyzer(mmpindexfile, mmp):
        oechem.OEThrow.Fatal('Error serializing MMP index: {}'
                             .format(mmpindexfile))

    # return some status information
    oechem.OEThrow.Info("Records: {}, Indexed: {}, matched pairs: {:,d}"
                        .format(record,
                                mmp.NumMols(),
                                mmp.NumMatchedPairs()))
    return 0
Exemple #4
0
def FindSimpleMatchedPairs(itf):
    ims = oechem.oemolistream()
    if not ims.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading: " +
                             itf.GetString("-input"))

    maxrecs = itf.GetInt("-maxrec")

    oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning)

    # @ <SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE>
    # create options class with defaults
    mmpOpts = oemedchem.OEMatchedPairAnalyzerOptions()

    # for 'simple' pairs, alter default indexing options
    # - single cuts only, heavy atom substituents only (HMember indexing off)
    mmpOpts.SetOptions(oemedchem.OEMatchedPairOptions_SingleCuts
                       | oemedchem.OEMatchedPairOptions_ComboCuts
                       | oemedchem.OEMatchedPairOptions_UniquesOnly)
    # - limit substituent size to no more than 20% of input structure
    mmpOpts.SetIndexableFragmentRange(80., 100.)

    # create analyzer class with nondefault options
    mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer(mmpOpts)

    # ignore common index status returns
    sIgnoreStatus = 'FragmentRangeFilter,DuplicateStructure,'
    sIgnoreStatus += 'FragmentationLimitFilter,HeavyAtomFilter'

    # index the input structures
    for recindex, mol in enumerate(ims.GetOEGraphMols(), start=1):
        # consider only the largest input fragment
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)
        # ignore stereochemistry
        oechem.OEUncolorMol(mol, (oechem.OEUncolorStrategy_RemoveAtomStereo
                                  | oechem.OEUncolorStrategy_RemoveBondStereo))

        # explicitly provide a 1-based index to refer to indexed structures
        #   - to allow references back to external data elsewhere
        status = mmpAnalyzer.AddMol(mol, recindex)
        if status != recindex:
            if not oemedchem.OEMatchedPairIndexStatusName(
                    status) in sIgnoreStatus:
                oechem.OEThrow.Warning(
                    "{0}: molecule indexing error, status={1}".format(
                        recindex,
                        oemedchem.OEMatchedPairIndexStatusName(status)))
        # if limiting input, quit after limit
        if maxrecs and recindex >= maxrecs:
            break

    print("Index complete, matched pairs = {0}".format(
        mmpAnalyzer.NumMatchedPairs()))

    # specify how transforms are extracted (direction and allowed properties)
    extractMode = (
        oemedchem.OEMatchedPairTransformExtractMode_Sorted
        | oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS
        | oemedchem.OEMatchedPairTransformExtractMode_AddMCSCorrespondence)

    extractOptions = oemedchem.OEMatchedPairTransformExtractOptions()
    # specify amount of chemical context at the site of the substituent change
    #   in the transform
    extractOptions.SetContext(oemedchem.OEMatchedPairContext_Bond0)
    extractOptions.SetOptions(extractMode)

    # walk the transforms and print the matched pairs
    xfmidx = 0
    for mmpxform in oemedchem.OEMatchedPairGetTransforms(
            mmpAnalyzer, extractOptions):
        xfmidx += 1
        print("{0:2} {1}".format(xfmidx, mmpxform.GetTransform()))
        # dump matched molecular pairs and index identifiers
        #   (recindex from indexing loop above)
        for mmppair in mmpxform.GetMatchedPairs():
            print("\tmatched pair molecule indices=({0},{1})".format(
                mmppair.FromIndex(), mmppair.ToIndex()))
    # @ </SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE>

    return True
Exemple #5
0
def ChEMBLSolubilityUsage(itf):
    ifs = oechem.oemolistream()
    if not ifs.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading: " +
                             itf.GetString("-input"))

    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-output")):
        oechem.OEThrow.Fatal("Unable to open %s for writing: " +
                             ofs.GetString("-output"))

    oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning)

    # @ <SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE>
    # number of bonds of chemistry context at site of change
    #  for the applied transforms
    totalmols = 0
    xformctxt = oemedchem.OEMatchedPairContext_Bond2
    for molidx, mol in enumerate(ifs.GetOEGraphMols(), start=1):
        # consider only the largest input fragment
        oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol)

        smolcnt = 0
        # only consider solubility transforms having at least 5 matched pairs
        for solMol in oemedchem.OEApplyChEMBL24SolubilityTransforms(
                mol, xformctxt, 5):
            # compute net change in solubility from MMP data
            deltasol = []
            if oechem.OEHasSDData(solMol, "OEMMP_normalized_value (uM)"):
                for sditem in oechem.OEGetSDData(
                        solMol, "OEMMP_normalized_value (uM)").split('\n'):
                    # fromIndex,toIndex,fromValue,toValue
                    sdvalues = sditem.split(',')
                    if not sdvalues[2] or not sdvalues[3]:
                        continue
                    deltasol.append(float(sdvalues[3]) - float(sdvalues[2]))
            if not len(deltasol):
                continue

            avgsol = deltasol[0]
            if len(deltasol) > 1:
                avgsol = average(deltasol)

            # reject examples with net decrease in solubility
            if avgsol < 0.0:
                continue
            sdev = stddev(deltasol)

            # annotate with average,stddev,num
            oechem.OEAddSDData(
                solMol, "OEMMP_average_delta_normalized_value",
                "{0:.1F},{1:.2F},{2}".format(avgsol, sdev, len(deltasol)))

            # export solubility transformed molecule with SDData annotations
            if oechem.OEWriteMolecule(
                    ofs, solMol) == oechem.OEWriteMolReturnCode_Success:
                smolcnt += 1

        oechem.OEThrow.Info("{0}: Exported molecule count, {1}".format(
            molidx, smolcnt))
        totalmols += smolcnt
    # @ </SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE>

    print("Exported molecule count = {0}".format(totalmols))

    return True