def main(argv=[__name__]): if len(argv) != 3: oechem.OEThrow.Usage("%s <infile> <outfile>" % argv[0]) ifs = oechem.oemolistream() if not ifs.open(argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) ofs = oechem.oemolostream() if not ofs.open(argv[2]): oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2]) for mol in ifs.GetOEMols(): oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) oechem.OEWriteMolecule(ofs, mol)
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData, argv) # flag on command line indicates uncoloring option or not bUncolor = itf.GetBool("-uncolor") # input structure(s) to transform ifsmols = oechem.oemolistream() if not ifsmols.open(itf.GetString("-i")): oechem.OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i")) # save output structure(s) to this file ofs = oechem.oemolostream() if not ofs.open(itf.GetString("-o")): oechem.OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o")) if not oechem.OEIsSDDataFormat(ofs.GetFormat()): oechem.OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o")) irec = 0 ototal = 0 frag = oechem.OEGraphMol() for mol in ifsmols.GetOEGraphMols(): irec += 1 oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) iter = oemedchem.OEGetBemisMurcko(mol) if not iter.IsValid(): name = mol.GetTitle() if not mol.GetTitle(): name = 'Record ' + str(irec) oechem.OEThrow.Warning("%s: no perceived regions" % name) continue for bmregion in iter: # create a fragment from the perceived region oechem.OESubsetMol(frag, mol, bmregion, True) if bUncolor: # ignore 3D stereo parities if (frag.GetDimension() == 3): frag.SetDimension(0) # uncolor the fragment oechem.OEUncolorMol(frag) smi = oechem.OEMolToSmiles(frag) # annotate the input molecule with the role information for role in bmregion.GetRoles(): oechem.OEAddSDData(mol, role.GetName(), smi) ototal += 1 oechem.OEWriteMolecule(ofs, mol) if not irec: oechem.OEThrow.Fatal('No records in input structure file to perceive') if not ototal: oechem.OEThrow.Warning('No annotated structures generated') print( "Input molecules={0:d}, output annotated {1:s}molecules={2:d}".format( irec, ("(uncolored) " if bUncolor else ""), ototal)) return 0
def MMPIndex(itf): # output index file mmpindexfile = itf.GetString("-output") if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpindexfile): oechem.OEThrow.Fatal("Output file is not a matched pair index type - \ needs .mmpidx extension: {}" .format(mmpindexfile)) # create options class with defaults mmpopts = oemedchem.OEMatchedPairAnalyzerOptions() # set up options from command line if not oemedchem.OESetupMatchedPairIndexOptions(mmpopts, itf): oechem.OEThrow.Fatal("Error setting matched pair indexing options!") # input structures to index ifsindex = oechem.oemolistream() if not ifsindex.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open {} for reading" .format(itf.GetString("-input"))) # get requested verbosity setting verbose = itf.GetBool("-verbose") vverbose = itf.GetBool("-vverbose") if vverbose: verbose = vverbose maxrec = max(itf.GetInt("-maxrec"), 0) statusrec = itf.GetInt("-status") if itf.GetBool("-exportcompress"): if not mmpopts.SetOptions(mmpopts.GetOptions() | oemedchem.OEMatchedPairOptions_ExportCompression): oechem.OEThrow.Warning("Error enabling export compression!") stripstereo = itf.GetBool("-stripstereo") stripsalts = itf.GetBool("-stripsalts") keepFields = [] if itf.HasString("-keepSD"): for field in itf.GetStringList("-keepSD"): keepFields.append(field) if verbose: oechem.OEThrow.Info('Retaining SD data fields: {}'.format(' '.join(keepFields))) alldata = itf.GetBool("-allSD") cleardata = itf.GetBool("-clearSD") if keepFields: if verbose and (alldata or cleardata): oechem.OEThrow.Info("Option -keepSD overriding -allSD, -clearSD") alldata = False cleardata = False elif cleardata: alldata = False if verbose: oechem.OEThrow.Info("Forced clearing of all input SD data") elif alldata: if verbose: oechem.OEThrow.Info("Retaining all input SD data") cleardata = False elif verbose: oechem.OEThrow.Info("No SD data handling option specified, -allSD assumed") if cleardata: keepFields = ['-CLEARSD'] elif alldata or not keepFields: keepFields = ['-ALLSD'] if verbose: if not mmpopts.HasIndexableFragmentHeavyAtomRange(): oechem.OEThrow.Info("Indexing all fragments") else: oechem.OEThrow.Info("Limiting fragment cores to {0:.2f}-{1:.2f}% of input molecules" .format(mmpopts.GetIndexableFragmentRangeMin(), mmpopts.GetIndexableFragmentRangeMax())) if statusrec: oechem.OEThrow.Info("Status output after every {0} records".format(statusrec)) if maxrec: oechem.OEThrow.Info("Indexing a maximum of {0} records".format(maxrec)) if itf.GetBool("-exportcompress"): oechem.OEThrow.Info("Removing singleton index nodes from index") if stripstereo: oechem.OEThrow.Info("Stripping stereo") if stripsalts: oechem.OEThrow.Info("Stripping salts") if itf.GetBool("-clearSD"): oechem.OEThrow.Info("Clearing all input SD data") elif alldata: oechem.OEThrow.Info("Retaining all input SD data") elif keepFields: oechem.OEThrow.Info('Retaining floating point SD data fields: {}' .format(''.join(keepFields))) # create indexing engine mmp = oemedchem.OEMatchedPairAnalyzer(mmpopts) # interpret SD fields as floating point data validdata = FilterSDData(keepFields, True) # add molecules to be indexed record = 0 unindexed = 0 for mol in ifsindex.GetOEGraphMols(): if not alldata: # filter the input molecule SD data based on allowed fields validdata.FilterMolData(mol) if stripsalts: oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) if stripstereo: oechem.OEUncolorMol(mol, (oechem.OEUncolorStrategy_RemoveAtomStereo | oechem.OEUncolorStrategy_RemoveBondStereo | oechem.OEUncolorStrategy_RemoveGroupStereo)) status = mmp.AddMol(mol, record) if status != record: unindexed += 1 if vverbose: oechem.OEThrow.Info('Input structure not added to index, record=%d status=%s' % (record, oemedchem.OEMatchedPairIndexStatusName(status))) record += 1 if maxrec and record >= maxrec: break if statusrec and (record % statusrec) == 0: oechem.OEThrow.Info("Records: {} Indexed: {} Unindexed: {}" .format(record, (record - unindexed), unindexed)) if not mmp.NumMols(): oechem.OEThrow.Fatal('No records in index structure file') if not mmp.NumMatchedPairs(): oechem.OEThrow.Fatal('No matched pairs found from indexing, ' + 'use -fragGe,-fragLe options to extend indexing range') if not oemedchem.OEWriteMatchedPairAnalyzer(mmpindexfile, mmp): oechem.OEThrow.Fatal('Error serializing MMP index: {}' .format(mmpindexfile)) # return some status information oechem.OEThrow.Info("Records: {}, Indexed: {}, matched pairs: {:,d}" .format(record, mmp.NumMols(), mmp.NumMatchedPairs())) return 0
def FindSimpleMatchedPairs(itf): ims = oechem.oemolistream() if not ims.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open %s for reading: " + itf.GetString("-input")) maxrecs = itf.GetInt("-maxrec") oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning) # @ <SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE> # create options class with defaults mmpOpts = oemedchem.OEMatchedPairAnalyzerOptions() # for 'simple' pairs, alter default indexing options # - single cuts only, heavy atom substituents only (HMember indexing off) mmpOpts.SetOptions(oemedchem.OEMatchedPairOptions_SingleCuts | oemedchem.OEMatchedPairOptions_ComboCuts | oemedchem.OEMatchedPairOptions_UniquesOnly) # - limit substituent size to no more than 20% of input structure mmpOpts.SetIndexableFragmentRange(80., 100.) # create analyzer class with nondefault options mmpAnalyzer = oemedchem.OEMatchedPairAnalyzer(mmpOpts) # ignore common index status returns sIgnoreStatus = 'FragmentRangeFilter,DuplicateStructure,' sIgnoreStatus += 'FragmentationLimitFilter,HeavyAtomFilter' # index the input structures for recindex, mol in enumerate(ims.GetOEGraphMols(), start=1): # consider only the largest input fragment oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) # ignore stereochemistry oechem.OEUncolorMol(mol, (oechem.OEUncolorStrategy_RemoveAtomStereo | oechem.OEUncolorStrategy_RemoveBondStereo)) # explicitly provide a 1-based index to refer to indexed structures # - to allow references back to external data elsewhere status = mmpAnalyzer.AddMol(mol, recindex) if status != recindex: if not oemedchem.OEMatchedPairIndexStatusName( status) in sIgnoreStatus: oechem.OEThrow.Warning( "{0}: molecule indexing error, status={1}".format( recindex, oemedchem.OEMatchedPairIndexStatusName(status))) # if limiting input, quit after limit if maxrecs and recindex >= maxrecs: break print("Index complete, matched pairs = {0}".format( mmpAnalyzer.NumMatchedPairs())) # specify how transforms are extracted (direction and allowed properties) extractMode = ( oemedchem.OEMatchedPairTransformExtractMode_Sorted | oemedchem.OEMatchedPairTransformExtractMode_NoSMARTS | oemedchem.OEMatchedPairTransformExtractMode_AddMCSCorrespondence) extractOptions = oemedchem.OEMatchedPairTransformExtractOptions() # specify amount of chemical context at the site of the substituent change # in the transform extractOptions.SetContext(oemedchem.OEMatchedPairContext_Bond0) extractOptions.SetOptions(extractMode) # walk the transforms and print the matched pairs xfmidx = 0 for mmpxform in oemedchem.OEMatchedPairGetTransforms( mmpAnalyzer, extractOptions): xfmidx += 1 print("{0:2} {1}".format(xfmidx, mmpxform.GetTransform())) # dump matched molecular pairs and index identifiers # (recindex from indexing loop above) for mmppair in mmpxform.GetMatchedPairs(): print("\tmatched pair molecule indices=({0},{1})".format( mmppair.FromIndex(), mmppair.ToIndex())) # @ </SNIPPET-FINDSIMPLEMATCHEDPAIRS-EXAMPLE> return True
def ChEMBLSolubilityUsage(itf): ifs = oechem.oemolistream() if not ifs.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open %s for reading: " + itf.GetString("-input")) ofs = oechem.oemolostream() if not ofs.open(itf.GetString("-output")): oechem.OEThrow.Fatal("Unable to open %s for writing: " + ofs.GetString("-output")) oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning) # @ <SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE> # number of bonds of chemistry context at site of change # for the applied transforms totalmols = 0 xformctxt = oemedchem.OEMatchedPairContext_Bond2 for molidx, mol in enumerate(ifs.GetOEGraphMols(), start=1): # consider only the largest input fragment oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) smolcnt = 0 # only consider solubility transforms having at least 5 matched pairs for solMol in oemedchem.OEApplyChEMBL24SolubilityTransforms( mol, xformctxt, 5): # compute net change in solubility from MMP data deltasol = [] if oechem.OEHasSDData(solMol, "OEMMP_normalized_value (uM)"): for sditem in oechem.OEGetSDData( solMol, "OEMMP_normalized_value (uM)").split('\n'): # fromIndex,toIndex,fromValue,toValue sdvalues = sditem.split(',') if not sdvalues[2] or not sdvalues[3]: continue deltasol.append(float(sdvalues[3]) - float(sdvalues[2])) if not len(deltasol): continue avgsol = deltasol[0] if len(deltasol) > 1: avgsol = average(deltasol) # reject examples with net decrease in solubility if avgsol < 0.0: continue sdev = stddev(deltasol) # annotate with average,stddev,num oechem.OEAddSDData( solMol, "OEMMP_average_delta_normalized_value", "{0:.1F},{1:.2F},{2}".format(avgsol, sdev, len(deltasol))) # export solubility transformed molecule with SDData annotations if oechem.OEWriteMolecule( ofs, solMol) == oechem.OEWriteMolReturnCode_Success: smolcnt += 1 oechem.OEThrow.Info("{0}: Exported molecule count, {1}".format( molidx, smolcnt)) totalmols += smolcnt # @ </SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE> print("Exported molecule count = {0}".format(totalmols)) return True