def initFromLines(self, lines): import re spaces = re.compile("[\ \t]+") feats = [] rads = [] for lineNum, line in enumerate(lines): txt = line.split("#")[0].strip() if txt: splitL = spaces.split(txt) if len(splitL) < 5: logger.error( "Input line %d only contains %d fields, 5 are required. Read failed." % (lineNum, len(splitL)) ) return fName = splitL[0] try: xP = float(splitL[1]) yP = float(splitL[2]) zP = float(splitL[3]) rad = float(splitL[4]) except ValueError: logger.error("Error parsing a number of line %d. Read failed." % (lineNum)) return feats.append(ChemicalFeatures.FreeChemicalFeature(fName, fName, Geometry.Point3D(xP, yP, zP))) rads.append(rad) self._initializeFeats(feats, rads)
def initFromLines(self, lines): import re spaces = re.compile('[\ \t]+') feats = [] rads = [] for lineNum, line in enumerate(lines): txt = line.split('#')[0].strip() if txt: splitL = spaces.split(txt) if len(splitL) < 5: logger.error( 'Input line %d only contains %d fields, 5 are required. Read failed.' % (lineNum, len(splitL))) return fName = splitL[0] try: xP = float(splitL[1]) yP = float(splitL[2]) zP = float(splitL[3]) rad = float(splitL[4]) except ValueError: logger.error( 'Error parsing a number of line %d. Read failed.' % (lineNum)) return feats.append( ChemicalFeatures.FreeChemicalFeature( fName, fName, Geometry.Point3D(xP, yP, zP))) rads.append(rad) self._initializeFeats(feats, rads)
def checkConstraints(mol,recConf,feat,filt): # identify what constraint we have to test: mC = mol.GetConformer() matchId = feat.GetAtomIds()[0] # all values in the filters are "anded" so we need to loop over them for i in range(2,len(filt),3): if filt[i]=="Distance": # get the partners dist = mC.GetAtomPosition(matchId).Distance(recConf.GetAtomPosition(filt[1][0])) #!!!POTENTIAL ERROR!!! Identified by SamudBe1 on 04AUG2014 18:44 PDT #!!!POTENTIAL ERROR!!! In the following line, the code should be "dist>=filt[i+1]" AND "dist<filt[i+2]" if (dist<=filt[i+1]) or (dist>filt[i+2]): return False elif filt[i]=="Angle": # changed to be able to handle multiple neighbour atoms for rec # the first entry in filt is the matching atom - then a list of all others are neighbours # and the same is now also added for the ligand (ie multiple neighbour atoms) # get the matching atom neighbours - the ligans don't get the H's read in - so we can use # that list directly fitsRecAngle = False fitsLigAngle = False # first the receptor angles - there are usually less nieghbours for rec atoms for neighIdx in filt[1][1]: # here we are looping over all possible combinations and check at the very end if # both angles are ok # if an angle is already ok we can skip the second calculation # get the vectors l1 = mC.GetAtomPosition(matchId)-recConf.GetAtomPosition(filt[1][0]) l2 = recConf.GetAtomPosition(neighIdx)-recConf.GetAtomPosition(filt[1][0]) angle = math.degrees(l1.AngleTo(l2)) # old version: angle = math.degrees(math.acos(l1.DotProduct(l2)/(l1.Length()*l2.Length()))) if (angle>filt[i+1]) and (angle<filt[i+2]): fitsRecAngle = True break if not fitsRecAngle: return False # now we check on the ligands neighbAtmIdx = [a.GetIdx() for a in mol.GetAtomWithIdx(matchId).GetNeighbors()] for idx in neighbAtmIdx: l1 = mC.GetAtomPosition(idx)-mC.GetAtomPosition(matchId) l2 = recConf.GetAtomPosition(filt[1][0])-mC.GetAtomPosition(matchId) angle = math.degrees(l1.AngleTo(l2)) # old version: angle = math.degrees(math.acos(l1.DotProduct(l2)/(l1.Length()*l2.Length()))) if (angle>filt[i+1]) and (angle<filt[i+2]): fitsLigAngle = True break if not fitsLigAngle: return False else: logger.error("Requesting a constraint that is not defined %s" % filt[2]) return False # we only reach this position if the ligand matches all queries return True
def SaveState(self, fileName): """ Writes this calculator off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written """ try: f = open(fileName, 'wb+') except Exception: logger.error('cannot open output file %s for writing' % (fileName)) return pickle.dump(self, f) f.close()
def SaveState(self, fileName): """ Writes this calculator off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written """ try: f = open(fileName, 'wb+') except Exception: logger.error('cannot open output file %s for writing' % (fileName)) return cPickle.dump(self, f) f.close()
if options.outF!='-': options.outF = file(options.outF,'w+') else: options.outF = sys.stdout # get the ph4 Scoring parameters try: options.ph4DescOutFile = open(options.ph4DescOutFile,'w') except: parser.error('Error opening You need to have a ph4DescOutFile file') try: f = open(options.ph4desc,'r') except IOError, err: logger.error(err) featureOrder=[] splitL = [] Ph4_Init_value={} Ph4_Descriptors={} Ph4_location={} Ph4_Type={} Ph4_radius={} Ph4_FeatureType={} for line in f: line=line.rstrip("\n") #(Name,Init_value,Type,radius,x,y,z,Feature_Type)=line.split("\t") splitL = line.split(" ")
morganRows = [] fpConn.Commit() if not options.silent: logger.info('Finished.') if __name__=='__main__': options,args = parser.parse_args() if options.loadMols: if len(args)!=1: parser.error('please provide a filename argument') dataFilename = args[0] try: dataFile = open(dataFilename,'r') except IOError: logger.error('input file %s does not exist'%(dataFilename)) sys.exit(0) dataFile=None if not options.outDir: prefix = os.path.splitext(dataFilename)[0] options.outDir=prefix if not os.path.exists(options.outDir): try: os.mkdir(options.outDir) except: logger.error('could not create output directory %s'%options.outDir) sys.exit(1) if 1:
def RunSearch(options, queryFilename): global sigFactory if options.similarityType == 'AtomPairs': fpBuilder = FingerprintUtils.BuildAtomPairFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.pairDbName) fpTableName = options.pairTableName fpColName = options.pairColName elif options.similarityType == 'TopologicalTorsions': fpBuilder = FingerprintUtils.BuildTorsionsFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.torsionsDbName) fpTableName = options.torsionsTableName fpColName = options.torsionsColName elif options.similarityType == 'RDK': fpBuilder = FingerprintUtils.BuildRDKitFP simMetric = DataStructs.FingerprintSimilarity dbName = os.path.join(options.dbDir, options.fpDbName) fpTableName = options.fpTableName if not options.fpColName: options.fpColName = 'rdkfp' fpColName = options.fpColName elif options.similarityType == 'Pharm2D': fpBuilder = FingerprintUtils.BuildPharm2DFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.fpDbName) fpTableName = options.pharm2DTableName if not options.fpColName: options.fpColName = 'pharm2dfp' fpColName = options.fpColName FingerprintUtils.sigFactory = BuildSigFactory(options) elif options.similarityType == 'Gobbi2D': from rdkit.Chem.Pharm2D import Gobbi_Pharm2D fpBuilder = FingerprintUtils.BuildPharm2DFP simMetric = DataStructs.TanimotoSimilarity dbName = os.path.join(options.dbDir, options.fpDbName) fpTableName = options.gobbi2DTableName if not options.fpColName: options.fpColName = 'gobbi2dfp' fpColName = options.fpColName FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory elif options.similarityType == 'Morgan': fpBuilder = FingerprintUtils.BuildMorganFP simMetric = DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir, options.morganFpDbName) fpTableName = options.morganFpTableName fpColName = options.morganFpColName extraArgs = {} if options.similarityMetric == 'tanimoto': simMetric = DataStructs.TanimotoSimilarity elif options.similarityMetric == 'dice': simMetric = DataStructs.DiceSimilarity elif options.similarityMetric == 'tversky': simMetric = DataStructs.TverskySimilarity extraArgs['tverskyA'] = options.tverskyA extraArgs['tverskyB'] = options.tverskyB if options.smilesQuery: mol = Chem.MolFromSmiles(options.smilesQuery) if not mol: logger.error('could not build query molecule from smiles "%s"' % options.smilesQuery) sys.exit(-1) options.queryMol = mol elif options.smartsQuery: mol = Chem.MolFromSmarts(options.smartsQuery) if not mol: logger.error('could not build query molecule from smarts "%s"' % options.smartsQuery) sys.exit(-1) options.queryMol = mol if options.outF == '-': outF = sys.stdout elif options.outF == '': outF = None else: outF = open(options.outF, 'w+') molsOut = False if options.sdfOut: molsOut = True if options.sdfOut == '-': sdfOut = sys.stdout else: sdfOut = open(options.sdfOut, 'w+') else: sdfOut = None if options.smilesOut: molsOut = True if options.smilesOut == '-': smilesOut = sys.stdout else: smilesOut = open(options.smilesOut, 'w+') else: smilesOut = None if queryFilename: try: tmpF = open(queryFilename, 'r') except IOError: logger.error('could not open query file %s' % queryFilename) sys.exit(1) if options.molFormat == 'smiles': func = GetMolsFromSmilesFile elif options.molFormat == 'sdf': func = GetMolsFromSDFile if not options.silent: msg = 'Reading query molecules' if fpBuilder: msg += ' and generating fingerprints' logger.info(msg) probes = [] i = 0 nms = [] for nm, smi, mol in func(queryFilename, None, options.nameProp): i += 1 nms.append(nm) if not mol: logger.error('query molecule %d could not be built' % (i)) probes.append((None, None)) continue if fpBuilder: probes.append((mol, fpBuilder(mol))) else: probes.append((mol, None)) if not options.silent and not i % 1000: logger.info(" done %d" % i) else: probes = None conn = None idName = options.molIdName ids = None names = None molDbName = os.path.join(options.dbDir, options.molDbName) molIdName = options.molIdName mConn = DbConnect(molDbName) cns = [(x.lower(), y) for x, y in mConn.GetColumnNamesAndTypes('molecules')] idCol, idTyp = cns[0] if options.propQuery or options.queryMol: conn = DbConnect(molDbName) curs = conn.GetCursor() if options.queryMol: if not options.silent: logger.info('Doing substructure query') if options.propQuery: where = 'where %s' % options.propQuery else: where = '' if not options.silent: curs.execute('select count(*) from molecules %(where)s' % locals()) nToDo = curs.fetchone()[0] join = '' doSubstructFPs = False fpDbName = os.path.join(options.dbDir, options.fpDbName) if os.path.exists(fpDbName) and not options.negateQuery: curs.execute("attach database '%s' as fpdb" % (fpDbName)) try: curs.execute('select * from fpdb.%s limit 1' % options.layeredTableName) except: pass else: doSubstructFPs = True join = 'join fpdb.%s using (%s)' % ( options.layeredTableName, idCol) query = LayeredOptions.GetQueryText(options.queryMol) if query: if not where: where = 'where' else: where += ' and' where += ' ' + query cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s' % locals( ) curs.execute(cmd) row = curs.fetchone() nDone = 0 ids = [] while row: id, molpkl = row if not options.zipMols: m = _molFromPkl(molpkl) else: m = Chem.Mol(zlib.decompress(molpkl)) matched = m.HasSubstructMatch(options.queryMol) if options.negateQuery: matched = not matched if matched: ids.append(id) nDone += 1 if not options.silent and not nDone % 500: if not doSubstructFPs: logger.info( ' searched %d (of %d) molecules; %d hits so far' % (nDone, nToDo, len(ids))) else: logger.info( ' searched through %d molecules; %d hits so far' % (nDone, len(ids))) row = curs.fetchone() if not options.silent and doSubstructFPs and nToDo: nFiltered = nToDo - nDone logger.info( ' Fingerprint screenout rate: %d of %d (%%%.2f)' % (nFiltered, nToDo, 100. * nFiltered / nToDo)) elif options.propQuery: if not options.silent: logger.info('Doing property query') propQuery = options.propQuery.split(';')[0] curs.execute( 'select %(idCol)s from molecules where %(propQuery)s' % locals()) ids = [x[0] for x in curs.fetchall()] if not options.silent: logger.info('Found %d molecules matching the query' % (len(ids))) t1 = time.time() if probes: if not options.silent: logger.info('Finding Neighbors') conn = DbConnect(dbName) cns = conn.GetColumnNames(fpTableName) curs = conn.GetCursor() if ids: ids = [(x, ) for x in ids] curs.execute( 'create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals()) curs.executemany('insert into _tmpTbl values (?)', ids) join = 'join _tmpTbl using (%(idCol)s)' % locals() else: join = '' if cns[0].lower() != idCol.lower(): # backwards compatibility to the days when mol tables had a guid and # the fps tables did not: curs.execute("attach database '%(molDbName)s' as mols" % locals()) curs.execute(""" select %(idCol)s,%(fpColName)s from %(fpTableName)s join (select %(idCol)s,%(molIdName)s from mols.molecules %(join)s) using (%(molIdName)s) """ % (locals())) else: curs.execute( 'select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s' % locals()) def poolFromCurs(curs, similarityMethod): row = curs.fetchone() while row: id, pkl = row fp = DepickleFP(pkl, similarityMethod) yield (id, fp) row = curs.fetchone() topNLists = GetNeighborLists(probes, options.topN, poolFromCurs(curs, options.similarityType), simMetric=simMetric, simThresh=options.simThresh, **extraArgs) uniqIds = set() nbrLists = {} for i, nm in enumerate(nms): topNLists[i].reverse() scores = topNLists[i].GetPts() nbrNames = topNLists[i].GetExtras() nbrs = [] for j, nbrGuid in enumerate(nbrNames): if nbrGuid is None: break else: uniqIds.add(nbrGuid) nbrs.append((nbrGuid, scores[j])) nbrLists[(i, nm)] = nbrs t2 = time.time() if not options.silent: logger.info('The search took %.1f seconds' % (t2 - t1)) if not options.silent: logger.info('Creating output') curs = mConn.GetCursor() ids = list(uniqIds) ids = [(x, ) for x in ids] curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals()) curs.executemany('insert into _tmpTbl values (?)', ids) curs.execute( 'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)' % locals()) nmDict = {} for guid, id in curs.fetchall(): nmDict[guid] = str(id) ks = list(nbrLists.keys()) ks.sort() if not options.transpose: for i, nm in ks: nbrs = nbrLists[(i, nm)] nbrTxt = options.outputDelim.join([nm] + [ '%s%s%.3f' % (nmDict[id], options.outputDelim, score) for id, score in nbrs ]) if outF: print(nbrTxt, file=outF) else: labels = [ '%s%sSimilarity' % (x[1], options.outputDelim) for x in ks ] if outF: print(options.outputDelim.join(labels), file=outF) for i in range(options.topN): outL = [] for idx, nm in ks: nbr = nbrLists[(idx, nm)][i] outL.append(nmDict[nbr[0]]) outL.append('%.3f' % nbr[1]) if outF: print(options.outputDelim.join(outL), file=outF) else: if not options.silent: logger.info('Creating output') curs = mConn.GetCursor() ids = [(x, ) for x in set(ids)] curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' % locals()) curs.executemany('insert into _tmpTbl values (?)', ids) molIdName = options.molIdName curs.execute( 'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)' % locals()) nmDict = {} for guid, id in curs.fetchall(): nmDict[guid] = str(id) if outF: print('\n'.join(nmDict.values()), file=outF) if molsOut and ids: molDbName = os.path.join(options.dbDir, options.molDbName) cns = [x.lower() for x in mConn.GetColumnNames('molecules')] if cns[-1] != 'molpkl': cns.remove('molpkl') cns.append('molpkl') curs = mConn.GetCursor() #curs.execute('create temporary table _tmpTbl (guid integer)'%locals()) #curs.executemany('insert into _tmpTbl values (?)',ids) cnText = ','.join(cns) curs.execute( 'select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)' % locals()) row = curs.fetchone() molD = {} while row: row = list(row) m = _molFromPkl(row[-1]) guid = row[0] nm = nmDict[guid] if sdfOut: m.SetProp('_Name', nm) print(Chem.MolToMolBlock(m), file=sdfOut) for i in range(1, len(cns) - 1): pn = cns[i] pv = str(row[i]) print >> sdfOut, '> <%s>\n%s\n' % (pn, pv) print('$$$$', file=sdfOut) if smilesOut: smi = Chem.MolToSmiles(m, options.chiralSmiles) if smilesOut: print('%s %s' % (smi, str(row[1])), file=smilesOut) row = curs.fetchone() if not options.silent: logger.info('Done!')
def RunSearch(options,queryFilename): global sigFactory if options.similarityType=='AtomPairs': fpBuilder=FingerprintUtils.BuildAtomPairFP simMetric=DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir,options.pairDbName) fpTableName = options.pairTableName fpColName = options.pairColName elif options.similarityType=='TopologicalTorsions': fpBuilder=FingerprintUtils.BuildTorsionsFP simMetric=DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir,options.torsionsDbName) fpTableName = options.torsionsTableName fpColName = options.torsionsColName elif options.similarityType=='RDK': fpBuilder=FingerprintUtils.BuildRDKitFP simMetric=DataStructs.FingerprintSimilarity dbName = os.path.join(options.dbDir,options.fpDbName) fpTableName = options.fpTableName if not options.fpColName: options.fpColName='rdkfp' fpColName = options.fpColName elif options.similarityType=='Pharm2D': fpBuilder=FingerprintUtils.BuildPharm2DFP simMetric=DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir,options.fpDbName) fpTableName = options.pharm2DTableName if not options.fpColName: options.fpColName='pharm2dfp' fpColName = options.fpColName FingerprintUtils.sigFactory = BuildSigFactory(options) elif options.similarityType=='Gobbi2D': from rdkit.Chem.Pharm2D import Gobbi_Pharm2D fpBuilder=FingerprintUtils.BuildPharm2DFP simMetric=DataStructs.TanimotoSimilarity dbName = os.path.join(options.dbDir,options.fpDbName) fpTableName = options.gobbi2DTableName if not options.fpColName: options.fpColName='gobbi2dfp' fpColName = options.fpColName FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory elif options.similarityType=='Morgan': fpBuilder=FingerprintUtils.BuildMorganFP simMetric=DataStructs.DiceSimilarity dbName = os.path.join(options.dbDir,options.morganFpDbName) fpTableName = options.morganFpTableName fpColName = options.morganFpColName extraArgs={} if options.similarityMetric=='tanimoto': simMetric = DataStructs.TanimotoSimilarity elif options.similarityMetric=='dice': simMetric = DataStructs.DiceSimilarity elif options.similarityMetric=='tversky': simMetric = DataStructs.TverskySimilarity extraArgs['tverskyA']=options.tverskyA extraArgs['tverskyB']=options.tverskyB if options.smilesQuery: mol=Chem.MolFromSmiles(options.smilesQuery) if not mol: logger.error('could not build query molecule from smiles "%s"'%options.smilesQuery) sys.exit(-1) options.queryMol = mol elif options.smartsQuery: mol=Chem.MolFromSmarts(options.smartsQuery) if not mol: logger.error('could not build query molecule from smarts "%s"'%options.smartsQuery) sys.exit(-1) options.queryMol = mol if options.outF=='-': outF=sys.stdout elif options.outF=='': outF=None else: outF = file(options.outF,'w+') molsOut=False if options.sdfOut: molsOut=True if options.sdfOut=='-': sdfOut=sys.stdout else: sdfOut = file(options.sdfOut,'w+') else: sdfOut=None if options.smilesOut: molsOut=True if options.smilesOut=='-': smilesOut=sys.stdout else: smilesOut = file(options.smilesOut,'w+') else: smilesOut=None if queryFilename: try: tmpF = file(queryFilename,'r') except IOError: logger.error('could not open query file %s'%queryFilename) sys.exit(1) if options.molFormat=='smiles': func=GetMolsFromSmilesFile elif options.molFormat=='sdf': func=GetMolsFromSDFile if not options.silent: msg='Reading query molecules' if fpBuilder: msg+=' and generating fingerprints' logger.info(msg) probes=[] i=0 nms=[] for nm,smi,mol in func(queryFilename,None,options.nameProp): i+=1 nms.append(nm) if not mol: logger.error('query molecule %d could not be built'%(i)) probes.append((None,None)) continue if fpBuilder: probes.append((mol,fpBuilder(mol))) else: probes.append((mol,None)) if not options.silent and not i%1000: logger.info(" done %d"%i) else: probes=None conn=None idName = options.molIdName ids=None names=None molDbName = os.path.join(options.dbDir,options.molDbName) molIdName = options.molIdName mConn = DbConnect(molDbName) cns = [(x.lower(),y) for x,y in mConn.GetColumnNamesAndTypes('molecules')] idCol,idTyp=cns[0] if options.propQuery or options.queryMol: conn = DbConnect(molDbName) curs = conn.GetCursor() if options.queryMol: if not options.silent: logger.info('Doing substructure query') if options.propQuery: where='where %s'%options.propQuery else: where='' if not options.silent: curs.execute('select count(*) from molecules %(where)s'%locals()) nToDo = curs.fetchone()[0] join='' doSubstructFPs=False fpDbName = os.path.join(options.dbDir,options.fpDbName) if os.path.exists(fpDbName) and not options.negateQuery : curs.execute("attach database '%s' as fpdb"%(fpDbName)) try: curs.execute('select * from fpdb.%s limit 1'%options.layeredTableName) except: pass else: doSubstructFPs=True join = 'join fpdb.%s using (%s)'%(options.layeredTableName,idCol) query = LayeredOptions.GetQueryText(options.queryMol) if query: if not where: where='where' else: where += ' and' where += ' '+query cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s'%locals() curs.execute(cmd) row=curs.fetchone() nDone=0 ids=[] while row: id,molpkl = row if not options.zipMols: m = Chem.Mol(str(molpkl)) else: m = Chem.Mol(zlib.decompress(str(molpkl))) matched=m.HasSubstructMatch(options.queryMol) if options.negateQuery: matched = not matched if matched: ids.append(id) nDone+=1 if not options.silent and not nDone%500: if not doSubstructFPs: logger.info(' searched %d (of %d) molecules; %d hits so far'%(nDone,nToDo,len(ids))) else: logger.info(' searched through %d molecules; %d hits so far'%(nDone,len(ids))) row=curs.fetchone() if not options.silent and doSubstructFPs and nToDo: nFiltered = nToDo-nDone logger.info(' Fingerprint screenout rate: %d of %d (%%%.2f)'%(nFiltered,nToDo,100.*nFiltered/nToDo)) elif options.propQuery: if not options.silent: logger.info('Doing property query') propQuery=options.propQuery.split(';')[0] curs.execute('select %(idCol)s from molecules where %(propQuery)s'%locals()) ids = [x[0] for x in curs.fetchall()] if not options.silent: logger.info('Found %d molecules matching the query'%(len(ids))) t1=time.time() if probes: if not options.silent: logger.info('Finding Neighbors') conn = DbConnect(dbName) cns = conn.GetColumnNames(fpTableName) curs = conn.GetCursor() if ids: ids = [(x,) for x in ids] curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals()) curs.executemany('insert into _tmpTbl values (?)',ids) join='join _tmpTbl using (%(idCol)s)'%locals() else: join='' if cns[0].lower() != idCol.lower(): # backwards compatibility to the days when mol tables had a guid and # the fps tables did not: curs.execute("attach database '%(molDbName)s' as mols"%locals()) curs.execute(""" select %(idCol)s,%(fpColName)s from %(fpTableName)s join (select %(idCol)s,%(molIdName)s from mols.molecules %(join)s) using (%(molIdName)s) """%(locals())) else: curs.execute('select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s'%locals()) def poolFromCurs(curs,similarityMethod): row = curs.fetchone() while row: id,pkl = row fp = DepickleFP(str(pkl),similarityMethod) yield (id,fp) row = curs.fetchone() topNLists = GetNeighborLists(probes,options.topN,poolFromCurs(curs,options.similarityType), simMetric=simMetric,simThresh=options.simThresh,**extraArgs) uniqIds=set() nbrLists = {} for i,nm in enumerate(nms): topNLists[i].reverse() scores=topNLists[i].GetPts() nbrNames = topNLists[i].GetExtras() nbrs = [] for j,nbrGuid in enumerate(nbrNames): if nbrGuid is None: break else: uniqIds.add(nbrGuid) nbrs.append((nbrGuid,scores[j])) nbrLists[(i,nm)] = nbrs t2=time.time() if not options.silent: logger.info('The search took %.1f seconds'%(t2-t1)) if not options.silent: logger.info('Creating output') curs = mConn.GetCursor() ids = list(uniqIds) ids = [(x,) for x in ids] curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals()) curs.executemany('insert into _tmpTbl values (?)',ids) curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'%locals()) nmDict={} for guid,id in curs.fetchall(): nmDict[guid]=str(id) ks = nbrLists.keys() ks.sort() if not options.transpose: for i,nm in ks: nbrs= nbrLists[(i,nm)] nbrTxt=options.outputDelim.join([nm]+['%s%s%.3f'%(nmDict[id],options.outputDelim,score) for id,score in nbrs]) if outF: print >>outF,nbrTxt else: labels = ['%s%sSimilarity'%(x[1],options.outputDelim) for x in ks] if outF: print >>outF,options.outputDelim.join(labels) for i in range(options.topN): outL = [] for idx,nm in ks: nbr = nbrLists[(idx,nm)][i] outL.append(nmDict[nbr[0]]) outL.append('%.3f'%nbr[1]) if outF: print >>outF,options.outputDelim.join(outL) else: if not options.silent: logger.info('Creating output') curs = mConn.GetCursor() ids = [(x,) for x in set(ids)] curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals()) curs.executemany('insert into _tmpTbl values (?)',ids) molIdName = options.molIdName curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'%locals()) nmDict={} for guid,id in curs.fetchall(): nmDict[guid]=str(id) if outF: print >>outF,'\n'.join(nmDict.values()) if molsOut and ids: molDbName = os.path.join(options.dbDir,options.molDbName) cns = [x.lower() for x in mConn.GetColumnNames('molecules')] if cns[-1]!='molpkl': cns.remove('molpkl') cns.append('molpkl') curs = mConn.GetCursor() #curs.execute('create temporary table _tmpTbl (guid integer)'%locals()) #curs.executemany('insert into _tmpTbl values (?)',ids) cnText=','.join(cns) curs.execute('select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)'%locals()) row=curs.fetchone() molD = {} while row: row = list(row) pkl = row[-1] m = Chem.Mol(str(pkl)) guid = row[0] nm = nmDict[guid] if sdfOut: m.SetProp('_Name',nm) print >>sdfOut,Chem.MolToMolBlock(m) for i in range(1,len(cns)-1): pn = cns[i] pv = str(row[i]) print >>sdfOut,'> <%s>\n%s\n'%(pn,pv) print >>sdfOut,'$$$$' if smilesOut: smi=Chem.MolToSmiles(m,options.chiralSmiles) if smilesOut: print >>smilesOut,'%s %s'%(smi,str(row[1])) row=curs.fetchone() if not options.silent: logger.info('Done!')
# get the input data # the receptor try: rec = Chem.MolFromMol2File(options.recF,removeHs=False) # setup the conformer for the receptor recConf = rec.GetConformer() except: print ("Problem reading receptor from %s" % options.recF) print "Error:", sys.exc_info()[0] quit() try: f = open(options.ph4F,'r') except IOError, err: logger.error(err) ph4Filters=[] for line in f: line=line.rstrip("\n") splitL=line.split(" ") # modify the types from str to the relevant types splitL[1]=int(splitL[1]) # I am not going to worry about "Represntation Errors"! for i in range(2,len(splitL),3): splitL[i+1]=float(splitL[i+1]) splitL[i+2]=float(splitL[i+2]) ph4Filters.append(splitL) if options.scoreFilter: try: f = open(options.scoreF,'r')
logger.info(f'testing {nm} queries') t1 = time.time() nPossible = 0 nTested = 0 nFound = 0 nErrors = 0 for i, fragfp in enumerate(qfps): for j, mfp in enumerate(mfps): nPossible += 1 if args.validateResults: matched = mols[j].HasSubstructMatch(qs[i]) fpMatch = DataStructs.AllProbeBitsMatch(fragfp, mfp) if fpMatch: nTested += 1 if matched: nFound += 1 if not fpMatch: nErrors += 1 logger.error(f"ERROR: mol {j} query {i}") else: if DataStructs.AllProbeBitsMatch(fragfp, mfp): nTested += 1 if mols[j].HasSubstructMatch(qs[i]): nFound += 1 t2 = time.time() ts.append(t2 - t1) logger.info( f'Results{len(ts)}: {t2-t1 : .2f} seconds. {nTested} tested ({nTested/nPossible :.4f} of total), {nFound} found, {nFound/nTested : .2f} accuracy. {nErrors} errors.' ) print(f"| {rdkit.__version__} | {' | '.join(['%.1f' % x for x in ts])} |")