def test1(self): # simple test with a known answer cont = TopNContainer(4) for foo in range(10): cont.Insert(foo, str(foo)) assert cont.GetPts() == list(range(6, 10)) assert cont.GetExtras() == [str(x) for x in range(6, 10)]
def test_keepAll(self): # simple test with a known answer where we keep all cont = TopNContainer(-1) for i in range(10): cont.Insert(9 - i, str(9 - i)) self.assertEqual(len(cont), i + 1) assert cont.GetPts() == list(range(10)) assert cont.GetExtras() == [str(x) for x in range(10)]
def test2(self): # larger scale random test cont = TopNContainer(50) for _ in range(1000): cont.Insert(random.random()) vs = cont.GetPts() last = vs.pop(0) while vs: assert vs[0] >= last last = vs.pop(0)
def GetNeighbors(self, example): """ Returns the k nearest neighbors of the example """ nbrs = TopNContainer(self._k) for trex in self._trainingExamples: dist = self._dfunc(trex, example, self._attrs) if self._radius is None or dist < self._radius: nbrs.Insert(-dist, trex) nbrs.reverse() return [x for x in nbrs]
def test3(self): # random test with extras cont = TopNContainer(10) for _ in range(100): v = random.random() cont.Insert(v, v + 1) vs = cont.GetExtras() last = vs.pop(0) while vs: assert vs[0] >= last last = vs.pop(0)
def test4(self): # random test with extras and getitem cont = TopNContainer(10) for i in range(100): v = random.random() cont.Insert(v, v + 1) lastV, lastE = cont[0] for i in range(1, len(cont)): v, e = cont[i] assert v >= lastV assert e >= lastE lastV, lastE = v, e
def test5(self): """ random test with extras and getitem, include reverse""" cont = TopNContainer(10) for i in range(100): v = random.random() cont.Insert(v,v+1) cont.reverse() lastV,lastE = cont[0] for i in range(1,len(cont)): v,e = cont[i] assert v<=lastV assert e<=lastE lastV,lastE = v,e
def test5(self): # random test with extras and getitem, include reverse cont = TopNContainer(10) for i in range(100): v = random.random() cont.Insert(v, v + 1) cont.reverse() lastV, lastE = cont[0] for i in range(1, len(cont)): v, e = cont[i] assert v <= lastV assert e <= lastE lastV, lastE = v, e
def MakePicks(self, force=False): if self._picks is not None and not force: return picks = TopNContainer(self.numToPick) for fp in self.data: origFp = fp bestScore = -1.0 for probeFp in self.probes: score = DataStructs.FingerprintSimilarity(origFp, probeFp, self.simMetric) bestScore = max(score, bestScore) picks.Insert(bestScore, fp) self._picks = [] for score, pt in picks: self._picks.append((pt, score)) self._picks.reverse()
def ScreenFingerprints(details, data, mol=None, probeFp=None): """ Returns a list of results """ if probeFp is None: try: probeFp = apply(FingerprintMols.FingerprintMol, (mol, ), details.__dict__) except: import traceback FingerprintMols.error('Error: problems fingerprinting molecule.\n') traceback.print_exc() return [] if not probeFp: return [] res = [] if not details.doThreshold and details.topN > 0: topN = TopNContainer(details.topN) else: topN = [] res = [] count = 0 for pt in data: fp1 = probeFp if not details.noPickle: if type(pt) in (types.TupleType, types.ListType): id, fp = pt else: fp = pt id = pt._fieldsFromDb[0] score = DataStructs.FingerprintSimilarity(fp1, fp, details.metric) else: id, pkl = pt score = details.metric(fp1, str(pkl)) if topN: topN.Insert(score, id) elif not details.doThreshold or \ (details.doThreshold and score>=details.screenThresh): res.append((id, score)) count += 1 if hasattr(details, 'stopAfter') and count >= details.stopAfter: break for score, id in topN: res.append((id, score)) return res
def GetNeighborLists(probes, topN, pool, simMetric=DataStructs.DiceSimilarity, simThresh=-1., silent=False, **kwargs): probeFps = [x[1] for x in probes] validProbes = [x for x in range(len(probeFps)) if probeFps[x] is not None] validFps = [probeFps[x] for x in validProbes] from rdkit.DataStructs.TopNContainer import TopNContainer if simThresh <= 0: nbrLists = [TopNContainer(topN) for x in range(len(probeFps))] else: nbrLists = [TopNContainer(-1) for x in range(len(probeFps))] nDone = 0 for nm, fp in pool: nDone += 1 if not silent and not nDone % 1000: logger.info(' searched %d rows' % nDone) if (simMetric == DataStructs.DiceSimilarity): scores = DataStructs.BulkDiceSimilarity(fp, validFps) for i, score in enumerate(scores): if score > simThresh: nbrLists[validProbes[i]].Insert(score, nm) elif (simMetric == DataStructs.TanimotoSimilarity): scores = DataStructs.BulkTanimotoSimilarity(fp, validFps) for i, score in enumerate(scores): if score > simThresh: nbrLists[validProbes[i]].Insert(score, nm) elif (simMetric == DataStructs.TverskySimilarity): av = float(kwargs.get('tverskyA', 0.5)) bv = float(kwargs.get('tverskyB', 0.5)) scores = DataStructs.BulkTverskySimilarity(fp, validFps, av, bv) for i, score in enumerate(scores): if score > simThresh: nbrLists[validProbes[i]].Insert(score, nm) else: for i in range(len(probeFps)): pfp = probeFps[i] if pfp is not None: score = simMetric(probeFps[i], fp) if score > simThresh: nbrLists[validProbes[i]].Insert(score, nm) return nbrLists
def ScreenFingerprints(details, data, mol=None, probeFp=None): """ Returns a list of results """ if probeFp is None: try: probeFp = FingerprintMols.FingerprintMol(mol, **details.__dict__) except Exception: import traceback FingerprintMols.error('Error: problems fingerprinting molecule.\n') traceback.print_exc() return [] if not probeFp: return [] if not details.doThreshold and details.topN > 0: topN = TopNContainer(details.topN) else: topN = [] res = [] count = 0 for pt in data: fp1 = probeFp if not details.noPickle: if isinstance(pt, (tuple, list)): ID, fp = pt else: fp = pt ID = pt._fieldsFromDb[0] score = DataStructs.FingerprintSimilarity(fp1, fp, details.metric) else: ID, pkl = pt score = details.metric(fp1, str(pkl)) if topN: topN.Insert(score, ID) elif not details.doThreshold or (details.doThreshold and score >= details.screenThresh): res.append((ID, score)) count += 1 if hasattr(details, 'stopAfter') and count >= details.stopAfter: break for score, ID in topN: res.append((ID, score)) return res