예제 #1
0
 def test1(self):
     # simple test with a known answer
     cont = TopNContainer(4)
     for foo in range(10):
         cont.Insert(foo, str(foo))
     assert cont.GetPts() == list(range(6, 10))
     assert cont.GetExtras() == [str(x) for x in range(6, 10)]
예제 #2
0
 def test_keepAll(self):
     # simple test with a known answer where we keep all
     cont = TopNContainer(-1)
     for i in range(10):
         cont.Insert(9 - i, str(9 - i))
         self.assertEqual(len(cont), i + 1)
     assert cont.GetPts() == list(range(10))
     assert cont.GetExtras() == [str(x) for x in range(10)]
예제 #3
0
 def test2(self):
     # larger scale random test
     cont = TopNContainer(50)
     for _ in range(1000):
         cont.Insert(random.random())
     vs = cont.GetPts()
     last = vs.pop(0)
     while vs:
         assert vs[0] >= last
         last = vs.pop(0)
예제 #4
0
파일: KNNModel.py 프로젝트: abradle/rdkit
  def GetNeighbors(self, example):
    """ Returns the k nearest neighbors of the example

    """
    nbrs = TopNContainer(self._k)
    for trex in self._trainingExamples:
      dist = self._dfunc(trex, example, self._attrs)
      if self._radius is None or dist < self._radius:
        nbrs.Insert(-dist, trex)
    nbrs.reverse()
    return [x for x in nbrs]
예제 #5
0
 def test3(self):
     # random test with extras
     cont = TopNContainer(10)
     for _ in range(100):
         v = random.random()
         cont.Insert(v, v + 1)
     vs = cont.GetExtras()
     last = vs.pop(0)
     while vs:
         assert vs[0] >= last
         last = vs.pop(0)
예제 #6
0
    def GetNeighbors(self, example):
        """ Returns the k nearest neighbors of the example

    """
        nbrs = TopNContainer(self._k)
        for trex in self._trainingExamples:
            dist = self._dfunc(trex, example, self._attrs)
            if self._radius is None or dist < self._radius:
                nbrs.Insert(-dist, trex)
        nbrs.reverse()
        return [x for x in nbrs]
예제 #7
0
 def test4(self):
     # random test with extras and getitem
     cont = TopNContainer(10)
     for i in range(100):
         v = random.random()
         cont.Insert(v, v + 1)
     lastV, lastE = cont[0]
     for i in range(1, len(cont)):
         v, e = cont[i]
         assert v >= lastV
         assert e >= lastE
         lastV, lastE = v, e
예제 #8
0
 def test5(self):
   """ random test with extras and getitem, include reverse"""
   cont = TopNContainer(10)
   for i in range(100):
     v = random.random()
     cont.Insert(v,v+1)
   cont.reverse()
   lastV,lastE = cont[0]
   for i in range(1,len(cont)):
     v,e = cont[i]
     assert v<=lastV
     assert e<=lastE
     lastV,lastE = v,e
예제 #9
0
 def test5(self):
     # random test with extras and getitem, include reverse
     cont = TopNContainer(10)
     for i in range(100):
         v = random.random()
         cont.Insert(v, v + 1)
     cont.reverse()
     lastV, lastE = cont[0]
     for i in range(1, len(cont)):
         v, e = cont[i]
         assert v <= lastV
         assert e <= lastE
         lastV, lastE = v, e
예제 #10
0
 def MakePicks(self, force=False):
   if self._picks is not None and not force:
     return
   picks = TopNContainer(self.numToPick)
   for fp in self.data:
     origFp = fp
     bestScore = -1.0
     for probeFp in self.probes:
       score = DataStructs.FingerprintSimilarity(origFp, probeFp, self.simMetric)
       bestScore = max(score, bestScore)
     picks.Insert(bestScore, fp)
   self._picks = []
   for score, pt in picks:
     self._picks.append((pt, score))
   self._picks.reverse()
예제 #11
0
def ScreenFingerprints(details, data, mol=None, probeFp=None):
    """ Returns a list of results

  """
    if probeFp is None:
        try:
            probeFp = apply(FingerprintMols.FingerprintMol, (mol, ),
                            details.__dict__)
        except:
            import traceback
            FingerprintMols.error('Error: problems fingerprinting molecule.\n')
            traceback.print_exc()
            return []
    if not probeFp:
        return []

    res = []
    if not details.doThreshold and details.topN > 0:
        topN = TopNContainer(details.topN)
    else:
        topN = []
    res = []
    count = 0
    for pt in data:
        fp1 = probeFp
        if not details.noPickle:
            if type(pt) in (types.TupleType, types.ListType):
                id, fp = pt
            else:
                fp = pt
                id = pt._fieldsFromDb[0]
            score = DataStructs.FingerprintSimilarity(fp1, fp, details.metric)
        else:
            id, pkl = pt
            score = details.metric(fp1, str(pkl))
        if topN:
            topN.Insert(score, id)
        elif not details.doThreshold or \
                 (details.doThreshold and score>=details.screenThresh):
            res.append((id, score))
        count += 1
        if hasattr(details, 'stopAfter') and count >= details.stopAfter:
            break
    for score, id in topN:
        res.append((id, score))

    return res
예제 #12
0
def GetNeighborLists(probes,
                     topN,
                     pool,
                     simMetric=DataStructs.DiceSimilarity,
                     simThresh=-1.,
                     silent=False,
                     **kwargs):
    probeFps = [x[1] for x in probes]
    validProbes = [x for x in range(len(probeFps)) if probeFps[x] is not None]
    validFps = [probeFps[x] for x in validProbes]
    from rdkit.DataStructs.TopNContainer import TopNContainer
    if simThresh <= 0:
        nbrLists = [TopNContainer(topN) for x in range(len(probeFps))]
    else:
        nbrLists = [TopNContainer(-1) for x in range(len(probeFps))]

    nDone = 0
    for nm, fp in pool:
        nDone += 1
        if not silent and not nDone % 1000:
            logger.info('  searched %d rows' % nDone)
        if (simMetric == DataStructs.DiceSimilarity):
            scores = DataStructs.BulkDiceSimilarity(fp, validFps)
            for i, score in enumerate(scores):
                if score > simThresh:
                    nbrLists[validProbes[i]].Insert(score, nm)
        elif (simMetric == DataStructs.TanimotoSimilarity):
            scores = DataStructs.BulkTanimotoSimilarity(fp, validFps)
            for i, score in enumerate(scores):
                if score > simThresh:
                    nbrLists[validProbes[i]].Insert(score, nm)
        elif (simMetric == DataStructs.TverskySimilarity):
            av = float(kwargs.get('tverskyA', 0.5))
            bv = float(kwargs.get('tverskyB', 0.5))
            scores = DataStructs.BulkTverskySimilarity(fp, validFps, av, bv)
            for i, score in enumerate(scores):
                if score > simThresh:
                    nbrLists[validProbes[i]].Insert(score, nm)
        else:
            for i in range(len(probeFps)):
                pfp = probeFps[i]
                if pfp is not None:
                    score = simMetric(probeFps[i], fp)
                    if score > simThresh:
                        nbrLists[validProbes[i]].Insert(score, nm)
    return nbrLists
예제 #13
0
def ScreenFingerprints(details, data, mol=None, probeFp=None):
  """ Returns a list of results

  """
  if probeFp is None:
    try:
      probeFp = FingerprintMols.FingerprintMol(mol, **details.__dict__)
    except Exception:
      import traceback
      FingerprintMols.error('Error: problems fingerprinting molecule.\n')
      traceback.print_exc()
      return []
  if not probeFp:
    return []

  if not details.doThreshold and details.topN > 0:
    topN = TopNContainer(details.topN)
  else:
    topN = []
  res = []
  count = 0
  for pt in data:
    fp1 = probeFp
    if not details.noPickle:
      if isinstance(pt, (tuple, list)):
        ID, fp = pt
      else:
        fp = pt
        ID = pt._fieldsFromDb[0]
      score = DataStructs.FingerprintSimilarity(fp1, fp, details.metric)
    else:
      ID, pkl = pt
      score = details.metric(fp1, str(pkl))
    if topN:
      topN.Insert(score, ID)
    elif not details.doThreshold or (details.doThreshold and score >= details.screenThresh):
      res.append((ID, score))
    count += 1
    if hasattr(details, 'stopAfter') and count >= details.stopAfter:
      break

  for score, ID in topN:
    res.append((ID, score))
  return res