Ejemplo n.º 1
0
    def Init(self):
        """ Initializes internal parameters.  This **must** be called after
      making any changes to the signature parameters

    """
        accum = 0
        self._scaffolds = [0] * (len(
            Utils.nPointDistDict[self.maxPointCount + 1]))
        self._starts = {}
        if not self.skipFeats:
            self._nFeats = len(self.featFactory.GetFeatureFamilies())
        else:
            self._nFeats = 0
            for fam in self.featFactory.GetFeatureFamilies():
                if fam not in self.skipFeats:
                    self._nFeats += 1
        for i in range(self.minPointCount, self.maxPointCount + 1):
            self._starts[i] = accum
            nDistsHere = len(Utils.nPointDistDict[i])
            scaffoldsHere = Utils.GetPossibleScaffolds(
                i, self._bins, useTriangleInequality=self.trianglePruneBins)
            nBitsHere = len(scaffoldsHere)
            self._scaffolds[nDistsHere] = scaffoldsHere
            pointsHere = Utils.NumCombinations(self._nFeats, i) * nBitsHere
            accum += pointsHere
        self._sigSize = accum
        if not self.useCounts:
            self.sigKlass = SparseBitVect
        elif self._sigSize < 2**31:
            self.sigKlass = IntSparseIntVect
        else:
            self.sigKlass = LongSparseIntVect
Ejemplo n.º 2
0
  def testUniquifyCombinations(self):
    combos = [[1, 2, 3], [3, 2, 1]]
    # Last equivalent combination is returned
    self.assertEqual(Utils.UniquifyCombinations(combos), [(3, 2, 1), ])

    combos = [[1], [1], [2]]
    # Last equivalent combination is returned
    self.assertEqual(sorted(Utils.UniquifyCombinations(combos)), sorted([(1, ), (2, )]))
Ejemplo n.º 3
0
  def testOrderTriangle(self):
    # Additional tests to complement the doctests
    self.assertRaises(ValueError, Utils.OrderTriangle, [0, 2], [1, 2, 3])
    self.assertRaises(ValueError, Utils.OrderTriangle, [0, 2, 4], [1, 2])
    self.assertEqual(Utils.OrderTriangle([1, 3, 1], [2, 3, 4]), ([1, 3, 1], [4, 3, 2]))
    self.assertEqual(Utils.OrderTriangle([1, 3, 1], [4, 3, 2]), ([1, 3, 1], [4, 3, 2]))

    # If all the features are the same, we want the distances in reverse order
    for dist in itertools.permutations([1, 2, 3], 3):
      self.assertEqual(Utils.OrderTriangle([1, 1, 1], dist), ([1, 1, 1], [3, 2, 1]))
Ejemplo n.º 4
0
 def testGetPossiblePharmacophores(self):
   bins = [(1, 2), (2, 3), (5, 6)]
   vals = [
     (2, 3),
     (3, 24),
   ]
   for tpl in vals:
     num, tgt = tpl
     pphores = Utils.GetPossibleScaffolds(num, bins)
     cnt = len(pphores)
     assert cnt == tgt, f'bad pharmacophore count {cnt} for probe {str(tpl)}'
   self.assertEqual(Utils.GetPossibleScaffolds(1, bins), 0)
Ejemplo n.º 5
0
 def testCounts(self):
   vals = [
     ((0, 1, 2), 4),
     ((0, 0, 0), 0),
     ((2, 2, 2), 9),
     ((1, 1, 2), 7),
     ((1, 2, 2), 8),
   ]
   for combo, tgt in vals:
     res = Utils.CountUpTo(3, 3, combo)
     assert res == tgt, f'Bad res ({res}) for combo {str((combo, tgt))}'
Ejemplo n.º 6
0
 def testGetTriangles(self):
   vals = [
     (2, 0, []),
     (3, 1, ((0, 1, 2), )),
     (4, 2, ((0, 1, 3), (1, 2, 4))),
     (5, 3, ((0, 1, 4), (1, 2, 5), (2, 3, 6))),
   ]
   for tpl in vals:
     nPts, cnt, tris = tpl
     r = Utils.GetTriangles(nPts)
     assert len(r) == cnt, f'bad triangle length {len(r)} for probe {str(tpl)}'
     assert r == tris, f'bad triangle list {str(r)} for probe {str(tpl)}'
Ejemplo n.º 7
0
 def testGetPossiblePharmacophores(self):
     bins = [(1, 2), (2, 3), (5, 6)]
     vals = [
         (2, 3),
         (3, 24),
     ]
     for tpl in vals:
         num, tgt = tpl
         pphores = Utils.GetPossibleScaffolds(num, bins)
         cnt = len(pphores)
         assert cnt == tgt, 'bad pharmacophore count %d for probe %s' % (
             cnt, str(tpl))
Ejemplo n.º 8
0
 def testLimitPharmacophores(self):
     bins = [(1, 2), (2, 3), (5, 6)]
     vals = [
         ((0, 0, 0), 1),
         ((0, 0, 1), 1),
         ((0, 0, 2), 0),
         ((0, 1, 2), 1),
         ((1, 1, 2), 1),
     ]
     for tpl in vals:
         ds, tgt = tpl
         r = Utils.ScaffoldPasses(ds, bins)
         assert r == tgt, 'bad result %d for probe %s' % (r, str(tpl))
Ejemplo n.º 9
0
 def testGetTriangles(self):
     vals = [
         (2, 0, []),
         (3, 1, ((0, 1, 2), )),
         (4, 2, ((0, 1, 3), (1, 2, 4))),
         (5, 3, ((0, 1, 4), (1, 2, 5), (2, 3, 6))),
     ]
     for tpl in vals:
         nPts, cnt, tris = tpl
         r = Utils.GetTriangles(nPts)
         assert len(r) == cnt, 'bad triangle length %d for probe %s' % (
             len(r), str(tpl))
         assert r == tris, 'bad triangle list %s for probe %s' % (str(r),
                                                                  str(tpl))
Ejemplo n.º 10
0
    def GetBitInfo(self, idx):
        """ returns information about the given bit

     **Arguments**

       - idx: the bit index to be considered

     **Returns**

       a 3-tuple:

         1) the number of points in the pharmacophore

         2) the proto-pharmacophore (tuple of pattern indices)

         3) the scaffold (tuple of distance indices)
     
    """
        if idx >= self._sigSize:
            raise IndexError('bad index (%d) queried. %d is the max' %
                             (idx, self._sigSize))
        # first figure out how many points are in the p'cophore
        nPts = self.minPointCount
        while nPts < self.maxPointCount and self._starts[nPts + 1] <= idx:
            nPts += 1

        # how far are we in from the start point?
        offsetFromStart = idx - self._starts[nPts]
        if _verbose:
            print('\t %d Points, %d offset' % (nPts, offsetFromStart))

        # lookup the number of scaffolds
        nDists = len(Utils.nPointDistDict[nPts])
        scaffolds = self._scaffolds[nDists]

        nScaffolds = len(scaffolds)

        # figure out to which proto-pharmacophore we belong:
        protoIdx = offsetFromStart // nScaffolds
        indexCombos = Utils.GetIndexCombinations(self._nFeats, nPts)
        combo = tuple(indexCombos[protoIdx])
        if _verbose:
            print('\t combo: %s' % (str(combo)))

        # and which scaffold:
        scaffoldIdx = offsetFromStart % nScaffolds
        scaffold = scaffolds[scaffoldIdx]
        if _verbose:
            print('\t scaffold: %s' % (str(scaffold)))
        return nPts, combo, scaffold
Ejemplo n.º 11
0
 def testDistTriangleInequality(self):
   bins = [(1, 2), (2, 3), (5, 6)]
   vals = [
     ((0, 0, 0), 1),
     ((0, 0, 1), 1),
     ((0, 0, 2), 0),
     ((0, 1, 2), 1),
     ((1, 1, 2), 1),
   ]
   for tpl in vals:
     ds, tgt = tpl
     distBins = [bins[x] for x in ds]
     r = Utils.BinsTriangleInequality(distBins[0], distBins[1], distBins[2])
     assert r == tgt, f'bad result {r} for probe {str(tpl)}'
Ejemplo n.º 12
0
def GetAtomsMatchingBit(sigFactory,bitIdx,mol,dMat=None,justOne=0,matchingAtoms=None):
  """ Returns a list of lists of atom indices for a bit

    **Arguments**

      - sigFactory: a SigFactory

      - bitIdx: the bit to be queried

      - mol: the molecule to be examined

      - dMat: (optional) the distance matrix of the molecule

      - justOne: (optional) if this is nonzero, only the first match
        will be returned.

      - matchingAtoms: (optional) if this is nonzero, it should
        contain a sequence of sequences with the indices of atoms in
        the molecule which match each of the patterns used by the
        signature.

    **Returns**

      a list of tuples with the matching atoms
  """
  assert sigFactory.shortestPathsOnly,'not implemented for non-shortest path signatures'
  nPts,featCombo,scaffold = sigFactory.GetBitInfo(bitIdx)
  if _verbose:
    print 'info:',nPts
    print '\t',featCombo
    print '\t',scaffold
  
  if matchingAtoms is None:
    matchingAtoms = sigFactory.GetMolFeats(mol)

  # find the atoms that match each features
  fams = sigFactory.GetFeatFamilies()
  choices = []
  for featIdx in featCombo:
    tmp = matchingAtoms[featIdx]
    if tmp:
      choices.append(tmp)
    else:
      # one of the patterns didn't find a match, we
      #  can return now
      if _verbose: print 'no match found for feature:',featIdx
      return []
 
  if _verbose:
    print 'choices:'
    print choices

  if dMat is None:
    dMat = Chem.GetDistanceMatrix(mol,sigFactory.includeBondOrder)

  matches = []
  distsToCheck = Utils.nPointDistDict[nPts]

  protoPharmacophores = Utils.GetAllCombinations(choices,noDups=1)

  res = []
  for protoPharm in protoPharmacophores:
    if _verbose: print 'protoPharm:',protoPharm
    for i in range(len(distsToCheck)):
      dLow,dHigh = sigFactory.GetBins()[scaffold[i]]
      a1,a2 = distsToCheck[i]
      #
      # FIX: this is making all kinds of assumptions about
      #  things being single-atom matches (or at least that
      #  only the first atom matters
      #
      idx1,idx2 = protoPharm[a1][0],protoPharm[a2][0]
      dist = dMat[idx1,idx2]
      if _verbose: print '\t dist: %d->%d = %d (%d,%d)'%(idx1,idx2,dist,dLow,dHigh)
      if dist < dLow or dist >= dHigh:
        break
    else:
      if _verbose: print 'Found one'
      # we found it
      protoPharm.sort()
      protoPharm = tuple(protoPharm)
      if protoPharm not in res:
        res.append(protoPharm)
        if justOne: break
  return res
Ejemplo n.º 13
0
def Gen2DFingerprint(mol, sigFactory, perms=None, dMat=None):
    """ generates a 2D fingerprint for a molecule using the
   parameters in _sig_

   **Arguments**

     - mol: the molecule for which the signature should be generated

     - sigFactory : the SigFactory object with signature parameters
       NOTE: no preprocessing is carried out for _sigFactory_.
             It *must* be pre-initialized.

     - perms: (optional) a sequence of permutation indices limiting which
       pharmacophore combinations are allowed

     - dMat: (optional) the distance matrix to be used

  """
    if not isinstance(sigFactory, SigFactory.SigFactory):
        raise ValueError('bad factory')
    featFamilies = sigFactory.GetFeatFamilies()
    if _verbose:
        print('* feat famillies:', featFamilies)
    nFeats = len(featFamilies)
    minCount = sigFactory.minPointCount
    maxCount = sigFactory.maxPointCount
    if maxCount > 3:
        logger.warning(
            ' Pharmacophores with more than 3 points are not currently supported.\nSetting maxCount to 3.'
        )
        maxCount = 3

    # generate the molecule's distance matrix, if required
    if dMat is None:
        from rdkit import Chem
        useBO = sigFactory.includeBondOrder
        dMat = Chem.GetDistanceMatrix(mol, useBO)

    # generate the permutations, if required
    if perms is None:
        perms = []
        for count in range(minCount, maxCount + 1):
            perms += Utils.GetIndexCombinations(nFeats, count)

    # generate the matches:
    featMatches = sigFactory.GetMolFeats(mol)
    if _verbose:
        print('  featMatches:', featMatches)

    sig = sigFactory.GetSignature()
    for perm in perms:
        # the permutation is a combination of feature indices
        #   defining the feature set for a proto-pharmacophore
        featClasses = [0]
        for i in range(1, len(perm)):
            if perm[i] == perm[i - 1]:
                featClasses.append(featClasses[-1])
            else:
                featClasses.append(featClasses[-1] + 1)

        # Get a set of matches at each index of
        #  the proto-pharmacophore.
        matchPerms = [featMatches[x] for x in perm]
        if _verbose:
            print('\n->Perm: %s' % (str(perm)))
            print('    matchPerms: %s' % (str(matchPerms)))

        # Get all unique combinations of those possible matches:
        matchesToMap = Utils.GetUniqueCombinations(matchPerms, featClasses)
        for i, entry in enumerate(matchesToMap):
            entry = [x[1] for x in entry]
            matchesToMap[i] = entry
        if _verbose:
            print('    mtM:', matchesToMap)

        for match in matchesToMap:
            if sigFactory.shortestPathsOnly:
                _ShortestPathsMatch(match, perm, sig, dMat, sigFactory)
    return sig
Ejemplo n.º 14
0
    def GetBitIdx(self, featIndices, dists, sortIndices=True):
        """ returns the index for a pharmacophore described using a set of
      feature indices and distances

    **Arguments***

      - featIndices: a sequence of feature indices

      - dists: a sequence of distance between the features, only the
        unique distances should be included, and they should be in the
        order defined in Utils.

      - sortIndices : sort the indices

    **Returns**

      the integer bit index
      
    """
        nPoints = len(featIndices)
        if nPoints > 3:
            raise NotImplementedError('>3 points not supported')
        if nPoints < self.minPointCount:
            raise IndexError('bad number of points')
        if nPoints > self.maxPointCount:
            raise IndexError('bad number of points')

        # this is the start of the nPoint-point pharmacophores
        startIdx = self._starts[nPoints]

        #
        # now we need to map the pattern indices to an offset from startIdx
        #
        if sortIndices:
            tmp = list(featIndices)
            tmp.sort()
            featIndices = tmp

        if featIndices[0] < 0: raise IndexError('bad feature index')
        if max(featIndices) >= self._nFeats:
            raise IndexError('bad feature index')

        if nPoints == 3:
            featIndices, dists = Utils.OrderTriangle(featIndices, dists)

        offset = Utils.CountUpTo(self._nFeats, nPoints, featIndices)
        if _verbose:
            print('offset for feature %s: %d' % (str(featIndices), offset))
        offset *= len(self._scaffolds[len(dists)])

        try:
            if _verbose:
                print('>>>>>>>>>>>>>>>>>>>>>>>')
                print('\tScaffolds:', repr(self._scaffolds[len(dists)]),
                      type(self._scaffolds[len(dists)]))
                print('\tDists:', repr(dists), type(dists))
                print('\tbins:', repr(self._bins), type(self._bins))
            bin = self._findBinIdx(dists, self._bins,
                                   self._scaffolds[len(dists)])
        except ValueError:
            fams = self.GetFeatFamilies()
            fams = [fams[x] for x in featIndices]
            raise IndexError(
                'distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s'
                % (fams, dists, self._bins, self._scaffolds))

        return startIdx + offset + bin