Beispiel #1
0
 def test1HierarchPick(self):
     fname = os.path.join(RDConfig.RDBaseDir, 'Code', 'SimDivPickers',
                          'Wrap', 'test_data', 'points.csv')
     with open(fname) as infil:
         lines = infil.readlines()
     self.dataPts = numpy.zeros((len(lines), 2), 'd')
     labels = []
     i = 0
     for line in lines:
         tlst = line.strip().split(',')
         self.dataPts[i, 0] = float(tlst[1])
         self.dataPts[i, 1] = float(tlst[2])
         labels.append(int(tlst[3]))
         i += 1
     self.dMat = rdmmc.GetEuclideanDistMat(self.dataPts)
     pkr = rdSimDivPickers.HierarchicalClusterPicker(
         rdSimDivPickers.ClusterMethod.WARD)
     clusters = pkr.Cluster(self.dMat, i, 2)
     # check that each of the clusters have the same label
     for cl in clusters:
         clbl = labels[cl[0]]
         for id in cl:
             assert clbl == labels[id]
     hierarch = pkr.Pick(self.dMat, i, 2)
     self.assertEqual(tuple(hierarch), (1, 30))
Beispiel #2
0
    def ClusterBits(self, corrMat):
        # clustering code actually needs distances so, take 1/val for each element in corMat
        distMat = 1 / corrMat

        pkr = rdsimdiv.HierarchicalClusterPicker(self._type)

        cls = pkr.Cluster(distMat, len(self._bidList), self._nClusters)
        # map the clusters to the actual bit ids
        self._clusters = []
        for cl in cls:
            self._clusters.append([self._bidList[i] for i in cl])
Beispiel #3
0
    def testNonUniqueCrash(self):
        from rdkit import DataStructs
        sz = 10
        nbits = 20
        nBitsToSet = int(nbits * .3)
        N = 12
        vs = []
        for i in range(sz):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nBitsToSet):
                val = int(nbits * random.random())
                bv.SetBit(val)
            vs.append(bv)
            vs.append(bv)

        def taniFunc(i, j, bvs=vs):
            d = 1 - DataStructs.FingerprintSimilarity(bvs[i], bvs[j])
            return d

        picker = rdSimDivPickers.MaxMinPicker()
        try:
            mm1 = picker.LazyPick(taniFunc, len(vs), N)
        except:
            ok = False
        else:
            ok = True
        self.assertTrue(ok)
        self.assertEqual(len(mm1), N)
        picker = None

        picker = rdSimDivPickers.MaxMinPicker()
        try:
            mm2 = picker.LazyBitVectorPick(vs, len(vs), N)
        except:
            ok = False
        else:
            ok = True
        self.assertTrue(ok)
        self.assertEqual(len(mm2), N)
        self.assertEqual(tuple(mm2), tuple(mm1))
        picker = None

        ds = []
        nvs = len(vs)
        for i in range(nvs):
            for j in range(i + 1, nvs):
                d = taniFunc(i, j)
                ds.append(d)
        m = numpy.array(ds)
        picker = rdSimDivPickers.HierarchicalClusterPicker(
            rdSimDivPickers.ClusterMethod.WARD)
        p1 = list(picker.Pick(m, nvs, N))
Beispiel #4
0
 def testIssue208(self):
   sz = 10
   N = 3
   m = []
   for i in range(sz):
     for j in range(i + 1, sz):
       m.append(random.random())
   m = numpy.array(m)
   picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
   p1 = list(picker.Pick(m, sz, N))
   p1.sort()
   p2 = list(picker.Pick(m, sz, N))
   p2.sort()
   self.assertEqual(p1, p2)
Beispiel #5
0
 def testInts(self):
   """ make sure we can handle ints too """
   sz = 10
   N = 3
   m = []
   for i in range(sz):
     for j in range(i + 1, sz):
       m.append(int(100 * random.random()))
   m = numpy.array(m)
   picker = rdSimDivPickers.HierarchicalClusterPicker(rdSimDivPickers.ClusterMethod.WARD)
   p1 = list(picker.Pick(m, sz, N))
   p1.sort()
   p2 = list(picker.Pick(m, sz, N))
   p2.sort()
   self.assertEqual(p1, p2)
Beispiel #6
0
    def testNonUniqueCrash(self):
        from rdkit import DataStructs
        sz = 300
        nbits = 40
        nBitsToSet = int(nbits * .3)
        N = 8
        vs = []
        for i in range(sz):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nBitsToSet):
                val = int(nbits * random.random())
                bv.SetBit(val)
            vs.append(bv)
            vs.append(bv)

        def taniFunc(i, j, bvs=vs):
            d = 1 - DataStructs.FingerprintSimilarity(bvs[i], bvs[j])
            return d

        picker = rdSimDivPickers.MaxMinPicker()
        mm1 = picker.LazyPick(taniFunc, len(vs), N)
        self.assertEqual(len(mm1), N)
        picker = None

        picker = rdSimDivPickers.MaxMinPicker()
        mm2 = picker.LazyBitVectorPick(vs, len(vs), N)
        self.assertEqual(len(mm2), N)

        picker = rdSimDivPickers.MaxMinPicker()
        mm3 = picker.LazyBitVectorPick(vs, len(vs), N)
        self.assertEqual(len(mm3), N)

        # we get the occasional dupe randomly,
        # make sure we don't get three dupes in a row
        self.assertTrue(tuple(mm2) != tuple(mm1)) or (tuple(mm3) != tuple(mm1))
        picker = None

        ds = []
        nvs = len(vs)
        for i in range(nvs):
            for j in range(i + 1, nvs):
                d = taniFunc(i, j)
                ds.append(d)
        m = numpy.array(ds)
        picker = rdSimDivPickers.HierarchicalClusterPicker(
            rdSimDivPickers.ClusterMethod.WARD)
        p1 = list(picker.Pick(m, nvs, N))
Beispiel #7
0
 def test1HierarchPick(self):
     infil = open("test_data/points.csv", 'r')
     lines = infil.readlines()
     infil.close()
     self.dataPts = numpy.zeros((len(lines), 2), 'd')
     labels = []
     i = 0
     for line in lines:
         tlst = line.strip().split(',')
         self.dataPts[i, 0] = float(tlst[1])
         self.dataPts[i, 1] = float(tlst[2])
         labels.append(int(tlst[3]))
         i += 1
     self.dMat = rdmmc.GetEuclideanDistMat(self.dataPts)
     pkr = rdSimDivPickers.HierarchicalClusterPicker(
         rdSimDivPickers.ClusterMethod.WARD)
     clusters = pkr.Cluster(self.dMat, i, 2)
     # check that each of the clusters have the same label
     for cl in clusters:
         clbl = labels[cl[0]]
         for id in cl:
             assert clbl == labels[id]
     hierarch = pkr.Pick(self.dMat, i, 2)
     assert tuple(hierarch) == (1, 30)