예제 #1
0
    def testMurtaghUPGMA(self):
        if Murtagh is None: return
        nPts = 5
        sz = 5
        dataP = numpy.random.random((nPts, sz))
        newClust = Murtagh.ClusterData(dataP, nPts, Murtagh.UPGMA)[0]
        ds = []
        for i in range(nPts):
            for j in range(i):
                d = dataP[i] - dataP[j]
                ds.append(sum(d * d))
        ds = numpy.array(ds)
        newClust2 = Murtagh.ClusterData(ds, nPts, Murtagh.UPGMA,
                                        isDistData=1)[0]

        assert len(newClust) == len(newClust2), 'length mismatch2'

        assert not newClust.Compare(newClust2,
                                    ignoreExtras=0), 'equality failed3'

        newClust2 = Murtagh.ClusterData(dataP,
                                        nPts,
                                        Murtagh.UPGMA,
                                        isDistData=0)[0]
        assert len(newClust) == len(newClust2), 'length mismatch2'

        assert not newClust.Compare(newClust2,
                                    ignoreExtras=0), 'equality failed3'
예제 #2
0
파일: ClusterMols.py 프로젝트: kozo2/rdkit
def ClusterPoints(data,
                  metric,
                  algorithmId,
                  haveLabels=False,
                  haveActs=True,
                  returnDistances=False):
    message('Generating distance matrix.\n')
    dMat = GetDistanceMatrix(data, metric)
    message('Clustering\n')
    clustTree = Murtagh.ClusterData(dMat, len(data), algorithmId,
                                    isDistData=1)[0]
    acts = []
    if haveActs and len(data[0]) > 2:
        # we've got activities... use them:
        acts = [int(x[2]) for x in data]

    if not haveLabels:
        labels = ['Mol: %s' % str(x[0]) for x in data]
    else:
        labels = [x[0] for x in data]
    clustTree._ptLabels = labels
    if acts:
        clustTree._ptValues = acts
    for pt in clustTree.GetPoints():
        idx = pt.GetIndex() - 1
        pt.SetName(labels[idx])
        if acts:
            try:
                pt.SetData(int(acts[idx]))
            except Exception:
                pass
    if not returnDistances:
        return clustTree
    else:
        return clustTree, dMat
예제 #3
0
def WardsClustering(dists, nfps):
    print "-------------------------------------------------"
    print "starting Wards clustering"
    start_time = time.time()
    c_tree = Murtagh.ClusterData(dists, nfps, Murtagh.WARDS, isDistData=True)
    print "time taken: ", time.time() - start_time
    return c_tree
예제 #4
0
def gen_coarseclusters(dists, nfps):
    """
    A function to generate coarse grained clusters (i.e. Murtagh) from Tanimoto distance matrices
    :param dists: Tanimoto distance matrix
    :param nfps: number of fingerprints
    :return: cs (clusters)
    """
    from rdkit import DataStructs
    from rdkit.ML.Cluster import Murtagh

    # now cluster the data:
    cs = Murtagh.ClusterData(dists, nfps, Murtagh.WARDS, isDistData=1)
    return cs
예제 #5
0
파일: cluster.py 프로젝트: mivicms/clusfps
    def ClusterFps_Murtagh(self, dists, nfps, method, ncluster):
        self.cdict = {}
        cs = None
        if method == 'Wards':
            cs = Murtagh.ClusterData(dists,
                                     len(self.fplist),
                                     Murtagh.WARDS,
                                     isDistData=1)
        elif method == 'SLINK':
            cs = Murtagh.ClusterData(dists,
                                     len(self.fplist),
                                     Murtagh.SLINK,
                                     isDistData=1)
        elif method == 'CLINK':
            cs = Murtagh.ClusterData(dists,
                                     len(self.fplist),
                                     Murtagh.CLINK,
                                     isDistData=1)
        elif method == 'UPGMA':
            cs = Murtagh.ClusterData(dists,
                                     len(self.fplist),
                                     Murtagh.UPGMA,
                                     isDistData=1)

        splitClusts = ClusterUtils.SplitIntoNClusters(cs[0], ncluster)
        #centroids = [ClusterUtils.FindClusterCentroidFromDists(x,dists) for x in splitClusts]
        for index, cluster in enumerate(splitClusts):
            children = cluster.GetPoints()
            pts = [x.GetData() for x in children]
            self.clustdict[index + 1] = pts
            for pt in pts:
                self.cdict[pt] = [index + 1]
                if pt == pts[0]:
                    self.cdict[pt].append("true")
                else:
                    self.cdict[pt].append("flase")
예제 #6
0
    [[10.0, 5.0], [20.0, 20.0], [30.0, 10.0], [30.0, 15.0], [5.0, 10.0]],
    numpy.float)
print '2'
#clusters = Murtagh.ClusterData(d,len(d),Murtagh.WARDS)
#for i in range(len(clusters)):
#  clusters[i].Print()
#print '3'

dists = []
for i in range(len(d)):
    for j in range(i):
        dist = sum((d[i] - d[j])**2)
        dists.append(dist)
dists = numpy.array(dists)

print 'Wards:'
clusters = Murtagh.ClusterData(dists, len(d), Murtagh.WARDS, isDistData=1)
clusters[0].Print()

print 'SLINK:'
clusters = Murtagh.ClusterData(dists, len(d), Murtagh.SLINK, isDistData=1)
clusters[0].Print()

print 'CLINK:'
clusters = Murtagh.ClusterData(dists, len(d), Murtagh.CLINK, isDistData=1)
clusters[0].Print()

print 'UPGMA:'
clusters = Murtagh.ClusterData(dists, len(d), Murtagh.UPGMA, isDistData=1)
clusters[0].Print()