Beispiel #1
0
 def __hieclu(self):
     #use Hierarchical clustering
     print 'using hierarchical clustering......'
     ac = Ward(n_clusters=self.k)
     ac.fit(self.data_matrix)
     result = ac.fit_predict(self.data_matrix)
     return result
Beispiel #2
0
def hieclu(data_matrix, k):
    #use Hierarchical clustering
    print 'using hierarchical clustering......'
    ac = Ward(n_clusters=k)
    ac.fit(data_matrix)
    result = ac.fit_predict(data_matrix)
    return result
def hierarchicalClustering(x,k):
    model = Ward(n_clusters=k)
    labels = model.fit_predict(np.asarray(x))

    # Centroids is a list of lists
    centroids = []
    for c in range(k):
        base = []
        for d in range(len(x[0])):
            base.append(0)
        centroids.append(base)

    # Stores number of examples per cluster
    ctrs = np.zeros(k)

    # Sum up all vectors for each cluster
    for c in range(len(x)):
        centDex = labels[c]
        for d in range(len(centroids[centDex])):
            centroids[centDex][d] += x[c][d]
        ctrs[centDex] += 1

    # Average the vectors in each cluster to get the centroids
    for c in range(len(centroids)):
        for d in range(len(centroids[c])):
            centroids[c][d] = centroids[c][d]/ctrs[c]

    return (centroids,labels)
	def __hieclu(self):
		#use Hierarchical clustering
		print 'using hierarchical clustering......'
		ac = Ward(n_clusters = self.k)
		ac.fit(self.data_matrix)
		result = ac.fit_predict(self.data_matrix)
		return result
def hieclu(data_matrix, k):
	#use Hierarchical clustering
	print 'using hierarchical clustering......'
	ac = Ward(n_clusters=k)
	ac.fit(data_matrix)
	result = ac.fit_predict(data_matrix)
	return result
Beispiel #6
0
    def agglomerate(self, nodes, edges, clusters):
        if len(nodes) != len(clusters):
            print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(clusters)))

        neighbors = {}
        for edge in edges:
            if edge[0] in neighbors:
                neighbors[edge[0]].append(edge[1])
            else:
                neighbors[edge[0]] = [edge[1]]

        node_clusters = {}  # node: its cluster id
        communities = {}  # cluster id: all neighbors for its members
        for i in range(len(nodes)):
            if clusters[i] in communities:
                communities[clusters[i]].extend(neighbors[nodes[i]])
            else:
                communities[clusters[i]] = neighbors[nodes[i]]
            node_clusters[nodes[i]] = clusters[i]

        N = len(communities)
        affinity_matrix = sp.zeros([N, N])
        for comm in communities:
            members = [node_clusters[node] for node in communities[comm]]
            degree = dict(Counter(members))
            for key in degree:
                affinity_matrix[comm, key] = degree[key]

        ward = Ward(n_clusters=6)
        predicts = ward.fit_predict(affinity_matrix)

        return [predicts[node_clusters[node]] for node in nodes]
Beispiel #7
0
    def constraint(self, nodes, edges, lables):
        if len(nodes) != len(lables):
            print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(lables)))

        N = len(nodes)
        circles = {}

        guidance_matrix = sp.zeros([N, N])
        # guidance_matrix = {}
        for i in range(len(nodes)):
            if lables[i] in circles:
                circles[lables[i]].append(nodes[i])
            else:
                circles[lables[i]] = [nodes[i]]

        for key in circles.iterkeys():
            print(key, len(circles[key]))

        c = 36
        for ni in circles[c]:
            i = nodes.index(ni)
            for nj in circles[c]:
                j = nodes.index(nj)
                guidance_matrix[i, j] = 1.0

        guidance_matrix = sparse.lil_matrix(guidance_matrix)

        # pos = sum(x > 0 for x in guidance_matrix)
        print(guidance_matrix)
        ward = Ward(n_clusters=6, n_components=2, connectivity=guidance_matrix)
        predicts = ward.fit_predict(self.A)

        print(predicts)
Beispiel #8
0
def hierarchicalClustering(x, k):
    model = Ward(n_clusters=k)
    labels = model.fit_predict(np.asarray(x))

    # Centroids is a list of lists
    centroids = []
    for c in range(k):
        base = []
        for d in range(len(x[0])):
            base.append(0)
        centroids.append(base)

    # Stores number of examples per cluster
    ctrs = np.zeros(k)

    # Sum up all vectors for each cluster
    for c in range(len(x)):
        centDex = labels[c]
        for d in range(len(centroids[centDex])):
            centroids[centDex][d] += x[c][d]
        ctrs[centDex] += 1

    # Average the vectors in each cluster to get the centroids
    for c in range(len(centroids)):
        for d in range(len(centroids[c])):
            centroids[c][d] = centroids[c][d] / ctrs[c]

    return (centroids, labels)
Beispiel #9
0
    def constraint(self, nodes, edges, lables):
        if len(nodes) != len(lables):
            print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(lables)))

        N = len(nodes)
        circles = {}

        guidance_matrix = sp.zeros([N, N])
        # guidance_matrix = {}
        for i in range(len(nodes)):
            if lables[i] in circles:
                circles[lables[i]].append(nodes[i])
            else:
                circles[lables[i]] = [nodes[i]]

        for key in circles.iterkeys():
            print(key, len(circles[key]))

        c = 36
        for ni in circles[c]:
            i = nodes.index(ni)
            for nj in circles[c]:
                j = nodes.index(nj)
                guidance_matrix[i, j] = 1.0

        guidance_matrix = sparse.lil_matrix(guidance_matrix)

        # pos = sum(x > 0 for x in guidance_matrix)
        print(guidance_matrix)
        ward = Ward(n_clusters=6, n_components=2, connectivity=guidance_matrix)
        predicts = ward.fit_predict(self.A)

        print(predicts)
Beispiel #10
0
    def agglomerate(self, nodes, edges, clusters):
        if len(nodes) != len(clusters):
            print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(clusters)))

        neighbors = {}
        for edge in edges:
            if edge[0] in neighbors:
                neighbors[edge[0]].append(edge[1])
            else:
                neighbors[edge[0]] = [edge[1]]

        node_clusters = {}  # node: its cluster id
        communities = {}    # cluster id: all neighbors for its members
        for i in range(len(nodes)):
            if clusters[i] in communities:
                communities[clusters[i]].extend(neighbors[nodes[i]])
            else:
                communities[clusters[i]] = neighbors[nodes[i]]
            node_clusters[nodes[i]] = clusters[i]

        N = len(communities)
        affinity_matrix = sp.zeros([N, N])
        for comm in communities:
            members = [node_clusters[node] for node in communities[comm]]
            degree = dict(Counter(members))
            for key in degree:
                affinity_matrix[comm, key] = degree[key]

        ward = Ward(n_clusters=6)
        predicts = ward.fit_predict(affinity_matrix)

        return [predicts[node_clusters[node]] for node in nodes]
Beispiel #11
0
def cluster_ward(classif_data, vect_data):
	ward = Ward(n_clusters=10)

	np_arr_train = np.array(vect_data["train_vect"])
	np_arr_label = np.array(classif_data["topics"])
	np_arr_test = np.array(vect_data["test_vect"])

	labels = ward.fit_predict(np_arr_train)
	print "Ward"
	sil_score = metrics.silhouette_score(np_arr_train, labels, metric='euclidean')
	print sil_score
	
	return labels
def get_km_segments(x, image, sps, n_segments=25):
    if len(x) == 2:
        feats, edges = x
    else:
        feats, edges, _ = x
    colors_ = get_colors(image, sps)
    centers = get_centers(sps)
    n_spixel = len(feats)
    graph = sparse.coo_matrix((np.ones(edges.shape[0]), edges.T), shape=(n_spixel, n_spixel))
    ward = Ward(n_clusters=n_segments, connectivity=graph + graph.T)
    # km = KMeans(n_clusters=n_segments)
    color_feats = np.hstack([colors_, centers * 0.5])
    # return km.fit_predict(color_feats)
    return ward.fit_predict(color_feats)
Beispiel #13
0
def get_km_segments(x, image, sps, n_segments=25):
    if len(x) == 2:
        feats, edges = x
    else:
        feats, edges, _ = x
    colors_ = get_colors(image, sps)
    centers = get_centers(sps)
    n_spixel = len(feats)
    graph = sparse.coo_matrix((np.ones(edges.shape[0]), edges.T),
                              shape=(n_spixel, n_spixel))
    ward = Ward(n_clusters=n_segments, connectivity=graph + graph.T)
    #km = KMeans(n_clusters=n_segments)
    color_feats = np.hstack([colors_, centers * .5])
    #return km.fit_predict(color_feats)
    return ward.fit_predict(color_feats)
Beispiel #14
0
def hac_derived_ordering(
    bags_file,
    num_clusters_multiplier=0.4
):  #uses HAC analysis to output hierarchies and evaluate results with ground truth
    print '*HAC DERIVED ORDERING*', num_clusters_multiplier
    print 'Starting Hierarchical Agglomerative Clustering analysis...'
    data, words, transcripts = doc_term_mat_from_bags(bags_file)
    model = Ward(n_clusters=int(num_clusters_multiplier *
                                len(transcripts))).fit(data)
    clust = model.fit_predict(data)
    hier_sets = []
    for i in range(len(transcripts)):
        s = [i + 1]
        #print transcripts[i]
        for j in range(0, i):
            if (clust[i] == clust[j]):
                #print '>>', transcripts[j]
                s.append(j + 1)
        hier_sets.append(set(s))
    return compare_hierarchies(hier_sets)
Beispiel #15
0
 def hierarchical(self, n_clusters):
     ward = Ward(n_clusters=n_clusters)
     return ward.fit_predict(sp.array(self.A))
Beispiel #16
0
 def hierarchical(self, n_clusters):
     ward = Ward(n_clusters=n_clusters)
     return ward.fit_predict(sp.array(self.A))