def __hieclu(self): #use Hierarchical clustering print 'using hierarchical clustering......' ac = Ward(n_clusters=self.k) ac.fit(self.data_matrix) result = ac.fit_predict(self.data_matrix) return result
def hieclu(data_matrix, k): #use Hierarchical clustering print 'using hierarchical clustering......' ac = Ward(n_clusters=k) ac.fit(data_matrix) result = ac.fit_predict(data_matrix) return result
def hierarchicalClustering(x,k): model = Ward(n_clusters=k) labels = model.fit_predict(np.asarray(x)) # Centroids is a list of lists centroids = [] for c in range(k): base = [] for d in range(len(x[0])): base.append(0) centroids.append(base) # Stores number of examples per cluster ctrs = np.zeros(k) # Sum up all vectors for each cluster for c in range(len(x)): centDex = labels[c] for d in range(len(centroids[centDex])): centroids[centDex][d] += x[c][d] ctrs[centDex] += 1 # Average the vectors in each cluster to get the centroids for c in range(len(centroids)): for d in range(len(centroids[c])): centroids[c][d] = centroids[c][d]/ctrs[c] return (centroids,labels)
def __hieclu(self): #use Hierarchical clustering print 'using hierarchical clustering......' ac = Ward(n_clusters = self.k) ac.fit(self.data_matrix) result = ac.fit_predict(self.data_matrix) return result
def hieclu(data_matrix, k): #use Hierarchical clustering print 'using hierarchical clustering......' ac = Ward(n_clusters=k) ac.fit(data_matrix) result = ac.fit_predict(data_matrix) return result
def agglomerate(self, nodes, edges, clusters): if len(nodes) != len(clusters): print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(clusters))) neighbors = {} for edge in edges: if edge[0] in neighbors: neighbors[edge[0]].append(edge[1]) else: neighbors[edge[0]] = [edge[1]] node_clusters = {} # node: its cluster id communities = {} # cluster id: all neighbors for its members for i in range(len(nodes)): if clusters[i] in communities: communities[clusters[i]].extend(neighbors[nodes[i]]) else: communities[clusters[i]] = neighbors[nodes[i]] node_clusters[nodes[i]] = clusters[i] N = len(communities) affinity_matrix = sp.zeros([N, N]) for comm in communities: members = [node_clusters[node] for node in communities[comm]] degree = dict(Counter(members)) for key in degree: affinity_matrix[comm, key] = degree[key] ward = Ward(n_clusters=6) predicts = ward.fit_predict(affinity_matrix) return [predicts[node_clusters[node]] for node in nodes]
def constraint(self, nodes, edges, lables): if len(nodes) != len(lables): print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(lables))) N = len(nodes) circles = {} guidance_matrix = sp.zeros([N, N]) # guidance_matrix = {} for i in range(len(nodes)): if lables[i] in circles: circles[lables[i]].append(nodes[i]) else: circles[lables[i]] = [nodes[i]] for key in circles.iterkeys(): print(key, len(circles[key])) c = 36 for ni in circles[c]: i = nodes.index(ni) for nj in circles[c]: j = nodes.index(nj) guidance_matrix[i, j] = 1.0 guidance_matrix = sparse.lil_matrix(guidance_matrix) # pos = sum(x > 0 for x in guidance_matrix) print(guidance_matrix) ward = Ward(n_clusters=6, n_components=2, connectivity=guidance_matrix) predicts = ward.fit_predict(self.A) print(predicts)
def hierarchicalClustering(x, k): model = Ward(n_clusters=k) labels = model.fit_predict(np.asarray(x)) # Centroids is a list of lists centroids = [] for c in range(k): base = [] for d in range(len(x[0])): base.append(0) centroids.append(base) # Stores number of examples per cluster ctrs = np.zeros(k) # Sum up all vectors for each cluster for c in range(len(x)): centDex = labels[c] for d in range(len(centroids[centDex])): centroids[centDex][d] += x[c][d] ctrs[centDex] += 1 # Average the vectors in each cluster to get the centroids for c in range(len(centroids)): for d in range(len(centroids[c])): centroids[c][d] = centroids[c][d] / ctrs[c] return (centroids, labels)
def constraint(self, nodes, edges, lables): if len(nodes) != len(lables): print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(lables))) N = len(nodes) circles = {} guidance_matrix = sp.zeros([N, N]) # guidance_matrix = {} for i in range(len(nodes)): if lables[i] in circles: circles[lables[i]].append(nodes[i]) else: circles[lables[i]] = [nodes[i]] for key in circles.iterkeys(): print(key, len(circles[key])) c = 36 for ni in circles[c]: i = nodes.index(ni) for nj in circles[c]: j = nodes.index(nj) guidance_matrix[i, j] = 1.0 guidance_matrix = sparse.lil_matrix(guidance_matrix) # pos = sum(x > 0 for x in guidance_matrix) print(guidance_matrix) ward = Ward(n_clusters=6, n_components=2, connectivity=guidance_matrix) predicts = ward.fit_predict(self.A) print(predicts)
def agglomerate(self, nodes, edges, clusters): if len(nodes) != len(clusters): print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(clusters))) neighbors = {} for edge in edges: if edge[0] in neighbors: neighbors[edge[0]].append(edge[1]) else: neighbors[edge[0]] = [edge[1]] node_clusters = {} # node: its cluster id communities = {} # cluster id: all neighbors for its members for i in range(len(nodes)): if clusters[i] in communities: communities[clusters[i]].extend(neighbors[nodes[i]]) else: communities[clusters[i]] = neighbors[nodes[i]] node_clusters[nodes[i]] = clusters[i] N = len(communities) affinity_matrix = sp.zeros([N, N]) for comm in communities: members = [node_clusters[node] for node in communities[comm]] degree = dict(Counter(members)) for key in degree: affinity_matrix[comm, key] = degree[key] ward = Ward(n_clusters=6) predicts = ward.fit_predict(affinity_matrix) return [predicts[node_clusters[node]] for node in nodes]
def cluster_ward(classif_data, vect_data): ward = Ward(n_clusters=10) np_arr_train = np.array(vect_data["train_vect"]) np_arr_label = np.array(classif_data["topics"]) np_arr_test = np.array(vect_data["test_vect"]) labels = ward.fit_predict(np_arr_train) print "Ward" sil_score = metrics.silhouette_score(np_arr_train, labels, metric='euclidean') print sil_score return labels
def get_km_segments(x, image, sps, n_segments=25): if len(x) == 2: feats, edges = x else: feats, edges, _ = x colors_ = get_colors(image, sps) centers = get_centers(sps) n_spixel = len(feats) graph = sparse.coo_matrix((np.ones(edges.shape[0]), edges.T), shape=(n_spixel, n_spixel)) ward = Ward(n_clusters=n_segments, connectivity=graph + graph.T) # km = KMeans(n_clusters=n_segments) color_feats = np.hstack([colors_, centers * 0.5]) # return km.fit_predict(color_feats) return ward.fit_predict(color_feats)
def get_km_segments(x, image, sps, n_segments=25): if len(x) == 2: feats, edges = x else: feats, edges, _ = x colors_ = get_colors(image, sps) centers = get_centers(sps) n_spixel = len(feats) graph = sparse.coo_matrix((np.ones(edges.shape[0]), edges.T), shape=(n_spixel, n_spixel)) ward = Ward(n_clusters=n_segments, connectivity=graph + graph.T) #km = KMeans(n_clusters=n_segments) color_feats = np.hstack([colors_, centers * .5]) #return km.fit_predict(color_feats) return ward.fit_predict(color_feats)
def hac_derived_ordering( bags_file, num_clusters_multiplier=0.4 ): #uses HAC analysis to output hierarchies and evaluate results with ground truth print '*HAC DERIVED ORDERING*', num_clusters_multiplier print 'Starting Hierarchical Agglomerative Clustering analysis...' data, words, transcripts = doc_term_mat_from_bags(bags_file) model = Ward(n_clusters=int(num_clusters_multiplier * len(transcripts))).fit(data) clust = model.fit_predict(data) hier_sets = [] for i in range(len(transcripts)): s = [i + 1] #print transcripts[i] for j in range(0, i): if (clust[i] == clust[j]): #print '>>', transcripts[j] s.append(j + 1) hier_sets.append(set(s)) return compare_hierarchies(hier_sets)
def hierarchical(self, n_clusters): ward = Ward(n_clusters=n_clusters) return ward.fit_predict(sp.array(self.A))
def hierarchical(self, n_clusters): ward = Ward(n_clusters=n_clusters) return ward.fit_predict(sp.array(self.A))