def test_ward_clustering(): """ Check that we obtain the correct number of clusters with Ward clustering. """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(100, 50) connectivity = grid_to_graph(*mask.shape) clustering = Ward(n_clusters=10, connectivity=connectivity) clustering.fit(X) # test caching clustering = Ward(n_clusters=10, connectivity=connectivity, memory=mkdtemp()) clustering.fit(X) labels = clustering.labels_ assert_true(np.size(np.unique(labels)) == 10) # Turn caching off now clustering = Ward(n_clusters=10, connectivity=connectivity) # Check that we obtain the same solution with early-stopping of the # tree building clustering.compute_full_tree = False clustering.fit(X) np.testing.assert_array_equal(clustering.labels_, labels) clustering.connectivity = None clustering.fit(X) assert_true(np.size(np.unique(clustering.labels_)) == 10) # Check that we raise a TypeError on dense matrices clustering = Ward(n_clusters=10, connectivity=connectivity.todense()) assert_raises(TypeError, clustering.fit, X) clustering = Ward(n_clusters=10, connectivity=sparse.lil_matrix( connectivity.todense()[:10, :10])) assert_raises(ValueError, clustering.fit, X)
def hierarchicalClustering(x, k): model = Ward(n_clusters=k) labels = model.fit_predict(np.asarray(x)) # Centroids is a list of lists centroids = [] for c in range(k): base = [] for d in range(len(x[0])): base.append(0) centroids.append(base) # Stores number of examples per cluster ctrs = np.zeros(k) # Sum up all vectors for each cluster for c in range(len(x)): centDex = labels[c] for d in range(len(centroids[centDex])): centroids[centDex][d] += x[c][d] ctrs[centDex] += 1 # Average the vectors in each cluster to get the centroids for c in range(len(centroids)): for d in range(len(centroids[c])): centroids[c][d] = centroids[c][d] / ctrs[c] return (centroids, labels)
def cluster_evaluation(D, y_true, n_clusters, eps=0.8, min_samples=10): ############################################################################## # Extract Y true labels_true = y_true ############################################################################## # transform distance matrix into a similarity matrix S = 1 - D ############################################################################## # compute DBSCAN #db = DBSCAN(eps=eps, min_samples=min_samples).fit(S) db = Ward(n_clusters=n_clusters).fit(S) #core_samples = db.core_sample_indices_ labels = db.labels_ # number of clusters in labels, ignoring noise if present n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print 'Number of clusters: %d' % n_clusters_ print 'Homogeneity: %0.3f' % metrics.homogeneity_score(labels_true, labels) print 'Completeness: %0.3f' % metrics.completeness_score( labels_true, labels) print 'V-meassure: %0.3f' % metrics.v_measure_score(labels_true, labels) print 'Adjusted Rand Index: %0.3f' % metrics.adjusted_rand_score( labels_true, labels) print 'Adjusted Mutual Information: %0.3f' % metrics.adjusted_mutual_info_score( labels_true, labels) print 'Silhouette Coefficient: %0.3f' % metrics.silhouette_score( D, labels, metric='precomputed')
def spect_clust_segmentation(lena, regions=20): X = np.reshape(lena, (-1, 1)) connectivity = grid_to_graph(*lena.shape) print("Compute structured hierarchical clustering...") st = time.time() n_clusters = regions ward = Ward(n_clusters=n_clusters, connectivity=connectivity).fit(X) label = np.reshape(ward.labels_, lena.shape) print("Elapsed time: ", time.time() - st) print("Number of pixels: ", label.size) print("Number of clusters: ", np.unique(label).size) plt.imshow(lena, cmap=plt.cm.gray) for l in range(n_clusters): plt.contour(label == l, contours=1, colors=[ plt.cm.spectral(l / float(n_clusters)), ]) plt.show()
def test_linkage_misc(): # Misc tests on linkage X = np.ones((5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foobar').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foobar') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) # Use the copy argument, to raise a warning Ward(copy=True).fit(X) # We should be getting 2 warnings: one for using Ward that is # deprecated, one for using the copy argument assert_equal(len(warning_list), 2) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) # Use the copy argument, to raise a warning ward_tree(X, copy=True) # We should be getting 1 warnings: for using the copy argument assert_equal(len(warning_list), 1) # Let's test a hiearchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
def compute_clusters(dataset, features_vector): """ Apply clustering method """ labels = dataset.target true_k = np.unique(labels).shape[0] # Run clustering method print "Performing clustering with method ", cmd_options.clust_method.upper( ) print if (cmd_options.clust_method == "hclust"): result = features_vector.toarray() ward = Ward(n_clusters=true_k) ward.fit(result) return ward if (cmd_options.clust_method == "kmeans"): km = KMeans(n_clusters=true_k, init='k-means++', max_iter=1000, verbose=1) km.fit(features_vector) return km
def cluster_tiestrength_kmeans(self,vertices=None, nclusters=2, cluster_prop='tsk'): if vertices is None: vertices=self.gs ts=self.similarity_dice(vertices) #list of list of similarity(float) ward=Ward(nclusters).fit(ts) for i,v in enumerate(vertices): v[cluster_prop]=ward.labels_[i]
def buildFromImageCollectionWard(self, pathTxtFile, pathDirImages, fileImageExtension, vocabularySize, maxNumImages=sys.maxint): # vocabularySize could be 4096 # Read the image IDs imageIds = self.readImageIdsFromTxtFile(pathTxtFile) # If there are more images than the considered ones... if (len(imageIds) > maxNumImages): imageIds = random.sample(imageIds, maxNumImages) # Extract the SURF descriptors from a collection of images and save in dictionary surfExtractor = SurfExtractor(True) surfExtractor.processCollectionFilesImage(imageIds, pathDirImages, fileImageExtension) # Create a numpy array from the descriptors descriptors = surfExtractor.getDescriptors() arr_descriptor = np.vstack(tuple(descriptors)) #self.mbk = MiniBatchKMeans(init='k-means++', # k=vocabularySize, # n_init=10, # max_no_improvement=10, # verbose=0) self.ward = Ward(n_clusters=vocabularySize) self.ward.fit(arr_descriptor)
def _run_interface(self, runtime): #load data data = nb.load(self.inputs.in_File).get_data() corrmatrix = np.squeeze(data) if self.inputs.cluster_type == 'spectral': positivecorrs = np.where( corrmatrix > 0, corrmatrix, 0) #threshold at 0 (spectral uses non-negative values) newmatrix = np.asarray( positivecorrs, dtype=np.double) #spectral expects dtype=double values labels = spectral(newmatrix, n_clusters=self.inputs.n_clusters, eigen_solver='arpack', assign_labels='discretize') if self.inputs.cluster_type == 'hiercluster': labels = Ward( n_clusters=self.inputs.n_clusters).fit_predict(corrmatrix) if self.inputs.cluster_type == 'kmeans': labels = km( n_clusters=self.inputs.n_clusters).fit_predict(corrmatrix) if self.inputs.cluster_type == 'dbscan': labels = DBSCAN(eps=self.inputs.epsilon).fit_predict(corrmatrix) new_img = nb.Nifti1Image(labels + 1, None) #+1 because cluster labels start at 0 _, base, _ = split_filename(self.inputs.in_File) nb.save( new_img, os.path.abspath(base + '_' + str(self.inputs.n_clusters) + '_' + self.inputs.cluster_type + '_' + self.inputs.hemi + '.nii')) return runtime
def test_connectivity_popagation(): """ Check that connectivity in the ward tree is propagated correctly during merging. """ from sklearn.neighbors import NearestNeighbors X = np.array([ (.014, .120), (.014, .099), (.014, .097), (.017, .153), (.017, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .152), (.018, .149), (.018, .144), ]) nn = NearestNeighbors(n_neighbors=10, warn_on_equidistant=False).fit(X) connectivity = nn.kneighbors_graph(X) ward = Ward(n_clusters=4, connectivity=connectivity) # If changes are not propagated correctly, fit crashes with an # IndexError ward.fit(X)
def __hieclu(self): #use Hierarchical clustering print 'using hierarchical clustering......' ac = Ward(n_clusters=self.k) ac.fit(self.data_matrix) result = ac.fit_predict(self.data_matrix) return result
def test_linkage_misc(): # Misc tests on linkage rnd = np.random.RandomState(42) X = rnd.normal(size=(5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foo') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) # Deprecation of Ward class with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", DeprecationWarning) Ward().fit(X) assert_equal(len(warning_list), 1) # test hiearchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hiearchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def constraint(self, nodes, edges, lables): if len(nodes) != len(lables): print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(lables))) N = len(nodes) circles = {} guidance_matrix = sp.zeros([N, N]) # guidance_matrix = {} for i in range(len(nodes)): if lables[i] in circles: circles[lables[i]].append(nodes[i]) else: circles[lables[i]] = [nodes[i]] for key in circles.iterkeys(): print(key, len(circles[key])) c = 36 for ni in circles[c]: i = nodes.index(ni) for nj in circles[c]: j = nodes.index(nj) guidance_matrix[i, j] = 1.0 guidance_matrix = sparse.lil_matrix(guidance_matrix) # pos = sum(x > 0 for x in guidance_matrix) print(guidance_matrix) ward = Ward(n_clusters=6, n_components=2, connectivity=guidance_matrix) predicts = ward.fit_predict(self.A) print(predicts)
def agglomerate(self, nodes, edges, clusters): if len(nodes) != len(clusters): print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(clusters))) neighbors = {} for edge in edges: if edge[0] in neighbors: neighbors[edge[0]].append(edge[1]) else: neighbors[edge[0]] = [edge[1]] node_clusters = {} # node: its cluster id communities = {} # cluster id: all neighbors for its members for i in range(len(nodes)): if clusters[i] in communities: communities[clusters[i]].extend(neighbors[nodes[i]]) else: communities[clusters[i]] = neighbors[nodes[i]] node_clusters[nodes[i]] = clusters[i] N = len(communities) affinity_matrix = sp.zeros([N, N]) for comm in communities: members = [node_clusters[node] for node in communities[comm]] degree = dict(Counter(members)) for key in degree: affinity_matrix[comm, key] = degree[key] ward = Ward(n_clusters=6) predicts = ward.fit_predict(affinity_matrix) return [predicts[node_clusters[node]] for node in nodes]
def hieclu(data_matrix, k): #use Hierarchical clustering print 'using hierarchical clustering......' ac = Ward(n_clusters=k) ac.fit(data_matrix) result = ac.fit_predict(data_matrix) return result
def test_connectivity_popagation(): """ Check that connectivity in the ward tree is propagated correctly during merging. """ from sklearn.neighbors import kneighbors_graph X = np.array([ (.014, .120), (.014, .099), (.014, .097), (.017, .153), (.017, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .152), (.018, .149), (.018, .144), ]) connectivity = kneighbors_graph(X, 10) ward = Ward(n_clusters=4, connectivity=connectivity) # If changes are not propagated correctly, fit crashes with an # IndexError ward.fit(X)
def doCoClustering(self, leftClustCount, rightClustCount, clustPropName='coclust'): vsleft = self.left() simleft = np.matrix(self.similarity_dice(vsleft)) clustleft = Ward(n_clusters=leftClustCount).fit(simleft).labels_ vsright = self.right() full2bipart = [ (None, -1) ] * self.vcount() #tuple of (isOnRightSide,index in left/right list) for i, v in enumerate(vsleft): full2bipart[v.index] = (False, i) for i, v in enumerate(vsright): full2bipart[v.index] = (True, i) sizeright = len(vsright) m_rclust = np.zeros(shape=(sizeright, leftClustCount)) for e in self.es: (srcOnRight, src) = full2bipart[e.source] (_, dst) = full2bipart[e.target] if srcOnRight: vright = src clust = clustleft[dst] else: vright = dst clust = clustleft[src] m_rclust[vright, clust] += 1 clustSizes = [0] * leftClustCount for c in clustleft: clustSizes[c] += 1 for (row, col) in [(row, col) for (row, col), val in np.ndenumerate(m_rclust) if val]: #m_rclust[row,col]=float(val)/clustSizes[col] m_rclust[row, col] = float(val) / vsright[row].degree() simRight = cdist(m_rclust, m_rclust, 'cosine') clustright = Ward(n_clusters=rightClustCount).fit(simRight).labels_ for i, c in enumerate(clustright): vsright[i][clustPropName] = c
def max_diff_dist_idx(dist_mat, min_dist, max_dist): num_nodes = dist_mat.shape[0] dist_diff = [] max_diff = -1 max_diff_row = 0 max_diff_label = [] max_cluster_idx = [] for i, dist_vals in enumerate(dist_mat): # exclude its own distance idx_set = np.r_[np.r_[0:i:1], np.r_[i + 1:num_nodes:1]] #print i,'th row k-mean cluster' temp = dist_vals[idx_set] if np.min(temp) > max_dist: exemplar_idx = i max_cluster_idx = i #import pdb;pdb.set_trace() return exemplar_idx, max_cluster_idx ######################################## # K-mean #_,label,_=cluster.k_means(temp[:,None],2) # Herichical Binary Clutering ward = Ward(n_clusters=2).fit(temp[:, None]) label = ward.labels_ #kmean=KMeans(n_clusters=2).fit(temp[:,None]) #label=kmean.labels_ # max is default centroid = np.zeros(2) #import pdb;pdb.set_trace() centroid[0] = np.max(temp[label == 0]) centroid[1] = np.max(temp[label == 1]) #idx0=idx_set[np.nonzero(label==0)] #idx1=idx_set[np.nonzero(label==1)] #dist01=np.round([dist_mat[v0,v1] for v0 in idx0 for v1 in idx1],2) #num_min_dist_violation=len(np.nonzero(dist01<min_dist)[0]) ######################################## temp_1 = abs(centroid[0] - centroid[1]) cent_diff = centroid[0] - centroid[1] dist_diff.append(abs(cent_diff)) if max_diff < temp_1: #if (max_diff< temp_1) and (num_min_dist_violation==0): max_idx_set = idx_set max_diff_row = i max_diff = temp_1 max_diff_label = label max_cent_diff = cent_diff #import pdb;pdb.set_trace() cur_cent_idx = set([]) if max_cent_diff > 0: cur_cent_idx = cur_cent_idx | set(np.nonzero(max_diff_label == 1)[0]) else: cur_cent_idx = cur_cent_idx | set(np.nonzero(max_diff_label == 0)[0]) max_cluster_idx = list( set(max_idx_set[list(cur_cent_idx)]) | set([max_diff_row])) exemplar_idx = max_diff_row return exemplar_idx, max_cluster_idx
def cluster_w_else(network, similarity_matrix, number_of_communities=20): raw_communities = Ward( n_clusters=number_of_communities).fit(similarity_matrix).labels_ #raw_communities = KMeans(k=number_of_communities).fit(similarity_matrix).labels_ #raw_communities = DBSCAN().fit(similarity_matrix, eps=eps, min_samples=min_samples).labels_ communities = OrderedDict([(x, []) for x in range(number_of_communities)]) for i in range(len(network)): community_idx = raw_communities[i] if community_idx != -1: communities[community_idx].append(network.keys()[i]) return communities
def test_ward_clustering(): """ Check that we obtain the correct number of clusters with Ward clustering. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(100, 50) connectivity = grid_to_graph(*mask.shape) clustering = Ward(n_clusters=10, connectivity=connectivity) clustering.fit(X) assert_true(np.size(np.unique(clustering.labels_)) == 10)
def cluster_hierarchically(self, raw_data, num_clusters, cmtrx=None): """ """ if cmtrx is None: cmtrx = self.generate_connectivity_matrix(raw_data.shape[0]) try: ward_clusters = Ward(n_clusters=num_clusters, connectivity=cmtrx).fit(raw_data) except NameError: print 'WARNING: sklearn Ward clustering disabled.' return None return ward_clusters.labels_
def test_connectivity_fixing_non_lil(): """ Check non regression of a bug if a non item assignable connectivity is provided with more than one component. """ # create dummy data x = np.array([[0, 0], [1, 1]]) # create a mask with several components to force connectivity fixing m = np.array([[True, False], [False, True]]) c = grid_to_graph(n_x=2, n_y=2, mask=m) w = Ward(connectivity=c) assert_warns(UserWarning, w.fit, x)
def identify_communities(number_of_communities, similarity_matrix, node_ids): raw_communities = Ward( n_clusters=number_of_communities).fit(similarity_matrix).labels_ #raw_communities = KMeans(k=number_of_communities).fit(similarity_matrix).labels_ #raw_communities = DBSCAN().fit(similarity_matrix, eps=eps, min_samples=min_samples).labels_ num_communities = len( set(raw_communities)) - (1 if -1 in raw_communities else 0) communities = OrderedDict([(x, []) for x in range(num_communities)]) for i in range(len(node_ids)): community_idx = raw_communities[i] if community_idx != -1: communities[community_idx].append(node_ids[i]) return communities
def main(): print "## Welcome to the clustering tutorial ##" args = parse_args() x, tc = generate_data(args.n) ks = numpy.arange(1, args.k + 1) crs = numpy.zeros(args.k) col = 'k' print "Computing %s clustering quality criterion" % args.criterion for j in xrange(args.k): ward = Ward(n_clusters=ks[j]).fit(x) labels = ward.labels_ if args.criterion == 'squared': crs[j] = squared_criterion(x, labels) col = 'r' elif args.criterion == 'diameter': crs[j] = diameter_criterion(x, labels) col = 'g' elif args.criterion == 'silhouette': crs[j] = silhouette_criterion(x, labels) col = 'b' else: raise ValueError("Wrong criterion" + args.criterion) pylab.figure(figsize=(12, 6)) ward = Ward(n_clusters=args.n).fit(x) labels = ward.labels_ pylab.subplot(1, 2, 1) plot_data(x, labels) pylab.subplot(1, 2, 2) plot_criterion(ks, crs, col) pylab.show()
def get_km_segments(x, image, sps, n_segments=25): if len(x) == 2: feats, edges = x else: feats, edges, _ = x colors_ = get_colors(image, sps) centers = get_centers(sps) n_spixel = len(feats) graph = sparse.coo_matrix((np.ones(edges.shape[0]), edges.T), shape=(n_spixel, n_spixel)) ward = Ward(n_clusters=n_segments, connectivity=graph + graph.T) #km = KMeans(n_clusters=n_segments) color_feats = np.hstack([colors_, centers * .5]) #return km.fit_predict(color_feats) return ward.fit_predict(color_feats)
def test_linkage_misc(): # Misc tests on linkage X = np.ones((5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foobar').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foobar') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) # Use the copy argument, to raise a warning Ward(copy=True).fit(X) # We should be getting 2 warnings: one for using Ward that is # deprecated, one for using the copy argument assert_equal(len(warning_list), 2)
def do_experiments(dataset): X, y = dataset.data, dataset.target dataset_name = dataset.DESCR.split('\n')[0] if dataset_name.startswith("Iris"): # iris has duplicate data points. That messes up our # MeanNN implementation. from scipy.spatial.distance import pdist, squareform dist = squareform(pdist(X)) doubles = np.unique(np.where(np.tril(dist - 1, -1) == -1)[0]) mask = np.ones(X.shape[0], dtype=np.bool) mask[doubles] = False X = X[mask] y = y[mask] n_clusters = len(np.unique(y)) print("\n\nDataset %s samples: %d, features: %d, clusters: %d" % (dataset_name, X.shape[0], X.shape[1], n_clusters)) print("=" * 70) classes = [ ITM(n_clusters=n_clusters), ITM(n_clusters=n_clusters, infer_dimensionality=True), Ward(n_clusters=n_clusters), KMeans(n_clusters=n_clusters) ] names = ["ITM", "ITM ID", "Ward", "KMeans"] for clusterer, method in zip(classes, names): start = time() clusterer.fit(X) y_pred = clusterer.labels_ ari = adjusted_rand_score(y, y_pred) ami = adjusted_mutual_info_score(y, y_pred) nmi = normalized_mutual_info_score(y, y_pred) objective = tree_information(X, y_pred) runtime = time() - start print("%-15s ARI: %.3f, AMI: %.3f, NMI: %.3f objective: %.3f time:" "%.2f" % (method, ari, ami, nmi, objective, runtime)) i_gt = tree_information(X, y) print("GT objective: %.3f" % i_gt)
def hac_derived_ordering( bags_file, num_clusters_multiplier=0.4 ): #uses HAC analysis to output hierarchies and evaluate results with ground truth print '*HAC DERIVED ORDERING*', num_clusters_multiplier print 'Starting Hierarchical Agglomerative Clustering analysis...' data, words, transcripts = doc_term_mat_from_bags(bags_file) model = Ward(n_clusters=int(num_clusters_multiplier * len(transcripts))).fit(data) clust = model.fit_predict(data) hier_sets = [] for i in range(len(transcripts)): s = [i + 1] #print transcripts[i] for j in range(0, i): if (clust[i] == clust[j]): #print '>>', transcripts[j] s.append(j + 1) hier_sets.append(set(s)) return compare_hierarchies(hier_sets)
def cluster(dump_path, file_name, n_clusters=200): # Obtain data from file. #feature_file = 'feature.list' data = np.loadtxt(file_name, unpack=True) m1 = data[1] X = np.transpose(data) X = scale(X) labels_true = np.zeros(len(m1)) ############################################################################### # Compute clustering print("Compute unstructured hierarchical clustering...") st = time.time() ward = Ward(n_clusters=n_clusters).fit(X) label = ward.labels_ print("Elapsed time: ", time.time() - st) print("Number of points: ", label.size) label_file = dump_path + "ward_labels.list" fp = open(label_file, 'w') for i in label: fp.write("%d\n" % i) fp.close() num_cluster_file = dump_path + "_num_clusters_ward.info" fp = open(num_cluster_file, 'w') fp.write("%d" % n_clusters) fp.close() cluster_centers = ward.cluster_centers_ score = 0.0 # print "evaluating performance..." # score = metrics.silhouette_score(X, label, metric='euclidean', sample_size=20000) # print "evaluation done." # score = metrics.silhouette_samples(X, k_means_labels, metric='euclidean', sample_size=1000) # score = np.sum(score)/len(score) return score
def clusterRT_ward(values) : if len(values) == 0 : return [] v = sorted([[val] for val in values]) #connectivity = kneighbors_graph(np.asarray(v), n_neighbors=3) ward = Ward(n_clusters=2).fit(np.asarray(v)) labels = ward.labels_ curr_l = -2 cl_output = [] curr_cluster = [] for i,l in enumerate(labels) : if l != curr_l : if len(curr_cluster) > 0 : cl_output.append(curr_cluster) curr_l = l curr_cluster = [] curr_cluster.append(values[i]) cl_output.append(curr_cluster) return cl_output