def map_clusters(n_list, n_clusters): # x = np.array([[28.596596, 77.344098], [28.574783, 77.333393]]) # x = np.append(x, [[28.596596, 77.344098], [28.574783, 77.333393], [28.582515, 77.246735], # [28.582915, 77.215735], [28.635639, 77.201197], [28.464873, 76.995451]], axis=0) x = np.array([[28.596596, 0], [28.574783, 0], [28.996596, 0], [28.674783, 0], [28.582515, 0], [28.582915, 0], [28.635639, 0], [28.464873, 0]]) # x = np.append(x, n_list, axis=0) # define the model model = Birch(threshold=0.01, n_clusters=n_clusters) # fit the model model.fit(n_list) # assign a cluster to each example yhat = model.predict(n_list) # retrieve unique clusters clusters = unique(yhat) dic = {} # create scatter plot for samples from each cluster for cluster in clusters: # # get row indexes for samples with this cluster row_ix = where(yhat == cluster) # # create scatter of these samples dic[cluster] = row_ix[0] # pyplot.scatter(x[row_ix, 0], x[row_ix, 1]) # print(dic) # pyplot.show() return dic
def _runAlgorithm(self): birch = Birch(branching_factor=50, n_clusters=self.params['birch'], threshold=0.5) birch.fit(self.m_data) self.m_resultLabels = birch.labels_ pass
def birch_clustering(principal_components, principal_df, number_of_clusters): final_df = pd.concat([principal_df], axis=1) model = Birch(threshold=0.01, n_clusters=number_of_clusters) # fit the model model.fit(principal_components) # assign a cluster to each example yhat = model.predict(principal_components) # retrieve unique clusters clusters = unique(yhat) final_df['Segment'] = model.labels_ # create scatter plot for samples from each cluster for cluster in clusters: # get row indexes for samples with this cluster row_ix = where(yhat == cluster) # create scatter of these samples plt.scatter(principal_components[row_ix, 0], principal_components[row_ix, 1], s=75) final_df.rename({ 0: 'PC1', 1: 'PC2', 2: 'PC3', 'y': 'Race' }, axis=1, inplace=True) plt.title("BIRCH Clustering") add_race_labels(final_df) calc_silhouette(data=principal_components, prediction=yhat, n_clusters=len(clusters)) return final_df
def birch_enrich(self, input_clustering, numclusters=10, threshold=1.7): """Enrich the training set with BIRCH clustering algorithm. BIRCH (balanced iterative reducing and clustering using hierarchies) is an unsupervised data mining algorithm used to perform hierarchical clustering over particularly large data-sets. An advantage of BIRCH is its ability to incrementally and dynamically cluster incoming, multi-dimensional metric data points in an attempt to produce the best quality clustering for a given set of resources (memory and time constraints). In most cases, BIRCH only requires a single scan of the database. :param numclusters: Number of clusters :type numclusters: int :param threshold: The radius of the subcluster obtained by merging a new sample and the closest subcluster should be lesser than the threshold. :type threshold: float """ self.X = self.X.astype(float) birch = Birch(threshold=threshold, n_clusters=numclusters) birch.fit(input_clustering) labels = birch.labels_ cluster_centers = birch.subcluster_centers_ n_features = self.vocabulary.__len__() sum = 0 for x in range(self.X.__len__()): sum = sum + np.count_nonzero(self.X[x]) mean_n_features_in_docs = sum / self.X.__len__() for x in range(self.X.__len__()): # check gamma, influence of length gamma = mean_n_features_in_docs / np.count_nonzero(self.X[x]) x_label = labels[x] center_vector = cluster_centers[x_label] for i in range(n_features): self.X[x][i] = self.X[x][i] + gamma * center_vector[i]
def clusteringReminMost(window): brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5, compute_labels=True) brc.fit(window) Class = brc.predict(window) #统计各个类别的信息,找出个数最多的类别,取出这些数据,从而强化历史数据 num0 = 0 num1 = 0 num2 = 0 for i in Class: if i == 0: num0 += 1 elif i == 1: num1 += 1 else: num2 += 1 lable = chooseMax(num0, num1, num2) newwindow = window[0:1] for i in range(1, len(Class)): if Class[i] == lable: #属于目标类别,则进行添加 newwindow = newwindow.append(window[i - 1:i]) #都为pandas数据结果 return newwindow
def birchclustering(datalist): brc = Birch(branching_factor=50, n_clusters=None, threshold=0.17, compute_labels=True) brc.fit(datalist) return brc
def BirchModel(data, actualLabels): pca = PCA(n_components=2).fit(data) pca_2d = pca.transform(data) birch_model = Birch(threshold=0.1, n_clusters=10) t0 = time() birch_model.fit(pca_2d) labels = birch_model.labels_ centroids = birch_model.subcluster_centers_ n_clusters = np.unique(labels).size print('% 9s' % 'init' ' time h**o compl v-meas ARI AMI silhouette') print( '% 9s %.2fs %i %.3f %.3f %.3f %.3f' % ('Birch Model', (time() - t0), metrics.homogeneity_score(actualLabels, birch_model.labels_), metrics.completeness_score(actualLabels, birch_model.labels_), metrics.v_measure_score(actualLabels, birch_model.labels_), metrics.adjusted_rand_score(actualLabels, birch_model.labels_), metrics.adjusted_mutual_info_score(actualLabels, birch_model.labels_), metrics.silhouette_score( data, birch_model.labels_, metric='euclidean', sample_size=10000))) scatter = plt.scatter(pca_2d[:, 0], pca_2d[:, 1], c=labels, marker='*') plt.plot(centroids[:, 0], centroids[:, 1], 'X', markeredgecolor='k', markersize=3) plt.colorbar(scatter) plt.title('Birch Model Clustering') plt.show()
def scan_callback(self, msg): pose = self.pose.copy() bearings = self.bearings.copy() ranges = np.array(msg.ranges) inf_flag = (-1 * np.isinf(ranges).astype(int) + 1) ranges = np.nan_to_num(ranges) * inf_flag euc_coord_x = pose[0] + np.cos(bearings - pose[2]) * ranges euc_coord_y = pose[1] + np.sin(bearings - pose[2]) * ranges dist_flag = np.where( (euc_coord_x-pose[0])**2 + \ (euc_coord_y-pose[1])**2 != 0.0)[0] points = np.array([euc_coord_x, euc_coord_y]).T points = points[dist_flag] self.obsv = [] if len(points) > 0: brc = Birch(n_clusters=None, threshold=0.05) brc.fit(points) labels = brc.predict(points) u_labels = np.unique(labels) for l in u_labels: seg_idx = np.where(labels == l) seg = points[seg_idx] if seg.shape[0] <= 1: fit_cov = 10 else: fit_cov = np.trace(np.cov(seg.T)) if fit_cov < 0.001 and seg.shape[0] >= 3: self.obsv.append(seg.mean(axis=0)) print(self.obsv)
def birch_ad_with_smoothing(latency_df, threshold): # anomaly detection on response time of service invocation. # input: response times of service invocations, threshold for birch clustering # output: anomalous service invocation anomalies = [] for svc, latency in latency_df.iteritems(): # No anomaly detection in db if svc != 'timestamp' and 'Unnamed' not in svc and 'rabbitmq' not in svc and 'db' not in svc: latency = latency.rolling(window=smoothing_window, min_periods=1).mean() x = np.array(latency) x = np.where(np.isnan(x), 0, x) normalized_x = preprocessing.normalize([x]) X = normalized_x.reshape(-1,1) # threshold = 0.05 brc = Birch(branching_factor=50, n_clusters=None, threshold=threshold, compute_labels=True) brc.fit(X) brc.predict(X) labels = brc.labels_ # centroids = brc.subcluster_centers_ n_clusters = np.unique(labels).size if n_clusters > 1: anomalies.append(svc) return anomalies
class BirchColorExtractor: def __init__(self, n_colors=None, threshold=0.5, branching_factor=50, compute_labels=True, copy=True): self.birch = Birch(n_clusters=n_colors, threshold=threshold, branching_factor=branching_factor, compute_labels=compute_labels, copy=copy) def extract(self, img): img_array = np.array(img, dtype=np.float64) / 255 # Load Image and transform to a 2D numpy array. w, h, d = tuple(img_array.shape) assert d == 3 image_array = np.reshape(img_array, (w * h, d)) print("Fitting model on a small sub-sample of the data") # manually fit on batches self.birch.fit(image_array) # Get labels for all points print("Predicting color indices on the full image (birch)") labels = self.birch.labels_ main_color_array = 255 * self.birch.subcluster_centers_ return [ dict(color=dict(r=color[0], g=color[1], b=color[2]), count=labels[labels == i].shape[0]) for i, color in enumerate(main_color_array) ]
def birch_clusters(textdata, trained_doc2vec, n_clusters, start_alpha=0.025, infer_epoch=100, branching_factor=10, threshold=0.01, compute_labels=True, metric='cosine', **kwargs): infer_list = [] for doc in textdata: infer_list.append( trained_doc2vec.infer_vector(doc, alpha=start_alpha, steps=infer_epoch, **kwargs)) brc = Birch(branching_factor=branching_factor, n_clusters=int(n_clusters), threshold=threshold, compute_labels=compute_labels) brc.fit(infer_list) clusters = brc.predict(infer_list) birch_labels = brc.labels_ silhouette_score = metrics.silhouette_score(infer_list, birch_labels, metric=metric) return silhouette_score, clusters
def cluster_latlon(n_clusters, data): #split the data between "around NYC" and "other locations" basically our first two clusters data_c = data[(data.longitude > -74.05) & (data.longitude < -73.75) & (data.latitude > 40.4) & (data.latitude < 40.9)] data_e = data[~(data.longitude > -74.05) & (data.longitude < -73.75) & (data.latitude > 40.4) & (data.latitude < 40.9)] #put it in matrix form coords = data_c.as_matrix(columns=['latitude', "longitude"]) brc = Birch(branching_factor=100, n_clusters=n_clusters, threshold=0.01, compute_labels=True) brc.fit(coords) clusters = brc.predict(coords) data_c["cluster_" + str(n_clusters)] = clusters data_e["cluster_" + str( n_clusters)] = -1 #assign cluster label -1 for the non NYC listings data = pd.concat([data_c, data_e]) plt.scatter(data_c["longitude"], data_c["latitude"], c=data_c["cluster_" + str(n_clusters)], s=10, linewidth=0.1) plt.title(str(n_clusters) + " Neighbourhoods from clustering") plt.show() return data
def birch_skm_part1_helper(data, m, k, delta): """ The function receive data and calculates k centers using the birch function in sklearn, and their quantile radius :param data: numpy array :param m: Size of the data :param k: Number of centers. :param delta: int :return: tuple of two numpy array. (k_medoids, k_distances). """ birch_instance = Birch(n_clusters=k, threshold=0.1) # birch instance birch_instance.fit(data) # Run birch on the data labels = birch_instance.predict(data) # calculate the cluster number for each point l_medoids = [] # since birch does not return centers, I have to calculate them for label in range( np.unique(labels).size): # calculate the center for each cluster cluster = data[labels == label] kmedoids_instance_for_birch = kmedoids(cluster.tolist(), init_centers(cluster, 1)) kmedoids_instance_for_birch.process() l_medoids.append(cluster[kmedoids_instance_for_birch.get_medoids()][0]) l_medoids = np.array(l_medoids) q = calc_q(m, delta) # calculate q # calculate the distance to the quantile points around each center l_distances = calc_quantile_radius_around_centers(data, l_medoids, q, k) return l_medoids, l_distances
def birch(self, x, threshold = 0.01): """ """ model = Birch(threshold = threshold, n_clusters = self.max_clusters) model.fit(x) return model
def train(feature, weights, cluster_num, feature_path = None, down = 0.006, up = 0.0085, bf_index = 2): if feature_path != None: feature = pd.read_csv(feature_path) X = [] print("Training...\n") for i in range(len(feature[feature.columns[0]])): f = np.array(feature.iloc[i][1:]) key = f[bf_index] if key > up: f_w = combine(feature.iloc[i][1:], weights) X.append(f_w) clf = Birch(n_clusters = cluster_num) clf = KMeans(n_clusters = cluster_num) clf.fit(X) pred = [] for i in range(len(feature[feature.columns[0]])): f = np.array(feature.iloc[i][1:]) key = f[bf_index] if key > up: p = clf.predict([combine(f, weights)]) pred.append(p[0]) if key < down: pred.append(cluster_num) if key > down and key < up: pred.append(cluster_num + 1) joblib.dump(clf, 'curve_model_Birch.pkl') print(pred) return pred
def add_cluster_column(train_df, test_df, n_clusters): train_df['source'] = 'train' test_df['source'] = 'test' total_rows = train_df.shape[0] + test_df.shape[0] data = pd.concat([train_df, test_df]) #split the data between "around NYC" and "other locations" data_c = data[(data.longitude > -74.05) & (data.longitude < -73.75) & (data.latitude > 40.4) & (data.latitude < 40.9)] data_e = data[~((data.longitude > -74.05) & (data.longitude < -73.75) & (data.latitude > 40.4) & (data.latitude < 40.9))] #put it in matrix form coords = data_c.as_matrix(columns=['latitude', "longitude"]) brc = Birch(branching_factor=100, n_clusters=n_clusters, threshold=0.01, compute_labels=True) brc.fit(coords) clusters = brc.predict(coords) data_c["num_cluster_" + str(n_clusters)] = clusters data_e["num_cluster_" + str( n_clusters)] = -1 #assign cluster label -1 for the non NYC listings data = pd.concat([data_c, data_e]) print('lost: {}'.format(total_rows - data[data['source'] == 'train'].shape[0] - data[data['source'] == 'test'].shape[0])) return data[data['source'] == 'train'], data[data['source'] == 'test']
def get_clustered_data(data_matrix, clustering_algorithm=model_constants.KMEANS, distance_metric='euclidean', num_clusters=3): if clustering_algorithm.lower() == model_constants.AFFINITY_PROP: aff_prop = AffinityPropagation(affinity=distance_metric) aff_prop.fit(data_matrix) return aff_prop.labels_, aff_prop elif clustering_algorithm.lower() == model_constants.DBSCAN: dbscan = DBSCAN(metric=distance_metric) dbscan.fit(data_matrix) return dbscan.labels_, dbscan elif clustering_algorithm.lower() == model_constants.OPTICS: optics = OPTICS(metric=distance_metric) optics.fit(data_matrix) return optics.labels_, optics elif clustering_algorithm.lower() == model_constants.MEANSHIFT: mean_shift = MeanShift() mean_shift.fit(data_matrix) return mean_shift.labels_, mean_shift elif clustering_algorithm.lower() == model_constants.BIRCH: birch = Birch(n_clusters=num_clusters) birch.fit(data_matrix) return birch.labels_, birch elif clustering_algorithm.lower() == model_constants.AGGLOMERATIVE: agglomerative = AgglomerativeClustering(n_clusters=num_clusters, affinity=distance_metric) agglomerative.fit(data_matrix) return agglomerative.labels_, agglomerative else: kmeans = KMeans(n_clusters=num_clusters, random_state=42) kmeans.fit(data_matrix) return kmeans.labels_, kmeans
def birchcluster(X): brc = Birch() brc.fit(X) # Plot result labels = brc.labels_ centroids = brc.subcluster_centers_ n_clusters = np.unique(labels).size print("n_clusters : %d" % n_clusters) return labels
def birch_algo(X, threshold=1.7, clustering=None): birch = Birch(threshold=threshold, n_clusters=clustering) t = time() birch.fit(X) time_ = time() - t labels = birch.labels_ centroids = birch.subcluster_centers_ n_clusters = np.unique(labels).size print(" The number of clusters is : %d" % n_clusters)
def birch_algo(X, threshold=1.7, clustering=None): birch = Birch(threshold=threshold, n_clusters=clustering) birch.fit(X) labels = birch.labels_ centroids = birch.subcluster_centers_ labels_unique = np.unique(labels) n_clusters = labels_unique.size print(" The number of clusters is : %d" % n_clusters) return labels, centroids, n_clusters
def birch(data,threshold,branching_factor): # bir = Birch(threshold=args['threshold'], branching_factor=int(args['branching_factor'])) db = Birch(threshold=threshold, branching_factor=branching_factor) db.fit(data) pred = db.fit_predict(data) score = sil_score(data,pred) print(score) return db,pred,score
def cluster_birch(self): print "Starting Birch clustering" brc = Birch(branching_factor=10, n_clusters=40, threshold=self.cluster_distance, compute_labels=False) brc.fit(self.all_frames_xy) clusters = brc.predict(self.all_frames_xy) return clusters
def test_feature_names_out(): """Check `get_feature_names_out` for `Birch`.""" X, _ = make_blobs(n_samples=80, n_features=4, random_state=0) brc = Birch(n_clusters=4) brc.fit(X) n_clusters = brc.subcluster_centers_.shape[0] names_out = brc.get_feature_names_out() assert_array_equal([f"birch{i}" for i in range(n_clusters)], names_out)
def test_n_samples_leaves_roots(): # Sanity check for the number of samples in leaves and roots X, y = make_blobs(n_samples=10) brc = Birch() brc.fit(X) n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_]) n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves() for sc in leaf.subclusters_]) assert n_samples_leaves == X.shape[0] assert n_samples_root == X.shape[0]
def test_threshold(): # Test that the leaf subclusters have a threshold lesser than radius X, y = make_blobs(n_samples=80, centers=4) brc = Birch(threshold=0.5, n_clusters=None) brc.fit(X) check_threshold(brc, 0.5) brc = Birch(threshold=5.0, n_clusters=None) brc.fit(X) check_threshold(brc, 5.0)
def update_k_clusters(attrname, old, new): k_cluster = int(k_slider.value) brc = Birch(branching_factor=50, n_clusters=k_cluster, threshold=0.5, compute_labels=True) brc.fit(tweet_vecs) predictions = brc.predict(tweet_vecs) colors = get_colors(predictions) brc_data.data = dict(colors=colors, x=tsne_vecs[:, 0], y=tsne_vecs[:, 1])
def BIRCH2_duplicate_removal(dataframe, threshold=0.8): # Note this method now takes a dataframe as input if len(dataframe) < 2: # nothing to do return dataframe Crater_data = dataframe # extract axes x = Crater_data[0].values.tolist() y = Crater_data[1].values.tolist() r = Crater_data[2].values.tolist() p = Crater_data[3].values.tolist() Points = [] X = np.column_stack((x, y)) brc = Birch(branching_factor=50, n_clusters=int(threshold * len(x)), threshold=0.5, compute_labels=True) brc.fit(X) groups_pred = brc.predict(X) for c in set(groups_pred): idx = [i for i, e in enumerate(groups_pred) if e == c] Group_x = [] Group_y = [] Group_r = [] Group_p = [] index = [] for i in idx: if i in range(0, len(x)): Group_x.append(x[i]) Group_y.append(y[i]) Group_r.append(r[i]) Group_p.append(p[i]) index.append(i) # after group is defined, extract its elements from list Points.append([Group_x, Group_y, Group_r, Group_p]) # now reduce groups center_size = [] for i, (Xs, Ys, Rr, Ps) in enumerate(Points): # we take the point with best prediction confidence best_index = np.argmax(Ps) x_center = Xs[best_index] y_center = Ys[best_index] radius = Rr[best_index] prob = Ps[best_index] center_size += [[x_center, y_center, radius, prob]] return pd.DataFrame(center_size)
def compute_clusters(data: List) -> np.ndarray: print("--->Computing clusters") birch = Birch(branching_factor=50, n_clusters=5, threshold=0.3, copy=True, compute_labels=True) birch.fit(data) predictions = np.array(birch.predict(data)) return predictions
class BirchSklearn(AbstractClusteringAlgorithm): def __init__(self, **kwargs): from sklearn.cluster import Birch self.model = Birch(**kwargs) def fit(self, x: [np.ndarray]): self.model.fit(x) @property def labels_(self): return self.model.labels_
def skitleanBirch(): data = pd.read_csv("soy_rock.csv", header=None) X = data.values.tolist() randomm = randint(5, 20) brc = Birch(branching_factor=randomm, n_clusters=4, threshold=0.1, compute_labels=True) brc.fit(X) pred = brc.predict(X) return pred
def birch(x, n_clusters=None, threshold=0.5, branching_factor=5): birch_model = Birch( threshold=threshold, n_clusters=n_clusters, branching_factor=branching_factor ) birch_model.fit(x) centroids = birch_model.subcluster_centers_ c = birch_model.labels_ k = len(centroids) return birch_model, (centroids, c, k)
def main(): #remove sub folders removeSubFolders(path+algorithm+'\\') for file in os.listdir(path): if file.endswith("-d.txt"): text_file = open(path+file,'r') ar = (text_file.readline().split(' ')) ar.remove('\n') if(len(ar)>0): #print map(int,ar) row = map(int,ar); data.append(row) fileNames.append(file) #print(row) #create np array npData = np.array(data) n_samples, n_features = npData.shape brc = Birch(branching_factor=50, n_clusters=n_digits, threshold=0.5,compute_labels=True) #kmeans = KMeans(init='random', n_clusters=n_digits, n_init=500) brc.fit(npData) list1 = brc.labels_ list2 = fileNames print brc.labels_ print fileNames list1, list2 = zip(*sorted(zip(list1, list2))) print list1 print list2 ''' k=0 lim = len(list1)-1 for i in range(0,n_digits): while(list1[k]==i): # want to copy these into folders copychar(list1[k],list2[k]) print list1[k],list2[k] k+=1 if k==lim: break ''' for i in range(0,len(list1)): print list1[i],list2[i] copychar(list1[i],list2[i])
def runBrich(K_cluster, cluster_input): # clustering by topic-probability vector of each category t0 = time() bri = Birch(n_clusters=K_cluster) bri.fit(cluster_input) print("done in %0.3fs" % (time() - t0)) with open('result/brich_cluster_' + str(K_cluster) + '.txt', 'w') as f: f.write("cluster_centers\n") f.write(str(bri.subcluster_centers_)) f.write("\n==========\n") f.write("labels (sequence of cluster # which input belongs to )\n") f.write(str(bri.labels_)) f.write("\n==========\n") f.write("inertia\n") f.write(str(bri.subcluster_labels_)) f.write("\n==========\n") return bri.labels_
def split_birch(self, branching_factor, threshold): # Extract dataset from files dataset = [f.dataset for f in self.files] # Initialize classifier classifier = Birch(branching_factor=branching_factor, n_clusters=None, threshold=threshold) classifier.fit(dataset) # Get index index = classifier.predict(dataset) count = max(index) + 1 # Create new clusters clusters = [Cluster(self.directory, self.name + '-' + str(i)) for i in range(count)] for i in range(0, len(self.files), 1): clusters[index[i]].add_file(self.files[i]) return clusters
def build_model(df, cluster_type="kmeans", seed=1): if cluster_type == "birch": model = Birch(n_clusters=N_CLUSTERS) res = model.fit_predict(df) elif cluster_type == "minibatch": model = MiniBatchKMeans(n_clusters=N_CLUSTERS, random_state=seed) res = model.fit_predict(df) elif cluster_type == "em": model = mixture.GMM(n_components=N_CLUSTERS) model.fit(df) res = model.predict(df) elif cluster_type == 'lda': model = lda.LDA(n_topics=N_CLUSTERS, n_iter=1500, random_state=seed) data_to_cluster = np.array(df).astype(int) lda_res = model.fit_transform(data_to_cluster) res = [] for i in lda_res: #for now - do hard clustering, take the higheset propability res.append(i.argmax()) else: model = KMeans(n_clusters=N_CLUSTERS, random_state=seed) res = model.fit_predict(df) df_array = np.array(df) dis_dict = {} for i in range(N_CLUSTERS): dis_dict[i] = clusters_centers[i] all_dist = [] for line_idx in range(len(df_array)): label = model.labels_[line_idx] dist = calc_distance(df_array[line_idx],dis_dict[label]) all_dist.append(dist) df["distance_from_cluster"] = all_dist #clusters = model.labels_.tolist() #print ("clusters are:",clusters) print(""">>>> model is: %s, # of clusters:%s, and %s""" %(cluster_type,N_CLUSTERS,Counter(res))) res = [str(i) for i in res] docs_clusteres = zip(df.index,res) return docs_clusteres
def test_birch_with_depot_calculation(): points = points_from_file('tsps/berlin52.txt') matrix = load_matrix(points) X = [[p[1],p[2]] for p in points] est = Birch(n_clusters=3) est.fit(X) labels = est.labels_ hl_matrix, clusters, G = load_matrices_from_labels(points,labels) depots, C = compute_depots(clusters, matrix, G, per_cluster=True) depots_actual, _ = compute_depots(clusters, matrix, G) cluster_optimal_cost, R, hl_route = clustered_tsp_solve(points, 3, labels=labels, depots=depots) cluster_optimal_cost += C print(depots_actual) print(R,C) for depot in depots_actual: for r in R: if r[1][0] == depot: for point in r[1]: print(matrix.points[point]) print('')
def obtainCodebook(self, sampled_x, x): print 'Obatining codebook using Birch from skilean...' scaled_x_sampled = StandardScaler().fit_transform(sampled_x) scaled_x = StandardScaler().fit_transform(x) brc = BIRCH(branching_factor=self.branching_factor, n_clusters=self.nclusters, threshold=self.threshold, compute_labels=True) #obatin the codebook and the projections of the images on the codebook (clusters of words) codebook = brc.fit(scaled_x_sampled) clusters = brc.predict(scaled_x) print 'Clusters obtained.' return codebook, clusters
def obtainClusters(self, hist): print 'Obatining clusters using Birch from skilean...' hist = np.array(hist) hist = hist.astype(float) scaled_vec = StandardScaler().fit_transform(hist) brc = BIRCH(branching_factor=self.branching_factor, n_clusters=self.nclusters, threshold=self.threshold, compute_labels=True) #obatin the codebook and the projections of the images on the codebook (clusters of words) codebook = brc.fit(scaled_vec) clusters = brc.predict(scaled_vec) print 'Clusters obtained.' return clusters
import numpy as np from sklearn.cluster import Birch from sklearn.datasets.samples_generator import make_blobs import matplotlib.pyplot as plt from itertools import cycle # Generates random vectors to cluster n_samples = 50 centers = [[0, 1], [4, -2], [-2, 2], [0, -1]] X, _ = make_blobs(n_samples=n_samples, centers=centers, cluster_std=0.2) # Creates the Birch classificator and gives it the vectors brc = Birch(branching_factor=50, n_clusters=None, threshold=0.8, compute_labels=True) brc.fit(X) labels = brc.labels_ cluster_centers = brc.subcluster_centers_ labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) # Prints the points generated plt.figure(1) plt.clf() colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk') for k, col in zip(range(n_clusters_), colors): my_members = labels == k cluster_center = cluster_centers[k] plt.plot(X[my_members, 0], X[my_members, 1], col + '.') plt.axis([-4,12,-4,12]) plt.title('Estimated number of clusters: %d' % n_clusters_)
def birchclustering(datalist): brc = Birch(branching_factor=50, n_clusters=None, threshold=0.17,compute_labels=True) brc.fit(datalist) return brc
def cluster_birch(self): print "Starting Birch clustering" brc = Birch(branching_factor=10, n_clusters=40, threshold=self.cluster_distance,compute_labels=False) brc.fit(self.all_frames_xy) clusters = brc.predict(self.all_frames_xy) return clusters
def cluster_junctions(juncs): birch_model = Birch(threshold=3, n_clusters=None) X = np.array(juncs) birch_model.fit(X) return birch_model.labels_
station_array = np.array(station_list) dsp_array = np.array(dsp_list) # extract the unique station names stations = np.unique(station_array) print stations for sta in stations: events = event_array[station_array == sta, :] dsp_shortlist = dsp_array[station_array == sta] print sta, events.shape, dsp_shortlist.shape # cluster on events so as to compare dispersion curves for nearby # events brc = Birch(branching_factor=50, n_clusters=None, threshold=dist, compute_labels=True) brc.fit(events) labels = brc.predict(events) print np.max(labels) for lab in np.unique(labels): dsp_this_label_list = dsp_shortlist[labels == lab] cluster_name = os.path.join(dirname, "cluster_%s_%03d" % (sta, lab)) plot_all_dsp(dsp_this_label_list, legend=False, fname="%s_gvel.png" % cluster_name) plot_all_map(dsp_this_label_list, fname="%s_map.png" % cluster_name, legend=False) f = open("%s_info.txt" % cluster_name, "w") for (dsp, dsp_dict) in dsp_this_label_list: f.write( "%s %s %d %03d %02d %02d %.3f %.3f\n" % ( dsp_dict["STA"], dsp_dict["COMP"], dsp_dict["YEAR"],