def cluster(train, val, type, number_of_clusters, plot_folder, classes): # todo this should be a class if type == "spectral_clustering": clustering_model = SpectralClustering(n_clusters=number_of_clusters, assign_labels="discretize", random_state=0).fit( train["data"]) elif type == "kmeans": clustering_model = KMeans(n_clusters=number_of_clusters, random_state=0).fit(train["data"]) else: raise NotImplementedError # compute metrics accuracies = {} random_array = np.random.randint(9, size=train["labels"].shape) centroids = find_centroids(number_of_clusters, train, clustering_model.labels_) test_classifications = cluster_test(val, centroids) visualize_clustering(train, clustering_model.labels_, type + "_training", plot_folder, number_of_clusters, centroids) visualize_clustering(val, np.asarray(test_classifications), type + "_validation", plot_folder, number_of_clusters, centroids) accuracies["random_score"] = homogeneity_score(train["labels"], random_array) accuracies["v_measure_score"] = v_measure_score(train["labels"], clustering_model.labels_) accuracies["homogeneity_score"] = homogeneity_score( train["labels"], clustering_model.labels_) accuracies["completeness_score"] = completeness_score( train["labels"], clustering_model.labels_) accuracies["silhouette_score"] = silhouette_score(train["data"], clustering_model.labels_) accuracies["purity_score"], accuracies[ "contingency_matrix"] = purity_score(train["labels"], clustering_model.labels_) accuracies["v_measure_score_test"] = v_measure_score( val["labels"], test_classifications) accuracies["homogeneity_score_test"] = homogeneity_score( val["labels"], test_classifications) accuracies["completeness_score_test"] = completeness_score( val["labels"], test_classifications) accuracies["silhouette_score_test"] = silhouette_score( val["data"], test_classifications) accuracies["purity_score_test"], accuracies[ "contingency_matrix_test"] = purity_score(val["labels"], test_classifications) return accuracies
def get_clustering_metrics(train_data, cluster_labels, ground_truth_labels=None): clustering_metric_dict = dict({}) clustering_metric_dict['silhouette_score'] = silhouette_score( train_data, cluster_labels, random_state=42) clustering_metric_dict[ 'calinski_harabasz_score'] = calinski_harabasz_score( train_data, cluster_labels) clustering_metric_dict['davies_bouldin_score'] = davies_bouldin_score( train_data, cluster_labels) if ground_truth_labels is not None: clustering_metric_dict['v_measure_score'] = v_measure_score( ground_truth_labels, cluster_labels) clustering_metric_dict[ 'fowlkes_mallows_score'] = fowlkes_mallows_score( ground_truth_labels, cluster_labels) clustering_metric_dict['homogeneity_score'] = homogeneity_score( ground_truth_labels, cluster_labels) clustering_metric_dict[ 'normalized_mutual_info_score'] = normalized_mutual_info_score( ground_truth_labels, cluster_labels) clustering_metric_dict['adjusted_rand_score'] = adjusted_rand_score( ground_truth_labels, cluster_labels) clustering_metric_dict['completeness_score'] = completeness_score( ground_truth_labels, cluster_labels) return clustering_metric_dict
def show_result(self, prediction, msg): new_line(50) print(msg) new_line(50) real = self.train_labels print "Confusion Matrix: " print str(confusion_matrix(real, prediction)) homo_score = homogeneity_score(real, prediction) complete_score = completeness_score(real, prediction) v_score = v_measure_score(real, prediction) rand_score = adjusted_rand_score(real, prediction) mutual_info = adjusted_mutual_info_score(real, prediction) print("Homogeneity Score: %0.3f" % homo_score) print("Completeness Score: %0.3f" % complete_score) print("V-measure: %0.3f" % v_score) print("Adjusted Rand Score: %0.3f" % rand_score) print("Adjusted Mutual Info Score: %0.3f\n" % mutual_info) return { 'Homogeneity': homo_score, 'Completeness': complete_score, 'V-measure': v_score, 'RAND': rand_score, 'Mutual': mutual_info }
def compute_V_measure(clusters, classes): class_list, cluster_list = [], [] # not_found_id = 1000000 clustered_but_unaligned = 0 for read in clusters: if read in classes: class_list.append(classes[read]) cluster_list.append(clusters[read]) else: # print("Read was clustered but unaligned:", read) clustered_but_unaligned += 1 # added the unprocessed reads to the measure not_clustered = set(classes.keys()) - set(clusters.keys()) highest_cluster_id = max(clusters.values()) highest_cluster_id += 1 for read in not_clustered: class_list.append(classes[read]) cluster_list.append(highest_cluster_id) highest_cluster_id += 1 v_score = v_measure_score(class_list, cluster_list) compl_score = completeness_score(class_list, cluster_list) homog_score = homogeneity_score(class_list, cluster_list) ari = adjusted_rand_score(class_list, cluster_list) print("Not included in clustering but aligned:", len(not_clustered)) print("V:", v_score, "Completeness:", compl_score, "Homogeneity:", homog_score) print( "Nr reads clustered but unaligned (i.e., no class and excluded from V-measure): ", clustered_but_unaligned) return v_score, compl_score, homog_score, clustered_but_unaligned, ari
def calc_homogenity_comp_vmeas_training(df, y_train): # user_input_df['predict'] = y1 # confidence_counter = -1 # for c in candidates: # confidence_counter += 1 # adj = c.getSource() # noun = c.getTarget() # candidate_df = user_input_df.loc[(user_input_df['adj'] == adj) & (user_input_df['noun'] == noun)] # print(candidate_df["adj"][confidence_counter]) # print(candidate_df["noun"][confidence_counter]) # if candidate_df["class"][confidence_counter] != 2: # truelabels.append(candidate_df["class"][confidence_counter]) # predictlabels.append(candidate_df["predict"][confidence_counter]) # print("truelables:",truelabels) # print("predictlabels:",predictlabels) # homogenity_scr = homogeneity_score(truelabels,predictlabels) # vmeasure_scr = v_measure_score(truelabels,predictlabels) # completness_scr =completeness_score(truelabels,predictlabels) # print("homogenity_scr={},vmeasure_scr={},completness_scr={}".format(homogenity_scr,vmeasure_scr,completness_scr)) truelabels = df['class'] predictlabels = y_train homogenity_scr = homogeneity_score(truelabels, predictlabels) vmeasure_scr = v_measure_score(truelabels, predictlabels) completness_scr = completeness_score(truelabels, predictlabels) print("truelables:", truelabels) print("predictlabels:", predictlabels) print("homogenity_scr={},vmeasure_scr={},completness_scr={}".format( homogenity_scr, vmeasure_scr, completness_scr))
def evaluate_bins(self): self.labels = [] newcolors = [] for bin in self.bins: for b in bin: self.labels.append(self.bins.index(bin)) if self.colors != None: newcolors.append(self.colors[b]) self.colors = newcolors labels = numpy.array(self.labels) colors = numpy.array(self.colors) points = [] for bin in self.bins: for b in bin: start_lat = self.data[b]['trip_start_location'][1] start_lon = self.data[b]['trip_start_location'][0] end_lat = self.data[b]['trip_end_location'][1] end_lon = self.data[b]['trip_end_location'][0] path = [start_lat, start_lon, end_lat, end_lon] points.append(path) if self.colors != None: a = metrics.silhouette_score(numpy.array(points), labels) b = homogeneity_score(colors, labels) c = completeness_score(colors, labels) print 'number of bins is ' + str(len(self.bins)) print 'silhouette score is ' + str(a) print 'homogeneity is ' + str(b) print 'completeness is ' + str(c) print 'accuracy is ' + str(((a+1)/2.0 + b + c)/3.0)
def compareAB(A, B, X): #measures the similarity of the two assignments, ignoring permutations and with chance normalization ars = metrics.adjusted_rand_score(A, B) ars_str = '%17.3f' % ars # each cluster contains only members of a single class hs = homogeneity_score(A, B) hs_str = '%17.3f' % hs #all members of a given class are assigned to the same cluster cs = completeness_score(A, B) cs_str = '%17.3f' % cs vms = metrics.v_measure_score(A, B) vms_str = '%17.3f' % vms # geometric mean of the pairwise precision and recall fowlkes_mallows_score = metrics.fowlkes_mallows_score(A, B) fms_str = '%17.3f' % fowlkes_mallows_score sc = metrics.silhouette_score(X, B, metric='euclidean') sc_str = '%17.3f' % sc my_str = ars_str + "&" + hs_str + "&" + cs_str + "&" + vms_str + "&" + fms_str + "&" + sc_str return my_str
def main(): m = 2000 # number of points n = 50 # Number of dimensions k = 30 # Number of latent clusters np.random.seed(3) X, z_true = draw_points(m, n, k=k) show_points(X, z_true, title="True") S = fully_connected_similarity(X) # Unnormalized spectral clustering # A = spectral_clustering(S, k) # Normalized spectral clustering according to Shi and Malik (2000) # A = spectral_clustering(S, k, normalization=LaplacianNorm.symmetric, generalized_eigenproblem=True) # Normalized spectral clustering according to Ng, Jordan, and Weiss (2002) A = spectral_clustering(S, k, normalization=LaplacianNorm.symmetric, norm_rows=True) show_points(X, A, title="Spectral Clustering") complete_score = completeness_score(z_true, A) homog_score = homogeneity_score(z_true, A) print("Completeness score: %s" % complete_score) print("Homogeneity score: %s" % homog_score)
def print_scores(labels, predicted, svd): print "Homogeneity: " + str(homogeneity_score(labels, predicted)) print "completeness: " + str(completeness_score(labels, predicted)) print "V-measure: " + str(v_measure_score(labels, predicted)) print "RAND score: " + str(adjusted_rand_score(labels, predicted)) print "Mutual Info: " + str(adjusted_mutual_info_score(labels, predicted)) ret = [] ret.append(homogeneity_score(labels, predicted)) ret.append(completeness_score(labels, predicted)) ret.append(v_measure_score(labels, predicted)) ret.append(adjusted_rand_score(labels, predicted)) ret.append(adjusted_mutual_info_score(labels, predicted)) if svd: svd_all.append(ret) else: nmf_all.append(ret) return homogeneity_score(labels, predicted)
def calculate_scores(self): x, c, labels = self.x, self.c, self.labels self.v_measure = v_measure_score(c, labels) self.complete = completeness_score(c, labels) self.adjusted_mutual = adjusted_mutual_info_score(c, labels) self.adjusted_rand = adjusted_rand_score(c, labels) self.silhouette = silhouette_score(x, c) self.purity, self.partial_purity = self.__purity__()
def five_measure_scores(label_true, label_pred): print("Homogeneity_score = %f" % homogeneity_score(label_true, label_pred)) print("Completeness_score = %f" % completeness_score(label_true, label_pred)) print("Adjusted_rand_score = %f" % adjusted_rand_score(label_true, label_pred)) print("V_measure_score = %f" % v_measure_score(label_true, label_pred)) print("Adjusted_mutual_info_score = %f" % adjusted_mutual_info_score(label_true, label_pred))
def v_measure(cluster_labels, true_labels): h_score = homogeneity_score(true_labels, cluster_labels) c_score = completeness_score(true_labels, cluster_labels) v_score = v_measure_score(true_labels, cluster_labels) print("Homogeneity Score: %.6f" % h_score) print("Completeness Score: %.6f" % c_score) print("V Measure Score: %.6f" % v_score) return h_score, c_score, v_score
def print_scores(labels, predicted): print "Contingency: " print str(confusion_matrix(labels, predicted)) ret = [] ret.append(homogeneity_score(labels, predicted)) ret.append(completeness_score(labels, predicted)) ret.append(v_measure_score(labels, predicted)) ret.append(adjusted_rand_score(labels, predicted)) ret.append(adjusted_mutual_info_score(labels, predicted)) print "Homogeneity: " + str(homogeneity_score(labels, predicted)) print "completeness: " + str(completeness_score(labels, predicted)) print "V-measure: " + str(v_measure_score(labels, predicted)) print "RAND score: " + str(adjusted_rand_score(labels, predicted)) print "Mutual Info: " + str(adjusted_mutual_info_score(labels, predicted)) return ret
def get_landmarking(dataset_name, df): start = time.time() record = {'dataset': dataset_name.split('.')[0]} results = [] n_samples = int(len(df)*0.1) if len(df) > 400 else min(df.shape[0], 40) data = df.sample(n=n_samples, replace=False) labels = get_dbscan(data) k = len(np.unique(labels)) labels2 = get_Kmeans(data, k, 40) full_tree = DecisionTreeClassifier() full_tree.fit(data, labels) worst_attr = np.argmin(full_tree.feature_importances_) X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3) best_stump = DecisionTreeClassifier(max_depth=1) random_stump = DecisionTreeClassifier(splitter="random", max_depth=1) worst_stump = DecisionTreeClassifier(max_depth=1) elite_knn = KNeighborsClassifier(n_neighbors=1) one_knn = KNeighborsClassifier(n_neighbors=1, algorithm="auto", weights="uniform", p=2, metric="minkowski") nb = GaussianNB() lda = LinearDiscriminantAnalysis() best_stump.fit(X_train, y_train) random_stump.fit(X_train, y_train) worst_stump.fit(X_train.iloc[:, worst_attr].values.reshape(-1, 1), y_train) elite_knn.fit(X_train, y_train) one_knn.fit(X_train, y_train) # lda.fit(X_train, y_train) nb.fit(X_train, y_train) record['LM1'] = np.log2(df.shape[0]) record['LM2'] = np.log2(df.shape[1]) record['LM3'] = accuracy_score(best_stump.predict(X_test), y_test) # record['LM4'] = f1_score(best_stump.predict(X_test), y_test, average='weighted') record['LM5'] = accuracy_score(random_stump.predict(X_test), y_test) # record['LM6'] = f1_score(random_stump.predict(X_test), y_test, average='weighted') # record['LM7'] = model.inertia_ record['LM8'] = accuracy_score(elite_knn.predict(X_test), y_test) # record['LM9'] = f1_score(elite_knn.predict(X_test), y_test, average='weighted') # record['LM10'] = accuracy_score(lda.predict(X_test), y_test) # record['LM11'] = f1_score(lda.predict(X_test), y_test, average='weighted') record['LM12'] = accuracy_score(nb.predict(X_test), y_test) # record['LM13'] = f1_score(nb.predict(X_test), y_test, average='weighted') record['LM14'] = accuracy_score(one_knn.predict(X_test), y_test) # record['LM15'] = f1_score(one_knn.predict(X_test), y_test, average='weighted') record['LM16'] = accuracy_score(worst_stump.predict(X_test.iloc[:, worst_attr].values.reshape(-1, 1)), y_test) # record['LM17'] = f1_score(worst_stump.predict(X_test.iloc[:, worst_attr].values.reshape(-1, 1)), y_test, average='weighted') record['LM18'] = adjusted_rand_score(labels, labels2) record['LM19'] = adjusted_mutual_info_score(labels, labels2) record['LM20'] = completeness_score(labels, labels2) record['LM21'] = fowlkes_mallows_score(labels, labels2) end = time.time() return record, (df.shape[0], df.shape[1], end-start)
def calc_performance_score(self, algo_type: str, predicted, y_train): homo_score = homogeneity_score(y_train, predicted) complete_socre = completeness_score(y_train, predicted) adjusted_mute_info_score = adjusted_mutual_info_score( y_train, predicted) print(algo_type + ' homo_score ' + "{:.2f}".format(homo_score)) print(algo_type + ' complete_socre ' + "{:.2f}".format(complete_socre)) print(algo_type + ' adjusted_mute_info_score ' + "{:.2f}".format(adjusted_mute_info_score))
def get_homogeneity_and_completeness(self, clusters, category): labels = getattr(self.scb.nodes, 'to_' + category)(range(len(self.scb.nodes))) keys = dict() for i, label in enumerate(labels): if label not in keys: keys[label] = len(keys) labels[i] = keys[label] hs = homogeneity_score(labels, clusters) cs = completeness_score(labels, clusters) return {'homogeneity': hs, 'completeness': cs}
def km(clusters, dats, Y): NMI = defaultdict(dict) INL = defaultdict(dict) CMS = defaultdict(dict) SIL = defaultdict(dict) for i, dat in enumerate(dats): for cluster in clusters: km = KMeans(n_clusters=cluster, random_state=0).fit(dat) cluster_labels = km.labels_ NMI[i][cluster] = normalized_mutual_info_score(Y, cluster_labels) INL[i][cluster] = km.inertia_ CMS[i][cluster] = completeness_score(Y, cluster_labels) SIL[i][cluster] = silhouette_score(dat, cluster_labels) plt.plot(clusters, NMI[0].values(), 'bx-', color='C0') plt.plot(clusters, NMI[1].values(), 'bx-', color='C1') plt.plot(clusters, NMI[2].values(), 'bx-', color='C2') plt.plot(clusters, NMI[3].values(), 'bx-', color='C3') plt.plot(clusters, NMI[4].values(), 'bx-', color='C4') plt.legend(['PCA', 'ICA', 'RP', 'RF', 'Original']) plt.xlabel('k') plt.title('Normalized Mutual Information on K-Means') plt.show() plt.plot(clusters, INL[0].values(), 'bx-', color='C0') plt.plot(clusters, INL[1].values(), 'bx-', color='C1') plt.plot(clusters, INL[2].values(), 'bx-', color='C2') plt.plot(clusters, INL[3].values(), 'bx-', color='C3') plt.plot(clusters, INL[4].values(), 'bx-', color='C4') plt.legend(['PCA', 'ICA', 'RP', 'RF', 'Original']) plt.xlabel('k') plt.title('Elbow Method on K-Means') plt.show() plt.plot(clusters, CMS[0].values(), 'bx-', color='C0') plt.plot(clusters, CMS[1].values(), 'bx-', color='C1') plt.plot(clusters, CMS[2].values(), 'bx-', color='C2') plt.plot(clusters, CMS[3].values(), 'bx-', color='C3') plt.plot(clusters, CMS[4].values(), 'bx-', color='C4') plt.legend(['PCA', 'ICA', 'RP', 'RF', 'Original']) plt.xlabel('k') plt.title('Completeness on K-Means') plt.show() plt.plot(clusters, SIL[0].values(), 'bx-', color='C0') plt.plot(clusters, SIL[1].values(), 'bx-', color='C1') plt.plot(clusters, SIL[2].values(), 'bx-', color='C2') plt.plot(clusters, SIL[3].values(), 'bx-', color='C3') plt.plot(clusters, SIL[4].values(), 'bx-', color='C4') plt.legend(['PCA', 'ICA', 'RP', 'RF', 'Original']) plt.xlabel('k') plt.title('Silhouette on K-Means') plt.show()
def clusterEvaluation(trueY, fittedY): result = dict() ## NMI denotes normalized mutual information ## ARS denotes adjusted rand score ## HS stands for homogeneity_score, 1 means perfect ## VM represents v_measure_score ranging [0, 1], 1.0 is perfectly complete labeling ## SS represents silhouette_score result['NMI'] = normalized_mutual_info_score(trueY, fittedY) result['ARS'] = adjusted_rand_score(trueY, fittedY) result['HS'] = homogeneity_score(trueY, fittedY) result['CS'] = completeness_score(trueY, fittedY) result['VM'] = v_measure_score(trueY, fittedY) return result
def print_five_measures(target, predicted): print('homogeneity score:') print(homogeneity_score(target, predicted)) print('completeness score:') print(completeness_score(target, predicted)) print('V-measure:') print(v_measure_score(target, predicted)) print('adjusted rand score:') print(adjusted_rand_score(target, predicted)) print('adjuted mutual info score:') print(adjusted_mutual_info_score(target, predicted))
def main(): start = timeit.default_timer() # Chuyển dữ liệu ảnh sang bitmap và scale X, Y = load(scale = 50*100 ) # Chạy Birch và gán nhãn pred_label = process(X, labels_num = 32) # Tính accuracy với nhãn thật accuracy(pred_label, Y, labels_num = 32) print("Completeness :", completeness_score(Y, pred_label) * 100, "%") print("Homogeneity :", homogeneity_score(Y, pred_label) * 100, "%") stop = timeit.default_timer() print("Time :", stop - start, "s")
def evaluate(clusters, typedict): """Given the predicted clusters and type dictionary, this function calculates homogeneity, completeness, and V-measure assuming the gold tags are the most frequent tags for each type in the type dict input: clusters (dict of int:Cluster): Clusters by id typedict (dict of str:Word): Word by wordform return: (float): homogeneity score (float): completeness score (float): V measure""" # The instructor completed this function in 7 line including the return golds = [] preds = [] # Your code here return homogeneity_score(golds, preds), completeness_score( golds, preds), v_measure_score(golds, preds, beta=2.0)
def __computeKmeansMetrics(self, data, predictedLabels, gsLabels, title, basePath, phase4Results): metrics = dict() metrics["davies_bouldin_score"] = clusteringMetrics.davies_bouldin_score(data, predictedLabels) metrics["adjusted_rand_score"] = clusteringMetrics.adjusted_rand_score(gsLabels, predictedLabels) metrics["completeness_score"] = clusteringMetrics.completeness_score(gsLabels, predictedLabels) metrics["purity_score"] = purity_score(gsLabels, predictedLabels) confusionMatrixMapped = clusteringMappingMetric(predictedLabels, gsLabels) confusionMatrix = confusion_matrix(gsLabels, predictedLabels) kdf = pd.DataFrame.from_dict(metrics, orient='index', columns=[title]) phase4Results = phase4Results.join(kdf) np.savetxt(basePath / f"{title}_kmeans_confusionMapping.csv", confusionMatrixMapped, delimiter=",", fmt='%i') np.savetxt(basePath / f"{title}_kmeans_confusion.csv", confusionMatrix, delimiter=",", fmt='%i') return phase4Results
def k_means_clustering(training_data, target_labels, title='Contingency Matrix', n_clusters=20, random_state=0, max_iter=1000, n_init=30): start = time.time() km = KMeans(n_clusters=n_clusters, random_state=random_state, max_iter=max_iter, n_init=n_init) km.fit(training_data) print("Finished clustering in %f seconds" % (time.time() - start)) cm = contingency_matrix(target_labels, km.labels_) # reorder to maximize along diagonal rows, cols = linear_sum_assignment(cm, maximize=True) new_cm = cm[rows[:, np.newaxis], cols] print("Show Contingency Matrix:") plot_contingency_table_20(new_cm, title=title) print("Report 5 Measures for K-Means Clustering") homogeneity = homogeneity_score(target_labels, km.labels_) completeness = completeness_score(target_labels, km.labels_) v_measure = v_measure_score(target_labels, km.labels_) adjusted_rand_index = adjusted_rand_score(target_labels, km.labels_) adjusted_mutual_info = adjusted_mutual_info_score(target_labels, km.labels_) print("Homogeneity Score: %f" % homogeneity) print("Completeness Score: %f" % completeness) print("V-Measure Score: %f" % v_measure) print("Adjusted Rand Index: %f" % adjusted_rand_index) print("Adjusted Mutual Information: %f" % adjusted_mutual_info) results = { "homogeneity": homogeneity, "completeness": completeness, "v_measure": v_measure, "adjusted_rand_index": adjusted_rand_index, "adjusted_mutual_info": adjusted_mutual_info } return results, km
def test(): model.eval() z = model.encode(data.x, data.train_pos_edge_index) # Cluster embedded values using k-means. kmeans_input = z.cpu().numpy() kmeans = KMeans(n_clusters=7, random_state=0).fit(kmeans_input) pred = kmeans.predict(kmeans_input) labels = data.y.cpu().numpy() completeness = completeness_score(labels, pred) hm = homogeneity_score(labels, pred) nmi = v_measure_score(labels, pred) auc, ap = model.test(z, data.test_pos_edge_index, data.test_neg_edge_index) return auc, ap, completeness, hm, nmi
def evaluate(self): eval_result_dict = {} eval_result_dict['ami'] = adjusted_mutual_info_score( self.data['true_y'], self.data['pred_y']) eval_result_dict['rand'] = adjusted_rand_score(self.data['true_y'], self.data['pred_y']) eval_result_dict['comp'] = completeness_score(self.data['true_y'], self.data['pred_y']) eval_result_dict['fow'] = fowlkes_mallows_score( self.data['true_y'], self.data['pred_y']) eval_result_dict['hom'] = homogeneity_score(self.data['true_y'], self.data['pred_y']) eval_result_dict['nmi'] = normalized_mutual_info_score( self.data['true_y'], self.data['pred_y']) eval_result_dict['v_score'] = v_measure_score(self.data['true_y'], self.data['pred_y']) return eval_result_dict
def compute_result(self, loss, preds, targets, stage): # Cluster embedded values using k-means. kmeans_input = preds.cpu().numpy() kmeans = KMeans(n_clusters=7, random_state=0).fit(kmeans_input) pred = kmeans.predict(kmeans_input) labels = targets.cpu().numpy() completeness = torch.Tensor([completeness_score(labels, pred)]) hm = torch.Tensor([homogeneity_score(labels, pred)]) nmi = torch.Tensor([v_measure_score(labels, pred)]) # auc, ap = model.test(z, data.test_pos_edge_index, data.test_neg_edge_index) result = pl.EvalResult(loss) result.log(f"{stage}_completeness", completeness, prog_bar=True) result.log(f"{stage}_hm", hm, prog_bar=True) result.log(f"{stage}_nmi", nmi, prog_bar=True) return result
def compareAB(A, B): # measures the similarity of the two assignments, ignoring permutations and with chance normalization ars = metrics.adjusted_rand_score(A, B) print("adjusted_rand_score " + str(ars)) # measures the agreement of the two assignments, ignoring permutations # amis = metrics.adjusted_mutual_info_score(A, B) # print("adjusted_mutual_info_score " + str(amis)) # each cluster contains only members of a single class hs = homogeneity_score(A, B) print("homogeneity_score " + str(hs)) # all members of a given class are assigned to the same cluster cs = completeness_score(A, B) print("completeness_score " + str(cs)) vms = metrics.v_measure_score(A, B) print("v_measure_score " + str(vms)) # geometric mean of the pairwise precision and recall fowlkes_mallows_score = metrics.fowlkes_mallows_score(A, B) print("fowlkes_mallows_score " + str(fowlkes_mallows_score))
def main(): start = timeit.default_timer() # Chuyển dữ liệu ảnh sang bitmap và scale Test_X, Test_Y, Train_X, Train_Y = load(scale=50 * 100) print(len(Test_X)) print(len(Test_Y)) print(len(Train_X)) print(len(Train_Y)) # Chạy Kmeans và gán nhãn pred_label = process(Train_X, Train_Y, Test_X) # Tính accuracy với nhãn thật accuracy(pred_label, Test_Y, labels_num=32) print("Completeness :", completeness_score(Test_Y, pred_label) * 100, "%") print("Homogeneity :", homogeneity_score(Test_Y, pred_label) * 100, "%") stop = timeit.default_timer() print("Time :", stop - start, "s")
def computeMetrics(self, data, trueLabels, predictedLabels): confusionMatrixes = dict() metrics = dict() for algorithmName, labels in predictedLabels.items(): metrics[algorithmName] = dict() metrics[algorithmName][ "davies_bouldin_score"] = clusteringMetrics.davies_bouldin_score( data, labels) metrics[algorithmName][ "adjusted_rand_score"] = clusteringMetrics.adjusted_rand_score( trueLabels, labels) metrics[algorithmName][ "completeness_score"] = clusteringMetrics.completeness_score( trueLabels, labels) metrics[algorithmName]["purity_score"] = purity_score( trueLabels, labels) confusionMatrixes[algorithmName] = clusteringMappingMetric( labels, trueLabels) return metrics, confusionMatrixes
def dbscanLibrarySklearn(): X_train, X_test, labels_true, y_test = readData() # Compute DBSCAN db = DBSCAN(eps=0.6, min_samples=4).fit(X_train) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) n_noise_ = list(labels).count(-1) print('Estimated number of clusters: %d' % n_clusters_) print('Estimated number of noise points: %d' % n_noise_) print("Homogeneity: %0.3f" % homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % completeness_score(labels_true, labels)) # Plot result import matplotlib.pyplot as plt # Black removed and is used for noise instead. unique_labels = set(labels) colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))] for k, col in zip(unique_labels, colors): if k == -1: # Black used for noise. col = [0, 0, 0, 1] class_member_mask = (labels == k) xy = X_train[class_member_mask & core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), markeredgecolor='k', markersize=5) xy = X_train[class_member_mask & ~core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), markeredgecolor='k', markersize=5) plt.title('Estimated number of clusters: %d' % n_clusters_) plt.show()
def plot_kmeans_em_v_meansrue(n_clusters): row_idx = n_clusters - 5 v_measure_array = [] xvalues = all_em_clustering_np_matrix.keys() for key in xvalues: kmeans_label = all_kmeans_clustering_np_matrix[key][row_idx] em_label = all_em_clustering_np_matrix[key][row_idx] c = completeness_score(kmeans_label, em_label) h = homogeneity_score(kmeans_label, em_label) v_measure = 2 * c * h / (c + h) v_measure_array.append(v_measure) plt.figure(figsize=(8, 8)) plt.plot(range(len(v_measure_array)), v_measure_array, "o-") plt.xticks(np.arange(len(v_measure_array)), xvalues) plt.yticks(np.linspace(0.6, 1.0, 9)) plt.grid(True) plt.xlabel("\nDatasets",fontsize=14) plt.ylabel("V-measure score",fontsize=14) plt.title("Comparison of clustering consistency \nbetween EM and Kmeans on the same dataset",fontsize=14) plt.show()
from sklearn.metrics.cluster import homogeneity_score print(homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])) print(homogeneity_score([0, 0, 0, 1, 1, 1], [3, 2, 2, 2, 3, 3])) from sklearn.metrics.cluster import completeness_score print(completeness_score([0, 0, 1, 1], [1, 1, 0, 0])) print(completeness_score([0, 0, 0, 1, 1, 1], [3, 2, 2, 2, 3, 3]))
#X_iso = manifold.Isomap(n_neighbors=5, n_components=2).fit_transform(X_train) #end = int(round(time.time() * 1000)) #print("--Isomap finished in ", (end-start), "ms--------------") #print("Done.") #spectral clustering, fitting and predictions spectral = cluster.SpectralClustering(n_clusters=10, eigen_solver='arpack', affinity="nearest_neighbors") #X = spectral.fit(X_iso) X = spectral.fit(X_spec) #y_pred = spectral.fit_predict(X_iso) y_pred = spectral.fit_predict(X_spec) # clustering evaluation metrics print(confusion_matrix(y_train, y_pred)) print (completeness_score(y_train, y_pred)) with plt.style.context('fivethirtyeight'): plt.title("Spectral embedding & spectral clustering on MNIST") plt.scatter(X_spec[:, 0], X_spec[:, 1], c=y_pred, s=50, cmap=plt.cm.get_cmap("jet", 10)) plt.colorbar(ticks=range(10)) plt.clim(-0.5, 9.5) plt.show()
def check_clusters(self): print self.colors print 'number of clusters is ' + str(self.clusters) print 'silhouette score is ' + str(self.sil) print 'homogeneity is ' + str(homogeneity_score(self.colors, self.labels)) print 'completeness is ' + str(completeness_score(self.colors, self.labels))
def evaluate(colors, labels): b = homogeneity_score(colors, labels) c = completeness_score(colors, labels) logging.debug('homogeneity is %d' % b) logging.debug('completeness is %d' % c)
def evaluate(colors, labels): b = homogeneity_score(colors, labels) c = completeness_score(colors, labels) print 'homogeneity is ' + str(b) print 'completeness is ' + str(c)