def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels from shogun.Mathematics import Math # reproducable results Math.init_random(1) centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) #print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) # TODO mutual information does not work with serialization #return gnd, gnd_hat, accuracy, MIEval, mutual_info return gnd, gnd_hat, accuracy
def evaluation_clustering (features=fea, ground_truth=gnd_raw, ncenters=10): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels from shogun.Mathematics import Math # reproducable results Math.init_random(1) centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) #print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) # TODO mutual information does not work with serialization #return gnd, gnd_hat, accuracy, MIEval, mutual_info return gnd, gnd_hat, accuracy
def evaluation_clustering_simple(n_data=100, sqrt_num_blobs=4, distance=5): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels, GaussianBlobsDataGenerator from shogun.Mathematics import Math # reproducable results Math.init_random(1) # produce sone Gaussian blobs to cluster ncenters = sqrt_num_blobs**2 stretch = 1 angle = 1 gen = GaussianBlobsDataGenerator(sqrt_num_blobs, distance, stretch, angle) features = gen.get_streamed_features(n_data) X = features.get_feature_matrix() # compute approximate "ground truth" labels via taking the closest blob mean coords = array(range(0, sqrt_num_blobs * distance, distance)) idx_0 = [abs(coords - x).argmin() for x in X[0]] idx_1 = [abs(coords - x).argmin() for x in X[1]] ground_truth = array( [idx_0[i] * sqrt_num_blobs + idx_1[i] for i in range(n_data)], dtype="float64") #for label in unique(ground_truth): # indices=ground_truth==label # plot(X[0][indices], X[1][indices], 'o') #show() centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) # in this case we know that the clustering has to be very good #print(('Clustering accuracy = %.4f' % accuracy)) assert (accuracy > 0.8) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) # TODO add multiclass labels and MI once the serialization works #return gnd, accuracy, mutual_info return accuracy
def evaluation_clustering_simple (n_data=100, sqrt_num_blobs=4, distance=5): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels, GaussianBlobsDataGenerator from shogun.Mathematics import Math # reproducable results Math.init_random(1) # produce sone Gaussian blobs to cluster ncenters=sqrt_num_blobs**2 stretch=1 angle=1 gen=GaussianBlobsDataGenerator(sqrt_num_blobs, distance, stretch, angle) features=gen.get_streamed_features(n_data) X=features.get_feature_matrix() # compute approximate "ground truth" labels via taking the closest blob mean coords=array(range(0,sqrt_num_blobs*distance,distance)) idx_0=[abs(coords -x).argmin() for x in X[0]] idx_1=[abs(coords -x).argmin() for x in X[1]] ground_truth=array([idx_0[i]*sqrt_num_blobs + idx_1[i] for i in range(n_data)], dtype="float64") #for label in unique(ground_truth): # indices=ground_truth==label # plot(X[0][indices], X[1][indices], 'o') #show() centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) # in this case we know that the clustering has to be very good #print(('Clustering accuracy = %.4f' % accuracy)) assert(accuracy>0.8) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) # TODO add multiclass labels and MI once the serialization works #return gnd, accuracy, mutual_info return accuracy
def evaluation_clustering (features=fea, ground_truth=gnd_raw, ncenters=10): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) #print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) return gnd, accuracy, mutual_info
def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) #print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) return gnd, accuracy, mutual_info
return kmeans.get_cluster_centers() def assign_labels(data, centroids): from shogun.Classifier import KNN from numpy import arange labels = Labels(arange(1.,11.)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclidianDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea) if __name__ == '__main__': (fea, gnd_raw) = prepare_data() centroids = run_clustering(fea, 10) gnd_hat = assign_labels(fea, centroids) gnd = Labels(gnd_raw) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) print(('Clustering mutual information = %.4f' % mutual_info))
def assign_labels(data, centroids): from shogun.Classifier import KNN from numpy import arange labels = Labels(arange(1., 11.)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclidianDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea) if __name__ == '__main__': (fea, gnd_raw) = prepare_data() centroids = run_clustering(fea, 10) gnd_hat = assign_labels(fea, centroids) gnd = Labels(gnd_raw) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) print(('Clustering mutual information = %.4f' % mutual_info))