Esempio n. 1
0
def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10):
    from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
    from shogun.Features import MulticlassLabels
    from shogun.Mathematics import Math

    # reproducable results
    Math.init_random(1)

    centroids = run_clustering(features, ncenters)
    gnd_hat = assign_labels(features, centroids, ncenters)
    gnd = MulticlassLabels(ground_truth)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    #print(('Clustering accuracy = %.4f' % accuracy))

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    #print(('Clustering mutual information = %.4f' % mutual_info))

    # TODO mutual information does not work with serialization
    #return gnd, gnd_hat, accuracy, MIEval, mutual_info
    return gnd, gnd_hat, accuracy
Esempio n. 2
0
def evaluation_clustering (features=fea, ground_truth=gnd_raw, ncenters=10):
	from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
	from shogun.Features import MulticlassLabels
	from shogun.Mathematics import Math
	
	# reproducable results
	Math.init_random(1)
	
	centroids = run_clustering(features, ncenters)
	gnd_hat = assign_labels(features, centroids, ncenters)
	gnd = MulticlassLabels(ground_truth)

	AccuracyEval = ClusteringAccuracy()
	AccuracyEval.best_map(gnd_hat, gnd)

	accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
	#print(('Clustering accuracy = %.4f' % accuracy))

	MIEval = ClusteringMutualInformation()
	mutual_info = MIEval.evaluate(gnd_hat, gnd)
	#print(('Clustering mutual information = %.4f' % mutual_info))

	# TODO mutual information does not work with serialization
	#return gnd, gnd_hat, accuracy, MIEval, mutual_info
	return gnd, gnd_hat, accuracy
def evaluation_clustering_simple(n_data=100, sqrt_num_blobs=4, distance=5):
    from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
    from shogun.Features import MulticlassLabels, GaussianBlobsDataGenerator
    from shogun.Mathematics import Math

    # reproducable results
    Math.init_random(1)

    # produce sone Gaussian blobs to cluster
    ncenters = sqrt_num_blobs**2
    stretch = 1
    angle = 1
    gen = GaussianBlobsDataGenerator(sqrt_num_blobs, distance, stretch, angle)
    features = gen.get_streamed_features(n_data)
    X = features.get_feature_matrix()

    # compute approximate "ground truth" labels via taking the closest blob mean
    coords = array(range(0, sqrt_num_blobs * distance, distance))
    idx_0 = [abs(coords - x).argmin() for x in X[0]]
    idx_1 = [abs(coords - x).argmin() for x in X[1]]
    ground_truth = array(
        [idx_0[i] * sqrt_num_blobs + idx_1[i] for i in range(n_data)],
        dtype="float64")

    #for label in unique(ground_truth):
    #	indices=ground_truth==label
    #	plot(X[0][indices], X[1][indices], 'o')
    #show()

    centroids = run_clustering(features, ncenters)
    gnd_hat = assign_labels(features, centroids, ncenters)
    gnd = MulticlassLabels(ground_truth)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    # in this case we know that the clustering has to be very good
    #print(('Clustering accuracy = %.4f' % accuracy))
    assert (accuracy > 0.8)

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    #print(('Clustering mutual information = %.4f' % mutual_info))

    # TODO add multiclass labels and MI once the serialization works
    #return gnd, accuracy, mutual_info
    return accuracy
def evaluation_clustering_simple (n_data=100, sqrt_num_blobs=4, distance=5):
	from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
	from shogun.Features import MulticlassLabels, GaussianBlobsDataGenerator
	from shogun.Mathematics import Math

	# reproducable results	
	Math.init_random(1)
	
	# produce sone Gaussian blobs to cluster
	ncenters=sqrt_num_blobs**2
	stretch=1
	angle=1
	gen=GaussianBlobsDataGenerator(sqrt_num_blobs, distance, stretch, angle)
	features=gen.get_streamed_features(n_data)
	X=features.get_feature_matrix()
	
	# compute approximate "ground truth" labels via taking the closest blob mean
	coords=array(range(0,sqrt_num_blobs*distance,distance))
	idx_0=[abs(coords -x).argmin() for x in X[0]]
	idx_1=[abs(coords -x).argmin() for x in X[1]]
	ground_truth=array([idx_0[i]*sqrt_num_blobs + idx_1[i] for i in range(n_data)], dtype="float64")
	
	#for label in unique(ground_truth):
	#	indices=ground_truth==label
	#	plot(X[0][indices], X[1][indices], 'o')
	#show()
	
	centroids = run_clustering(features, ncenters)
	gnd_hat = assign_labels(features, centroids, ncenters)
	gnd = MulticlassLabels(ground_truth)

	AccuracyEval = ClusteringAccuracy()
	AccuracyEval.best_map(gnd_hat, gnd)

	accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
	# in this case we know that the clustering has to be very good
	#print(('Clustering accuracy = %.4f' % accuracy))
	assert(accuracy>0.8)

	MIEval = ClusteringMutualInformation()
	mutual_info = MIEval.evaluate(gnd_hat, gnd)
	#print(('Clustering mutual information = %.4f' % mutual_info))

	# TODO add multiclass labels and MI once the serialization works
	#return gnd, accuracy, mutual_info
	return accuracy
Esempio n. 5
0
def evaluation_clustering (features=fea, ground_truth=gnd_raw, ncenters=10):
	from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
	from shogun.Features import MulticlassLabels
	centroids = run_clustering(features, ncenters)
	gnd_hat = assign_labels(features, centroids, ncenters)
	gnd = MulticlassLabels(ground_truth)

	AccuracyEval = ClusteringAccuracy()
	AccuracyEval.best_map(gnd_hat, gnd)

	accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
	#print(('Clustering accuracy = %.4f' % accuracy))

	MIEval = ClusteringMutualInformation()
	mutual_info = MIEval.evaluate(gnd_hat, gnd)
	#print(('Clustering mutual information = %.4f' % mutual_info))

	return gnd, accuracy, mutual_info
Esempio n. 6
0
def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10):
    from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
    from shogun.Features import MulticlassLabels
    centroids = run_clustering(features, ncenters)
    gnd_hat = assign_labels(features, centroids, ncenters)
    gnd = MulticlassLabels(ground_truth)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    #print(('Clustering accuracy = %.4f' % accuracy))

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    #print(('Clustering mutual information = %.4f' % mutual_info))

    return gnd, accuracy, mutual_info
    return kmeans.get_cluster_centers()

def assign_labels(data, centroids):
    from shogun.Classifier import KNN
    from numpy import arange

    labels = Labels(arange(1.,11.))
    fea = RealFeatures(data)
    fea_centroids = RealFeatures(centroids)
    distance = EuclidianDistance(fea_centroids, fea_centroids)
    knn = KNN(1, distance, labels)
    knn.train()
    return knn.apply(fea)

if __name__ == '__main__':
    (fea, gnd_raw) = prepare_data()
    centroids = run_clustering(fea, 10)
    gnd_hat = assign_labels(fea, centroids)
    gnd = Labels(gnd_raw)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    print(('Clustering accuracy = %.4f' % accuracy))

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    print(('Clustering mutual information = %.4f' % mutual_info))

Esempio n. 8
0

def assign_labels(data, centroids):
    from shogun.Classifier import KNN
    from numpy import arange

    labels = Labels(arange(1., 11.))
    fea = RealFeatures(data)
    fea_centroids = RealFeatures(centroids)
    distance = EuclidianDistance(fea_centroids, fea_centroids)
    knn = KNN(1, distance, labels)
    knn.train()
    return knn.apply(fea)


if __name__ == '__main__':
    (fea, gnd_raw) = prepare_data()
    centroids = run_clustering(fea, 10)
    gnd_hat = assign_labels(fea, centroids)
    gnd = Labels(gnd_raw)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    print(('Clustering accuracy = %.4f' % accuracy))

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    print(('Clustering mutual information = %.4f' % mutual_info))