Beispiel #1
0
    sys.exit('Usage: python spectral.py dataset k')

## Data preprocessing
data = parse_tab(sys.argv[1])
k = int(sys.argv[2])
classes = [example[-1] for example in data]

examples = data_to_na(data)
distances = euclidean_distances(examples, examples)
# Apply gaussian kernel as suggested in the documentation:
gamma = 0.5 # == 1 / num_features (heuristic)
similarity_matrix = numpy.exp(-distances * gamma)

## Clustering
sc = SpectralClustering(k=k, random_state=0)
sc.fit(similarity_matrix)
labels = sc.labels_

## Performance evaluation
ari = adjusted_rand_score(labels, classes)
homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes)
print('ARI: {0}'.format(ari))
print('Homogeneity: {0}'.format(homogeneity))
print('Completeness: {0}'.format(completeness))
print('V-measure: {0}'.format(v_measure))
addToResult('Spectral', ari, homogeneity, completeness, v_measure)

draw.scatter(examples, labels)
print(os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.setImgTitle('spectral_' + os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.showImage()
Beispiel #2
0
clusters = int(sys.argv[2])
classes = [example[-1] for example in data]
examples = data_to_na(data)

## Clustering
clf = mixture.GMM(n_components=clusters)
clf.fit(examples)
labels = clf.predict(examples)

## Performance evaluation
ari = adjusted_rand_score(labels, classes)
homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes)
print('ARI: {0}'.format(ari))
print('Homogeneity: {0}'.format(homogeneity))
print('Completeness: {0}'.format(completeness))
print('V-measure: {0}'.format(v_measure))
addToResult('GMM', ari, homogeneity, completeness, v_measure)


#x = np.linspace(min([x[0] for x in examples]), max([x[0] for x in examples]))
#y = np.linspace(min([y[1] for y in examples]), max([y[1] for y in examples]))
#X, Y = np.meshgrid(x, y)
#XX = np.c_[X.ravel(), Y.ravel()]
#Z = np.log(-clf.eval(XX)[0])
#Z = Z.reshape(X.shape)
#pylab.contour(Y, X, Z)

draw.scatter(examples, labels)
print(os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.setImgTitle('GMM_' + os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.showImage()
Beispiel #3
0
import drawing as draw
draw.savedImages = {}

if len(sys.argv) < 3:
    sys.exit('Usage: python kmeans.py dataset k')

## Data preprocessing
data = parse_tab(sys.argv[1])
k = int(sys.argv[2])
classes = [example[-1] for example in data]
examples = data_to_na(data)

## Clustering
kmeans = KMeans(k=k, random_state=0)
kmeans.fit(examples)
codebook = kmeans.cluster_centers_
labels = kmeans.predict(examples)

## Performance evaluation
ari = adjusted_rand_score(labels, classes)
homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes)
print('ARI: {0}'.format(ari))
print('Homogeneity: {0}'.format(homogeneity))
print('Completeness: {0}'.format(completeness))
print('V-measure: {0}'.format(v_measure))
addToResult('k-means', ari, homogeneity, completeness, v_measure)

draw.scatter(examples, labels)
print(os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.setImgTitle('kmeans_' + os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.showImage()
Beispiel #4
0
import os
import drawing as draw
draw.savedImages = {}

if len(sys.argv) < 4:
    sys.exit('Usage: python DBSCAN-comparison.py dataset eps min_samples')

## Data preprocessing
data = parse_tab(sys.argv[1])
eps = float(sys.argv[2])
min_samples = int(sys.argv[3])
classes = [example[-1] for example in data]
examples = data_to_na(data)

## Clustering
db = DBSCAN().fit(examples, eps=eps, min_samples=min_samples)
labels = db.labels_

## Performance evaluation
ari = adjusted_rand_score(labels, classes)
homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes)
print('ARI: {0}'.format(ari))
print('Homogeneity: {0}'.format(homogeneity))
print('Completeness: {0}'.format(completeness))
print('V-measure: {0}'.format(v_measure))
addToResult('DBSCAN', ari, homogeneity, completeness, v_measure)

draw.scatter(examples, labels)
print(os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.setImgTitle('DBSCAN_' + os.path.splitext(os.path.basename(sys.argv[1]))[0])
draw.showImage()