Example #1
0
#!/usr/bin/env python
# encoding: utf-8
import sys
from utils import data_to_na, parse_tab
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, homogeneity_completeness_v_measure
import pylab

if len(sys.argv) < 3:
    sys.exit('Usage: python kmeans.py dataset k')

## Data preprocessing
data = parse_tab(sys.argv[1])
k = int(sys.argv[2])
classes = [example[-1] for example in data]
examples = data_to_na(data)

## Clustering
kmeans = KMeans(k=k, random_state=0)
kmeans.fit(examples)
codebook = kmeans.cluster_centers_
labels = kmeans.predict(examples)

## Performance evaluation
ari = adjusted_rand_score(labels, classes)
homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes)
print('ARI: {0}'.format(ari))
print('Homogeneity: {0}'.format(homogeneity))
print('Completeness: {0}'.format(completeness))
print('V-measure: {0}'.format(v_measure))
Example #2
0
File: score.py Project: butara/PPRM
#!/usr/bin/env python
# encoding: utf-8
from utils import data_to_na, parse_tab
from sklearn.metrics import adjusted_rand_score, homogeneity_completeness_v_measure
import pylab

#data = parse_tab('../datasets/circle-weird.tab')
#output = '../datasets/nested-circle-output'
#data = parse_tab('../datasets/half-moons.tab')
#output = '../datasets/half-moons-output'
data = parse_tab('../datasets/red-blue-clusters.tab')
output = '../datasets/red-blue-output'


classes = [example[-1] for example in data]
examples = data_to_na(data)
labels = map(int, open(output).read().split())
print len(labels), len(classes)

## Performance evaluation
ari = adjusted_rand_score(labels, classes)
homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, classes)
print('ARI: {0}'.format(ari))
print('Homogeneity: {0}'.format(homogeneity))
print('Completeness: {0}'.format(completeness))
print('V-measure: {0}'.format(v_measure))

pylab.figure(1)
pylab.scatter(examples.T[0], examples.T[1], c=labels)
pylab.show()