# Test to see if there is a correlation between the
# proclus objective function and the adjusted rand index

X, sup = ar.readarff("data/highdataproclus.arff")

NRUNS = 15

objs = np.zeros(NRUNS)  # objective function results
adjs = np.zeros(NRUNS)  # adjusted rand index results

print "Beginning runs..."

for i in xrange(NRUNS):
    rseed = np.random.randint(low=0, high=1239831)
    print ">>> Run %d/%d using seed %d" % (i + 1, NRUNS, rseed)
    M, D, A = prc.proclus(X, k=7, l=2, seed=rseed)
    objs[i] = prc.evaluateClusters(X, A, D, M)
    adjs[i] = adjrand.computeAdjustedRandIndex(A, sup)

print "Finished runs..."

sidx = np.argsort(objs)

plt.clf()
plt.plot(objs[sidx], adjs[sidx], 'bo-')
plt.xlabel("Objective function results")
plt.ylabel("Adjusted Rand Index")
plt.show()
plt.draw()

print "Pearson correlation: %.4f" % pearsonr(objs[sidx], adjs[sidx])[0]
Exemple #2
0
import arffreader as ar
import numpy as np
import adjrand
import batalgo

X, sup = ar.readarff("data/D75.arff")

Dims = [0, 3]
# plotter.plotDataset(X, D = Dims)

R = 1 # toggle run proclus
RS = 0 # toggle use random seed
a = 0.2942
if R: # run proclus
	rseed = 489132
	if RS:
		rseed = np.random.randint(low = 0, high = 1239831)

	print "Using seed %d" % rseed
	M, D, A = prc.proclus(X, k = 4, l = 2, seed = rseed, A = 30, B = 5)
	# print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
	adj = adjrand.computeAdjustedRandIndex(A, sup)
	print "Adjusted rand index: %.4f \n" % adj
	plotter.plotClustering(X, M, A, D = Dims)

	print "Centroid points given by BAT algorithm:"
	batalgo.batalgo(rseed, 4)
	M, D, A = prc.batproclus(X, k = 4, l = 2, seed = rseed, A = 30, B = 5)
	# print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
	print "Adjusted rand index: %.4f \n" % (a)
	plotter.plotClustering(X, M, A, D = Dims)