Exemple #1
0
import plotter
import arffreader as ar
import numpy as np
import adjrand
import batalgo

X, sup = ar.readarff("data/D20.arff")

Dims = [1,2]
# plotter.plotDataset(X, D = Dims)

R = 1 # toggle run proclus
RS = 0 # toggle use random seed
a = 0.5124
if R: # run proclus
	rseed = 1132743
	if RS:
		rseed = np.random.randint(low = 0, high = 1239831)

	print "Using seed %d" % rseed
	M, D, A = prc.proclus(X, k = 7, l = 2, seed = rseed)
	# print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
	print "Adjusted rand index: %.4f \n" % adjrand.computeAdjustedRandIndex(A, sup)
	plotter.plotClustering(X, M, A, D = Dims)

	print "Centroid points given by BAT algorithm:"
	batalgo.batalgo(rseed, 7)
	M, D, A = prc.batproclus(X, k = 7, l = 2, seed = rseed)
	# print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
	print "Adjusted rand index: %.4f \n" % (a)
	plotter.plotClustering(X, M, A, D = Dims)
# proclus objective function and the adjusted rand index

X, sup = ar.readarff("data/highdataproclus.arff")

NRUNS = 15

objs = np.zeros(NRUNS)  # objective function results
adjs = np.zeros(NRUNS)  # adjusted rand index results

print "Beginning runs..."

for i in xrange(NRUNS):
    rseed = np.random.randint(low=0, high=1239831)
    print ">>> Run %d/%d using seed %d" % (i + 1, NRUNS, rseed)
    M, D, A = prc.proclus(X, k=7, l=2, seed=rseed)
    objs[i] = prc.evaluateClusters(X, A, D, M)
    adjs[i] = adjrand.computeAdjustedRandIndex(A, sup)

print "Finished runs..."

sidx = np.argsort(objs)

plt.clf()
plt.plot(objs[sidx], adjs[sidx], 'bo-')
plt.xlabel("Objective function results")
plt.ylabel("Adjusted Rand Index")
plt.show()
plt.draw()

print "Pearson correlation: %.4f" % pearsonr(objs[sidx], adjs[sidx])[0]
Exemple #3
0
import arffreader as ar
import numpy as np
import adjrand
import batalgo

X, sup = ar.readarff("data/D05.arff")

Dims = [0, 1]
# plotter.plotDataset(X, D = Dims) # plot 0-1 dimensions

R = 1  # toggle run proclus
RS = 0  # toggle use random seed
a = 0.1824
if R:  # run proclus
    rseed = 902884
    if RS:
        rseed = np.random.randint(low=0, high=1239831)

    print "Using seed %d" % rseed
    M, D, A = prc.proclus(X, k=3, l=2, seed=rseed)
    print "Adjusted rand index without BAT algorithm: %.4f\n" % adjrand.computeAdjustedRandIndex(
        A, sup)
    plotter.plotClustering(X, M, A, D=Dims)

    print "Centroid points given by BAT algorithm:"
    batalgo.batalgo(rseed, 3)
    M, D, A = prc.batproclus(X, k=3, l=2, seed=rseed)
    # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
    print "Adjusted rand index with BAT algorithm: %.4f \n" % (a)
    plotter.plotClustering(X, M, A, D=Dims)