import plotter import arffreader as ar import numpy as np import adjrand import batalgo X, sup = ar.readarff("data/D20.arff") Dims = [1,2] # plotter.plotDataset(X, D = Dims) R = 1 # toggle run proclus RS = 0 # toggle use random seed a = 0.5124 if R: # run proclus rseed = 1132743 if RS: rseed = np.random.randint(low = 0, high = 1239831) print "Using seed %d" % rseed M, D, A = prc.proclus(X, k = 7, l = 2, seed = rseed) # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup) print "Adjusted rand index: %.4f \n" % adjrand.computeAdjustedRandIndex(A, sup) plotter.plotClustering(X, M, A, D = Dims) print "Centroid points given by BAT algorithm:" batalgo.batalgo(rseed, 7) M, D, A = prc.batproclus(X, k = 7, l = 2, seed = rseed) # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup) print "Adjusted rand index: %.4f \n" % (a) plotter.plotClustering(X, M, A, D = Dims)
# proclus objective function and the adjusted rand index X, sup = ar.readarff("data/highdataproclus.arff") NRUNS = 15 objs = np.zeros(NRUNS) # objective function results adjs = np.zeros(NRUNS) # adjusted rand index results print "Beginning runs..." for i in xrange(NRUNS): rseed = np.random.randint(low=0, high=1239831) print ">>> Run %d/%d using seed %d" % (i + 1, NRUNS, rseed) M, D, A = prc.proclus(X, k=7, l=2, seed=rseed) objs[i] = prc.evaluateClusters(X, A, D, M) adjs[i] = adjrand.computeAdjustedRandIndex(A, sup) print "Finished runs..." sidx = np.argsort(objs) plt.clf() plt.plot(objs[sidx], adjs[sidx], 'bo-') plt.xlabel("Objective function results") plt.ylabel("Adjusted Rand Index") plt.show() plt.draw() print "Pearson correlation: %.4f" % pearsonr(objs[sidx], adjs[sidx])[0]
import arffreader as ar import numpy as np import adjrand import batalgo X, sup = ar.readarff("data/D05.arff") Dims = [0, 1] # plotter.plotDataset(X, D = Dims) # plot 0-1 dimensions R = 1 # toggle run proclus RS = 0 # toggle use random seed a = 0.1824 if R: # run proclus rseed = 902884 if RS: rseed = np.random.randint(low=0, high=1239831) print "Using seed %d" % rseed M, D, A = prc.proclus(X, k=3, l=2, seed=rseed) print "Adjusted rand index without BAT algorithm: %.4f\n" % adjrand.computeAdjustedRandIndex( A, sup) plotter.plotClustering(X, M, A, D=Dims) print "Centroid points given by BAT algorithm:" batalgo.batalgo(rseed, 3) M, D, A = prc.batproclus(X, k=3, l=2, seed=rseed) # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup) print "Adjusted rand index with BAT algorithm: %.4f \n" % (a) plotter.plotClustering(X, M, A, D=Dims)