import proclus as prc import plotter import arffreader as ar import numpy as np import matplotlib.pyplot as plt from scipy.stats.stats import pearsonr import adjrand # Test to see if there is a correlation between the # proclus objective function and the adjusted rand index X, sup = ar.readarff("data/highdataproclus.arff") NRUNS = 15 objs = np.zeros(NRUNS) # objective function results adjs = np.zeros(NRUNS) # adjusted rand index results print "Beginning runs..." for i in xrange(NRUNS): rseed = np.random.randint(low=0, high=1239831) print ">>> Run %d/%d using seed %d" % (i + 1, NRUNS, rseed) M, D, A = prc.proclus(X, k=7, l=2, seed=rseed) objs[i] = prc.evaluateClusters(X, A, D, M) adjs[i] = adjrand.computeAdjustedRandIndex(A, sup) print "Finished runs..." sidx = np.argsort(objs)
import proclus as prc import plotter import arffreader as ar import numpy as np import adjrand X, sup = ar.readarff("data/simple.arff") Dims = [0,1] # plotter.plotDataset(X, D = Dims) # plot 0-1 dimensions R = 1 # toggle run proclus RS = 0 # toggle use random seed if R: # run proclus rseed = 902884 if RS: rseed = np.random.randint(low = 0, high = 1239831) print "Using seed %d" % rseed M, D, A = prc.proclus(X, k = 3, l = 2, seed = rseed) print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup) print "Adjusted rand index: %.4f" % adjrand.computeAdjustedRandIndex(A, sup) plotter.plotClustering(X, M, A, D = Dims)
import proclus as prc import plotter import arffreader as ar import numpy as np import adjrand import batalgo X, sup = ar.readarff("data/D20.arff") Dims = [1,2] # plotter.plotDataset(X, D = Dims) R = 1 # toggle run proclus RS = 0 # toggle use random seed a = 0.5124 if R: # run proclus rseed = 1132743 if RS: rseed = np.random.randint(low = 0, high = 1239831) print "Using seed %d" % rseed M, D, A = prc.proclus(X, k = 7, l = 2, seed = rseed) # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup) print "Adjusted rand index: %.4f \n" % adjrand.computeAdjustedRandIndex(A, sup) plotter.plotClustering(X, M, A, D = Dims) print "Centroid points given by BAT algorithm:" batalgo.batalgo(rseed, 7) M, D, A = prc.batproclus(X, k = 7, l = 2, seed = rseed) # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup) print "Adjusted rand index: %.4f \n" % (a)
import proclus as prc import plotter import arffreader as ar import numpy as np import adjrand X, sup = ar.readarff("data/D05.arff") Dims = [0, 1] # plotter.plotDataset(X, D = Dims) # plot 0-1 dimensions R = 1 # toggle run proclus RS = 0 # toggle use random seed if R: # run proclus rseed = 902884 if RS: rseed = np.random.randint(low=0, high=1239831) print "Using seed %d" % rseed M, D, A = prc.proclus(X, k=3, l=2, seed=rseed) # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup) print "Adjusted rand index: %.4f" % adjrand.computeAdjustedRandIndex( A, sup) plotter.plotClustering(X, M, A, D=Dims)