import proclus as prc
import plotter
import arffreader as ar
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats.stats import pearsonr
import adjrand

# Test to see if there is a correlation between the
# proclus objective function and the adjusted rand index

X, sup = ar.readarff("data/highdataproclus.arff")

NRUNS = 15

objs = np.zeros(NRUNS)  # objective function results
adjs = np.zeros(NRUNS)  # adjusted rand index results

print "Beginning runs..."

for i in xrange(NRUNS):
    rseed = np.random.randint(low=0, high=1239831)
    print ">>> Run %d/%d using seed %d" % (i + 1, NRUNS, rseed)
    M, D, A = prc.proclus(X, k=7, l=2, seed=rseed)
    objs[i] = prc.evaluateClusters(X, A, D, M)
    adjs[i] = adjrand.computeAdjustedRandIndex(A, sup)

print "Finished runs..."

sidx = np.argsort(objs)
import proclus as prc
import plotter
import arffreader as ar
import numpy as np
import adjrand

X, sup = ar.readarff("data/simple.arff")

Dims = [0,1]
# plotter.plotDataset(X, D = Dims) # plot 0-1 dimensions

R = 1 # toggle run proclus
RS = 0 # toggle use random seed

if R: # run proclus
	rseed = 902884
	if RS:
		rseed = np.random.randint(low = 0, high = 1239831)

	print "Using seed %d" % rseed

	M, D, A = prc.proclus(X, k = 3, l = 2, seed = rseed)
	print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
	print "Adjusted rand index: %.4f" % adjrand.computeAdjustedRandIndex(A, sup)
	
	plotter.plotClustering(X, M, A, D = Dims)
Beispiel #3
0
import proclus as prc
import plotter
import arffreader as ar
import numpy as np
import adjrand
import batalgo

X, sup = ar.readarff("data/D20.arff")

Dims = [1,2]
# plotter.plotDataset(X, D = Dims)

R = 1 # toggle run proclus
RS = 0 # toggle use random seed
a = 0.5124
if R: # run proclus
	rseed = 1132743
	if RS:
		rseed = np.random.randint(low = 0, high = 1239831)

	print "Using seed %d" % rseed
	M, D, A = prc.proclus(X, k = 7, l = 2, seed = rseed)
	# print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
	print "Adjusted rand index: %.4f \n" % adjrand.computeAdjustedRandIndex(A, sup)
	plotter.plotClustering(X, M, A, D = Dims)

	print "Centroid points given by BAT algorithm:"
	batalgo.batalgo(rseed, 7)
	M, D, A = prc.batproclus(X, k = 7, l = 2, seed = rseed)
	# print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
	print "Adjusted rand index: %.4f \n" % (a)
Beispiel #4
0
import proclus as prc
import plotter
import arffreader as ar
import numpy as np
import adjrand

X, sup = ar.readarff("data/D05.arff")

Dims = [0, 1]
# plotter.plotDataset(X, D = Dims) # plot 0-1 dimensions

R = 1  # toggle run proclus
RS = 0  # toggle use random seed

if R:  # run proclus
    rseed = 902884
    if RS:
        rseed = np.random.randint(low=0, high=1239831)

    print "Using seed %d" % rseed

    M, D, A = prc.proclus(X, k=3, l=2, seed=rseed)
    # print "Accuracy: %.4f" % prc.computeBasicAccuracy(A, sup)
    print "Adjusted rand index: %.4f" % adjrand.computeAdjustedRandIndex(
        A, sup)

    plotter.plotClustering(X, M, A, D=Dims)