def getalldata(): train = weeklydataset_shogun("/home/work/Projects/EclipseProjects/thesis/Scripts/train_sg.csv") test = weeklydataset_shogun("/home/work/Projects/EclipseProjects/thesis/Scripts/test_sg.csv") train_label = fromfile("/home/work/Projects/EclipseProjects/thesis/Scripts/train_label.csv", dtype=double, sep=' ') test_label = fromfile("/home/work/Projects/EclipseProjects/thesis/Scripts/test_label.csv", dtype=double, sep=' ') ''' train = load_numbers("/home/work/Projects/EclipseProjects/thesis/Scripts/fm_train_real.dat") test = load_numbers("/home/work/Projects/EclipseProjects/thesis/Scripts/fm_test_real.dat") train_label = load_labels("/home/work/Projects/EclipseProjects/thesis/Scripts/label_train_multiclass.dat") ''' return train, test, train_label, test_label
def main_mcmc(): [points, label] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_mod.csv', []) # points, label = weeklydataset_sg_ndata('/media/4AC0AB31C0AB21E5/Documents and Settings/Claudio/Documenti/Thesis/Workloads/MSClaudio/ews/ewsdata2.csv', []) clusteredpoints, cdata = create_clustered_samples(points, 10, 1) # clusteredpoints, cdata = create_clustered_samples_ndata(points, 3, 1) # cluster0 = clusteredpoints[0] # cluster1 = clusteredpoints[1] # cluster2 = clusteredpoints[2] # cluster3 = clusteredpoints[3] # cluster4 = clusteredpoints[4] # cluster5 = clusteredpoints[5] # cluster6 = clusteredpoints[0] # cluster7 = clusteredpoints[1] # cluster8 = clusteredpoints[2] # cluster9 = clusteredpoints[3] # # clusterlen = [len(cluster0[0]), len(cluster1[0]), len(cluster2[0]),len(cluster3[0]),len(cluster4[0]), len(cluster5[0]), # len(cluster6[0]), len(cluster7[0]), len(cluster8[0]),len(cluster9[0])] # m = max(clusterlen) # minimum = min(clusterlen) # # maxcluster = clusterlen.index(m) # mincluster = clusterlen.index(minimum) target = [] numcluster = 0 for cluster in clusteredpoints: target.append(aggregateby10mins_sg_mcmc(cluster[0], numcluster)) # target.append(aggregateby10mins_sg_mcmc_ndata(cluster[1], numcluster)) numcluster += 1 return target
def main(argv): #[train, labels] = weeklydataset('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_500.csv', []) [points, labels] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_500.csv', []) clusteredpoints, cdata = create_clustered_samples(points, 5, 1) # print len(clusteredpoints[0][2]) train ,test = traintest(clusteredpoints[0], 20, 1)
def clusterize(): [points, label] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_mod.csv', []) # points, label = weeklydataset_sg_ndata('/media/4AC0AB31C0AB21E5/Documents and Settings/Claudio/Documenti/Thesis/Workloads/MSClaudio/ews/ewsdata2.csv', []) # print points clusteredpoints, cdata = create_clustered_samples(points, 10, 1) # clusteredpoints, cdata = create_clustered_samples_ndata(points, 3, 1) cluster0 = clusteredpoints[0][1] cluster1 = clusteredpoints[1][1] cluster2 = clusteredpoints[2][1] cluster3 = clusteredpoints[3][1] cluster4 = clusteredpoints[4][1] cluster5 = clusteredpoints[5][1] cluster6 = clusteredpoints[6][1] cluster7 = clusteredpoints[7][1] cluster8 = clusteredpoints[8][1] cluster9 = clusteredpoints[9][1] # # clusterlen = [len(cluster0[0]), len(cluster1[0]), len(cluster2[0]),len(cluster3[0]),len(cluster4[0]), len(cluster5[0]), # len(cluster6[0]), len(cluster7[0]), len(cluster8[0]),len(cluster9[0])] # m = max(clusterlen) # minimum = min(clusterlen) # # maxcluster = clusterlen.index(m) # mincluster = clusterlen.index(minimum) # input, target = aggregateby10mins_sg(clusteredpoints[mincluster][0]) print "Cluster0 points: %d" % len(cluster0) print "Cluster1 points: %d" % len(cluster1) print "Cluster2 points: %d" % len(cluster2) print "Cluster3 points: %d" % len(cluster3) print "Cluster4 points: %d" % len(cluster4) print "Cluster5 points: %d" % len(cluster5) print "Cluster6 points: %d" % len(cluster6) print "Cluster7 points: %d" % len(cluster7) print "Cluster8 points: %d" % len(cluster8) print "Cluster9 points: %d" % len(cluster9) input = [] target = [] numcluster = 0 for cluster in clusteredpoints: inp, tar = aggregateby10mins_sg_mean(cluster[0], numcluster) # inp, tar = aggregateby10mins_sg_ndata(cluster[1], numcluster) input.append(inp) target.append(tar) numcluster += 1 # input, target = [aggregateby10mins_sg_ndata(cluster[0]) for cluster in clusteredpoints] traininput = [] traintarget = [] testinput = [] testtarget = [] for i in range(len(input)): trainin, traintar, testin, testtar = traintest(input[i], target[i], 20, 1) traininput.append(trainin) traintarget.append(traintar) testinput.append(testin) testtarget.append(testtar) return traininput, traintarget, testinput, testtarget, cdata
def create_clusters(): [points, label] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_mod.csv', []) # [points, label] = weeklydataset_sg_ndata('/media/4AC0AB31C0AB21E5/Documents and Settings/Claudio/Documenti/Thesis/Workloads/MSClaudio/ews/ewsdata2.csv', []) # print points clusteredpoints, cdata = create_clustered_samples(points, 10, 1) # clusteredpoints, cdata = create_clustered_samples_ndata(points, 3, 1) cluster0 = clusteredpoints[0][1] cluster1 = clusteredpoints[1][1] cluster2 = clusteredpoints[2][1] cluster3 = clusteredpoints[3][1] cluster4 = clusteredpoints[4][1] cluster5 = clusteredpoints[5][1] cluster6 = clusteredpoints[6][1] cluster7 = clusteredpoints[7][1] cluster8 = clusteredpoints[8][1] cluster9 = clusteredpoints[9][1] print type(cluster0) savetxt("cluster0.csv", matrix(clusteredpoints[0]), delimiter=';') savetxt("cluster1.csv", matrix(clusteredpoints[1]), delimiter=';') savetxt("cluster2.csv", matrix(clusteredpoints[2]), delimiter=';') savetxt("cluster3.csv", matrix(clusteredpoints[3]), delimiter=';') savetxt("cluster4.csv", matrix(clusteredpoints[4]), delimiter=';') savetxt("cluster5.csv", matrix(clusteredpoints[5]), delimiter=';') savetxt("cluster6.csv", matrix(clusteredpoints[6]), delimiter=';') savetxt("cluster7.csv", matrix(clusteredpoints[7]), delimiter=';') savetxt("cluster8.csv", matrix(clusteredpoints[8]), delimiter=';') savetxt("cluster9.csv", matrix(clusteredpoints[9]), delimiter=';') # # clusterlen = [len(cluster0[0]), len(cluster1[0]), len(cluster2[0]),len(cluster3[0]),len(cluster4[0]), len(cluster5[0]), # len(cluster6[0]), len(cluster7[0]), len(cluster8[0]),len(cluster9[0])] # m = max(clusterlen) # minimum = min(clusterlen) # # maxcluster = clusterlen.index(m) # mincluster = clusterlen.index(minimum) # input, target = aggregateby10mins_sg(clusteredpoints[mincluster][0]) print "Cluster0 points: %d" % len(cluster0) print "Cluster1 points: %d" % len(cluster1) print "Cluster2 points: %d" % len(cluster2) print "Cluster3 points: %d" % len(cluster3) print "Cluster4 points: %d" % len(cluster4) print "Cluster5 points: %d" % len(cluster5) print "Cluster6 points: %d" % len(cluster6) print "Cluster7 points: %d" % len(cluster7) print "Cluster8 points: %d" % len(cluster8) print "Cluster9 points: %d" % len(cluster9)
@author: work ''' import numpy as np from thesis.scripts.dataset.dataset import weeklydataset_shogun from scipy.cluster.vq import kmeans,vq from scipy.spatial.distance import cdist, pdist import matplotlib.pyplot as plt from matplotlib import cm # load the iris dataset #fName = 'C:\\Python26\\Lib\\site-packages\\scipy\\spatial\\tests\\iris.txt' #fp = open(fName) #X = np.loadtxt(fp) #fp.close() [X, label] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu.csv', [0]) ##### cluster data into K=1..10 clusters ##### K = range(1,10) # scipy.cluster.vq.kmeans KM = [kmeans(X,k) for k in K] centroids = [cent for (cent,var) in KM] # cluster centroids #avgWithinSS = [var for (cent,var) in KM] # mean within-cluster sum of squares # alternative: scipy.cluster.vq.vq #Z = [vq(X,cent) for cent in centroids] #avgWithinSS = [sum(dist)/X.shape[0] for (cIdx,dist) in Z] # alternative: scipy.spatial.distance.cdist D_k = [cdist(X, cent, 'euclidean') for cent in centroids] cIdx = [np.argmin(D,axis=1) for D in D_k]