def clusterize(): [points, label] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_mod.csv', []) # points, label = weeklydataset_sg_ndata('/media/4AC0AB31C0AB21E5/Documents and Settings/Claudio/Documenti/Thesis/Workloads/MSClaudio/ews/ewsdata2.csv', []) # print points clusteredpoints, cdata = create_clustered_samples(points, 10, 1) # clusteredpoints, cdata = create_clustered_samples_ndata(points, 3, 1) cluster0 = clusteredpoints[0][1] cluster1 = clusteredpoints[1][1] cluster2 = clusteredpoints[2][1] cluster3 = clusteredpoints[3][1] cluster4 = clusteredpoints[4][1] cluster5 = clusteredpoints[5][1] cluster6 = clusteredpoints[6][1] cluster7 = clusteredpoints[7][1] cluster8 = clusteredpoints[8][1] cluster9 = clusteredpoints[9][1] # # clusterlen = [len(cluster0[0]), len(cluster1[0]), len(cluster2[0]),len(cluster3[0]),len(cluster4[0]), len(cluster5[0]), # len(cluster6[0]), len(cluster7[0]), len(cluster8[0]),len(cluster9[0])] # m = max(clusterlen) # minimum = min(clusterlen) # # maxcluster = clusterlen.index(m) # mincluster = clusterlen.index(minimum) # input, target = aggregateby10mins_sg(clusteredpoints[mincluster][0]) print "Cluster0 points: %d" % len(cluster0) print "Cluster1 points: %d" % len(cluster1) print "Cluster2 points: %d" % len(cluster2) print "Cluster3 points: %d" % len(cluster3) print "Cluster4 points: %d" % len(cluster4) print "Cluster5 points: %d" % len(cluster5) print "Cluster6 points: %d" % len(cluster6) print "Cluster7 points: %d" % len(cluster7) print "Cluster8 points: %d" % len(cluster8) print "Cluster9 points: %d" % len(cluster9) input = [] target = [] numcluster = 0 for cluster in clusteredpoints: inp, tar = aggregateby10mins_sg_mean(cluster[0], numcluster) # inp, tar = aggregateby10mins_sg_ndata(cluster[1], numcluster) input.append(inp) target.append(tar) numcluster += 1 # input, target = [aggregateby10mins_sg_ndata(cluster[0]) for cluster in clusteredpoints] traininput = [] traintarget = [] testinput = [] testtarget = [] for i in range(len(input)): trainin, traintar, testin, testtar = traintest(input[i], target[i], 20, 1) traininput.append(trainin) traintarget.append(traintar) testinput.append(testin) testtarget.append(testtar) return traininput, traintarget, testinput, testtarget, cdata
def main_hmm(): # [points, label] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_mod.csv', []) ## [points, label] = weeklydataset_sg_ndata('/media/4AC0AB31C0AB21E5/Documents and Settings/Claudio/Documenti/Thesis/Workloads/MSClaudio/ews/ewsdata2.csv', []) ## print points # clusteredpoints, cdata = create_clustered_samples(points, 10, 1) ## clusteredpoints, cdata = create_clustered_samples_ndata(points, 3, 1) # clusteredpoints = [] clusteredpoints.append(genfromtxt("cluster0.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster1.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster2.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster3.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster4.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster5.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster6.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster7.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster8.csv", delimiter=';')) clusteredpoints.append(genfromtxt("cluster9.csv", delimiter=';')) cluster0 = clusteredpoints[0][1] cluster1 = clusteredpoints[1][1] cluster2 = clusteredpoints[2][1] cluster3 = clusteredpoints[3][1] cluster4 = clusteredpoints[4][1] cluster5 = clusteredpoints[5][1] cluster6 = clusteredpoints[6][1] cluster7 = clusteredpoints[7][1] cluster8 = clusteredpoints[8][1] cluster9 = clusteredpoints[9][1] ## ## clusterlen = [len(cluster0[0]), len(cluster1[0]), len(cluster2[0]),len(cluster3[0]),len(cluster4[0]), len(cluster5[0]), ## len(cluster6[0]), len(cluster7[0]), len(cluster8[0]),len(cluster9[0])] ## m = max(clusterlen) ## minimum = min(clusterlen) ## ## maxcluster = clusterlen.index(m) ## mincluster = clusterlen.index(minimum) ## input, target = aggregateby10mins_sg(clusteredpoints[mincluster][0]) print "Cluster0 points: %d" % len(cluster0) print "Cluster1 points: %d" % len(cluster1) print "Cluster2 points: %d" % len(cluster2) print "Cluster3 points: %d" % len(cluster3) print "Cluster4 points: %d" % len(cluster4) print "Cluster5 points: %d" % len(cluster5) print "Cluster6 points: %d" % len(cluster6) print "Cluster7 points: %d" % len(cluster7) print "Cluster8 points: %d" % len(cluster8) print "Cluster9 points: %d" % len(cluster9) clusterlen = [len(cluster0), len(cluster1), len(cluster2), len(cluster3), len(cluster4), len(cluster5), len(cluster6), len(cluster7), len(cluster8), len(cluster9)] minimum = min(clusterlen) mincluster = clusterlen.index(70400) input = [] target = [] numcluster = 0 # input, target = aggregateby10mins_sg_ndata(points[1], 0) for cluster in clusteredpoints: inp, tar = aggregateby10mins_sg_mean(cluster[0], numcluster) # inp, tar = aggregateby10mins_sg_ndata(cluster[1], numcluster) input.append(inp) target.append(tar) numcluster += 1 # input, target = [aggregateby10mins_sg_ndata(cluster[0]) for cluster in clusteredpoints] traininput = [] traintarget = [] testinput = [] testtarget = [] for i in range(len(input)): trainin, traintar, testin, testtar = traintest(input[i], target[i], 20, 1) traininput.append(trainin) traintarget.append(traintar) testinput.append(testin) testtarget.append(testtar) # traininput, traintarget, testinput, testtarget = traintest(input, target, 20, 1) # models = [HMM(target[j], testinput[j], testtarget[j], 6, 6, max(target[j])) for j in range(len(target))] print "cluster maximum = %f" % max(target[mincluster]) models = hmm(target[mincluster], testinput[mincluster], testtarget[mincluster], 6, 6, max(target[mincluster])) # vs = [hmm_req(models[j], target[j], testinput[j], testtarget[j], max(target[j])) for j in range(len(target)-1)] ## model = hmm(target, testinput, testtarget, 6, 6, max(target)) v = hmm_req(models, target[mincluster], testinput[mincluster], testtarget[mincluster], max(target[mincluster])) # counter = 0 # # for v in vs: lastest_states = [v[i][0][len(v[i][0])-1] for i in range(len(v)-1)] print lastest_states ttarget = [] for state in lastest_states: li = models.getEmission(state) m = (max(li)* 2.0)/3.0 el = pylab.find(array(li) > m) maxes = nlargest(10, li) maxvals = [li.index(maxval) for maxval in maxes] ttarget.append(maxvals) # counter += 1 ## sme = sme_calc(ttarget, testtarget[counter]) sme = sme_calc(ttarget, testtarget[mincluster]) print "SME = %f" % sme # counter += 1 return models