def divide_by_cmd(filename1, filename2, position): X, label = weeklydataset(filename1, []) X2, label2 = weeklydataset(filename2, []) x = X + X2 f = operator.itemgetter(position) commands = map(f, x) print commands[0:20] unique_cmd_set = set(commands) print unique_cmd_set unique_cmd = [] while len(unique_cmd_set) > 0: unique_cmd.append(unique_cmd_set.pop()) print unique_cmd files = [] for i in range(len(unique_cmd)): files.append(csv.writer(open(unique_cmd[i]+"_cmd.csv", "wb"), delimiter=";")) for elem in x: index = unique_cmd.index(elem[position]) files[index].writerow(elem)
def test(): # data = [["claudio","Di Cosmo"], ["claudino", "Cosimino"], ["fabio", "Melillo"], ["fabietto", "Mellillo"], ["angelo", "Furno"], ["angioletto", "Furnetto"], ["antonio", "Cuomo"], ["antoniuccio", "Cuomuccio"], ["marcangelo", "Frunillo"]] X, label = weeklydataset("/home/claudio/Workloads/WmProxyWL/nlog.csv", []) start_time = time() centroids, clusters = kmeans(X, 4) end_time = time() print end_time - start_time print centroids print len(clusters[0]) print len(clusters[1]) print len(clusters[2]) print len(clusters[3]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster0.csv", "wb"), delimiter=";") results.writerow(["Cluster 0", len(clusters[0])]) results.writerows(clusters[0]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster1.csv", "wb"), delimiter=";") results.writerow(["Cluster 1", len(clusters[1])]) results.writerows(clusters[1]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster2.csv", "wb"), delimiter=";") results.writerow(["Cluster 2", len(clusters[2])]) results.writerows(clusters[2]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster3.csv", "wb"), delimiter=";") results.writerow(["Cluster 3", len(clusters[3])]) results.writerows(clusters[3])
def clusterize(): X, label = weeklydataset('/home/work/Workloads/WmProxyWL/train.csv', []) X2, label2 = weeklydataset('/home/claudio/Workloads/WmProxyWL/test.csv', []) x = X + X2 start_time = time() centroids, clusters = kmeans(x, 4) end_time = time() print end_time - start_time print centroids print len(clusters[0]) print len(clusters[1]) print len(clusters[2]) print len(clusters[3]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster0.csv", "wb"), delimiter=";") results.writerow(["Cluster 0", len(clusters[0])]) results.writerows(clusters[0]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster1.csv", "wb"), delimiter=";") results.writerow(["Cluster 1", len(clusters[1])]) results.writerows(clusters[1]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster2.csv", "wb"), delimiter=";") results.writerow(["Cluster 2", len(clusters[2])]) results.writerows(clusters[2]) results = csv.writer(open("/home/claudio/Workloads/WmProxyWL/cluster3.csv", "wb"), delimiter=";") results.writerow(["Cluster 3", len(clusters[3])]) results.writerows(clusters[3])
''' Created on Jul 18, 2011 @author: work ''' #from Pycluster import clustercentroids, kcluster from kmeans import kmeans from numpy import matrix, float64 from thesis.scripts.dataset.dataset import weeklydataset import matplotlib.pyplot as plt #[X, label] = weeklydataset_shogun('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu_mod.csv', [0]) X, label = weeklydataset('/media/DATA/Thesis/Workloads/GenericWorkloadModeler/workloads/WMproxy/wmpcommon_cmd.csv', []) #X = open('/home/work/Projects/EclipseProjects/thesis/Scripts/cpu.csv',) K = range(2,3) labels = list() error = list() nfound = list() cdata = list() cmask = list() #param = X[5:8] #parameters = matrix(X) for k in K: # tmplabels, tmperror, tmpnfound = kcluster(parameters, nclusters=k, mask=None, weight=None, transpose=1, npass=1, method='a', dist='e', initialid=None) # tmpcdata, tmpcmask = clustercentroids(parameters, None, tmplabels, 'a', 1) tmperror, tmp_cluster = kmeans(X, k) # labels.append(tmplabels) error.append(tmperror)