def run_and_draw(latituede, longtitude, k): datList = [] #解析文本数据中的每一行中的数据特征值 for P in range(len(latituede)): datList.append([latituede[P], longtitude[P]]) datMat = mat(datList) myCentroids, clusterAssing = K_means.biKmeans(datMat, k) LA = mean(latituede) LO = mean(longtitude) center = [] other = [] # 添加聚类中点 for h, l in zip(myCentroids[:, 0].flatten().A[0], myCentroids[:, 1].flatten().A[0]): # 解包操作 center.append([f(h), f(l)]) for m in range(k): # 添加城市点 ptsInCurrCluster = datMat[nonzero(clusterAssing[:, 0].A == m)[0], :] for i, j in zip(ptsInCurrCluster[:, 0].flatten().A[0], ptsInCurrCluster[:, 1].flatten().A[0]): # 解包操作 other.append([f(i), f(j), m]) print('[INFO]: 景点聚类完成') return center, other, LO, LA
def get_sse(data, centroid, q): sse = [] for i in range(len(centroid)): sum = 0 for j in range(len(q)): if q[j] == i: sum += K_means.get_dist(data[j][0], data[j][1], centroid[i][0], centroid[i][1]) sse.append(sum) return sse
def draw_Image(points, m, original_data, vertical_digit, horizontal_digit): # 下面开始画图 # plt.figure(figsize=(12, 9)) # plt.imshow(original_data); #和plt.show()配合才能画图 # plt.axis('off') # plt.show() #建立输出文件夹 if not np.os.path.exists("../Home1.2_Image"): np.os.mkdir("../Home1.2_Image") for i in [2, 3, 5, 10, 20, 50]: # 建立输出文件夹 filePackege = "../Home1.2_Image/K_equals" + str(i) + "/" if not np.os.path.exists(filePackege): np.os.mkdir(filePackege) print('Number of clusters:', i) starttime = time.time() #用K——means方法 c_new, labels = K_means(points, i, m) # 用K——metroid方法 #c_new, labels = K_metroid(points, i, m) endtime = time.time() runningTime = endtime - starttime runningtimeAnounce = "for K= " + str( i) + " the running time is " + str(runningTime) + "s" new_image = np.zeros((vertical_digit, horizontal_digit, 3)) labels = np.array(labels) j = 0 #生成image三维矩阵 while j < m: row = j // horizontal_digit coloum = j % horizontal_digit new_image[row, coloum, :] = c_new[labels[j], :] j = j + 1 j = 0 #装换centroid,并画图 new_centroid = np.zeros((1, i, 3)) while j < i: new_centroid[0, j, :] = c_new[j, :] j = j + 1 plt.figure(figsize=(12, 1)) plt.imshow(new_centroid.astype('uint8')) plt.axis('off') fileaddressKdemo = "../Home1.2_Image/" + str(i) + "K.jpg" plt.savefig(fileaddressKdemo) # plt.show() plt.figure(figsize=(12, 9)) plt.imshow(new_image.astype('uint8')) plt.axis('off') plt.text(12, 4, runningtimeAnounce) fileaddressdemo = filePackege + str(i) + "demo.jpg" plt.savefig(fileaddressdemo)
def run(self): db = self.db N = db['N'] if type(db['H_matrix']) == type(None): db['H_matrix'] = np.eye(N) - np.ones((N, N)) / N if db['kernel_type'] == 'Linear Kernel': optimize_linear_kernel(db) elif db['kernel_type'] == 'Gaussian Kernel': output = np.empty((N, 1), dtype=np.float32) if self.db['prev_clust'] == 0: self.kdac.SetupParams(self.params) self.kdac.Fit(db['data'], N, db['d']) elif (self.db['with_predefined_clustering'] == True): self.kdac.SetupParams(self.params) self.kdac.Fit(db['data'], N, db['d'], db['Y_matrix'], N, db['C_num']) else: #import pdb; pdb.set_trace() self.kdac.SetupParams(self.params) self.kdac.Fit() print 'got to here' self.kdac.Predict(output, N, 1) db['allocation'] = output.T[0] db['allocation'].astype(np.int32) db['allocation'] += 1 # starts from 1 instead of 0 db['binary_allocation'] = np.zeros((N, db['C_num'])) # Convert from allocation to binary_allocation for m in range(db['allocation'].shape[0]): db['binary_allocation'][m, int(db['allocation'][m]) - 1] = 1 if db['Y_matrix'].shape[0] == 0: db['Y_matrix'] = db['binary_allocation'] else: db['Y_matrix'] = np.append(db['Y_matrix'], db['binary_allocation'], axis=1) self.db['prev_clust'] += 1 return elif self.db['kernel_type'] == 'Polynomial Kernel': optimize_polynomial_kernel(self.db) else: raise ValueError('Error : unknown kernel was used.') normalize_each_U_row(self.db) K_means(self.db) self.db['prev_clust'] += 1
def bi_kmeans(data, k): q = K_means.kmeans(data, 2) index = 2 # 初始化centroids节点 centroid = K_means.init_centroids(data, 2) index=2 while index!=k: datasep = [] a = max_key(get_sse(data, centroid, q)) print a for i in range(len(data)): if q[i] == a: datasep.append(data[i]) sep_centroid = K_means.init_centroids(datasep, 2) index += 1 print centroid centroid=copy.deepcopy(numpy.delete(centroid, a,0)) print centroid centroid = numpy.concatenate((centroid, sep_centroid)) print "======================================" print centroid print "======================================" q = K_means.cost_funct(data, centroid) centroid = K_means.update_centroid(data, centroid, q) pre_centroid = centroid q = K_means.cost_funct(data_process.data, centroid) now_centroid = copy.deepcopy(pre_centroid) pre_centroid = K_means.update_centroid(data, pre_centroid, q) print now_centroid - pre_centroid while (now_centroid != pre_centroid).any(): print pre_centroid print now_centroid q = K_means.cost_funct(data, pre_centroid) print q now_centroid = copy.deepcopy(pre_centroid) pre_centroid = K_means.update_centroid(data, pre_centroid, q) return q
def run(self): db = self.db N = self.db['N'] if db['data_type'] == 'Feature Matrix': self.db['data'] = self.center_data(self.db['data']) if self.db['kernel_type'] == 'Linear Kernel': optimize_linear_kernel(self.db) elif self.db['kernel_type'] == 'Gaussian Kernel': optimize_gaussian_kernel(self.db) elif self.db['kernel_type'] == 'Polynomial Kernel': optimize_polynomial_kernel(self.db) else: raise ValueError('Error : unknown kernel was used.') normalize_each_U_row(self.db) K_means(self.db) self.db['prev_clust'] += 1
j = test_files[i] imgs.append(ref_imgs[j]) input_infos.append(img_infos[j]) ref_imgs = np.delete(ref_imgs, (j), axis=0) img_infos = np.delete(img_infos, (j), axis=0) distance_matrix = np.delete(distance_matrix, (j), axis=0) distance_matrix = np.delete(distance_matrix, (j), axis=1) # loading clusters and centroids clusters2, centroids2 = np.load('clusters2.npy').item(), np.genfromtxt( 'centroids2.csv', delimiter=',') centroids2 = centroids2.astype(int) clusters3, centroids3 = np.load('clusters3.npy').item(), np.genfromtxt( 'centroids3.csv', delimiter=',') centroids3 = centroids3.astype(int) clusters11, centroids11 = K.KMeans_clustering(ref_imgs, 1, img_infos) for max_iter in range( 1, 200, 1 ): # We test the accuracy of each algorithm having different values for maximum iterations print(max_iter) #Testing the ML-ANN algorithm NC = 1 # number of clusters weights = np.zeros(NC) # weights for each cluster CDistances = np.zeros(NC) # dictances between clusters and input image distances = {} for i in range(0, NC): distances[str(i)] = [] clusters, centroids = copy.deepcopy(clusters11), copy.deepcopy( centroids11) # clustering reference images by K-Means algorithm
import K_means import matplotlib.pyplot as plt from numpy import * def loadDatas(): """将文本文件dataSet写入矩阵""" fr = open('dataSet') arrys = fr.readlines() number_lines = len(arrys) return_mat = zeros((number_lines, 3)) # 创建矩阵时一定是两个括号 label_mat = zeros((1, number_lines)) index = 0 for line in arrys: line = line.strip() # 去掉空格 list_from_line = line.split(' ') return_mat[index, :] = list_from_line[0:3] label_mat[0, index] = list_from_line[0] index += 1 return return_mat, label_mat mat, labels = loadDatas() fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(mat[:, 0], mat[:, 1]) plt.show() K_means.k_means()
def __init__(self, number_of_neurons_hidden_layer, number_of_neurons_output, is_bias, input_data, expected_outputs, is_aproximation=True): # czy uruchomilismy bias, bias aktualnie nie jest zaimplementowany dla warstwy radialnej self.is_aproximation = is_aproximation self.is_bias = is_bias # dane wejsciowe self.input_data = input_data self.expected_outputs = expected_outputs if not self.is_aproximation: self.amount_of_class = [] self.num_class = int(max(self.expected_outputs)) self.correct_list = [] self.correct_class_vector = [] for i in range(self.num_class): self.correct_class_vector.append([]) self.amount_of_class.append([0]) for i in self.expected_outputs: self.amount_of_class[int(i - 1)][0] += 1 # Pozycja centrów ma być losowana z wektórów wejściowych # Laczymy dane wejsciowe i expected outputs żeby móc je razem przelosować i zachować łączność danych input_data_random_order = numpy.vstack((self.input_data.T, self.expected_outputs.T)).T numpy.random.shuffle(input_data_random_order) # wtworzymy wagi dla warstwy wejsciowej, najpierw tworzymy macierz o jakim chcemy rozmiarze self.hidden_layer = numpy.zeros((len(input_data_random_order[0, :-1]), number_of_neurons_hidden_layer)).T # Ustawiamy sigmy początkowo na 1 self.scale_coefficient = numpy.ones(numpy.size(self.hidden_layer, 0)) self.delta_scale_coefficient = numpy.zeros_like(self.scale_coefficient) self.hidden_layer = K_means.kmeans(input_data, number_of_neurons_hidden_layer, 1000) # TODO: dirty fix self.num_of_neurons_hid_layer = len(self.hidden_layer) # Szukamy sigm ze wzoru self.find_sigma() # print(self.scale_coefficient) # delty dla momentum, aktualnie nie uczymy wsteczną propagacją warstwy ukrytej więc nie używamy self.delta_weights_hidden_layer = numpy.zeros((len(input_data_random_order[0][:-1]), self.num_of_neurons_hid_layer)).T # tworzymy warstwę wyjściową z losowymi wagami od -1 do 1, jak w zad 1 self.output_layer = 2 * numpy.random.random((self.num_of_neurons_hid_layer, number_of_neurons_output)).T - 1 # print(self.output_layer) self.delta_weights_output_layer = numpy.zeros((self.num_of_neurons_hid_layer, number_of_neurons_output)).T # jesli wybralismy że bias ma byc to tworzymy dla każdej warstwy wektor wag biasu if is_bias: self.bias_hidden_layer = (2 * numpy.random.random(self.num_of_neurons_hid_layer) - 1) self.bias_output_layer = (2 * numpy.random.random(number_of_neurons_output) - 1) # jesli nie ma byc biasu to tworzymy takie same warstwy ale zer. Nie ingerują one potem w obliczenia w żaden sposób else: self.bias_hidden_layer = numpy.zeros(self.num_of_neurons_hid_layer) self.bias_output_layer = numpy.zeros(number_of_neurons_output) # taka sama warstwa delty jak dla layerów self.bias_output_layer_delta = numpy.zeros(number_of_neurons_output) self.bias_hidden_layer_delta = numpy.zeros(self.num_of_neurons_hid_layer)
if clust is None or len(clust) <= 1: count1 += 1 else: count2 += 1 return (count1 / len(clusters)) * 100 #===========Main======================== _data = Mini_Parse("stars.txt") fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # We create an object of the Kmeans class and we put the data in it kmeans = km.Kmeans(_data) #The input defines the number of centroids and it calculates them means = kmeans.CalculateMeans(15) #Based on the means or centroids it creates the clusters clusters = kmeans.FindClusters() hullx = [] hully = [] hullz = [] # Call for each cluster the Jarvis March Algorithm for i in range(len(clusters)): if (len(clusters[i])) <= 1: continue else: temp = d3ch.JarvisMarch(clusters[i])
def myCluster(self, str_clu, str_iter, str_thres): try: self.right_tableview.itemChanged.disconnect( self.item_Changed_Event) # 断开连接,不然会很费时间 except Exception: pass try: if str_clu != '': num_clu = int(str_clu) else: num_clu = 2 if str_iter != '': num_iter = int(str_iter) else: num_iter = 500 if str_thres != '': num_thres = float(str_thres) else: num_thres = 0.0001 except Exception: QtWidgets.QMessageBox.critical(self, "错误", "输入错误") return kmeans = K_means.Kmeans(num_clu, num_iter, num_thres) self.SelectedToDf() if self.selectedDf is None: return if not self.selectedDf.empty: if self.selectedDf.T.shape[1] < 2: QtWidgets.QMessageBox.critical(self, "错误", "输入向量小于2维,无法聚类") return try: print(self.selectedDf) col = self.selectedDf.T.shape[1] data_X = self.selectedDf.T.iloc[:, :col].values predict_y = kmeans.predict(data_X) self.right_tableview.insertColumn( self.right_tableview.columnCount()) for i in range(len(predict_y)): Input_item = QtWidgets.QTableWidgetItem(str(predict_y[i])) Input_item.setTextAlignment(Qt.AlignHCenter | Qt.AlignVCenter) self.right_tableview.setItem( i, self.right_tableview.columnCount() - 1, Input_item) temp_x = pd.DataFrame(data=data_X) temp_y = pd.Series(data=predict_y) temp_x["Predict"] = temp_y if self.selectedDf.T.shape[1] == 2: ax = self.ClusterWindow.plot_widget.add_subplot(111) for i in range(num_clu): ax.scatter(x=temp_x[temp_x.Predict == i].iloc[:, 0], y=temp_x[temp_x.Predict == i].iloc[:, 1]) elif self.selectedDf.T.shape[1] >= 3: ax = Axes3D(self.ClusterWindow.plot_widget) for i in range(num_clu): ax.scatter(temp_x[temp_x.Predict == i].iloc[:, 0], temp_x[temp_x.Predict == i].iloc[:, 1], temp_x[temp_x.Predict == i].iloc[:, 2]) except Exception: QtWidgets.QMessageBox.critical(self, "错误", "聚类失败,请检查输入数据的合法性") return try: self.item_Changed_Event() self.right_tableview.itemChanged.connect( self.item_Changed_Event) # 断开连接,不然会很费时间 except Exception: pass self.ClusterWindow.plot_canvas.draw()
def main(): x,miu,c = K_means.kmeans(file_name) for i in range(len(x[:,0])): x[i] = miu[int(c[i])] im = Image.fromarray(x.reshape(128,128,3)) im.save('bird_new.png')
import numpy as np import copy import matplotlib.pyplot as plt import K_means #data = np.loadtxt('circle_data.csv',delimiter = ',') data = np.loadtxt('2d_span_data_centered.csv', delimiter=',') randompoints = K_means.randomPoint(2, 2) np.random.seed(123) print(data.shape) data = data x = data[0] y = data[1] K_means.plot_image(x, y) #label, center_points = K_means.k_means(data,randompoints) #K_means.plotting(label,data,center_points) #print(label)
Ant,X,count = nt.main(fname) code = cl_ant.ant_label(Ant) p, e, c = eva.Evaluate_All(code, X.tolist(), d, fname) P = P + p E = E + e C = C + c #kmeans.show_plot(code, X) print "NO-THRESHOLDS Algorithm : %s" %N print "P: ", P/float(N) print "E: ", E/float(N) print "C: ", C/float(N) print "count:", count #--- K-means --- P = 0.0 E = 0.0 C = 0.0 for i in range(N): #time.sleep(1) code,X = kmeans.main(fname,k) p, e, c = eva.Evaluate_All(code, X.tolist(), d, fname) P = P + p E = E + e C = C + c print "K-means Algorithm : %s" %N print "P: ", P/float(N) print "E: ", E/float(N) print "C: ", C/float(N)
#--------------------------------------------------------------- # make a codebook x_train = pickle.load(open('./datasets/train_img.npy', 'rb')) x_test = pickle.load(open('./datasets/test_img.npy', 'rb')) y_train = pickle.load(open('./datasets/train_label.txt', 'rb')) y_test = pickle.load(open('./datasets/test_label.txt', 'rb')) strong_des = sift.dense_sift_each() # dense SIFT # weak_des = sift.weak_des_whole() # original SIFT codebook_path = './codebook/km_center_dense_200_caltech' K_means.clustering(strong_des, codebook_path, n_cluster=200) #--------------------------------------------------------------- # train, test에 해당하는 level 0, 1, 2의 PHOW(pyramid histogram of word)를 저장 codebooks = codebook.load_codebook(codebook_path) tr_sl_0 = single_level(cal_train, 0, codebooks) tr_sl_1 = single_level(cal_train, 1, codebooks) tr_sl_2 = single_level(cal_train, 2, codebooks) ts_sl_0 = single_level(cal_test, 0, codebooks) ts_sl_1 = single_level(cal_test, 1, codebooks) ts_sl_2 = single_level(cal_test, 2, codebooks) tr_pyramid_L0 = tr_sl_0 # book 추가
R_Cramer_y=R_Cramer_y, R_cont_x=R_cont_x, R_Cramer_x=R_Cramer_x, dic=dic, normalize=True, verbose=True, path_rslt) delta_time = round((datetime.now() - begin).total_seconds(), 1) print('Données préparées en ' + str(delta_time) + 's') #============================================================================ # K-means begin = datetime.now() repartition = K_means.K_means(dfX, Y, nb_clusters_init=nb_clusters, methode_prediction=method_prediction) delta_time = round((datetime.now() - begin).total_seconds(), 1) print('Clusters effectués en ' + str(delta_time) + 's') #=========================================================================== # IC begin = datetime.now() cluster_stat = IntConf.IntConf(repartition, alpha=0.85, path_rslt) print(cluster_stat) delta_time = round((datetime.now() - begin).total_seconds(), 1) print('Intervalle de confiance éffectué en ' + str(delta_time) + 's') #=========================================================================== # Prediction index = ~Y.isnull()
def K_means(self): return K_means.ClusterPlot(self.file_path, self.clusters).main()
import sys, os import numpy as np sys.path.append(os.getcwd() + r'\Modules') import matplotlib.pyplot as plt from sklearn.datasets import make_circles import DBSCAN as db import K_means as km np.random.seed(0) X, y = make_circles(n_samples=400, factor=.3, noise=.05) dbscan = db.DBSCAN(eps=0.5, min_sample=5) db_assignments = dbscan.fit_transform(X) kmeans = km.K_Means(n_clusters=2) km_centers, km_assignments = kmeans.fit_transform(X) plt.figure(20) plt.scatter(X[:, 0], X[:, 1], c=db_assignments) plt.figure(21) plt.scatter(X[:, 0], X[:, 1], c=km_assignments) plt.show()
def clustering(NC, ref_imgs, img_infos): clusters, centroids = K.KMeans_clustering(ref_imgs, NC, img_infos) np.save(clusters, 'clusters' + NC + '.npy') np.save(centroids, 'centroids' + NC + '.npy')
for idx in range(nP): # The random value to be added to the gene. for i in range(length): random_value = np.random.uniform(0.0, 1.0) cluster.__dict__['populasi'][i][idx][3] + random_value return cluster def getCentroidPop(i): cluster.centroids = cluster.populasi[i] k = 3 fitness = [] cluster = K_means.Kmeans('seeds.txt', k) #------------------------- POPULATION, CHROMOSOME, CLUSTERING -------------------------# createPopulation() for i in range(0, 70): getCentroidPop(i) # ngambil centroid d masing2 populasi cluster.clustering() centroid, SSE, acc = cluster.groupData() result = centroid, SSE, acc fitness.append(result) # per chromosome->centroid punya fitness function # print("SSE chromosome: %.2f" % SSE) # print("Acc chromosome: %.2f" % acc) #--------------------------------- SELECTION ----------------------------------#
#!/usr/bin/env python # coding: utf-8 # In[4]: import K_means import DB import numpy as np import pandas as pd import sys from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score # In[ ]: test_file_name = sys.argv[1] kmeans_labels = K_means.km_clustering(test_file_name) km = kmeans_labels.reshape(((len(kmeans_labels)), 1)) dbscan_labels = DB.dbscan_clustering(test_file_name) dbscan = dbscan_labels.reshape(((len(dbscan_labels)), 1)) for i in range(len(kmeans_labels)): km[i] = km[i] + 1 dbscan[i] = dbscan[i] + 1 final = np.append(km, dbscan, axis=1) print('Saved the output labels in a csv file') result = pd.DataFrame(final, columns=['K-Means', 'DBSCAN']) result.to_csv('Label_Output.csv')
def workflow(fichier,fichier_sortie, path, path_rslt, taille_ech, suffix_table, begin_distributed,fich_vae,fich_deb) : #======================================================================= # Parametrage statistique nb_clusters = taille_ech//1000 R_cont_y = 0.25; R_Cramer_y = 0.20; R_cont_x = 0.85; R_Cramer_x = 0.80 method_disc = 'Cramer'; method_continuous = 'regression' method_prediction = 'Ridge' #'random_forest' dic={'SGMT_PF_V4':['SGMT_PF_V4',1],'SGMT_PF_AXE_FIDELITE_V4':['SGMT_PF_AXE_FIDELITE_V4',3],'REVENU_EST_M':['REVENU_EST_M',3]} #============================================================================ # Préparation des données begin = datetime.now() dfX, Y, R_dico = data_preparation.main( fichier = fichier,fichier_sortie= fichier_sortie,method_disc=method_disc,method_continuous=method_continuous,taille_ech = taille_ech, R_cont_y = R_cont_y , R_Cramer_y = R_Cramer_y, R_cont_x = R_cont_x, R_Cramer_x = R_Cramer_x, dic=dic, normalize = True, verbose = True, path_rslt=path_rslt, suffix_table=suffix_table ) delta_time = round((datetime.now() - begin).total_seconds(),1) print('Données préparées en ' + str(delta_time) + 's') #============================================================================ # K-means begin = datetime.now() repartition = K_means.K_means(dfX,Y,nb_clusters_init= nb_clusters,methode_prediction=method_prediction) delta_time = round((datetime.now() - begin).total_seconds(),1) print('Clusters effectués en ' + str(delta_time) + 's') #=========================================================================== # IC begin = datetime.now() cluster_stat = IntConf.IntConf(repartition,alpha = 0.85,path_rslt=path_rslt, suffix_table=suffix_table ) print(cluster_stat) delta_time = round((datetime.now() - begin).total_seconds(),1) print('Intervalle de confiance éffectué en ' + str(delta_time) + 's') #=========================================================================== # Prediction index = ~Y.isnull() dfX_train, dfX_test, Y_train, Y_test = train_test_split(dfX[index], Y[index],test_size = 0.4, random_state = 44) del dfX_train, dfX_test, Y_train result, result_test=prediction.IC_reg(repartition, dfX, Y,path_rslt=path_rslt, suffix_table=suffix_table) result_1 = prediction.get_result(repartition, dfX, Y, Y_test, method = method_prediction) #prediction.regression_graph(Y_real = result_1['Y_real'], Y_pred = result_1['Y_pred'], col = 'black') score_abs, score_rel = prediction.get_regression_score(Y_real = result_1['Y_real'], Y_pred = result_1['Y_pred']) print('Erreur moyenne de prevision: ' + str(score_abs) + ' (' + str(score_rel) + '%)') curve_Recall, AUC_ROC = prediction.classification_curve(Y_real = result_1['Y_real'], Y_pred = result_1['Y_pred'], threshold_rich = 100000, col = 'black') print('AUC_ROC: '+str(AUC_ROC)) print('AUC Precision-Recall: '+str(curve_Recall)) #prediction.graph_variable_influence(dfX,result_1,col='black') #=========================================================================== # file of parameters if os.path.exists(path+"Compare.xlsx") == False: S=pd.DataFrame(columns=["reference","data_x","data_y","method disc","method continuous","method prevision","Date", "Nb ligne","% NaN","k cluster","R_cont_y","R_Cramer_y","R_cont_x","R_Cramer_x","Nb var quali","Nb var quant", "Nb regroupement","score","score relative","AUC ROC","curve Recall","temps de calcul"]) S.to_excel(path+"Compare.xlsx",encoding="utf-8", index=False) #,sep=";",encoding="utf-8" Nb_var_quant=len(R_dico['variables continues gardees']); Nb_var_quali=len(R_dico['variables discretes gardees']); Nb_regroupement=len(R_dico['groupe variables']) date = "%s" % datetime.now() NaN = len(Y[Y.isnull()==True])/Y.shape[0] delta_time = round((datetime.now() - begin_distributed).total_seconds(),1) wb = load_workbook(path+"Compare.xlsx") sheet = wb.get_sheet_by_name('Sheet1') reference = sheet.max_row + 1 ABC=list("ABCDEFGHIJKLMNOPQRSTUV") liste=[reference,fich_deb,fich_vae,method_disc,method_continuous,method_prediction,date,taille_ech,str(NaN.__round__(2)), nb_clusters,R_cont_y,R_Cramer_y,R_cont_x,R_Cramer_x,Nb_var_quali,Nb_var_quant,Nb_regroupement,score_abs, str(score_rel)+'%',str(AUC_ROC),str(curve_Recall),str(delta_time) + 's'] for i in range(len(ABC)): sheet[ABC[i] + str(reference)].value = liste[i] # except ValueError : # print ("liste " + liste[i]) # print("i " + str(i)) # print ("ABC " + ABC[i]) # print ("refer " + str(reference)) # print ("sheet " + sheet[ABC[i] + str(reference)].value) # raise ValueError("C'est ici que ça plante") wb.save(path+"Compare.xlsx") mon_fichier = open(path_rslt + "fichier" + suffix_table + ".txt", "w") mon_fichier.write('liste variables qualitatives :') a=R_dico['variables discretes gardees'].sort(['R2'],ascending=[False]) for i in a.values: mon_fichier.write('\n '+str(i)) a=R_dico['variables continues gardees'].sort(['R2'],ascending=[False]) mon_fichier.write('\n \nliste variables quantitatives :') for i in a.values: mon_fichier.write('\n '+str(i)) mon_fichier.write('\n \nliste groupe variables :') mon_fichier.write('\n R², groupe variables correlees, variable representante') for i in R_dico['groupe variables'].values: mon_fichier.write('\n '+str(i)) delta_time = round((datetime.now() - begin_distributed).total_seconds(),1) mon_fichier.write('\n \nTemps de calcul est ' + str(delta_time)+ 's') mon_fichier.close() print('Temps total de calcul du paquet est ' + str(delta_time) + 's')
t = np.zeros(15) for i in range(15): if i < 5: t[i] = 0 elif i < 10: t[i] = 1 else: t[i] = 2 plt.scatter(model[:, 0], model[:, 1], s=150, c=t, cmap=mycm) plt.xlim([0, 10]) plt.ylim([0, 10]) plt.show() if __name__ == '__main__': X = simulate_data.loaddata() t = [] for i in range(300): if i < 100: t.append(0) elif i < 200: t.append(1) else: t.append(2) k_means_model = K_means.train(X) init_model = extract_centers(k_means_model)[0] model = train(X, init_model) draw(X, np.array(t), model)
# so we have the spanning matrix C_matrix = eigenvectors.T[eigenvalue_order] return C_matrix if __name__ == "__main__": dirlist = os.listdir() text = None for file in dirlist: if '2d' in file: text = file data = np.loadtxt(text, delimiter=',') x_origin = data[0] y_origin = data[1] # plot the origin picture K_means.plot_image(x_origin, y_origin) centered_data = center(data) C_matrix = -1 * compute_pca(centered_data, 10**-5) print("The spanning matrix:\n", C_matrix) # plot the vector arrow x_vector = np.zeros(3) y_vector = np.zeros(3) x_vector[:2] = C_matrix.T[0] y_vector[:2] = C_matrix.T[1] K_means.plot_image(x_vector, y_vector) # Now we transform the origin data data_transformed = np.dot(C_matrix, data) x_transformed = data_transformed[0] y_transformed = data_transformed[1] K_means.plot_image(x_transformed, y_transformed)