def main(): files = [] for lang, addr in code_sources: data = requests.get(addr).text contents = io.StringIO("\n".join(data.split('\n')[:1000])) outline = ''.join( silhouette.silhouette(contents, " ", "<span style='background: black;'>", "</span>")) files.append((addr, lang, outline)) cells = [] for f in files: cells.append(make_cell(f)) table = '<div class="lfbtable">{}\n</div>' table = table.format('\n'.join(cells)) print(PAGE.format(CSS, table, "", JAVASCRIPT))
import sys import cPickle from sklearn.preprocessing import scale import silhouette # import some data to play with warnings.simplefilter("ignore") db = cPickle.load(open(sys.argv[1],'r')) Y = np.array([db[i][0] for i in db.keys()]).astype(int) X = np.array([db[i][1:] for i in db.keys()]) s = float(Y.shape[0]) priors = np.array([float(np.where(Y == i)[0].shape[0])/s for i in range(1,Y.max()+1)]) classifiers = ['nb','knn','lda','qda'] clf = [pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('nb',naive_bayes.GaussianNB())]), pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('knn',neighbors.KNeighborsClassifier(n_neighbors = 1))]), pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('lda',lda.LDA())]), pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('qda',qda.QDA())])] s = silhouette.silhouette(X,Y-1) print np.median(s) it = cross_validation.KFold(Y.size,n_folds = 10) for c,cn in zip(clf,classifiers): res = cross_validation.cross_val_score(c,scale(X),Y,cv = it,scoring = "f1_weighted") print cn+': ',res.mean(),res.std()
#Organize Data as X/Y Coordinates x = arcpy.da.TableToNumPyArray(input_points, "SHAPE@X").astype(float) y = arcpy.da.TableToNumPyArray(input_points, "SHAPE@Y").astype(float) X = np.array(list(zip(x,y))) #Check for Optimization if str(k_optimized)=='true': try: k_max = 20 Silhouettes=[] K=2 while K < 20: arcpy.AddMessage("Testing K = "+str(K)) Centroids = plusplus(X,K) Cxy, points, clusters = k_means(X,K,Centroids) average, sils = silhouette(points) Silhouettes.append(average) K+=1 arcpy.AddMessage('\r'+'\n'+"Average Silhouette Values:"+'\n'+'\r') arcpy.AddMessage(Silhouettes) except Exception as e: exc_tb = sys.exc_info()[2] #Get Line Number arcpy.AddError('\n' +"Error Optimizing K: \n\n\t"+"In line " +str(exc_tb.tb_lineno)+": "+str(e.message)+"\n") else: #Control for Error in K input if k > len(X): arcpy.AddError("ERROR: K must be less than or equal to number of points") quit()
def dr_cluster(data, method, gamma, params, clusters, stepsize, rows_toload, dropped_class_numbers): if (method == "Kmeans2D"): components = 2 if (method == "Kmeans1D" or method == "Thresholding"): components = 1 flag = 0 resetflag = 0 logger.writelog(components, "Components") logger.result_open(method) print(method) max_sc = -100.0 best_purity = 0.0 best_gamma = 0.0 serial_num = 0 try: for i in range(0, params + 1): transformer = KernelPCA(n_components=components, kernel='rbf', gamma=gamma) data_transformed = transformer.fit_transform(data) df = pd.DataFrame(data_transformed) df.to_csv(KPCA_output_path, index=False, header=None) del df gc.collect() if (method == "Thresholding"): if (flag == 0): os.system("cc c_thresholding_new.c") flag = 1 start = timeit.default_timer() os.system("./a.out " + str(clusters) + " " + str(rows_toload)) end = timeit.default_timer() thresholding_time = (end - start) sc = silhouette.silhouette(KPCA_output_path, Thresholding_paths[1]) groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian( 't', Thresholding_paths[0], clusters, rows_toload, dropped_class_numbers) logger.writeresult(i + 1, clusters, method, thresholding_time, gamma, sc, purity) #print(i+1,thresholding_time,gamma,sc,purity) if (i < params): if (sc > max_sc): max_sc = sc best_gamma = gamma best_purity = purity serial_num = i + 1 if (i == (params - 1)): gamma = best_gamma sc = max_sc purity = best_purity if (i == params): print(best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writeresult(serial_num, clusters, method, thresholding_time, best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writefinalresult(serial_num, clusters, method, thresholding_time, best_gamma, max_sc, best_purity) write_hungarian_result(best_gamma, clusters, groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, best_purity, method, params, stepsize, dropped_class_numbers) else: kmeans_time = kmeans.kmeans(KPCA_output_path, KMeans_paths[1], clusters) kmeans.groundtruth_distribution(KMeans_paths[1], KMeans_paths[0], datafiles_names[0], datafiles_names[2], clusters) sc = silhouette.silhouette(KPCA_output_path, KMeans_paths[1]) groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian( 'k', KMeans_paths[0], clusters, rows_toload, dropped_class_numbers) logger.writeresult(i + 1, clusters, method, kmeans_time, gamma, sc, purity) #print(i+1,kmeans_time,gamma,sc,purity) if (i < params): if (sc > max_sc): max_sc = sc best_gamma = gamma best_purity = purity serial_num = i + 1 if (i == (params - 1)): gamma = best_gamma sc = max_sc purity = best_purity if (i == params): print(best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writeresult(serial_num, clusters, method, kmeans_time, best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writefinalresult(serial_num, clusters, method, kmeans_time, best_gamma, max_sc, best_purity) write_hungarian_result(best_gamma, clusters, groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, best_purity, method, params, stepsize, dropped_class_numbers) if (i < (params - 1)): gamma = gamma + stepsize except (KeyboardInterrupt, SystemExit, Exception) as ex: ex_type, ex_value, ex_traceback = sys.exc_info() trace_back = traceback.extract_tb(ex_traceback) logger.writelog(str(ex_type.__name__), "Exception Type") logger.writelog(str(ex_value), "Exception Message") logger.writelog(str(trace_back), "Traceback") finally: logger.result_close()
with open(path+"names.pkl","r") as g: cl = cPickle.load(f) nomes = cPickle.load(g) db = {} for im_file in nomes: nmbe = desc.bendenergy(path+im_file,sigma) db[im_file] = numpy.hstack((cl[im_file],numpy.log(nmbe()))) # nome das figuras data1 = numpy.array([db[i] for i in db.keys()]) Y = data1[:,0].astype(int) X1 = scale(data1[:,1:]) s = silhouette.silhouette(X1,Y-1) print numpy.median(numpy.abs(1.- s)) #iso = Isomap(n_neighbors=98, max_iter= 2500) mds = MDS(n_init = 20,dissimilarity = 'euclidean',max_iter = 2500) #X1 = iso.fit_transform(data1[:,1:]) X1 = mds.fit_transform(data1[:,1:]) r = ((pdist(data1[:,1:]) - pdist(X1))**2).sum() s = ((pdist(X1)-pdist(X1).mean())**2).sum() R2 = 1-r/s print R2 data = numpy.vstack((Y,X1.transpose())).transpose() db = dict(zip(db.keys(),data))