# plots with kmeans = 2 kmean = sklearn.cluster.KMeans(2) pred = kmean.fit_predict(X_used) plt.close() plt.figure() three_d_plot_funct(X_used[:,-3:],pred,save=False) plt.savefig(images+"/tibshirani_gap/"+data_info+"_tibshirani_2_clusters_3d.png") pairplotsX = pd.concat([pd.DataFrame(X_used),pd.DataFrame(pred)],axis=1) pairplotsX.columns = ["x"+str(i) for i in range(X_used.shape[-1])]+["prediction"] plt.close() plt.figure() pair_plot_funct(pairplotsX,save=False) plt.savefig(images+"/tibshirani_gap/"+data_info+"_tibshirani_2_clusters_pairs.png") plt.close() bests = pd.DataFrame([ [best_uniform,best_gaussian,best_t], [best_uniform_max_gap,best_gaussian_max_gap,best_t_max_gap]], columns=["uniform","gaussian","t"]) # a 2d example (seen after most of the above code was written) # https://datasciencelab.wordpress.com/2013/12/27/finding-the-k-in-k-means-clustering/
pairplotsX.columns = ["x"+str(i) for i in range(len(grey_option))]+["prediction"] # storage of prediction vector (dealing with different num of observations) storage_prediction = prediction if X.shape[0]!=X_full.shape[0]: storage_prediction = np.ones((X_full.shape[0]))*-1 # for the outliers storage_prediction[kept_truth] = prediction kmean_predictions[num_data][:,index_clusters,kk]=storage_prediction # imaging image_extension = "kmeans"+"("+str(num_clusters)+")_"+data_names[num_data]+"_"+grey_output+".png" # pairs plots pair_plot_funct(pairplotsX,image_extension=image_extension) # 3d plots three_d_plot_funct(X,prediction,np.arange(X.shape[-1])[-3:], image_extension=image_extension) sys.stdout.write("-") sys.stdout.flush() sys.stdout.write("\n") ################### #### Dirichlet #### ###################