def test3_cluster2(): """ tests the three_d_cluster_rotation_gen function PART 2: uses prediction vector """ X = np.ones((10, 3)) X[:, 0] = np.array([-1, -0.5, 0, 0, 0, 0, 0, 0, 0.5, 1]) X[:, 1] = np.array([-1, 0, 0, 0, -0.5, 0, 0.5, 0, 0, 1]) X[:, 2] = np.array([-1, -0.5, 0, -0.5, 0, 5, 0, 0, 0.5, 1]) x = three_d_cluster_rotation_gen(X, 45, predictions=np.ones(10)) check = next(x) assert check == 0 assert next(x) == 45
def test4_cluster3(): """ tests the three_d_cluster_rotation_gen function PART 2 """ X = np.ones((10, 3)) X[:, 0] = np.array([-1, -0.5, 0, 0, 0, 0, 0, 0, 0.5, 1]) X[:, 1] = np.array([-1, 0, 0, 0, -0.5, 0, 0.5, 0, 0, 1]) X[:, 2] = np.array([-1, -0.5, 0, -0.5, 0, 5, 0, 0, 0.5, 1]) x = three_d_cluster_rotation_gen(X, 45, type_marker="numeric") check = next(x) assert check == 0 assert next(x) == 45
def test2_cluster1(): """ tests the three_d_cluster_rotation_gen function PART 1: most basic usage """ X = np.ones((10, 3)) X[:, 0] = np.array([-1, -0.5, 0, 0, 0, 0, 0, 0, 0.5, 1]) X[:, 1] = np.array([-1, 0, 0, 0, -0.5, 0, 0.5, 0, 0, 1]) X[:, 2] = np.array([-1, -0.5, 0, -0.5, 0, 5, 0, 0, 0.5, 1]) x = three_d_cluster_rotation_gen(X, 45) check = next(x) assert check == 0 assert next(x) == 45
from scipy.cluster.hierarchy import fcluster max_d = 1.05 prediction = fcluster(single_hierarchy_minus2, max_d, criterion='distance') # number of unique clusters: len(set(prediction)) from visuals_functions import three_d_scatter_rotation_gen,three_d_cluster_rotation_gen,binary_prediction cluster_plot_generator = three_d_cluster_rotation_gen(X_minus2[:,2:], rotation_angle=22.5,predictions=(prediction-1)) next(cluster_plot_generator) plt.show() # http://stackoverflow.com/questions/19633336/using-numbers-as-matplotlib-plot-markers gen=three_d_cluster_rotation_gen(X_minus2[:,2:],rotation_angle=22.5,predictions=binary_prediction(prediction)[0],type_marker="regular") next(gen) plt.show() max_d = 1.1 prediction = fcluster(single_hierarchy_minus2, max_d, criterion='distance')
for j,num_clusters in enumerate(number_of_clusters): kmean = sklearn.cluster.KMeans(num_clusters) sizes_of_groups=np.zeros((3,num_clusters)) for i,X in enumerate([X_full,X_minus1,X_minus2]): prediction = kmean.fit_predict(X) num_obs = X.shape[0] groupings = [np.arange(num_obs)[prediction==x] for x in np.arange(num_clusters)] sizes_of_groups[i,:] = sorted([len(x) for x in groupings]) cluster_plot_generator = three_d_cluster_rotation_gen(X[:,2:], rotation_angle=22.5,predictions=prediction) next(cluster_plot_generator) plt.close() next(cluster_plot_generator) plt.title("Kmeans(" + str(num_clusters) + ") clustering on data with " + str(i) + " 'outlier(s)' removed") plt.savefig(images+"Kmeans("+ str(num_clusters) +")_" + str(i) + "_outlier.png") plt.close() # silhouette score collection silhouette_sc[i,j]=sklearn.metrics.silhouette_score(X,labels=prediction) print("**** Kmeans("+str(num_clusters)+") table ****")