def test3_cluster2():
    """
	tests the three_d_cluster_rotation_gen function
	PART 2: uses prediction vector
	"""
    X = np.ones((10, 3))
    X[:, 0] = np.array([-1, -0.5, 0, 0, 0, 0, 0, 0, 0.5, 1])
    X[:, 1] = np.array([-1, 0, 0, 0, -0.5, 0, 0.5, 0, 0, 1])
    X[:, 2] = np.array([-1, -0.5, 0, -0.5, 0, 5, 0, 0, 0.5, 1])

    x = three_d_cluster_rotation_gen(X, 45, predictions=np.ones(10))
    check = next(x)

    assert check == 0

    assert next(x) == 45
def test4_cluster3():
    """
	tests the three_d_cluster_rotation_gen function
	PART 2
	"""
    X = np.ones((10, 3))
    X[:, 0] = np.array([-1, -0.5, 0, 0, 0, 0, 0, 0, 0.5, 1])
    X[:, 1] = np.array([-1, 0, 0, 0, -0.5, 0, 0.5, 0, 0, 1])
    X[:, 2] = np.array([-1, -0.5, 0, -0.5, 0, 5, 0, 0, 0.5, 1])

    x = three_d_cluster_rotation_gen(X, 45, type_marker="numeric")
    check = next(x)

    assert check == 0

    assert next(x) == 45
def test2_cluster1():
    """
	tests the three_d_cluster_rotation_gen function
	PART 1: most basic usage
	"""
    X = np.ones((10, 3))
    X[:, 0] = np.array([-1, -0.5, 0, 0, 0, 0, 0, 0, 0.5, 1])
    X[:, 1] = np.array([-1, 0, 0, 0, -0.5, 0, 0.5, 0, 0, 1])
    X[:, 2] = np.array([-1, -0.5, 0, -0.5, 0, 5, 0, 0, 0.5, 1])

    x = three_d_cluster_rotation_gen(X, 45)
    check = next(x)

    assert check == 0

    assert next(x) == 45


from scipy.cluster.hierarchy import fcluster

max_d = 1.05
prediction = fcluster(single_hierarchy_minus2, max_d, criterion='distance')
# number of unique clusters:
len(set(prediction))




from visuals_functions import three_d_scatter_rotation_gen,three_d_cluster_rotation_gen,binary_prediction

cluster_plot_generator = three_d_cluster_rotation_gen(X_minus2[:,2:],
	rotation_angle=22.5,predictions=(prediction-1))

next(cluster_plot_generator)
plt.show()
# http://stackoverflow.com/questions/19633336/using-numbers-as-matplotlib-plot-markers


gen=three_d_cluster_rotation_gen(X_minus2[:,2:],rotation_angle=22.5,predictions=binary_prediction(prediction)[0],type_marker="regular")
next(gen)
plt.show()




max_d = 1.1
prediction = fcluster(single_hierarchy_minus2, max_d, criterion='distance')
Example #5
0
for j,num_clusters in enumerate(number_of_clusters):

	kmean = sklearn.cluster.KMeans(num_clusters)

	sizes_of_groups=np.zeros((3,num_clusters))

	for i,X in enumerate([X_full,X_minus1,X_minus2]):
		prediction = kmean.fit_predict(X)
		num_obs    = X.shape[0]

		groupings = [np.arange(num_obs)[prediction==x] for x 
			in np.arange(num_clusters)]

		sizes_of_groups[i,:] = sorted([len(x) for x in groupings])

		cluster_plot_generator = three_d_cluster_rotation_gen(X[:,2:],
			rotation_angle=22.5,predictions=prediction)

		next(cluster_plot_generator)
		plt.close()
		next(cluster_plot_generator)

		plt.title("Kmeans(" + str(num_clusters) + ") clustering on data with " +
			str(i) + " 'outlier(s)' removed")
		plt.savefig(images+"Kmeans("+ str(num_clusters) +")_" + str(i) + 
			"_outlier.png")
		plt.close()

		# silhouette score collection
		silhouette_sc[i,j]=sklearn.metrics.silhouette_score(X,labels=prediction)

	print("**** Kmeans("+str(num_clusters)+") table ****")