Esempio n. 1
0
def good_number_of_clusters(vals):
    wcss = []
    for ii in range(1, 30):
        kmeans = KMeans(n_clusters=ii,
                        init="k-means++",
                        n_init=10,
                        max_iter=300)
    kmeans.fit_predict(vals)
    wcss.append(kmeans.inertia_)

    plt.plot(wcss, 'ro-', label="WCSS")
    plt.title("Computing WCSS for KMeans++")
    plt.xlabel("Number of clusters")
    plt.ylabel("WCSS")
    plt.show()
Esempio n. 2
0
def do_KMeans_clustering(N_cluster, X, device):
	"""
	This function will use KMeans Clustering method to label training data
	according to its proximity with a cluster
	Input:
		N_cluster: number of cluster estimated by Gap Statistics
		X: Training data for the input layer
	Output:
		cluster_label: label assigned to every point
		over_coef: this will be used in the oversampling method to increase
				   number of points in the less densed cluster region
	"""

	X = X.to(device)

	#Instantiating kmeans object
	kmeans = KMeans(n_clusters=N_cluster, mode='euclidean', verbose=1)
	cluster_label = kmeans.fit_predict(X)

	#Calculating the size of cluster (number of data near the cluster centroid)
	cluster_size = torch.zeros(N_cluster, dtype=torch.int32).to(device)
	for cluster in range(N_cluster):
		cluster_size[cluster] = len(torch.where(cluster_label==cluster)[0])

	over_coef = torch.zeros(N_cluster, dtype=torch.int32).to(device)
	for cluster in range(N_cluster):
		over_coef[cluster] = torch.clone((max(cluster_size))/cluster_size[cluster]).to(device)
		if over_coef[cluster] > 10:
			over_coef[cluster] = 10

	return cluster_label.cpu(), over_coef.cpu()
Esempio n. 3
0
def do_gap_statistics(X, n_var, device):
	"""
	This function uses gap statistics method to calculate the number of clusters
	Input:
		X: Training data for the input layer
		n_var: The number of design variables of the problem
	Output:
		N_cluster: the best number of cluster that maximizes gap
	"""
	
	max_cluster = 30
	trials = 10
	X = X.to(device)
	count = torch.zeros(max_cluster, dtype=torch.int32).to(device)
	X_rnd = torch.randn(len(X), n_var).to(device)

	for trial in range(trials):
		gap 	 = torch.zeros(max_cluster, dtype=torch.float32).to(device)
		gap_diff = torch.zeros(max_cluster, dtype=torch.float32).to(device)
		for cluster in range(max_cluster):
			kmeans       = KMeans(n_clusters=cluster+1, mode='euclidean')
			labels 		 = kmeans.fit_predict(X)
			kmeans_rnd	 = KMeans(n_clusters=cluster+1, mode='euclidean')
			labels_rnd	 = kmeans_rnd.fit_predict(X_rnd)
			gap[cluster] = torch.log(kmeans_rnd.inertia_(X_rnd,labels_rnd)/kmeans.inertia_(X,labels))

			if cluster==0:
				gap_diff[0] = 0.0
			else:
				gap_diff[cluster] = gap[cluster] - gap[cluster-1]
				if gap_diff[cluster] < 0.0:
					break

		count[torch.argmax(gap)] = count[torch.argmax(gap)] + 1

	N_cluster = torch.argmax(count)+1
	#+1 because cluster in the range(max_cluster) starts from zero
	return N_cluster
Esempio n. 4
0
    def test_score(self):
        """
        Tests within-cluster variance
        """

        X, y, centers = generate_cluster_samples()
        n_samples = X.shape[0]
        n_features = X.shape[1]
        k = centers.shape[0]

        kmeans = KMeans(k, N_ITER)
        assignments = kmeans.fit_predict(X)

        score = np.sqrt(kmeans.score(X)) / n_samples
        self.assertLess(score, EPS)
Esempio n. 5
0
 def predict_line_label(self):
     self.check()
     if self.method == '1D':
         kmeans = sklearn.cluster.KMeans(n_clusters=3)
         pred = kmeans.fit_predict(
             np.array(self.smoothed_TR).reshape(-1, 1))
         self.distance1D = scipy.spatial.distance.cdist(
             np.array(self.smoothed_TR).reshape(-1, 1),
             kmeans.cluster_centers_)
         self.label1d = np.argmin(kmeans.cluster_centers_)
         line_label = pred != self.label1d
     elif self.method == '2D':
         kmeans = KMeans(n_clusters=3, fixed_centroids={0: [0, 0]})
         norm_TR = np.linalg.norm(self.smoothed_TR[:len(self.smoothed_G)])
         norm_G = np.linalg.norm(self.smoothed_G)
         X = np.dstack((self.smoothed_TR[:len(self.smoothed_G)],
                        self.smoothed_G * norm_TR / norm_G))[0]
         pred = kmeans.fit_predict(X)
         self.distance2D = kmeans.distance
         line_label = np.hstack([pred != 0, [False] * self.alpha])
     else:
         raise Exception("Wrong method argument: " + self.method)
     return line_label
Esempio n. 6
0
from matplotlib.image import imread
import matplotlib.pyplot as plt
from kmeans import KMeans
import torch

image = imread('images/IMG_0015.jpg')
X = image.reshape(-1, 3)
X_t = torch.from_numpy(X).float()

kmeans = KMeans(n_clusters=5)
labels = kmeans.fit_predict(X_t)
segmented_img = kmeans.centroids[labels]
segmented_img = segmented_img.view(image.shape)

new_img = segmented_img.numpy()

plt.imsave('images/5.jpg', new_img.astype('uint8'))