def kmeans(X, k): # Intialize centroids centroids = simpleInitialization(X, k) #print(centroids) # Initialize variables iterations = 0 oldCentroids = None labels = zeros(X.shape[0]) # ====================== ADD YOUR CODE HERE ====================== # Instructions: Run the main k-means algorithm. Follow the steps # given in the description. Compute the distance # between each instance and each centroid. Assign # the instance to the cluster described by the closest # centroid. Repeat the above steps until the centroids # stop moving or reached a certain number of iterations # (e.g., 100). labels = [] for i in range(100): c = [] for j in range(len(centroids)): c.append(linalg.norm(X[i] - centroids[j])) labels += where(c == min(c)) for point in range(len(X)): for j in range(len(centroids)): print X[labels[i] == j] # =============================================================== return labels
def kmeans(X, k): # Intialize centroids centroids = simpleInitialization(X, k) #centroids = kmeansppInitialization(X,k) # Initialize book keeping vars. iterations = 0 oldCentroids = None # Run the main k-means algorithm while not shouldStop(oldCentroids, centroids, iterations): # Save old centroids for convergence test. oldCentroids = centroids iterations += 1 # Compute distances from the centroid points distances = np.array( [[euclideanDistance(Xi, centroid) for centroid in centroids] for Xi in X]) # Compute nearest centroid indices labels = np.array([np.argmin(distance) for distance in distances]) # Find new centroids centroids = [[ np.sum(col) / len(col) for col in np.transpose(X[labels == i]) ] for i in range(k)] return labels
def kmeans(X, k): # Intialize centroids centroids = simpleInitialization(X, k) labels = np.zeros(X.shape[0]) n_it = 100 # ====================== ADD YOUR CODE HERE ====================== # Instructions: Run the main k-means algorithm. Follow the steps # given in the description. Compute the distance # between each instance and each centroid. Assign # the instance to the cluster described by the closest # centroid. Repeat the above steps until the centroids # stop moving or reached a certain number of iterations # (e.g., 100). # =============================================================== # Run main k-means algorithm for i in range(n_it): # Compute distances from the centroid to points distances = np.array( [np.linalg.norm(X - centroid, axis=1) for centroid in centroids]).T #Compute nearest centroid indices labels = np.argmin(distances, axis=1) # Find new centroids before_centroids = np.copy(centroids) centroids = np.array( [np.mean(X[labels == i], axis=0) for i in range(k)]) if (np.array_equal(before_centroids, centroids)): break print('We found the solution in ' + str(i) + ' iterations!') return labels
def kmeans(X, k): # Intialize centroids C = simpleInitialization(X, k) print(C.shape) print(X.shape) # Initialize var iables n = X.shape[0] # Size of the sample m = X.shape[1] # Space dimension iterations = 0 labels = zeros(n) oldC = zeros((k, m)) C_sizes = zeros(k) threshold = 0.1 # ====================== ADD YOUR CODE HERE ====================== # Instructions: Run the main k-means algorithm. Follow the steps # given in the description. Compute the distance # between each instance and each centroid. Assign # the instance to the cluster described by the closest # centroid. Repeat the above steps until the centroids # stop moving or reached a certain number of iterations # (e.g., 100). while ((C - oldC) > threshold).any( ): # While C is not converging (according to a certain threshold) for i in range(n): distances = zeros(k) for j in range(k): distances[j] = euclideanDistance(X[i, :], C[j]) c_index = argmax(distances) labels[i] = c_index C_sizes[c_index] += 1 oldC = copy.deepcopy(C) for j in range(k): if C_sizes[j] > 0: u = zeros(m) Xs_in_Cj = argwhere(labels == j) for i in Xs_in_Cj: u = add(u, X[i, :]) u /= C_sizes[j] C[j] = u iterations += 1 # =============================================================== print("k-means exectued in {} iterations".format(iterations)) return labels
def kmeans(X, k): # Intialize centroids centroids = simpleInitialization(X, k) # Initialize variables iterations = 0 oldCentroids = None labels = zeros(X.shape[0]) # ====================== ADD YOUR CODE HERE ====================== # Instructions: Run the main k-means algorithm. Follow the steps # given in the description. Compute the distance # between each instance and each centroid. Assign # the instance to the cluster described by the closest # centroid. Repeat the above steps until the centroids # stop moving or reached a certain number of iterations # (e.g., 100). notConverged = True while notConverged: # update labels old_labels = labels.copy() for i in range(X.shape[0]): dist = inf for j in range(k): d = euclideanDistance(X[i], centroids[j]) if d < dist: labels[i] = j dist = d # update centroids centroids = zeros((k, X.shape[1])) num_centroids = zeros((k, 1)) + 1e-8 for i in range(X.shape[0]): centroids[labels[i]] += X[i] num_centroids[labels[i]] += 1. centroids = centroids/num_centroids iterations += 1 if (old_labels == labels).all() or iterations>500: notConverged = False print iterations # =============================================================== return labels
def kmeans(X, k): # Intialize centroids centroids = simpleInitialization(X, k) # Initialize variables iterations = 0 oldCentroids = None labels = zeros(X.shape[0]) # ====================== ADD YOUR CODE HERE ====================== # Instructions: Run the main k-means algorithm. Follow the steps # given in the description. Compute the distance # between each instance and each centroid. Assign # the instance to the cluster described by the closest # centroid. Repeat the above steps until the centroids # stop moving or reached a certain number of iterations # (e.g., 100). notConverged = True while notConverged: # update labels old_labels = labels.copy() for i in range(X.shape[0]): dist = inf for j in range(k): d = euclideanDistance(X[i], centroids[j]) if d < dist: labels[i] = j dist = d # update centroids centroids = zeros((k, X.shape[1])) num_centroids = zeros((k, 1)) + 1e-8 for i in range(X.shape[0]): centroids[labels[i]] += X[i] num_centroids[labels[i]] += 1. centroids = centroids / num_centroids iterations += 1 if (old_labels == labels).all() or iterations > 500: notConverged = False print iterations # =============================================================== return labels