def k_means (points, k, centroids): ''' Actual K Means code ''' iteration = 0 while (True): new_centroids = [] # Assign data points to clusters cluster_lists = utils.assign_cluster(points, centroids, k) # compute new centroids as average of points in each cluster for list_item in cluster_lists: new_centroids.append(Point2D.getAverage(list_item)) # Check for convergence if (set(centroids) == set(new_centroids)): iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids # Iterate again if not convered centroids = copy.deepcopy(new_centroids) iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids
def k_means (points, k, centroids): ''' Actual K Means code ''' iteration = 0 while (True): new_centroid_list = [] # Assign data points to clusters cluster_lists = utils.assign_cluster(points, centroids, k) # find new centroids as average of points in each cluster for list_item in cluster_lists: new_centroid_list.append(find_new_centroid(list_item)) # check for convergence if (set(centroids) == set(new_centroid_list)): iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids # If not converged, go into another iteration centroids = copy.deepcopy(new_centroid_list) iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids
def slave_function(): ''' Slave functionality K_means code ''' # Get communication instance comm = MPI.COMM_WORLD # Get k k = comm.recv(source = 0) # Get Data Points data_points = comm.recv(source = 0) # New Centroid List new_centroids = [ [] for i in range(k) ] # Population List have size of each cluster population_slave = [ [] for i in range(k) ] centroids = [] while (True): # Receive centroids from master centroids = comm.bcast(centroids,root=0) # check if done signal is received if len(centroids) == 0: return # Assign points to clusters (2D list of points) assigned_points = utils.assign_cluster(data_points, centroids, k) for i in range (0, k): # new_centroids: list of points new_centroids[i] = Point2D.getAverage(assigned_points[i]) # population_slave: list of numbers; i-th number is no. of points in cluster i population_slave[i] = len(assigned_points[i]) # Send the new centroids and the population count of each cluster comm.send((new_centroids, population_slave), dest=0)
def slave_function(): ''' Slave functionality K_means code ''' # Get communication instance comm = MPI.COMM_WORLD # Get k k = comm.recv(source = 0) # Get Data Points data_points = comm.recv(source = 0) # Dimension dimension = len(data_points[0]) # Dimension stats for all clusters dimension_stats_all_clusters = [] centroids = [] while (True): # Receive centroids from master centroids = comm.bcast(centroids,root=0) # check if done signal is received if len(centroids) == 0: return # Assign points to clusters (2D list of points) assigned_points = utils.assign_cluster(data_points, centroids, k) for i in range (0, k): # dimension stats is a list of dictionaries dimension_stats_cluster_i = get_dimension_wise_stats(assigned_points[i], dimension) dimension_stats_all_clusters.append(dimension_stats_cluster_i) # Send the dimension stats for all clusters to master node comm.send(dimension_stats_all_clusters, dest=0)