def master_function(points, k, centroids): ''' Master functionality for K_means code points = list of data points k = number of clusters centroids = list of initial centroids return: list of final centroids ''' comm = MPI.COMM_WORLD num_nodes = comm.Get_size() if (num_nodes - 1 == 0): print "ERROR: No slave nodes. Check number of processes requested" sys.exit(0) # Partition The points for the slaves partition = utils.partition_points(points, num_nodes - 1) # 3D list of dictionaries: for each node, for each cluster, for each dimension (n x k x dimension) dimension_stats_all_nodes = [ [] for i in xrange(num_nodes - 1) ] iteration = 0 dimension = len(points[0]) # Send k and the Data Points to the Slaves for i in xrange (1, num_nodes): comm.send(k, dest = i) comm.send(partition[i-1], dest = i) while (True): # Send the Initial Centroids comm.bcast(centroids) # Receive statistics on each dimension of each dna for each centroid for i in xrange (1, num_nodes): dimension_stats_all_nodes[i-1] = comm.recv(source = i) # Recalculating the New Centroids Based on the Global FeedBack new_centroids = recalculate_centroids(dimension_stats_all_nodes, k, dimension) # If Centroids Haven't Changed then We are done and we send empty list to slaves to signal end if (set(centroids) == set(new_centroids)): comm.bcast([], root = 0) iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids # New Centroid Copy centroids = copy.deepcopy(new_centroids) new_centroids = [] iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids
def master_function(points, k, centroids): ''' Master functionality K_means code points = list of initial data points k = number of clusters centroids = initial list of centroids return: list of final centroids ''' comm = MPI.COMM_WORLD num_nodes = comm.Get_size() if (num_nodes - 1 == 0): print "ERROR: No slave nodes. Check number of processes requested" sys.exit(0) # Partition The points for the slaves partition = utils.partition_points(points, num_nodes - 1) centroid_slave = [ [] for i in xrange(num_nodes - 1) ] population_slave = [ [] for i in xrange(num_nodes - 1) ] iteration = 0 # Send k and the Data Points to the Slaves for i in xrange (1, num_nodes): comm.send(k, dest = i) comm.send(partition[i-1], dest = i) while (True): # Send the Initial Centroids comm.bcast(centroids) # Receive centroids and population count for EACH centroid from slaves for i in xrange (1, num_nodes): (centroid_slave[i-1], population_slave[i-1]) = comm.recv(source = i) # Recalculating the New Centroids Based on the Global FeedBack new_centroids = recalculate_centroids(centroid_slave, population_slave, k) # If Centroids Haven't Changed then We are done and we send empty list to slaves to signal end if (set(centroids) == set(new_centroids)): comm.bcast([], root = 0) iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids # New Centroid Copy centroids = copy.deepcopy(new_centroids) new_centroids = [] iteration += 1 print "Took " + str(iteration) + " iteration(s)" return centroids