def master_function(points, k, centroids):
	'''
	Master functionality for K_means code
	points = list of data points
	k = number of clusters
	centroids = list of initial centroids
	return: list of final centroids
	'''
	comm = MPI.COMM_WORLD

	num_nodes = comm.Get_size()

	if (num_nodes - 1 == 0):
		print "ERROR: No slave nodes. Check number of processes requested"
		sys.exit(0)

	# Partition The points for the slaves
	partition = utils.partition_points(points, num_nodes - 1)

	# 3D list of dictionaries: for each node, for each cluster, for each dimension (n x k x dimension)
	dimension_stats_all_nodes =  [ [] for i in xrange(num_nodes - 1) ]
	iteration = 0
	dimension = len(points[0])
	
	# Send k and the Data Points to the Slaves
	for i in xrange (1, num_nodes):
		comm.send(k, dest = i)
		comm.send(partition[i-1], dest = i)

	while (True):
		# Send the Initial Centroids
		comm.bcast(centroids)

		# Receive statistics on each dimension of each dna for each centroid
		for i in xrange (1, num_nodes):
	   		dimension_stats_all_nodes[i-1] = comm.recv(source = i)

		# Recalculating the New Centroids Based on the Global FeedBack
		new_centroids = recalculate_centroids(dimension_stats_all_nodes, k, dimension)

		# If Centroids Haven't Changed then We are done and we send empty list to slaves to signal end
		if (set(centroids) == set(new_centroids)):
	   		comm.bcast([], root = 0)
	   		iteration += 1
	   		print "Took " + str(iteration) + " iteration(s)"
			return centroids
		# New Centroid Copy
		centroids = copy.deepcopy(new_centroids)
		new_centroids = []
		iteration += 1
	print "Took " + str(iteration) + " iteration(s)"
	return centroids
Exemple #2
0
def master_function(points, k, centroids):
	'''
	Master functionality K_means code
	points = list of initial data points
	k = number of clusters
	centroids = initial list of centroids
	return: list of final centroids
	'''
	comm = MPI.COMM_WORLD

	num_nodes = comm.Get_size()

	if (num_nodes - 1 == 0):
		print "ERROR: No slave nodes. Check number of processes requested"
		sys.exit(0)

	# Partition The points for the slaves
	partition = utils.partition_points(points, num_nodes - 1)

	centroid_slave =  [ [] for i in xrange(num_nodes - 1) ]
	population_slave =   [ [] for i in xrange(num_nodes - 1) ]
	iteration = 0
	
	# Send k and the Data Points to the Slaves
	for i in xrange (1, num_nodes):
		comm.send(k, dest = i)
		comm.send(partition[i-1], dest = i)

	while (True):
		# Send the Initial Centroids
		comm.bcast(centroids)

		# Receive centroids and population count for EACH centroid from slaves
		for i in xrange (1, num_nodes):
	   		(centroid_slave[i-1], population_slave[i-1]) = comm.recv(source = i)

		# Recalculating the New Centroids Based on the Global FeedBack
		new_centroids = recalculate_centroids(centroid_slave, population_slave, k)

		# If Centroids Haven't Changed then We are done and we send empty list to slaves to signal end
		if (set(centroids) == set(new_centroids)):
	   		comm.bcast([], root = 0)
	   		iteration += 1
	   		print "Took " + str(iteration) + " iteration(s)"
			return centroids
		# New Centroid Copy
		centroids = copy.deepcopy(new_centroids)
		new_centroids = []
		iteration += 1
	print "Took " + str(iteration) + " iteration(s)"
	return centroids