Ejemplo n.º 1
0
	options["-s"]=int(options["-s"])-1


	#Gaussians data generation case:
	if options["-t"] == "random":
		print("génération de donnée aléatoire...")
		gaussienne_sample=[]
		gaussienne_sample.append({"1":{"direction":[1,0.5],"centre":[5,3]},\
			"2":{"direction":[0.1,1],"centre":[0,0]}})
		gaussienne_sample.append({"1":{"direction":[1,0.5],"centre":[2,2]},\
			"2":{"direction":[0.1,1],"centre":[0,0]}})
		gaussienne_sample.append({"1":{"direction":[1,0.5],"centre":[-2,-2]},\
			"2":{"direction":[0.1,1],"centre":[0,0]},"3":{"direction":[1,0.5],"centre":[2,2]}})
		gen_random_data(gaussienne_sample[options["-s"]])
		print("... gaussiennes "+str(options["-s"]+1)+" générées.")

	#Iris data generation case:
	elif options["-t"] == "iris":
		print("generation des données iris...")
		gen_iris_data()
		print("... données iris générées.")

	#picture data generation case:
	elif options["-t"] == "picture":
		print("génération des données d'image...")
		gen_picture_data(options["-n"])
		print("... données d'image chargées.")

	if options["-d"] == "True":
		es.display(es.read_kmeans_input(),None,"Generated datas :",True)
Ejemplo n.º 2
0
def compute_kmeans(k,population,centroids = None,display=False,\
	max_iteration=99999,title=""):
	"""
	Compute the k-means algorithm on the input file 
		(*./input/input.csv*)

	:arg k: the k of k-means : number of centroids
	:type k: int
	:arg population: the population of Observations tocompute k-means on.
	:type population: Observation[]
	:param max_iteration: the number maximum of iteration we allow
	:type max_iteration: int
	:param centroids: the initial positions of centroids
	:type controids: Observation[]
	:param display: if True, the first and the second coordinate of the 
		populations are displayed setep by step
	:type display: boolean
	:arg title: title to print on top of the figures
	:type title: String
	:return: a table of centroids and a table of affectations
	:rtype: Observation[][]
	"""

	
	dimension = len(population[0].values)
	


#=============================================================================#
#						Phase 1 : Initialisation 							  #
#=============================================================================#

	if centroids == None:
		
		#centroids initialisation:
		centroids=[]
		isSelected=[]
		for i in range(len(population)):
			isSelected.append(0)
		for i in range(k):
			while True:

				#centroids are ranomly choose in the population
				index = int(floor(random.random()*len(population)))

				#We checked that we don't take the same centroid twice
				if isSelected[index]==0:
					centroids.append(population[index].copy())
					isSelected[index]=1
					break

	#affectation initialisation:
	affectation=[]
	for i in range(len(population)):
		affectation.append(0)
	
	#if display, display the population
	if display:
		es.display(population,None,title + "Population : ",False)
	
	#Loop stop condition initialisation:
	stop=False
	

	iteration = 0
	while not stop and iteration < max_iteration:
		iteration+=1
#=============================================================================#
#							Phase 2: Affectation 						      #
#=============================================================================#

		#if display, we print the population and the centroids
		if display:
			es.display(population,centroids,title +\
				"computing k-means : iteration "+str(iteration),False)

		#Compute the distance between each observation and each centroid
		distance=[[]]
		for i in range(len(population)):
			distance.append([])
			for j in range(k):
				distance[i].append(population[i].dist(centroids[j]))

		#The loop stop condition is fixed to True
		stop = True

		#Affect the nearest centroid to each observation.
		for i in range(len(population)):
			index_du_minimum = distance[i].index(min(distance[i]))
			if not affectation[i]==index_du_minimum:
				affectation[i]=index_du_minimum

		#If there is any changement, the loop stop condition became false
				stop = False


#=============================================================================#
#							Phase 3: Calculation 							  #
#=============================================================================#
		
		#Compute the new centroids
		for j in range(k):
			centroid = Observation(dimension)
			for i in range(len(population)):
				if affectation[i]==j:
					centroid.add(population[i])
			centroids[j]=centroid
	
	#write the output files
	es.write_kmeans_output(population,centroids,affectation)

	#if display, we print the population and the centroids
	if display:
		es.display(population,centroids,title + "K-means computed",True)

	return [centroids,affectation]