options["-s"]=int(options["-s"])-1 #Gaussians data generation case: if options["-t"] == "random": print("génération de donnée aléatoire...") gaussienne_sample=[] gaussienne_sample.append({"1":{"direction":[1,0.5],"centre":[5,3]},\ "2":{"direction":[0.1,1],"centre":[0,0]}}) gaussienne_sample.append({"1":{"direction":[1,0.5],"centre":[2,2]},\ "2":{"direction":[0.1,1],"centre":[0,0]}}) gaussienne_sample.append({"1":{"direction":[1,0.5],"centre":[-2,-2]},\ "2":{"direction":[0.1,1],"centre":[0,0]},"3":{"direction":[1,0.5],"centre":[2,2]}}) gen_random_data(gaussienne_sample[options["-s"]]) print("... gaussiennes "+str(options["-s"]+1)+" générées.") #Iris data generation case: elif options["-t"] == "iris": print("generation des données iris...") gen_iris_data() print("... données iris générées.") #picture data generation case: elif options["-t"] == "picture": print("génération des données d'image...") gen_picture_data(options["-n"]) print("... données d'image chargées.") if options["-d"] == "True": es.display(es.read_kmeans_input(),None,"Generated datas :",True)
def compute_kmeans(k,population,centroids = None,display=False,\ max_iteration=99999,title=""): """ Compute the k-means algorithm on the input file (*./input/input.csv*) :arg k: the k of k-means : number of centroids :type k: int :arg population: the population of Observations tocompute k-means on. :type population: Observation[] :param max_iteration: the number maximum of iteration we allow :type max_iteration: int :param centroids: the initial positions of centroids :type controids: Observation[] :param display: if True, the first and the second coordinate of the populations are displayed setep by step :type display: boolean :arg title: title to print on top of the figures :type title: String :return: a table of centroids and a table of affectations :rtype: Observation[][] """ dimension = len(population[0].values) #=============================================================================# # Phase 1 : Initialisation # #=============================================================================# if centroids == None: #centroids initialisation: centroids=[] isSelected=[] for i in range(len(population)): isSelected.append(0) for i in range(k): while True: #centroids are ranomly choose in the population index = int(floor(random.random()*len(population))) #We checked that we don't take the same centroid twice if isSelected[index]==0: centroids.append(population[index].copy()) isSelected[index]=1 break #affectation initialisation: affectation=[] for i in range(len(population)): affectation.append(0) #if display, display the population if display: es.display(population,None,title + "Population : ",False) #Loop stop condition initialisation: stop=False iteration = 0 while not stop and iteration < max_iteration: iteration+=1 #=============================================================================# # Phase 2: Affectation # #=============================================================================# #if display, we print the population and the centroids if display: es.display(population,centroids,title +\ "computing k-means : iteration "+str(iteration),False) #Compute the distance between each observation and each centroid distance=[[]] for i in range(len(population)): distance.append([]) for j in range(k): distance[i].append(population[i].dist(centroids[j])) #The loop stop condition is fixed to True stop = True #Affect the nearest centroid to each observation. for i in range(len(population)): index_du_minimum = distance[i].index(min(distance[i])) if not affectation[i]==index_du_minimum: affectation[i]=index_du_minimum #If there is any changement, the loop stop condition became false stop = False #=============================================================================# # Phase 3: Calculation # #=============================================================================# #Compute the new centroids for j in range(k): centroid = Observation(dimension) for i in range(len(population)): if affectation[i]==j: centroid.add(population[i]) centroids[j]=centroid #write the output files es.write_kmeans_output(population,centroids,affectation) #if display, we print the population and the centroids if display: es.display(population,centroids,title + "K-means computed",True) return [centroids,affectation]