def get_nearest_cluster (clusters, point): """ Retrieves the closest cluster of a point. """ # Initially the result is set to the first neuron. nearest_cluster = clusters[0] best_distance = distance.euclidian(clusters[0], point) # We look for a closer neuron in the neuron list. for i in xrange(1, len(clusters)): tmp_distance = distance.euclidian(clusters[i], point) if tmp_distance < best_distance: nearest_cluster = clusters[i] best_distance = tmp_distance return nearest_cluster
def get_nearest_cluster(self, point): """ Retrieves the closest cluster of a point. """ # Initially the result is set to the first neuron. nearest_cluster = self._clusters[0] best_distance = distance.euclidian(self._clusters[0], point) # We look for a closer neuron in the neuron list. for i in xrange(1, len(self._clusters)): tmp_distance = distance.euclidian(self._clusters[i], point) if tmp_distance < best_distance: nearest_cluster = self._clusters[i] best_distance = tmp_distance return nearest_cluster
def class_probability (self, point, c): distances = [] # Compute distances with all dataset for i in xrange (len (self.__data)): dist = distance.euclidian (self.__data[i], point) distances.append ((dist, self.__labels[i])) # Keep only K nearest points distances.sort (dist_cmp) distances = distances[:self.k] # Compute the most frequent label in neighbours labels_freq = {} freq_tot = 0.0 ref_freq = 0.0 for i in xrange (len (distances)): freq_tot += 1 if distances[i][1] == c: ref_freq += 1 if not labels_freq.has_key (distances[i][1]): labels_freq[distances[i][1]] = 0 else: labels_freq[distances[i][1]] += 1 i = 0 for label in labels_freq: if i == 0: label_max = label freq_max = labels_freq[label] i += 1 else: if labels_freq[label] > freq_max: label_max = label freq_max = labels_freq[label] return ref_freq / freq_tot
def class_probability(self, point, c): distances = [] # Compute distances with all dataset for i in xrange(len(self.__data)): dist = distance.euclidian(self.__data[i], point) distances.append((dist, self.__labels[i])) # Keep only K nearest points distances.sort(dist_cmp) distances = distances[: self.k] # Compute the most frequent label in neighbours labels_freq = {} freq_tot = 0.0 ref_freq = 0.0 for i in xrange(len(distances)): freq_tot += 1 if distances[i][1] == c: ref_freq += 1 if not labels_freq.has_key(distances[i][1]): labels_freq[distances[i][1]] = 0 else: labels_freq[distances[i][1]] += 1 i = 0 for label in labels_freq: if i == 0: label_max = label freq_max = labels_freq[label] i += 1 else: if labels_freq[label] > freq_max: label_max = label freq_max = labels_freq[label] return ref_freq / freq_tot
def clusterize(neurons, data_dimension, clusters, dataset, nb_clusters=0): # Each neuron is put in its own cluster. for neuron in neurons: c = cluster.Cluster(data_dimension) c.add_neuron(neuron) c.compute_average() clusters.append(c) # Every element from the dataset is classified and the corresponding # cluster stores its label. for z in dataset: nearest_cluster = clusters[0] best_distance = distance.euclidian(clusters[0], z.data()) for i in xrange(1, len(clusters)): tmp_distance = distance.euclidian(clusters[i], z.data()) if tmp_distance < best_distance: nearest_cluster = clusters[i] best_distance = tmp_distance nearest_cluster.add_label(z.labels()) nearest_cluster._neurons[0]._labels.append(z.labels()) # Here we reduce the number of cluster to the one specified. To do # that we look for the two closest clusters and we merge them until # we have the good number of clusters. if nb_clusters > 0: while len(clusters) > nb_clusters: best_distance = clusters[0].distance_with(clusters[1].get_center()) cluster1 = 0 cluster2 = 1 for i in xrange(len(clusters)): for j in xrange(len(clusters)): if i != j: tmp_distance = clusters[i].distance_with( clusters[j].get_center()) if tmp_distance < best_distance: cluster1 = i cluster2 = j best_distance = tmp_distance clusters[i].merge_with(clusters[j]) clusters.pop(j)
def clusterize (neurons, data_dimension, clusters, dataset, nb_clusters=0): # Each neuron is put in its own cluster. for neuron in neurons: c = cluster.Cluster(data_dimension) c.add_neuron(neuron) c.compute_average() clusters.append(c) # Every element from the dataset is classified and the corresponding # cluster stores its label. for z in dataset: nearest_cluster = clusters[0] best_distance = distance.euclidian(clusters[0], z.data()) for i in xrange(1, len(clusters)): tmp_distance = distance.euclidian(clusters[i], z.data()) if tmp_distance < best_distance: nearest_cluster = clusters[i] best_distance = tmp_distance nearest_cluster.add_label(z.labels()) nearest_cluster._neurons[0]._labels.append(z.labels()) # Here we reduce the number of cluster to the one specified. To do # that we look for the two closest clusters and we merge them until # we have the good number of clusters. if nb_clusters > 0: while len(clusters) > nb_clusters: best_distance = clusters[0].distance_with(clusters[1].get_center()) cluster1 = 0 cluster2 = 1 for i in xrange(len(clusters)): for j in xrange(len(clusters)): if i != j: tmp_distance = clusters[i].distance_with(clusters[j].get_center()) if tmp_distance < best_distance: cluster1 = i cluster2 = j best_distance = tmp_distance clusters[i].merge_with(clusters[j]) clusters.pop(j)
def learn (self): if self._verbose: self.draw_all() # Parameters initialization neurons = self._neurons sigma_init = distance.euclidian(neurons[len(neurons) / 2].get_position(), \ neurons[len(neurons) - 1].get_position(), \ dimension=len(neurons[0].get_position())) t1 = 1000 / math.log(sigma_init) nu0 = 0.1 t2 = 1000 nu = lambda n: nu0 * math.exp(-n / t2) iter = 1 data = self._dataset.data() while (iter <= ITERATION_MAX): z = data[int(random.random() * len(data))] sigma = sigma_init * math.exp(-iter / t1) # Get nearest neuron for observation z nearest_neuron = neurons[0] best_distance = distance.euclidian(neurons[0], z) for i in xrange(1, len(neurons)): tmp_distance = distance.euclidian(neurons[i], z) if tmp_distance < best_distance: nearest_neuron = neurons[i] best_distance = tmp_distance if (iter % len(neurons) == 0): changed_neurons = 0 else: changed_neurons = None for neuron in neurons: d = distance.euclidian(neuron.get_position(), \ nearest_neuron.get_position(), \ dimension=len(neuron.get_position())) h = gaussian_kernel(d, sigma) dmoved = 0 for i in xrange(len(neuron)): step = nu(iter) * h * (z[i] - neuron[i]) neuron[i] += step dmoved += abs(step) if not changed_neurons is None: if dmoved > 0: changed_neurons += 1 if not changed_neurons is None: perc = float(changed_neurons) / len(neurons) * 100 if perc <= PERC_NEURONS_CHANGED: break if iter % 1000 == 0 and self._verbose: print "%f%% of neurons moved at last check" % perc print "iteration %i" % iter self.draw_all() iter += 1 if self._verbose: print "%i%% of neurons moved at last check" % int(perc) self.draw_all() clusterize(self._neurons, self._data_dimension, self._clusters, self._dataset, self._nb_clusters) return iter - 1
def learn(self): if self._verbose: self.draw_all() # Parameters initialization neurons = self._neurons sigma_init = distance.euclidian(neurons[len(neurons) / 2].get_position(), \ neurons[len(neurons) - 1].get_position(), \ dimension=len(neurons[0].get_position())) t1 = 1000 / math.log(sigma_init) nu0 = 0.1 t2 = 1000 nu = lambda n: nu0 * math.exp(-n / t2) iter = 1 data = self._dataset.data() while (iter <= ITERATION_MAX): z = data[int(random.random() * len(data))] sigma = sigma_init * math.exp(-iter / t1) # Get nearest neuron for observation z nearest_neuron = neurons[0] best_distance = distance.euclidian(neurons[0], z) for i in xrange(1, len(neurons)): tmp_distance = distance.euclidian(neurons[i], z) if tmp_distance < best_distance: nearest_neuron = neurons[i] best_distance = tmp_distance if (iter % len(neurons) == 0): changed_neurons = 0 else: changed_neurons = None for neuron in neurons: d = distance.euclidian(neuron.get_position(), \ nearest_neuron.get_position(), \ dimension=len(neuron.get_position())) h = gaussian_kernel(d, sigma) dmoved = 0 for i in xrange(len(neuron)): step = nu(iter) * h * (z[i] - neuron[i]) neuron[i] += step dmoved += abs(step) if not changed_neurons is None: if dmoved > 0: changed_neurons += 1 if not changed_neurons is None: perc = float(changed_neurons) / len(neurons) * 100 if perc <= PERC_NEURONS_CHANGED: break if iter % 1000 == 0 and self._verbose: print "%f%% of neurons moved at last check" % perc print "iteration %i" % iter self.draw_all() iter += 1 if self._verbose: print "%i%% of neurons moved at last check" % int(perc) self.draw_all() clusterize(self._neurons, self._data_dimension, self._clusters, self._dataset, self._nb_clusters) return iter - 1