def _mmr(lambda_score, q, data, k): docs_unranked = data docs_selected = [] best = [0,0] for i in range (k): mmr = -100000000 for d in docs_unranked: sim = 0 for s in docs_selected: sim_current = 1/(1+euclidean_distance_square(d, s)) if sim_current > sim: sim = sim_current else: continue rel = 1/(1+euclidean_distance_square(q, d)) mmr_current = lambda_score * rel - (1 - lambda_score) * sim if mmr_current > mmr: mmr = mmr_current best = d else: continue docs_selected.append(best) docs_unranked.remove(best) return docs_selected
def GMM(X, K): global gmmtimeStep1 a = [] b = [] C = [] maxd = 0 start = timeit.default_timer() for i in X: for j in X: if (i[0] == j[0] and i[1] == j[1]) == False: dis = euclidean_distance_square(i, j) if maxd < dis: maxd = dis a = i b = j # print(a,b,max) C.append(a) C.append(b) X.remove(a) X.remove(b) stop = timeit.default_timer() gmmtimeStep1 = stop - start #print('GMM Time for step 1: ', stop - start) #print(a) #print(b) # print(X) # print(C) for k in range(K - 2): L = [] for i in X: min = 10000000 for j in C: dist = euclidean_distance_square(i, j) if min > dist: min = dist L.append(min) # print(maxOfmins) index_max = np.argmax(L) #print(L[index_max]) #print(X[index_max]) C.append(X[index_max]) X.remove(X[index_max]) # print("C:" , C) # print("X:", X) print("final C:", C) return C
def aug_mmr(cluster,indexMap,lambda_score, q, data, k, numberOfCluster,numberOfLevels): global getNextTime docs_unranked = data docs_selected = [] checkGetNext = True lastDoc = None for i in range (k): mmr = -100000000 R = data.tolist() if checkGetNext: start = timeit.default_timer() R = getNext(cluster,indexMap,q,lambda_score,lastDoc, numberOfCluster,numberOfLevels) end = timeit.default_timer() getNextTime = end - start + getNextTime if checkGetNext == True and (len(R)>= stopcondcoeff * len(data)): checkGetNext = False best1 = [0,0] for item in docs_selected: if item in R: R.remove(item) for d in R: sim = 0 for s in docs_selected: if euclidean_distance_square(d, s) == 0: continue sim_current = 1/(1+euclidean_distance_square(d, s)) if sim_current > sim: sim = sim_current else: continue rel = 1/(1+euclidean_distance_square(q, d)) mmr_current = lambda_score * rel - (1 - lambda_score) * sim if mmr_current > mmr: mmr = mmr_current best1 = d else: continue docs_selected.append(best1) lastDoc = best1 return docs_selected
def templateDistanceCalculation(self, cluster1, cluster2, type_measurement): entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1)); entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2)); # check that the same distance from 1 to 2 and from 2 to 1. distance12 = entry1.get_distance(entry2, type_measurement); distance21 = entry2.get_distance(entry1, type_measurement); assert distance12 == distance21; # check with utils calculation float_delta = 0.0000001; if (type_measurement == measurement_type.CENTROID_EUCLIDEAN_DISTANCE): assert distance12 == euclidean_distance_square(entry1.get_centroid(), entry2.get_centroid()); elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE): assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid()); elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True; elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True; elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE): assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
def __find_another_nearest_medoid(self, point_index, current_medoid_index): """! @brief Finds the another nearest medoid for the specified point that is different from the specified medoid. @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is performed. @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest. @return (uint) index of the another nearest medoid for the point. """ other_medoid_index = -1 other_distance_nearest = float("inf") for index_medoid in self.__current: if index_medoid != current_medoid_index: other_distance_candidate = euclidean_distance_square( self.__pointer_data[point_index], self.__pointer_data[current_medoid_index], ) if other_distance_candidate < other_distance_nearest: other_distance_nearest = other_distance_candidate other_medoid_index = index_medoid return other_medoid_index
def __merge_by_average_link(self): """! @brief Merges the most similar clusters in line with average link type. """ minimum_average_distance = float('Inf'); for index_cluster1 in range(0, len(self.__clusters)): for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)): # Find farthest objects candidate_average_distance = 0.0; for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: candidate_average_distance += euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); candidate_average_distance /= (len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2])); if (candidate_average_distance < minimum_average_distance): minimum_average_distance = candidate_average_distance; indexes = [index_cluster1, index_cluster2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__clusters.pop(indexes[1]); # remove merged cluster.
def __update_clusters(self, medoids): """! @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. """ self.__belong = [0] * len(self.__pointer_data) self.__clusters = [[] for _ in range(len(medoids))] for index_point in range(len(self.__pointer_data)): index_optim = -1 dist_optim = 0.0 for index in range(len(medoids)): dist = euclidean_distance_square( self.__pointer_data[index_point], self.__pointer_data[medoids[index]], ) if (dist < dist_optim) or (index == 0): index_optim = index dist_optim = dist self.__clusters[index_optim].append(index_point) self.__belong[index_point] = index_optim # If cluster is not able to capture object it should be removed self.__clusters = [ cluster for cluster in self.__clusters if len(cluster) > 0 ]
def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes): """! @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance. @param[in] point (list): Coordinates that is considered as centroind for searching @param[in] distance (double): Distance from the center where seaching is performed. @param[in] sqrt_distance (double): Square distance from the center where searching is performed. @param[in] node_head (node): Node from that searching is performed. @param[in|out] best_nodes (list): List of founded nodes. """ if node_head.right is not None: minimum = node_head.data[node_head.disc] - distance if point[node_head.disc] >= minimum: self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.right, best_nodes) if node_head.left is not None: maximum = node_head.data[node_head.disc] + distance if point[node_head.disc] < maximum: self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.left, best_nodes) candidate_distance = euclidean_distance_square(point, node_head.data) if candidate_distance <= sqrt_distance: best_nodes.append( (candidate_distance, node_head) )
def GMM(X, K): global gmm_step1_time, firstTwoItem C = [] # a = [] # b = [] # C = [] # maxd = 0 # start = timeit.default_timer() # # for i in X: # for j in X: # if (i[0] == j[0] and i[1] == j[1]) == False: # dis = euclidean_distance_square(i, j) # if maxd < dis: # maxd = dis # a = i # b = j # print(a,b,max) a = firstTwoItem[0] b = firstTwoItem[1] C.append(a) C.append(b) X.remove(a) X.remove(b) stop = timeit.default_timer() gmm_step1_time = 0 print('Time for gmm step 1: ', gmm_step1_time) # print(a) # print(b) # print(X) # print(C) for k in range(K - 2): L = [] for i in X: min = 10000000 for j in C: dist = euclidean_distance_square(i, j) if min > dist: min = dist L.append(min) # print(maxOfmins) index_max = np.argmax(L) # print(L[index_max]) # print(X[index_max]) C.append(X[index_max]) X.remove(X[index_max]) # print("C:" , C) # print("X:", X) print("final C:", C) return C
def process(self): """! @brief Performs cluster analysis in line with rules of K-Medians algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_medians() """ if (self.__ccore is True): self.__clusters = wrapper.kmedians(self.__pointer_data, self.__medians, self.__tolerance); self.__medians = self.__update_medians(); else: changes = float('inf'); stop_condition = self.__tolerance * self.__tolerance; # Fast solution #stop_condition = self.__tolerance; # Slow solution # Check for dimension if (len(self.__pointer_data[0]) != len(self.__medians[0])): raise NameError('Dimension of the input data and dimension of the initial cluster medians must be equal.'); while (changes > stop_condition): self.__clusters = self.__update_clusters(); updated_centers = self.__update_medians(); # changes should be calculated before asignment changes = max([euclidean_distance_square(self.__medians[index], updated_centers[index]) for index in range(len(updated_centers))]); # Fast solution self.__medians = updated_centers;
def __update_clusters(self): """! @brief Calculate Manhattan distance to each point from the each cluster. @details Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. """ clusters = [[] for i in range(len(self.__medians))] for index_point in range(len(self.__pointer_data)): index_optim = -1 dist_optim = 0.0 for index in range(len(self.__medians)): dist = euclidean_distance_square( self.__pointer_data[index_point], self.__medians[index]) if (dist < dist_optim) or (index is 0): index_optim = index dist_optim = dist clusters[index_optim].append(index_point) # If cluster is not able to capture object it should be removed clusters = [cluster for cluster in clusters if len(cluster) > 0] return clusters
def __merge_by_average_link(self): """! @brief Merges the most similar clusters in line with average link type. """ minimum_average_distance = float('Inf') for index_cluster1 in range(0, len(self.__clusters)): for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)): # Find farthest objects candidate_average_distance = 0.0 for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: candidate_average_distance += euclidean_distance_square( self.__pointer_data[index_object1], self.__pointer_data[index_object2]) candidate_average_distance /= ( len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2])) if candidate_average_distance < minimum_average_distance: minimum_average_distance = candidate_average_distance indexes = [index_cluster1, index_cluster2] self.__clusters[indexes[0]] += self.__clusters[indexes[1]] self.__clusters.pop(indexes[1]) # remove merged cluster.
def get_distance_matrix(self): """! @brief Calculates distance matrix (U-matrix). @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map. @return (list) Distance matrix (U-matrix). @see show_distance_matrix() @see get_density_matrix() """ if self.__ccore_som_pointer is not None: self._weights = wrapper.som_get_weights(self.__ccore_som_pointer) if self._conn_type != type_conn.func_neighbor: self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer) distance_matrix = [[0.0] * self._cols for i in range(self._rows)] for i in range(self._rows): for j in range(self._cols): neuron_index = i * self._cols + j if self._conn_type == type_conn.func_neighbor: self._create_connections(type_conn.grid_eight) for neighbor_index in self._neighbors[neuron_index]: distance_matrix[i][j] += euclidean_distance_square(self._weights[neuron_index], self._weights[neighbor_index]) distance_matrix[i][j] /= len(self._neighbors[neuron_index]) return distance_matrix
def get_distance(self, entry, type_measurement): """! @brief Calculates distance between two clusters in line with measurement type. @details In case of usage CENTROID_EUCLIDIAN_DISTANCE square euclidian distance will be returned. Square root should be taken from the result for obtaining real euclidian distance between entries. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters. @return (double) Distance between two clusters. """ if type_measurement is measurement_type.CENTROID_EUCLIDEAN_DISTANCE: return euclidean_distance_square(entry.get_centroid(), self.get_centroid()) elif type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE: return manhattan_distance(entry.get_centroid(), self.get_centroid()) elif type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE: return self.__get_average_inter_cluster_distance(entry) elif type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE: return self.__get_average_intra_cluster_distance(entry) elif type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE: return self.__get_variance_increase_distance(entry) else: raise ValueError("Unsupported type of measurement '%s' is specified." % type_measurement)
def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes): """! @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance. @param[in] point (list): Coordinates that is considered as centroind for searching @param[in] distance (double): Distance from the center where seaching is performed. @param[in] sqrt_distance (double): Square distance from the center where searching is performed. @param[in] node_head (node): Node from that searching is performed. @param[in|out] best_nodes (list): List of founded nodes. """ if node_head.right is not None: minimum = node_head.data[node_head.disc] - distance if point[node_head.disc] >= minimum: self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.right, best_nodes) if node_head.left is not None: maximum = node_head.data[node_head.disc] + distance if point[node_head.disc] < maximum: self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.left, best_nodes) candidate_distance = euclidean_distance_square(point, node_head.data) if candidate_distance <= sqrt_distance: best_nodes.append((candidate_distance, node_head))
def __update_clusters(self): """! @brief Calculate distance to each point from the each cluster. @details Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. """ clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoids))]; for index_point in range(len(self.__pointer_data)): if (index_point in self.__medoid_indexes): continue; index_optim = -1; dist_optim = float('Inf'); for index in range(len(self.__medoids)): dist = euclidean_distance_square(self.__pointer_data[index_point], self.__medoids[index]); if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); return clusters;
def process(self): """! @brief Performs cluster analysis in line with rules of K-Medoids algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_medoids() """ if (self.__ccore is True): self.__clusters = wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance); self.__medoids, self.__medoid_indexes = self.__update_medoids(); else: changes = float('inf'); stop_condition = self.__tolerance * self.__tolerance; # Fast solution #stop_condition = self.__tolerance; # Slow solution while (changes > stop_condition): self.__clusters = self.__update_clusters(); updated_medoids, update_medoid_indexes = self.__update_medoids(); # changes should be calculated before asignment changes = max([euclidean_distance_square(self.__medoids[index], updated_medoids[index]) for index in range(len(updated_medoids))]); # Fast solution self.__medoids = updated_medoids; self.__medoid_indexes = update_medoid_indexes;
def templateDistanceCalculation(self, cluster1, cluster2, type_measurement): entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1)) entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2)) # check that the same distance from 1 to 2 and from 2 to 1. distance12 = entry1.get_distance(entry2, type_measurement) distance21 = entry2.get_distance(entry1, type_measurement) assert distance12 == distance21; # check with utils calculation float_delta = 0.0000001 if (type_measurement == measurement_type.CENTROID_EUCLIDEAN_DISTANCE): assert distance12 == euclidean_distance_square(entry1.get_centroid(), entry2.get_centroid()); elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE): assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid()); elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True; elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True; elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE): assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
def createSimMatrix(q, X): r = {} i = 0 for p in X: d = euclidean_distance_square(q, p) r[i] = 1 / (1 + d) i = i + 1 return r
def aug_mmr(cluster, lambda_score, q, data, k): docs_unranked = data docs_selected = [] for i in range(k): mmr = -100000000 #start = timeit.default_timer() # Your statements here R = getNext(cluster, docs_unranked, docs_selected, q, lambda_score) print(len(R)) #stop = timeit.default_timer() #print('Time for getnext: ', stop - start) best1 = [0, 0] item = [0, 0] for item in docs_selected: if item in R: R.remove(item) for d in R: sim = 0 for s in docs_selected: if euclidean_distance_square(d, s) == 0: continue sim_current = 1 / euclidean_distance_square(d, s) if sim_current > sim: sim = sim_current else: continue rel = 1 / euclidean_distance_square(q, d) mmr_current = lambda_score * rel - (1 - lambda_score) * sim if mmr_current > mmr: mmr = mmr_current best1 = d else: continue docs_selected.append(best1) #docs_unranked.remove(best) return docs_selected
def AugGMM(cluster, X1, indexMap, K, C1): l = 1 for k in range(K - 2): LLmin = [] LLmax = [] for node1 in cluster.root.children: # print("children ", node1.elements) minmax = 10000000 minmin = 10000000 for e in C1: id = cluster.documentMap[tuple(e)][l] distmax, distmin = cluster.dismatrix[l][id][node1.id] #cluster.dismatrixitem[l][indexMap[tuple(e)]][node1.id] if minmax > distmax[0]: minmax = distmax[0] if minmin > distmin[0]: minmin = distmin[0] LLmin.append(minmin) LLmax.append(minmax) maxofMin = max(LLmin) selecteditem = [] i = 0 for it in LLmax: if it > maxofMin: selecteditem = selecteditem + cluster.root.children[i].elements i = i + 1 L = [] for i in selecteditem: min = 10000000 for j in C1: dist = euclidean_distance_square(i, j) if min > dist: min = dist L.append(min) # print(maxOfmins) index_max = np.argmax(L) # print(selecteditem[index_max]) C1.append(selecteditem[index_max]) X1.remove(selecteditem[index_max]) id = cluster.documentMap[tuple(selecteditem[index_max])][1] node = cluster.root.children[id - 1] node.elements.remove(selecteditem[index_max]) print("Aug-GMM result:", C1) return C1
def _competition(self, x): """! @brief Calculates neuron winner (distance, neuron index). @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point. @return (uint) Returns index of neuron that is winner. """ index = 0 minimum = euclidean_distance_square(self._weights[0], x) for i in range(1, self._size, 1): candidate = euclidean_distance_square(self._weights[i], x) if candidate < minimum: index = i minimum = candidate return index
def __calculate_weight(self, stimulus1, stimulus2): """! @brief Calculate weight between neurons that have external stimulus1 and stimulus2. @param[in] stimulus1 (list): External stimulus of the first neuron. @param[in] stimulus2 (list): External stimulus of the second neuron. @return (double) Weight between neurons that are under specified stimulus. """ distance = euclidean_distance_square(stimulus1, stimulus2) return math.exp(-distance / (2.0 * self.__average_distance))
def __bayesian_information_criterion(self, clusters, centers): """! @brief Calculates splitting criterion for input clusters using bayesian information criterion. @param[in] clusters (list): Clusters for which splitting criterion should be calculated. @param[in] centers (list): Centers of the clusters. @return (double) Splitting criterion in line with bayesian information criterion. High value of splitting criterion means that current structure is much better. @see __minimum_noiseless_description_length(clusters, centers) """ scores = [float('inf')] * len(clusters) # splitting criterion dimension = len(self.__pointer_data[0]) # estimation of the noise variance in the data set sigma_sqrt = 0.0 K = len(clusters) N = 0.0 for index_cluster in range(0, len(clusters), 1): for index_object in clusters[index_cluster]: sigma_sqrt += euclidean_distance_square( self.__pointer_data[index_object], centers[index_cluster]) N += len(clusters[index_cluster]) if N - K > 0: sigma_sqrt /= (N - K) p = (K - 1) + dimension * K + 1 # in case of the same points, sigma_sqrt can be zero (issue: #407) sigma_multiplier = 0.0 if sigma_sqrt <= 0.0: sigma_multiplier = float('-inf') else: sigma_multiplier = dimension * 0.5 * log(sigma_sqrt) # splitting criterion for index_cluster in range(0, len(clusters), 1): n = len(clusters[index_cluster]) L = n * log(n) - n * log(N) - n * 0.5 * log( 2.0 * numpy.pi) - n * sigma_multiplier - (n - K) * 0.5 # BIC calculation scores[index_cluster] = L - p * 0.5 * log(N) return sum(scores)
def __calculate_estimation(self): """! @brief Calculates estimation (cost) of the current clusters. The lower the estimation, the more optimally configuration of clusters. @return (double) estimation of current clusters. """ estimation = 0.0 for index_cluster in range(0, len(self.__clusters)): cluster = self.__clusters[index_cluster] index_medoid = self.__current[index_cluster] for index_point in cluster: estimation += euclidean_distance_square(self.__pointer_data[index_point], self.__pointer_data[index_medoid]) return estimation
def __initialize_distances(self, size, location): """! @brief Initialize distance matrix in SOM grid. @param[in] size (uint): Amount of neurons in the network. @param[in] location (list): List of coordinates of each neuron in the network. @return (list) Distance matrix between neurons in the network. """ sqrt_distances = [ [ [] for i in range(size) ] for j in range(size) ] for i in range(size): for j in range(i, size, 1): dist = euclidean_distance_square(location[i], location[j]) sqrt_distances[i][j] = dist sqrt_distances[j][i] = dist return sqrt_distances
def __initialize_distances(self, size, location): """! @brief Initialize distance matrix in SOM grid. @param[in] size (uint): Amount of neurons in the network. @param[in] location (list): List of coordinates of each neuron in the network. @return (list) Distance matrix between neurons in the network. """ sqrt_distances = [[[] for i in range(size)] for j in range(size)] for i in range(size): for j in range(i, size, 1): dist = euclidean_distance_square(location[i], location[j]) sqrt_distances[i][j] = dist sqrt_distances[j][i] = dist return sqrt_distances
def __calculate_nearest_distance(self, index_cluster1, index_cluster2): """! @brief Finds two nearest objects in two specified clusters and returns distance between them. @param[in] (uint) Index of the first cluster. @param[in] (uint) Index of the second cluster. @return The nearest euclidean distance between two clusters. """ candidate_minimum_distance = float('Inf') for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: distance = euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2]) if distance < candidate_minimum_distance: candidate_minimum_distance = distance return candidate_minimum_distance
def __calculate_farthest_distance(self, index_cluster1, index_cluster2): """! @brief Finds two farthest objects in two specified clusters in terms and returns distance between them. @param[in] (uint) Index of the first cluster. @param[in] (uint) Index of the second cluster. @return The farthest euclidean distance between two clusters. """ candidate_maximum_distance = 0.0; for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: distance = euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); if (distance > candidate_maximum_distance): candidate_maximum_distance = distance; return candidate_maximum_distance;
def __calculate_nearest_distance(self, index_cluster1, index_cluster2): """! @brief Finds two nearest objects in two specified clusters and returns distance between them. @param[in] (uint) Index of the first cluster. @param[in] (uint) Index of the second cluster. @return The nearest euclidean distance between two clusters. """ candidate_minimum_distance = float('Inf'); for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: distance = euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); if (distance < candidate_minimum_distance): candidate_minimum_distance = distance; return candidate_minimum_distance;
def __has_object_connection(self, oscillator_index1, oscillator_index2): """! @brief Searches for pair of objects that are encoded by specified neurons and that are connected in line with connectivity radius. @param[in] oscillator_index1 (uint): Index of the first oscillator in the second layer. @param[in] oscillator_index2 (uint): Index of the second oscillator in the second layer. @return (bool) True - if there is pair of connected objects encoded by specified oscillators. """ som_neuron_index1 = self._som_osc_table[oscillator_index1]; som_neuron_index2 = self._som_osc_table[oscillator_index2]; for index_object1 in self._som.capture_objects[som_neuron_index1]: for index_object2 in self._som.capture_objects[som_neuron_index2]: distance = euclidean_distance_square(self._data[index_object1], self._data[index_object2]); if (distance <= self._radius): return True; return False;
def __has_object_connection(self, oscillator_index1, oscillator_index2): """! @brief Searches for pair of objects that are encoded by specified neurons and that are connected in line with connectivity radius. @param[in] oscillator_index1 (uint): Index of the first oscillator in the second layer. @param[in] oscillator_index2 (uint): Index of the second oscillator in the second layer. @return (bool) True - if there is pair of connected objects encoded by specified oscillators. """ som_neuron_index1 = self._som_osc_table[oscillator_index1] som_neuron_index2 = self._som_osc_table[oscillator_index2] for index_object1 in self._som.capture_objects[som_neuron_index1]: for index_object2 in self._som.capture_objects[som_neuron_index2]: distance = euclidean_distance_square(self._data[index_object1], self._data[index_object2]) if distance <= self._radius: return True return False
def __cluster_distance(self, cluster1, cluster2): """! @brief Calculate minimal distance between clusters using representative points. @param[in] cluster1 (cure_cluster): The first cluster. @param[in] cluster2 (cure_cluster): The second cluster. @return (double) Euclidean distance between two clusters that is defined by minimum distance between representation points of two clusters. """ distance = float('inf') for i in range(0, len(cluster1.rep)): for k in range(0, len(cluster2.rep)): dist = euclidean_distance_square(cluster1.rep[i], cluster2.rep[k]) # Fast mode if dist < distance: distance = dist return distance
def __merge_by_centroid_link(self): """! @brief Merges the most similar clusters in line with centroid link type. """ minimum_centroid_distance = float('Inf') indexes = None for index1 in range(0, len(self.__centers)): for index2 in range(index1 + 1, len(self.__centers)): distance = euclidean_distance_square(self.__centers[index1], self.__centers[index2]) if distance < minimum_centroid_distance: minimum_centroid_distance = distance indexes = [index1, index2] self.__clusters[indexes[0]] += self.__clusters[indexes[1]] self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]) self.__clusters.pop(indexes[1]) # remove merged cluster. self.__centers.pop(indexes[1]) # remove merged center.
def __find_another_nearest_medoid(self, point_index, current_medoid_index): """! @brief Finds the another nearest medoid for the specified point that is differ from the specified medoid. @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is perfomed. @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest. @return (uint) index of the another nearest medoid for the point. """ other_medoid_index = -1 other_distance_nearest = float('inf') for index_medoid in self.__current: if (index_medoid != current_medoid_index): other_distance_candidate = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]) if other_distance_candidate < other_distance_nearest: other_distance_nearest = other_distance_candidate other_medoid_index = index_medoid return other_medoid_index
def __merge_by_centroid_link(self): """! @brief Merges the most similar clusters in line with centroid link type. """ minimum_centroid_distance = float('Inf'); indexes = None; for index1 in range(0, len(self.__centers)): for index2 in range(index1 + 1, len(self.__centers)): distance = euclidean_distance_square(self.__centers[index1], self.__centers[index2]); if (distance < minimum_centroid_distance): minimum_centroid_distance = distance; indexes = [index1, index2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]); self.__clusters.pop(indexes[1]); # remove merged cluster. self.__centers.pop(indexes[1]); # remove merged center.
def __calculate_initial_clusters(self, centers): """! @brief Calculate Euclidean distance to each point from the each cluster. @brief Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data. """ clusters = [[] for _ in range(len(centers))] for index_point in range(len(self.__sample)): index_optim, dist_optim = -1, 0.0 for index in range(len(centers)): dist = euclidean_distance_square(self.__sample[index_point], centers[index]) if (dist < dist_optim) or (index is 0): index_optim, dist_optim = index, dist clusters[index_optim].append(index_point) return clusters
def GMM(X, K, C): for k in range(K - 2): L = [] for i in X: min = 10000000 for j in C: dist = euclidean_distance_square(i, j) if min > dist: min = dist L.append(min) # print(maxOfmins) index_max = np.argmax(L) # print(L[index_max]) # print(X[index_max]) C.append(X[index_max]) X.remove(X[index_max]) # print("C:" , C) # print("X:", X) print("final C:", C) return C
def __update_clusters(self, medoids): """! @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. """ self.__belong = [0] * len(self.__pointer_data) self.__clusters = [[] for i in range(len(medoids))] for index_point in range(len(self.__pointer_data)): index_optim = -1 dist_optim = 0.0 for index in range(len(medoids)): dist = euclidean_distance_square(self.__pointer_data[index_point], self.__pointer_data[medoids[index]]) if (dist < dist_optim) or (index is 0): index_optim = index dist_optim = dist self.__clusters[index_optim].append(index_point) self.__belong[index_point] = index_optim # If cluster is not able to capture object it should be removed self.__clusters = [cluster for cluster in self.__clusters if len(cluster) > 0]
def __optimize_configuration(self): """! @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules. """ index_neighbor = 0 while (index_neighbor < self.__maxneighbor): # get random current medoid that is to be replaced current_medoid_index = self.__current[random.randint(0, self.__number_clusters - 1)] current_medoid_cluster_index = self.__belong[current_medoid_index] # get new candidate to be medoid candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1) while candidate_medoid_index in self.__current: candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1) candidate_cost = 0.0 for point_index in range(0, len(self.__pointer_data)): if point_index not in self.__current: # get non-medoid point and its medoid point_cluster_index = self.__belong[point_index] point_medoid_index = self.__current[point_cluster_index] # get other medoid that is nearest to the point (except current and candidate) other_medoid_index = self.__find_another_nearest_medoid(point_index, current_medoid_index) other_medoid_cluster_index = self.__belong[other_medoid_index] # for optimization calculate all required distances # from the point to current medoid distance_current = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]) # from the point to candidate median distance_candidate = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[candidate_medoid_index]) # from the point to nearest (own) medoid distance_nearest = float('inf') if ( (point_medoid_index != candidate_medoid_index) and (point_medoid_index != current_medoid_cluster_index) ): distance_nearest = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[point_medoid_index]) # apply rules for cost calculation if (point_cluster_index == current_medoid_cluster_index): # case 1: if (distance_candidate >= distance_nearest): candidate_cost += distance_nearest - distance_current # case 2: else: candidate_cost += distance_candidate - distance_current elif (point_cluster_index == other_medoid_cluster_index): # case 3 ('nearest medoid' is the representative object of that cluster and object is more similar to 'nearest' than to 'candidate'): if (distance_candidate > distance_nearest): pass; # case 4: else: candidate_cost += distance_candidate - distance_nearest if (candidate_cost < 0): # set candidate that has won self.__current[current_medoid_cluster_index] = candidate_medoid_index # recalculate clusters self.__update_clusters(self.__current) # reset iterations and starts investigation from the begining index_neighbor = 0 else: index_neighbor += 1