def _mmr(lambda_score, q, data, k):

    docs_unranked = data

    docs_selected = []

    best = [0,0]
    for i in range (k):
        mmr = -100000000
        for d in docs_unranked:
            sim = 0
            for s in docs_selected:
                sim_current = 1/(1+euclidean_distance_square(d, s))
                if sim_current > sim:
                    sim = sim_current
                else:
                    continue

            rel = 1/(1+euclidean_distance_square(q, d))
            mmr_current = lambda_score * rel - (1 - lambda_score) * sim

            if mmr_current > mmr:
                mmr = mmr_current
                best = d
            else:
                continue


        docs_selected.append(best)
        docs_unranked.remove(best)

    return docs_selected
Example #2
0
def GMM(X, K):
    global gmmtimeStep1
    a = []
    b = []
    C = []
    maxd = 0

    start = timeit.default_timer()

    for i in X:
        for j in X:
            if (i[0] == j[0] and i[1] == j[1]) == False:
                dis = euclidean_distance_square(i, j)
                if maxd < dis:
                    maxd = dis
                    a = i
                    b = j
    # print(a,b,max)

    C.append(a)
    C.append(b)

    X.remove(a)
    X.remove(b)

    stop = timeit.default_timer()
    gmmtimeStep1 = stop - start
    #print('GMM Time for step 1: ', stop - start)

    #print(a)
    #print(b)
    # print(X)
    # print(C)

    for k in range(K - 2):
        L = []
        for i in X:
            min = 10000000
            for j in C:
                dist = euclidean_distance_square(i, j)
                if min > dist:
                    min = dist
            L.append(min)

        # print(maxOfmins)
        index_max = np.argmax(L)
        #print(L[index_max])
        #print(X[index_max])

        C.append(X[index_max])

        X.remove(X[index_max])
        # print("C:" , C)
        # print("X:", X)

    print("final C:", C)
    return C
def aug_mmr(cluster,indexMap,lambda_score, q, data, k, numberOfCluster,numberOfLevels):
    global getNextTime
    docs_unranked = data

    docs_selected = []

    checkGetNext = True

    lastDoc = None

    for i in range (k):
        mmr = -100000000
        R = data.tolist()

        if checkGetNext:
            start = timeit.default_timer()
            R = getNext(cluster,indexMap,q,lambda_score,lastDoc, numberOfCluster,numberOfLevels)
            end = timeit.default_timer()
            getNextTime = end - start + getNextTime
            if checkGetNext == True and (len(R)>= stopcondcoeff *  len(data)):
                checkGetNext = False
            best1 = [0,0]

        for item in docs_selected:
            if item in R:
                R.remove(item)


        for d in R:
            sim = 0
            for s in docs_selected:
                if euclidean_distance_square(d, s) == 0:
                    continue
                sim_current = 1/(1+euclidean_distance_square(d, s))
                if sim_current > sim:
                    sim = sim_current
                else:
                    continue

            rel = 1/(1+euclidean_distance_square(q, d))
            mmr_current = lambda_score * rel - (1 - lambda_score) * sim

            if mmr_current > mmr:
                mmr = mmr_current
                best1 = d
            else:
                continue

        docs_selected.append(best1)
        lastDoc = best1

    return docs_selected
Example #4
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement);
     distance21 = entry2.get_distance(entry1, type_measurement);
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001;
     if (type_measurement == measurement_type.CENTROID_EUCLIDEAN_DISTANCE):
         assert distance12 == euclidean_distance_square(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
Example #5
0
    def __find_another_nearest_medoid(self, point_index, current_medoid_index):
        """!
        @brief Finds the another nearest medoid for the specified point that is different from the specified medoid.

        @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids
        is performed.
        @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest.

        @return (uint) index of the another nearest medoid for the point.

        """
        other_medoid_index = -1
        other_distance_nearest = float("inf")
        for index_medoid in self.__current:
            if index_medoid != current_medoid_index:
                other_distance_candidate = euclidean_distance_square(
                    self.__pointer_data[point_index],
                    self.__pointer_data[current_medoid_index],
                )

                if other_distance_candidate < other_distance_nearest:
                    other_distance_nearest = other_distance_candidate
                    other_medoid_index = index_medoid

        return other_medoid_index
Example #6
0
 def __merge_by_average_link(self):
     """!
     @brief Merges the most similar clusters in line with average link type.
     
     """
     
     minimum_average_distance = float('Inf');
     
     for index_cluster1 in range(0, len(self.__clusters)):
         for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
             
             # Find farthest objects
             candidate_average_distance = 0.0;
             for index_object1 in self.__clusters[index_cluster1]:
                 for index_object2 in self.__clusters[index_cluster2]:
                     candidate_average_distance += euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
             
             candidate_average_distance /= (len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2]));
             
             if (candidate_average_distance < minimum_average_distance):
                 minimum_average_distance = candidate_average_distance;
                 indexes = [index_cluster1, index_cluster2];
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
     self.__clusters.pop(indexes[1]);   # remove merged cluster.  
Example #7
0
    def __update_clusters(self, medoids):
        """!
        @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids.

        """

        self.__belong = [0] * len(self.__pointer_data)
        self.__clusters = [[] for _ in range(len(medoids))]
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1
            dist_optim = 0.0

            for index in range(len(medoids)):
                dist = euclidean_distance_square(
                    self.__pointer_data[index_point],
                    self.__pointer_data[medoids[index]],
                )

                if (dist < dist_optim) or (index == 0):
                    index_optim = index
                    dist_optim = dist

            self.__clusters[index_optim].append(index_point)
            self.__belong[index_point] = index_optim

        # If cluster is not able to capture object it should be removed
        self.__clusters = [
            cluster for cluster in self.__clusters if len(cluster) > 0
        ]
Example #8
0
    def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes):
        """!
        @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance.
        
        @param[in] point (list): Coordinates that is considered as centroind for searching
        @param[in] distance (double): Distance from the center where seaching is performed.
        @param[in] sqrt_distance (double): Square distance from the center where searching is performed.
        @param[in] node_head (node): Node from that searching is performed.
        @param[in|out] best_nodes (list): List of founded nodes.
        
        """

        if node_head.right is not None:
            minimum = node_head.data[node_head.disc] - distance
            if point[node_head.disc] >= minimum:
                self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.right, best_nodes)
        
        if node_head.left is not None:
            maximum = node_head.data[node_head.disc] + distance
            if point[node_head.disc] < maximum:
                self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.left, best_nodes)
        
        candidate_distance = euclidean_distance_square(point, node_head.data)
        if candidate_distance <= sqrt_distance:
            best_nodes.append( (candidate_distance, node_head) )
Example #9
0
def GMM(X, K):

    global gmm_step1_time, firstTwoItem
    C = []
    # a = []
    # b = []
    # C = []
    # maxd = 0
    # start = timeit.default_timer()
    #
    # for i in X:
    #     for j in X:
    #         if (i[0] == j[0] and i[1] == j[1]) == False:
    #             dis = euclidean_distance_square(i, j)
    #             if maxd < dis:
    #                 maxd = dis
    #                 a = i
    #                 b = j

    # print(a,b,max)

    a = firstTwoItem[0]
    b = firstTwoItem[1]
    C.append(a)
    C.append(b)

    X.remove(a)
    X.remove(b)
    stop = timeit.default_timer()
    gmm_step1_time = 0
    print('Time for gmm step 1: ', gmm_step1_time)

    # print(a)
    # print(b)
    # print(X)
    # print(C)

    for k in range(K - 2):
        L = []
        for i in X:
            min = 10000000
            for j in C:
                dist = euclidean_distance_square(i, j)
                if min > dist:
                    min = dist
            L.append(min)

        # print(maxOfmins)
        index_max = np.argmax(L)
        # print(L[index_max])
        # print(X[index_max])

        C.append(X[index_max])

        X.remove(X[index_max])
        # print("C:" , C)
        # print("X:", X)

    print("final C:", C)
    return C
Example #10
0
 def process(self):
     """!
     @brief Performs cluster analysis in line with rules of K-Medians algorithm.
     
     @remark Results of clustering can be obtained using corresponding get methods.
     
     @see get_clusters()
     @see get_medians()
     
     """
     
     if (self.__ccore is True):
         self.__clusters = wrapper.kmedians(self.__pointer_data, self.__medians, self.__tolerance);
         self.__medians = self.__update_medians();
         
     else:
         changes = float('inf');
          
         stop_condition = self.__tolerance * self.__tolerance;   # Fast solution
         #stop_condition = self.__tolerance;              # Slow solution
          
         # Check for dimension
         if (len(self.__pointer_data[0]) != len(self.__medians[0])):
             raise NameError('Dimension of the input data and dimension of the initial cluster medians must be equal.');
          
         while (changes > stop_condition):
             self.__clusters = self.__update_clusters();
             updated_centers = self.__update_medians();  # changes should be calculated before asignment
          
             changes = max([euclidean_distance_square(self.__medians[index], updated_centers[index]) for index in range(len(updated_centers))]);    # Fast solution
              
             self.__medians = updated_centers;
Example #11
0
    def __update_clusters(self):
        """!
        @brief Calculate Manhattan distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """

        clusters = [[] for i in range(len(self.__medians))]
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1
            dist_optim = 0.0

            for index in range(len(self.__medians)):
                dist = euclidean_distance_square(
                    self.__pointer_data[index_point], self.__medians[index])

                if (dist < dist_optim) or (index is 0):
                    index_optim = index
                    dist_optim = dist

            clusters[index_optim].append(index_point)

        # If cluster is not able to capture object it should be removed
        clusters = [cluster for cluster in clusters if len(cluster) > 0]

        return clusters
    def __merge_by_average_link(self):
        """!
        @brief Merges the most similar clusters in line with average link type.
        
        """

        minimum_average_distance = float('Inf')

        for index_cluster1 in range(0, len(self.__clusters)):
            for index_cluster2 in range(index_cluster1 + 1,
                                        len(self.__clusters)):

                # Find farthest objects
                candidate_average_distance = 0.0
                for index_object1 in self.__clusters[index_cluster1]:
                    for index_object2 in self.__clusters[index_cluster2]:
                        candidate_average_distance += euclidean_distance_square(
                            self.__pointer_data[index_object1],
                            self.__pointer_data[index_object2])

                candidate_average_distance /= (
                    len(self.__clusters[index_cluster1]) +
                    len(self.__clusters[index_cluster2]))

                if candidate_average_distance < minimum_average_distance:
                    minimum_average_distance = candidate_average_distance
                    indexes = [index_cluster1, index_cluster2]

        self.__clusters[indexes[0]] += self.__clusters[indexes[1]]
        self.__clusters.pop(indexes[1])  # remove merged cluster.
Example #13
0
 def get_distance_matrix(self):
     """!
     @brief Calculates distance matrix (U-matrix).
     @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map.
     
     @return (list) Distance matrix (U-matrix).
     
     @see show_distance_matrix()
     @see get_density_matrix()
     
     """
     if self.__ccore_som_pointer is not None:
         self._weights = wrapper.som_get_weights(self.__ccore_som_pointer)
         
         if self._conn_type != type_conn.func_neighbor:
             self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer)
         
     distance_matrix = [[0.0] * self._cols for i in range(self._rows)]
     
     for i in range(self._rows):
         for j in range(self._cols):
             neuron_index = i * self._cols + j
             
             if self._conn_type == type_conn.func_neighbor:
                 self._create_connections(type_conn.grid_eight)
             
             for neighbor_index in self._neighbors[neuron_index]:
                 distance_matrix[i][j] += euclidean_distance_square(self._weights[neuron_index], self._weights[neighbor_index])
                 
             distance_matrix[i][j] /= len(self._neighbors[neuron_index])
 
     return distance_matrix
Example #14
0
    def get_distance(self, entry, type_measurement):
        """!
        @brief Calculates distance between two clusters in line with measurement type.
        
        @details In case of usage CENTROID_EUCLIDIAN_DISTANCE square euclidian distance will be returned.
                 Square root should be taken from the result for obtaining real euclidian distance between
                 entries. 
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
        
        @return (double) Distance between two clusters.
        
        """
        
        if type_measurement is measurement_type.CENTROID_EUCLIDEAN_DISTANCE:
            return euclidean_distance_square(entry.get_centroid(), self.get_centroid())
        
        elif type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE:
            return manhattan_distance(entry.get_centroid(), self.get_centroid())
        
        elif type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE:
            return self.__get_average_inter_cluster_distance(entry)
            
        elif type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE:
            return self.__get_average_intra_cluster_distance(entry)
        
        elif type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE:
            return self.__get_variance_increase_distance(entry)

        else:
            raise ValueError("Unsupported type of measurement '%s' is specified." % type_measurement)
Example #15
0
    def __recursive_nearest_nodes(self, point, distance, sqrt_distance,
                                  node_head, best_nodes):
        """!
        @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance.
        
        @param[in] point (list): Coordinates that is considered as centroind for searching
        @param[in] distance (double): Distance from the center where seaching is performed.
        @param[in] sqrt_distance (double): Square distance from the center where searching is performed.
        @param[in] node_head (node): Node from that searching is performed.
        @param[in|out] best_nodes (list): List of founded nodes.
        
        """

        if node_head.right is not None:
            minimum = node_head.data[node_head.disc] - distance
            if point[node_head.disc] >= minimum:
                self.__recursive_nearest_nodes(point, distance, sqrt_distance,
                                               node_head.right, best_nodes)

        if node_head.left is not None:
            maximum = node_head.data[node_head.disc] + distance
            if point[node_head.disc] < maximum:
                self.__recursive_nearest_nodes(point, distance, sqrt_distance,
                                               node_head.left, best_nodes)

        candidate_distance = euclidean_distance_square(point, node_head.data)
        if candidate_distance <= sqrt_distance:
            best_nodes.append((candidate_distance, node_head))
Example #16
0
    def __update_clusters(self):
        """!
        @brief Calculate distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoids))];
        for index_point in range(len(self.__pointer_data)):
            if (index_point in self.__medoid_indexes):
                continue;

            index_optim = -1;
            dist_optim = float('Inf');
            
            for index in range(len(self.__medoids)):
                dist = euclidean_distance_square(self.__pointer_data[index_point], self.__medoids[index]);
                
                if ( (dist < dist_optim) or (index is 0)):
                    index_optim = index;
                    dist_optim = dist;
            
            clusters[index_optim].append(index_point);
        
        return clusters;
Example #17
0
    def get_distance_matrix(self):
        """!
        @brief Calculates distance matrix (U-matrix).
        @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map.
        
        @return (list) Distance matrix (U-matrix).
        
        @see show_distance_matrix()
        @see get_density_matrix()
        
        """
        if self.__ccore_som_pointer is not None:
            self._weights = wrapper.som_get_weights(self.__ccore_som_pointer)

            if self._conn_type != type_conn.func_neighbor:
                self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer)

        distance_matrix = [[0.0] * self._cols for i in range(self._rows)]

        for i in range(self._rows):
            for j in range(self._cols):
                neuron_index = i * self._cols + j

                if self._conn_type == type_conn.func_neighbor:
                    self._create_connections(type_conn.grid_eight)

                for neighbor_index in self._neighbors[neuron_index]:
                    distance_matrix[i][j] += euclidean_distance_square(self._weights[neuron_index],
                                                                       self._weights[neighbor_index])

                distance_matrix[i][j] /= len(self._neighbors[neuron_index])

        return distance_matrix
Example #18
0
 def process(self):
     """!
     @brief Performs cluster analysis in line with rules of K-Medoids algorithm.
     
     @remark Results of clustering can be obtained using corresponding get methods.
     
     @see get_clusters()
     @see get_medoids()
     
     """
     
     if (self.__ccore is True):
         self.__clusters = wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance);
         self.__medoids, self.__medoid_indexes = self.__update_medoids();
     
     else:
         changes = float('inf');
          
         stop_condition = self.__tolerance * self.__tolerance;   # Fast solution
         #stop_condition = self.__tolerance;              # Slow solution
          
         while (changes > stop_condition):
             self.__clusters = self.__update_clusters();
             updated_medoids, update_medoid_indexes = self.__update_medoids();  # changes should be calculated before asignment
          
             changes = max([euclidean_distance_square(self.__medoids[index], updated_medoids[index]) for index in range(len(updated_medoids))]);    # Fast solution
              
             self.__medoids = updated_medoids;
             self.__medoid_indexes = update_medoid_indexes;
Example #19
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1))
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2))
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement)
     distance21 = entry2.get_distance(entry1, type_measurement)
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001
     if (type_measurement == measurement_type.CENTROID_EUCLIDEAN_DISTANCE):
         assert distance12 == euclidean_distance_square(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
def createSimMatrix(q, X):
    r = {}
    i = 0
    for p in X:
        d = euclidean_distance_square(q, p)
        r[i] = 1 / (1 + d)
        i = i + 1
    return r
Example #21
0
def aug_mmr(cluster, lambda_score, q, data, k):

    docs_unranked = data

    docs_selected = []

    for i in range(k):
        mmr = -100000000
        #start = timeit.default_timer()
        # Your statements here
        R = getNext(cluster, docs_unranked, docs_selected, q, lambda_score)
        print(len(R))
        #stop = timeit.default_timer()
        #print('Time for getnext: ', stop - start)

        best1 = [0, 0]
        item = [0, 0]
        for item in docs_selected:
            if item in R:
                R.remove(item)

        for d in R:
            sim = 0
            for s in docs_selected:
                if euclidean_distance_square(d, s) == 0:
                    continue
                sim_current = 1 / euclidean_distance_square(d, s)
                if sim_current > sim:
                    sim = sim_current
                else:
                    continue

            rel = 1 / euclidean_distance_square(q, d)
            mmr_current = lambda_score * rel - (1 - lambda_score) * sim

            if mmr_current > mmr:
                mmr = mmr_current
                best1 = d
            else:
                continue

        docs_selected.append(best1)
        #docs_unranked.remove(best)

    return docs_selected
def AugGMM(cluster, X1, indexMap, K, C1):

    
    l = 1
    
    for k in range(K - 2):
        LLmin = []
        LLmax = []
        for node1 in cluster.root.children:
            # print("children ", node1.elements)
            minmax = 10000000
            minmin = 10000000
            for e in C1:
                id = cluster.documentMap[tuple(e)][l]
                distmax, distmin = cluster.dismatrix[l][id][node1.id]
                #cluster.dismatrixitem[l][indexMap[tuple(e)]][node1.id]

                if minmax > distmax[0]:
                    minmax = distmax[0]
                if minmin > distmin[0]:
                    minmin = distmin[0]

            LLmin.append(minmin)
            LLmax.append(minmax)

        maxofMin = max(LLmin)
        selecteditem = []
        i = 0
        for it in LLmax:
            if it > maxofMin:
                selecteditem = selecteditem + cluster.root.children[i].elements
            i = i + 1

        L = []
        for i in selecteditem:
            min = 10000000
            for j in C1:
                dist = euclidean_distance_square(i, j)
                if min > dist:
                    min = dist
            L.append(min)

        # print(maxOfmins)
        index_max = np.argmax(L)
        # print(selecteditem[index_max])

        C1.append(selecteditem[index_max])
        X1.remove(selecteditem[index_max])
        id = cluster.documentMap[tuple(selecteditem[index_max])][1]
        node = cluster.root.children[id - 1]
        node.elements.remove(selecteditem[index_max])

    print("Aug-GMM result:", C1)
    return C1
Example #23
0
 def _competition(self, x):
     """!
     @brief Calculates neuron winner (distance, neuron index).
     
     @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point.
     
     @return (uint) Returns index of neuron that is winner.
     
     """
     
     index = 0
     minimum = euclidean_distance_square(self._weights[0], x)
     
     for i in range(1, self._size, 1):
         candidate = euclidean_distance_square(self._weights[i], x)
         if candidate < minimum:
             index = i
             minimum = candidate
     
     return index
Example #24
0
    def _competition(self, x):
        """!
        @brief Calculates neuron winner (distance, neuron index).
        
        @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point.
        
        @return (uint) Returns index of neuron that is winner.
        
        """

        index = 0
        minimum = euclidean_distance_square(self._weights[0], x)

        for i in range(1, self._size, 1):
            candidate = euclidean_distance_square(self._weights[i], x)
            if candidate < minimum:
                index = i
                minimum = candidate

        return index
Example #25
0
    def __calculate_weight(self, stimulus1, stimulus2):
        """!
        @brief Calculate weight between neurons that have external stimulus1 and stimulus2.
        
        @param[in] stimulus1 (list): External stimulus of the first neuron.
        @param[in] stimulus2 (list): External stimulus of the second neuron.
        
        @return (double) Weight between neurons that are under specified stimulus.
        
        """

        distance = euclidean_distance_square(stimulus1, stimulus2)
        return math.exp(-distance / (2.0 * self.__average_distance))
Example #26
0
    def __bayesian_information_criterion(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using bayesian information criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Splitting criterion in line with bayesian information criterion.
                High value of splitting criterion means that current structure is much better.
                
        @see __minimum_noiseless_description_length(clusters, centers)
        
        """

        scores = [float('inf')] * len(clusters)  # splitting criterion
        dimension = len(self.__pointer_data[0])

        # estimation of the noise variance in the data set
        sigma_sqrt = 0.0
        K = len(clusters)
        N = 0.0

        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                sigma_sqrt += euclidean_distance_square(
                    self.__pointer_data[index_object], centers[index_cluster])

            N += len(clusters[index_cluster])

        if N - K > 0:
            sigma_sqrt /= (N - K)
            p = (K - 1) + dimension * K + 1

            # in case of the same points, sigma_sqrt can be zero (issue: #407)
            sigma_multiplier = 0.0
            if sigma_sqrt <= 0.0:
                sigma_multiplier = float('-inf')
            else:
                sigma_multiplier = dimension * 0.5 * log(sigma_sqrt)

            # splitting criterion
            for index_cluster in range(0, len(clusters), 1):
                n = len(clusters[index_cluster])

                L = n * log(n) - n * log(N) - n * 0.5 * log(
                    2.0 * numpy.pi) - n * sigma_multiplier - (n - K) * 0.5

                # BIC calculation
                scores[index_cluster] = L - p * 0.5 * log(N)

        return sum(scores)
Example #27
0
 def __calculate_estimation(self):
     """!
     @brief Calculates estimation (cost) of the current clusters. The lower the estimation,
            the more optimally configuration of clusters.
     
     @return (double) estimation of current clusters.
     
     """
     estimation = 0.0
     for index_cluster in range(0, len(self.__clusters)):
         cluster = self.__clusters[index_cluster]
         index_medoid = self.__current[index_cluster]
         for index_point in cluster:
             estimation += euclidean_distance_square(self.__pointer_data[index_point], self.__pointer_data[index_medoid])
     
     return estimation
Example #28
0
 def __calculate_estimation(self):
     """!
     @brief Calculates estimation (cost) of the current clusters. The lower the estimation,
            the more optimally configuration of clusters.
     
     @return (double) estimation of current clusters.
     
     """
     estimation = 0.0
     for index_cluster in range(0, len(self.__clusters)):
         cluster = self.__clusters[index_cluster]
         index_medoid = self.__current[index_cluster]
         for index_point in cluster:
             estimation += euclidean_distance_square(self.__pointer_data[index_point], self.__pointer_data[index_medoid])
     
     return estimation
Example #29
0
 def __initialize_distances(self, size, location):
     """!
     @brief Initialize distance matrix in SOM grid.
     
     @param[in] size (uint): Amount of neurons in the network.
     @param[in] location (list): List of coordinates of each neuron in the network.
     
     @return (list) Distance matrix between neurons in the network.
     
     """
     sqrt_distances = [ [ [] for i in range(size) ] for j in range(size) ]
     for i in range(size):
         for j in range(i, size, 1):
             dist = euclidean_distance_square(location[i], location[j])
             sqrt_distances[i][j] = dist
             sqrt_distances[j][i] = dist
     
     return sqrt_distances
Example #30
0
    def __initialize_distances(self, size, location):
        """!
        @brief Initialize distance matrix in SOM grid.
        
        @param[in] size (uint): Amount of neurons in the network.
        @param[in] location (list): List of coordinates of each neuron in the network.
        
        @return (list) Distance matrix between neurons in the network.
        
        """
        sqrt_distances = [[[] for i in range(size)] for j in range(size)]
        for i in range(size):
            for j in range(i, size, 1):
                dist = euclidean_distance_square(location[i], location[j])
                sqrt_distances[i][j] = dist
                sqrt_distances[j][i] = dist

        return sqrt_distances
Example #31
0
 def __calculate_nearest_distance(self, index_cluster1, index_cluster2):
     """!
     @brief Finds two nearest objects in two specified clusters and returns distance between them.
     
     @param[in] (uint) Index of the first cluster.
     @param[in] (uint) Index of the second cluster.
     
     @return The nearest euclidean distance between two clusters.
     
     """
     candidate_minimum_distance = float('Inf')
     
     for index_object1 in self.__clusters[index_cluster1]:
         for index_object2 in self.__clusters[index_cluster2]:
             distance = euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2])
             if distance < candidate_minimum_distance:
                 candidate_minimum_distance = distance
     
     return candidate_minimum_distance
Example #32
0
 def __calculate_farthest_distance(self, index_cluster1, index_cluster2):
     """!
     @brief Finds two farthest objects in two specified clusters in terms and returns distance between them.
     
     @param[in] (uint) Index of the first cluster.
     @param[in] (uint) Index of the second cluster.
     
     @return The farthest euclidean distance between two clusters.
     
     """
     candidate_maximum_distance = 0.0;
     for index_object1 in self.__clusters[index_cluster1]:
         for index_object2 in self.__clusters[index_cluster2]:
             distance = euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
             
             if (distance > candidate_maximum_distance):
                 candidate_maximum_distance = distance;
 
     return candidate_maximum_distance;
Example #33
0
 def __calculate_nearest_distance(self, index_cluster1, index_cluster2):
     """!
     @brief Finds two nearest objects in two specified clusters and returns distance between them.
     
     @param[in] (uint) Index of the first cluster.
     @param[in] (uint) Index of the second cluster.
     
     @return The nearest euclidean distance between two clusters.
     
     """
     candidate_minimum_distance = float('Inf');
     
     for index_object1 in self.__clusters[index_cluster1]:
         for index_object2 in self.__clusters[index_cluster2]:
             distance = euclidean_distance_square(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
             if (distance < candidate_minimum_distance):
                 candidate_minimum_distance = distance;
     
     return candidate_minimum_distance;
Example #34
0
 def __has_object_connection(self, oscillator_index1, oscillator_index2):
     """!
     @brief Searches for pair of objects that are encoded by specified neurons and that are connected in line with connectivity radius.
     
     @param[in] oscillator_index1 (uint): Index of the first oscillator in the second layer.
     @param[in] oscillator_index2 (uint): Index of the second oscillator in the second layer.
     
     @return (bool) True - if there is pair of connected objects encoded by specified oscillators.
     
     """
     som_neuron_index1 = self._som_osc_table[oscillator_index1];
     som_neuron_index2 = self._som_osc_table[oscillator_index2];
     
     for index_object1 in self._som.capture_objects[som_neuron_index1]:
         for index_object2 in self._som.capture_objects[som_neuron_index2]:
             distance = euclidean_distance_square(self._data[index_object1], self._data[index_object2]);
             if (distance <= self._radius):
                 return True;
     
     return False;
Example #35
0
 def __has_object_connection(self, oscillator_index1, oscillator_index2):
     """!
     @brief Searches for pair of objects that are encoded by specified neurons and that are connected in line with connectivity radius.
     
     @param[in] oscillator_index1 (uint): Index of the first oscillator in the second layer.
     @param[in] oscillator_index2 (uint): Index of the second oscillator in the second layer.
     
     @return (bool) True - if there is pair of connected objects encoded by specified oscillators.
     
     """
     som_neuron_index1 = self._som_osc_table[oscillator_index1]
     som_neuron_index2 = self._som_osc_table[oscillator_index2]
     
     for index_object1 in self._som.capture_objects[som_neuron_index1]:
         for index_object2 in self._som.capture_objects[som_neuron_index2]:
             distance = euclidean_distance_square(self._data[index_object1], self._data[index_object2])
             if distance <= self._radius:
                 return True
     
     return False
Example #36
0
    def __cluster_distance(self, cluster1, cluster2):
        """!
        @brief Calculate minimal distance between clusters using representative points.
        
        @param[in] cluster1 (cure_cluster): The first cluster.
        @param[in] cluster2 (cure_cluster): The second cluster.
        
        @return (double) Euclidean distance between two clusters that is defined by minimum distance between representation points of two clusters.
        
        """

        distance = float('inf')
        for i in range(0, len(cluster1.rep)):
            for k in range(0, len(cluster2.rep)):
                dist = euclidean_distance_square(cluster1.rep[i],
                                                 cluster2.rep[k])  # Fast mode
                if dist < distance:
                    distance = dist

        return distance
Example #37
0
 def __merge_by_centroid_link(self):
     """!
     @brief Merges the most similar clusters in line with centroid link type.
     
     """
     
     minimum_centroid_distance = float('Inf')
     indexes = None
     
     for index1 in range(0, len(self.__centers)):
         for index2 in range(index1 + 1, len(self.__centers)):
             distance = euclidean_distance_square(self.__centers[index1], self.__centers[index2])
             if distance < minimum_centroid_distance:
                 minimum_centroid_distance = distance
                 indexes = [index1, index2]
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]]
     self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]])
      
     self.__clusters.pop(indexes[1])   # remove merged cluster.
     self.__centers.pop(indexes[1])    # remove merged center.
Example #38
0
 def __find_another_nearest_medoid(self, point_index, current_medoid_index):
     """!
     @brief Finds the another nearest medoid for the specified point that is differ from the specified medoid. 
     
     @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is perfomed.
     @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest.
     
     @return (uint) index of the another nearest medoid for the point.
     
     """
     other_medoid_index = -1
     other_distance_nearest = float('inf')
     for index_medoid in self.__current:
         if (index_medoid != current_medoid_index):
             other_distance_candidate = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index])
             
             if other_distance_candidate < other_distance_nearest:
                 other_distance_nearest = other_distance_candidate
                 other_medoid_index = index_medoid
     
     return other_medoid_index
Example #39
0
 def __merge_by_centroid_link(self):
     """!
     @brief Merges the most similar clusters in line with centroid link type.
     
     """
     
     minimum_centroid_distance = float('Inf');
     indexes = None;
     
     for index1 in range(0, len(self.__centers)):
         for index2 in range(index1 + 1, len(self.__centers)):
             distance = euclidean_distance_square(self.__centers[index1], self.__centers[index2]);
             if (distance < minimum_centroid_distance):
                 minimum_centroid_distance = distance;
                 indexes = [index1, index2];
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]];
     self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]);
      
     self.__clusters.pop(indexes[1]);   # remove merged cluster.
     self.__centers.pop(indexes[1]);    # remove merged center.
Example #40
0
 def __calculate_initial_clusters(self, centers):
     """!
     @brief Calculate Euclidean distance to each point from the each cluster. 
     @brief Nearest points are captured by according clusters and as a result clusters are updated.
     
     @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
     
     """
     
     clusters = [[] for _ in range(len(centers))]
     for index_point in range(len(self.__sample)):
         index_optim, dist_optim = -1, 0.0
          
         for index in range(len(centers)):
             dist = euclidean_distance_square(self.__sample[index_point], centers[index])
              
             if (dist < dist_optim) or (index is 0):
                 index_optim, dist_optim = index, dist
          
         clusters[index_optim].append(index_point)
     
     return clusters
Example #41
0
 def __calculate_initial_clusters(self, centers):
     """!
     @brief Calculate Euclidean distance to each point from the each cluster. 
     @brief Nearest points are captured by according clusters and as a result clusters are updated.
     
     @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
     
     """
     
     clusters = [[] for _ in range(len(centers))]
     for index_point in range(len(self.__sample)):
         index_optim, dist_optim = -1, 0.0
          
         for index in range(len(centers)):
             dist = euclidean_distance_square(self.__sample[index_point], centers[index])
              
             if (dist < dist_optim) or (index is 0):
                 index_optim, dist_optim = index, dist
          
         clusters[index_optim].append(index_point)
     
     return clusters
def GMM(X, K, C):
    for k in range(K - 2):
        L = []
        for i in X:
            min = 10000000
            for j in C:
                dist = euclidean_distance_square(i, j)
                if min > dist:
                    min = dist
            L.append(min)

        # print(maxOfmins)
        index_max = np.argmax(L)
        # print(L[index_max])
        # print(X[index_max])

        C.append(X[index_max])

        X.remove(X[index_max])
        # print("C:" , C)
        # print("X:", X)

    print("final C:", C)
    return C
Example #43
0
    def __update_clusters(self, medoids):
        """!
        @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. 
        
        """
        
        self.__belong = [0] * len(self.__pointer_data)
        self.__clusters = [[] for i in range(len(medoids))]
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1
            dist_optim = 0.0
             
            for index in range(len(medoids)):
                dist = euclidean_distance_square(self.__pointer_data[index_point], self.__pointer_data[medoids[index]])
                 
                if (dist < dist_optim) or (index is 0):
                    index_optim = index
                    dist_optim = dist

            self.__clusters[index_optim].append(index_point)
            self.__belong[index_point] = index_optim
        
        # If cluster is not able to capture object it should be removed
        self.__clusters = [cluster for cluster in self.__clusters if len(cluster) > 0]
Example #44
0
 def __optimize_configuration(self):
     """!
     @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules. 
     
     """
     index_neighbor = 0
     while (index_neighbor < self.__maxneighbor):
         # get random current medoid that is to be replaced
         current_medoid_index = self.__current[random.randint(0, self.__number_clusters - 1)]
         current_medoid_cluster_index = self.__belong[current_medoid_index]
         
         # get new candidate to be medoid
         candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1)
         
         while candidate_medoid_index in self.__current:
             candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1)
         
         candidate_cost = 0.0
         for point_index in range(0, len(self.__pointer_data)):
             if point_index not in self.__current:
                 # get non-medoid point and its medoid
                 point_cluster_index = self.__belong[point_index]
                 point_medoid_index = self.__current[point_cluster_index]
                 
                 # get other medoid that is nearest to the point (except current and candidate)
                 other_medoid_index = self.__find_another_nearest_medoid(point_index, current_medoid_index)
                 other_medoid_cluster_index = self.__belong[other_medoid_index]
                 
                 # for optimization calculate all required distances
                 # from the point to current medoid
                 distance_current = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index])
                 
                 # from the point to candidate median
                 distance_candidate = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[candidate_medoid_index])
                 
                 # from the point to nearest (own) medoid
                 distance_nearest = float('inf')
                 if ( (point_medoid_index != candidate_medoid_index) and (point_medoid_index != current_medoid_cluster_index) ):
                     distance_nearest = euclidean_distance_square(self.__pointer_data[point_index], self.__pointer_data[point_medoid_index])
                 
                 # apply rules for cost calculation
                 if (point_cluster_index == current_medoid_cluster_index):
                     # case 1:
                     if (distance_candidate >= distance_nearest):
                         candidate_cost += distance_nearest - distance_current
                     
                     # case 2:
                     else:
                         candidate_cost += distance_candidate - distance_current
                 
                 elif (point_cluster_index == other_medoid_cluster_index):
                     # case 3 ('nearest medoid' is the representative object of that cluster and object is more similar to 'nearest' than to 'candidate'):
                     if (distance_candidate > distance_nearest):
                         pass;
                     
                     # case 4:
                     else:
                         candidate_cost += distance_candidate - distance_nearest
         
         if (candidate_cost < 0):
             # set candidate that has won
             self.__current[current_medoid_cluster_index] = candidate_medoid_index
             
             # recalculate clusters
             self.__update_clusters(self.__current)
             
             # reset iterations and starts investigation from the begining
             index_neighbor = 0
             
         else:
             index_neighbor += 1