Esempio n. 1
0
    def __distance_matrix__(self, data, S,
                            U):  # first and second distance for every element
        D, E = {}, {}
        for i in S:
            list = [euclidean_distance(data[i], data[ele]) for ele in S]
            list.sort()
            D.update({i: list[0]})
            E.update({i: list[1]})

        for j in U:
            list = [euclidean_distance(data[j], data[ele]) for ele in S]
            list.sort()
            D.update({j: list[0]})
            E.update({j: list[1]})
        return [D, E]
Esempio n. 2
0
 def __within_cluster_variation(self, data, k, clusters, centers):
     E = 0.0
     for c in range(k):
         for i in clusters[c]:
             d = euclidean_distance(data[i], data[centers[c]])
             E = E + d**2
     return round(E, 3)
Esempio n. 3
0
 def __within_cluster_variation__(
         self, data, assignment,
         centers):  # finding within cluster variation
     E = 0.0
     for i in range(len(data)):
         d = euclidean_distance(data[i], centers[assignment[i]])
         E = E + d**2
     return round(E, 3)
Esempio n. 4
0
 def __find_initial_point__(self):  # Initial point for k medoids
     list = []
     for i in range(len(self.data)):
         list.append([
             sum(
                 euclidean_distance(self.data[i], ele)
                 for ele in self.data), i
         ])
     return min(list)[1]
Esempio n. 5
0
    def __swap__(self, data, k):
        U, S, D, E = self.__build__(data, k)

        print("swap started")
        halt = False

        while halt == False:

            best = float("inf")
            ii = S[0]
            hh = U[0]

            for i in S:
                for h in U:
                    effect = 0  # Tih
                    for j in U:
                        if j != h:
                            dis_ij = euclidean_distance(data[j], data[i])
                            dis_jh = euclidean_distance(data[j], data[h])
                            if dis_ij > D[j]:
                                effect += min(dis_jh - D[j], 0)
                            elif dis_ij == D[j]:
                                effect += (min(dis_jh, E[j]) - D[j])
                    if effect < best:
                        best = effect
                        ii = i
                        hh = h

            if best >= 0:
                break
            else:
                S.remove(ii)
                U.remove(hh)
                S.append(hh)
                U.append(ii)
                D, E = self.__distance_matrix__(data, S, U)

        clusters, centers = self.__assign_items__(data, S, k)

        return [clusters, centers]
Esempio n. 6
0
    def __get_assignment__(self, data, cluster_means,
                           k):  # finding best cluster for each object
        assignment = {}
        for i in range(len(data)):
            best = 0
            min_distance = float("inf")
            for j in range(k):
                dis = euclidean_distance(data[i], cluster_means[j])
                if dis < min_distance:
                    min_distance = dis
                    best = j
            assignment.update({i: best})

        return assignment
Esempio n. 7
0
    def __assign_items__(self, data, S, k):
        objects = {}
        centers = {}

        for i in range(len(S)):
            centers.update({i: S[i]})
            objects.update({i: []})

        for i in range(len(data)):
            lst = []
            for j in range(k):
                lst.append([euclidean_distance(data[i], data[centers[j]]), j])
            index = min(lst)[1]
            objects[index].append(i)
        return objects, centers
Esempio n. 8
0
    def __build__(self, data, k):
        assert (k > 1)

        S = [self.__find_initial_point__()]  # set of selected objects
        U = [i for i in range(len(data))]  # U = O - S
        U.remove(S[0])

        while len(S) != k:
            list = []
            for i in U:
                gain = 0
                for j in U:
                    if j != i:
                        Dj = self.__min_distance_from_s(data, j, S)
                        gain += max(Dj - euclidean_distance(data[i], data[j]),
                                    0)
                list.append([gain, i])
            best = min(list)[1]
            S.append(best)
            U.remove(best)

        dis = self.__distance_matrix__(data, S, U)

        return [U, S, dis[0], dis[1]]
Esempio n. 9
0
 def __min_distance_from_s(self, data, j, S):  # Finding Dj
     d = float("inf")
     for i in S:
         d = min(d, euclidean_distance(data[j], data[i]))
     return d