def __distance_matrix__(self, data, S, U): # first and second distance for every element D, E = {}, {} for i in S: list = [euclidean_distance(data[i], data[ele]) for ele in S] list.sort() D.update({i: list[0]}) E.update({i: list[1]}) for j in U: list = [euclidean_distance(data[j], data[ele]) for ele in S] list.sort() D.update({j: list[0]}) E.update({j: list[1]}) return [D, E]
def __within_cluster_variation(self, data, k, clusters, centers): E = 0.0 for c in range(k): for i in clusters[c]: d = euclidean_distance(data[i], data[centers[c]]) E = E + d**2 return round(E, 3)
def __within_cluster_variation__( self, data, assignment, centers): # finding within cluster variation E = 0.0 for i in range(len(data)): d = euclidean_distance(data[i], centers[assignment[i]]) E = E + d**2 return round(E, 3)
def __find_initial_point__(self): # Initial point for k medoids list = [] for i in range(len(self.data)): list.append([ sum( euclidean_distance(self.data[i], ele) for ele in self.data), i ]) return min(list)[1]
def __swap__(self, data, k): U, S, D, E = self.__build__(data, k) print("swap started") halt = False while halt == False: best = float("inf") ii = S[0] hh = U[0] for i in S: for h in U: effect = 0 # Tih for j in U: if j != h: dis_ij = euclidean_distance(data[j], data[i]) dis_jh = euclidean_distance(data[j], data[h]) if dis_ij > D[j]: effect += min(dis_jh - D[j], 0) elif dis_ij == D[j]: effect += (min(dis_jh, E[j]) - D[j]) if effect < best: best = effect ii = i hh = h if best >= 0: break else: S.remove(ii) U.remove(hh) S.append(hh) U.append(ii) D, E = self.__distance_matrix__(data, S, U) clusters, centers = self.__assign_items__(data, S, k) return [clusters, centers]
def __get_assignment__(self, data, cluster_means, k): # finding best cluster for each object assignment = {} for i in range(len(data)): best = 0 min_distance = float("inf") for j in range(k): dis = euclidean_distance(data[i], cluster_means[j]) if dis < min_distance: min_distance = dis best = j assignment.update({i: best}) return assignment
def __assign_items__(self, data, S, k): objects = {} centers = {} for i in range(len(S)): centers.update({i: S[i]}) objects.update({i: []}) for i in range(len(data)): lst = [] for j in range(k): lst.append([euclidean_distance(data[i], data[centers[j]]), j]) index = min(lst)[1] objects[index].append(i) return objects, centers
def __build__(self, data, k): assert (k > 1) S = [self.__find_initial_point__()] # set of selected objects U = [i for i in range(len(data))] # U = O - S U.remove(S[0]) while len(S) != k: list = [] for i in U: gain = 0 for j in U: if j != i: Dj = self.__min_distance_from_s(data, j, S) gain += max(Dj - euclidean_distance(data[i], data[j]), 0) list.append([gain, i]) best = min(list)[1] S.append(best) U.remove(best) dis = self.__distance_matrix__(data, S, U) return [U, S, dis[0], dis[1]]
def __min_distance_from_s(self, data, j, S): # Finding Dj d = float("inf") for i in S: d = min(d, euclidean_distance(data[j], data[i])) return d