def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.s_clusters = [0. for _ in range(n_clusters)] self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) db = 0 self.points_in_clusters = cluster_centroid.count_cluster_sizes( labels, n_clusters) for i in range(n_clusters): self.s_clusters[i] = self.s(X, i, self.points_in_clusters, labels, self.centroids) self.sums = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)] for i in range(0, n_clusters): for j in range(0, n_clusters): if i != j: tm = utils.euclidian_dist(self.centroids[i], self.centroids[j]) if tm != 0: self.sums[i][j] = (self.s_clusters[i] + self.s_clusters[j]) / tm else: pass #a = -Constants.bad_cluster tmp = np.amax(self.sums[i]) db += tmp db /= float(n_clusters) return db
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) rows, colums = X.shape self.sums = [0 for _ in range(n_clusters)] minimum_dif_c = sys.float_info.max # min dist in different clusters centres_l = [[sys.float_info.max] * n_clusters] * n_clusters self.centers = np.array(centres_l) self.centroid_dists = [0 for _ in range(len(labels))] # self.centroid_dists = [utils.euclidian_dist(X[i], self.centroids[labels[i]]) for i in range(len(X))] for i in range(len(labels)): self.centroid_dists[i] = utils.euclidian_dist( X[i], self.centroids[labels[i]]) self.sums[labels[i]] += self.centroid_dists[i] for i in range(n_clusters): for j in range(n_clusters): if i != j: self.centers[i][j] = utils.euclidian_dist( self.centroids[i], self.centroids[j]) for i in range(rows): for j in range(rows): if labels[i] != labels[j]: dist = self.centers[labels[i]][labels[j]] minimum_dif_c = min(dist, minimum_dif_c) denominator = list(self.sums) for i in range(n_clusters): denominator[i] *= (2 / self.cluster_sizes[i]) return -(minimum_dif_c / max(denominator))
def find(self, X, labels, n_clusters): self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters) self.diameter = utils.find_diameter(X) self.cluster_sizes = [] self.distances = [] self.s_c = 0 self.n_w = 0 rows, colums = X.shape for i in range(rows - 1): for j in range(i + 1, rows): if labels[i] == labels[j]: self.s_c += utils.euclidian_dist(X[i], X[j]) self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters) for k in range(0, n_clusters): self.n_w += self.cluster_sizes[k] * (self.cluster_sizes[k] - 1) / 2 for i in range(0, len(labels) - 1): for j in range(i + 1, len(labels)): self.distances.append(utils.euclidian_dist(X[i], X[j])) self.s_min = heapq.nsmallest(int(self.n_w), self.distances) self.s_max = heapq.nlargest(int(self.n_w), self.distances) #ones = [1] * int(self.n_w) #s_min_c = np.dot(self.s_min, np.transpose(ones)) #s_max_c = np.dot(self.s_max, np.transpose(ones)) s_min_c = sum(self.s_min) s_max_c = sum(self.s_max) return (self.s_c - s_min_c) / (s_max_c - s_min_c)
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters) self.centroid_dists = [[sys.float_info.max for _ in range(n_clusters)] for _ in range(n_clusters)] self.dists = [[0 for _ in range(len(labels))] for _ in range(n_clusters)] numerator = 0.0 for k in range(0, n_clusters - 1): for l in range(k + 1, n_clusters): self.centroid_dists[k][l] = utils.euclidian_dist(self.centroids[k], self.centroids[l]) self.centroid_dists[l][k] = self.centroid_dists[k][l] for i in range(n_clusters): min_dist = np.amin(self.centroid_dists[i]) numerator += min_dist denominator = 0.0 for k in range(n_clusters): for i in range(len(labels)): if labels[i] != k: continue self.dists[k][i] = utils.euclidian_dist(X[i], self.centroids[k]) for k in range(n_clusters): # get sum of 0.1*|Ck| largest elements acc = 0.0 max_n = heapq.nlargest(int(math.ceil(0.1 * self.cluster_sizes[k])), self.dists[k]) for i in range(0, len(max_n)): acc += max_n[i] denominator += acc * 10.0 / self.cluster_sizes[k] return -(numerator / denominator)
def find(self, X, labels, n_clusters): self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) self.diameter = utils.find_diameter(X) self.dists = [[0. for _ in range(len(labels))] for _ in range(len(labels))] self.dist_same_c = [] rows, colums = X.shape delta_l = [[0.0] * n_clusters] * n_clusters self.delta = np.array(delta_l) minimum_dif_c = sys.float_info.max # min dist in different clusters maximum_same_c = sys.float_info.min # max dist in the same cluster for i in range(rows - 1): for j in range(i + 1, rows): self.dists[i][j] = utils.euclidian_dist(X[i], X[j]) self.dists[j][i] = self.dists[i][j] if labels[i] != labels[j]: self.delta[labels[i]][labels[j]] += self.dists[i][j] else: self.dist_same_c.append([i, j]) maximum_same_c = max(self.dists[i][j], maximum_same_c) for i in range(n_clusters - 1): for j in range(i + 1, n_clusters): self.delta[i][j] /= float(self.cluster_sizes[i] * self.cluster_sizes[j]) if self.delta[i][j] != 0: minimum_dif_c = min(minimum_dif_c, self.delta[i][j]) return -minimum_dif_c / maximum_same_c
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) self.dist_same_c = [] rows, colums = X.shape self.dists = [[0. for _ in range(rows)] for _ in range(rows)] minimum_dif_c = sys.float_info.max # min dist in different clusters maximum_same_c = sys.float_info.min # max dist in the same cluster centres_l = [[sys.float_info.max] * n_clusters] * n_clusters self.centers = np.array(centres_l) for i in range(n_clusters): for j in range(n_clusters): if i != j: self.centers[i][j] = utils.euclidian_dist( self.centroids[i], self.centroids[j]) for i in range(rows - 1): for j in range(i + 1, rows): self.dists[i][j] = utils.euclidian_dist(X[i], X[j]) self.dists[j][i] = self.dists[i][j] if labels[i] != labels[j]: dist = self.centers[labels[i]][labels[j]] minimum_dif_c = min(dist, minimum_dif_c) else: self.dist_same_c.append([i, j]) maximum_same_c = max(self.dists[i][j], maximum_same_c) return -(minimum_dif_c / maximum_same_c)
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) self.dists = [[0 for _ in range(len(labels))] for _ in range(len(labels))] self.centroid_dists = [0 for _ in range(len(labels))] self.delta = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)] minimum_dif_c = sys.float_info.max # min dist in different clusters maximum_same_c = sys.float_info.min # max dist in the same cluster self.sums = [0 for _ in range(n_clusters)] for i in range(len(labels)): self.centroid_dists[i] = utils.euclidian_dist( X[i], self.centroids[labels[i]]) self.sums[labels[i]] += self.centroid_dists[i] for i in range(len(labels) - 1): for j in range(i + 1, len(labels)): self.dists[i][j] = utils.euclidian_dist(X[i], X[j]) self.dists[j][i] = self.dists[i][j] self.dists_same_c.append([i, j]) maximum_same_c = max(self.dists[i][j], maximum_same_c) for i in range(n_clusters): for j in range(n_clusters): if i != j: self.delta[i][j] = (self.sums[i] + self.sums[j]) / float( self.cluster_sizes[i] + self.cluster_sizes[j]) minimum_dif_c = min(minimum_dif_c, self.delta[i][j]) return -(minimum_dif_c / maximum_same_c)
def update(self, X, n_clusters, labels, k, l, id): self.diameter = utils.find_diameter(X) prev_point_in_c = list(self.point_in_c) prev_centroids = np.copy(self.centroids) self.point_in_c = cluster_centroid.count_cluster_sizes(labels, n_clusters) self.centroids = cluster_centroid.update_centroids(np.copy(self.centroids), np.copy(self.point_in_c), X[id], k, l) minimum_dif_c = sys.float_info.max # min dist in different clusters #update numerator for i in range(n_clusters): for j in range(n_clusters): self.delta[i][j] *= (prev_point_in_c[i] * prev_point_in_c[j]) for i in range(len(labels)): if labels[i] != k and id < i: self.delta[k][labels[i]] -= self.dists[id][i] if labels[i] != k and id > i: self.delta[labels[i]][k] -= self.dists[i][id] if labels[i] != l and id < i: self.delta[l][labels[i]] += self.dists[id][i] if labels[i] != l and id > i: self.delta[labels[i]][l] += self.dists[i][id] for i in range(n_clusters - 1): for j in range(i + 1, n_clusters): self.delta[i][j] /= float(self.point_in_c[i] * self.point_in_c[j]) if self.delta[i][j] != 0: minimum_dif_c = min(minimum_dif_c, self.delta[i][j]) # update denominator new_centroid_dists = list(self.centroid_dists) dell = 10 ** (-math.log(len(X), 10) - 1) for i in range(len(labels)): if (labels[i] == k and utils.euclidian_dist(prev_centroids[k], self.centroids[k]) > dell * self.diameter or labels[i] == l and utils.euclidian_dist(prev_centroids[l], self.centroids[l]) > dell * self.diameter): new_centroid_dists[i] = utils.euclidian_dist(X[i], self.centroids[labels[i]]) new_sums = [0 for _ in range(n_clusters)] for i in range(n_clusters): if i != k and i != l: new_sums[i] = self.sums[i] for i in range(len(labels)): if labels[i] == k or labels[i] == l: new_sums[labels[i]] += new_centroid_dists[i] denominator = list(new_sums) for i in range(n_clusters): if self.point_in_c[i] != 0: denominator[i] *= (2 / self.point_in_c[i]) return -(minimum_dif_c / max(denominator))
def find(self, X, labels, n_clusters): self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters) self.max_s_sum = [[sys.float_info.min for _ in range(n_clusters)] for _ in range(n_clusters)] self.min_centroids_dist = [[sys.float_info.max for _ in range(n_clusters)] for _ in range(n_clusters)] self.s_clusters = [0 for _ in range(n_clusters)] self.diameter = utils.find_diameter(X) for i in range(n_clusters): self.s_clusters[i] = self.s(X, i, self.cluster_sizes, labels, self.centroids) numerator = 0.0 for k in range(0, n_clusters): for l in range(k + 1, n_clusters): self.max_s_sum[k][l] = self.s_clusters[k] + self.s_clusters[l] self.min_centroids_dist[k][l] = utils.euclidian_dist(self.centroids[k], self.centroids[l]) numerator += np.max(self.max_s_sum[k]) / np.min(self.min_centroids_dist[k]) return numerator / n_clusters
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) self.numerators = [0.0] * n_clusters for i in range(0, len(labels)): self.numerators[labels[i]] += utils.euclidian_dist( X[i], self.centroids[labels[i]]) self.inner_max_dists = [[0 for _ in range(len(labels))] for _ in range(len(labels))] self.outer_min_dists = [[ sys.float_info.max for _ in range(len(labels)) ] for _ in range(n_clusters)] self.accumulator = [0 for _ in range(n_clusters)] for k in range(0, n_clusters): for i in range(len(labels)): # iterate elements outside cluster if labels[i] == k: continue for j in range(len(labels)): # iterate inside cluster if labels[j] != k: continue self.inner_max_dists[i][j] = utils.euclidian_dist( X[i], X[j]) self.inner_max_dists[j][i] = self.inner_max_dists[i][j] for c in range(n_clusters): for i in range(len(labels)): if labels[i] == c: continue inner_max_dist = 0 for j in range(len(self.inner_max_dists[i])): if labels[j] == c: inner_max_dist = max(inner_max_dist, self.inner_max_dists[i][j]) if inner_max_dist != 0: self.outer_min_dists[c][i] = inner_max_dist outer_min_dist = np.amin(self.outer_min_dists[c]) self.accumulator[c] = self.numerators[c] / outer_min_dist return sum(self.accumulator) / len(labels)
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.dist_centroids = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)] self.dist_ps = [0 for _ in range(len(labels))] self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) for k in range(0, n_clusters - 1): for l in range(k + 1, n_clusters): self.dist_centroids[k][l] = utils.euclidian_dist( self.centroids[k], self.centroids[l]) numerator = np.amax(self.dist_centroids) for i in range(0, len(labels)): self.dist_ps[i] = utils.d_ps(X, labels, X[i], labels[i], self.centroids) denominator = sum(self.dist_ps) return -(numerator / (denominator * n_clusters))
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) rows, colums = X.shape self.dist = [[0. for _ in range(rows)] for _ in range(rows)] self.dist_dif_c = [] self.dist_same_c = [] minimum_dif_c = sys.float_info.max # min self.dist in different clusters maximum_same_c = sys.float_info.min # max self.dist in the same cluster for i in range(rows - 1): for j in range(i + 1, rows): self.dist[i][j] = utils.euclidian_dist(X[i], X[j]) self.dist[j][i] = self.dist[i][j] if labels[i] != labels[j]: self.dist_dif_c.append([i, j]) minimum_dif_c = min(self.dist[i][j], minimum_dif_c) else: self.dist_same_c.append([i, j]) maximum_same_c = max(self.dist[i][j], maximum_same_c) return -(minimum_dif_c / maximum_same_c)
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.dist_ps = [0 for _ in range(len(labels))] self.sym_s_clusters = [0 for _ in range(n_clusters)] self.fractions = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)] self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters) db = 0 self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters) for i in range(0, len(labels)): self.dist_ps[i] = utils.d_ps(X, labels, X[i], labels[i], self.centroids) for i in range(n_clusters): self.sym_s_clusters[i] = self.sym_s(X, labels, i, self.cluster_sizes, self.centroids) for k in range(0, n_clusters): for l in range(0, n_clusters): if k != l: self.fractions[k][l] = ((self.sym_s_clusters[k] + self.sym_s_clusters[l]) / utils.euclidian_dist(self.centroids[k], self.centroids[l])) for k in range(n_clusters): max_fraction = np.amax(self.fractions[k]) db += max_fraction db /= float(n_clusters) return db
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters) self.centroid_dists = [0 for _ in range(len(labels))] self.delta = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)] minimum_dif_c = sys.float_info.max # min dist in different clusters self.sums = [0 for _ in range(n_clusters)] for i in range(len(labels)): self.centroid_dists[i] = utils.euclidian_dist(X[i], self.centroids[labels[i]]) self.sums[labels[i]] += self.centroid_dists[i] for i in range(n_clusters): for j in range(n_clusters): if i != j: self.delta[i][j] = (self.sums[i] + self.sums[j]) / float(self.cluster_sizes[i] + self.cluster_sizes[j]) minimum_dif_c = min(minimum_dif_c, self.delta[i][j]) denominator = list(self.sums) #print(denominator) for i in range(n_clusters): denominator[i] *= (2 / self.cluster_sizes[i]) return -(minimum_dif_c / max(denominator))
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters) self.dists = [[0 for _ in range(len(labels))] for _ in range(n_clusters)] self.dists_e = [[0 for _ in range(len(labels))] for _ in range(len(labels))] self.dists_for_b = [0 for _ in range(len(labels))] self.max_b_ss = [0 for _ in range(len(labels))] self.b_ss_size = [0 for _ in range(len(labels))] for i in range(len(labels)): for j in range(len(labels)): self.dists_e[i][j] = utils.euclidian_dist(X[i], X[j]) self.dists_e[j][i] = self.dists_e[i][j] self.a_ss = [0 for _ in range(len(labels))] self.b_ss = [0 for _ in range(len(labels))] for i in range(len(labels)): self.a_ss[i] = self.a(X, labels, i, labels[i]) self.b_ss[i] = self.b(X, labels, i, labels[i]) numerator = 0.0 for k in range(n_clusters): for i in range(len(labels)): if labels[i] != k: continue numerator += self.ov(i) denominator = 0.0 for k in range(n_clusters): for i in range(len(labels)): if labels[i] != k: continue self.dists[k][i] = utils.euclidian_dist(X[i], self.centroids[k]) for k in range(n_clusters): # get sum of 0.1*|Ck| largest elements acc = 0.0 max_n = heapq.nlargest(int(math.ceil(0.1 * self.cluster_sizes[k])), self.dists[k]) for i in range(0, len(max_n)): acc += max_n[i] denominator += acc * 10.0 / self.cluster_sizes[k] return -(numerator / denominator)
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) elements, ignore_columns = X.shape self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) self.dists = [[0 for _ in range(elements)] for _ in range(elements)] for i in range(0, elements - 1): # for every element for j in range(i + 1, elements): # for every other if labels[i] != labels[j]: continue # if they are in the same cluster # update the distance to the farthest element in the same cluster self.dists[i][j] = utils.euclidian_dist(X[i], X[j]) self.dists[j][i] = self.dists[i][j] # max_self.dists contain for each element the farthest the his cluster numerator = 0.0 for i in range(0, elements): max_dist = np.amax(self.dists[i]) numerator += max_dist / self.cluster_sizes[labels[i]] denominator = 0.0 self.centroids_dist = [[sys.float_info.max for _ in range(n_clusters)] for _ in range(n_clusters)] for i in range(n_clusters): for j in range(n_clusters): if i != j: self.centroids_dist[i][j] = utils.euclidian_dist( self.centroids[i], self.centroids[j]) for i in range(n_clusters): min_centroid_dist = np.amin(self.centroids_dist[i]) denominator += min_centroid_dist return numerator / denominator
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.sigmas = [0 for _ in range(n_clusters)] self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) for k in range(0, n_clusters): self.sigmas[k] = self.normed_cluster_sigma(X, labels, k) self.normed_sigma_x = self.normed_sigma(X) term1 = sum(self.sigmas) / (n_clusters * self.normed_sigma_x) stdev_val = self.stdev(n_clusters) self.dens = 0.0 for k in range(0, n_clusters): for l in range(0, n_clusters): self.dens += self.den2(X, labels, self.centroids, k, l, stdev_val) /\ max(self.den1(X, labels, self.centroids, k, stdev_val), self.den1(X, labels, self.centroids, l, stdev_val)) self.dens /= n_clusters * (n_clusters - 1) return (term1 + self.dens)
def find(self, X, labels, n_clusters): self.diameter = utils.find_diameter(X) self.centroids = cluster_centroid.cluster_centroid( X, labels, n_clusters) self.cluster_sizes = cluster_centroid.count_cluster_sizes( labels, n_clusters) self.a_ss = [0 for _ in range(len(labels))] self.b_ss = [0 for _ in range(len(labels))] self.dists_e = [[0 for _ in range(len(labels))] for _ in range(len(labels))] self.dists_for_b = [0 for _ in range(len(labels))] for i in range(len(labels)): for j in range(len(labels)): self.dists_e[i][j] = utils.euclidian_dist(X[i], X[j]) for i in range(len(labels)): self.a_ss[i] = self.a(X, labels, i, labels[i]) self.b_ss[i] = self.b(X, n_clusters, labels, i, labels[i]) ch = 0 for i in range(len(labels)): ch += (self.b_ss[i] - self.a_ss[i]) / max(self.b_ss[i], self.a_ss[i]) return -(ch / float(len(labels)))