def cluster_cost(Data, k, medoids, cluster_assignments): price = np.zeros(k) for i in range(0, k): x = Data[cluster_assignments == i, :] for j in range(0, np.size(x, 0)): cost = kh.euclid_distance(x[j, :], medoids[i, :]) price[i] += cost return price
def cluster(k, Data, medoids): cluster_assignments = np.empty(np.size(Data, 0)) for i in range(np.size(Data, 0)): dist = np.empty(k) for j in range(k): x1 = Data[i, :] x2 = medoids[j] dist[j] = kh.euclid_distance(x1, x2) cluster_assignments[i] = dist.argmin() return cluster_assignments
def find_greatest_dist(cluster_assignments, Data, medoids): combined_array = np.empty(shape=(Data.shape[0], 2), dtype=object) greatest_dist = np.zeros(shape=(k, 2), dtype=float) dist = np.empty(shape=Data.shape[0]) for i in range(len(combined_array)): combined_array[i] = [cluster_assignments[i], Data[i, :]] x1 = Data[i, :] currentMedoid = int(combined_array[i, 0]) x2 = medoids[currentMedoid] dist[i] = kh.euclid_distance(x1, x2) if greatest_dist[currentMedoid, 0] <= dist[i]: greatest_dist[currentMedoid] = [float(dist[i]), int(i)] # greatest_dist[:, 0] is the distance and greatest_dist[:, 1] is the id where it came from return greatest_dist