def _get_min_sq_dist(self, point: Vector) -> float: min_sq_dist = math.inf for centroid in self._centroids: if centroid != []: sq_dist = euclidean_distance(point, centroid)**2 if sq_dist < min_sq_dist: min_sq_dist = sq_dist return min_sq_dist
def _get_dist(self, c1: Cluster, c2: Cluster, measure=min) -> float: """ Get the distance between to cluster c1 and c2 with euclidean distance. Using the min function as default measure(single link). """ distances = [] for i in c1: for j in c2: dist = euclidean_distance(self._dataset[i], self._dataset[j]) distances.append(dist) return measure(distances)
def predict(self, point: Vector) -> int: min_dist = math.inf best_label = -1 for i in range(self._k): cluster_vectors = [self._dataset[j] for j in self._clusters[i]] dist = sum(euclidean_distance(point, p) for p in cluster_vectors) if dist < min_dist: min_dist = dist best_label = i print(f"The label of {point} is {best_label}") return best_label
def predict(self, point: Vector) -> int: """ Predict the label of the new sample point """ min_dist = math.inf label = -1 for cluster_label, cluster in enumerate(self._clusters): dist = min( [euclidean_distance(point, self._dataset[i]) for i in cluster]) if dist < min_dist: min_dist = dist label = cluster_label return label
def _get_centroid_label(self, point: Vector, centroids: DataSet) -> int: distances = [euclidean_distance(point, centroid) for centroid in centroids] return distances.index(min(distances))