def get_variances(self, one_variance=False): """Returns the squared variance for each cluster in a list of length K. If one_variance is set to True, it will return the same variance for each cluster as calculated by sigma^2 = d_max^2 / 2K Otherwise, the variance will be determined individually using the following formula: sigma^2 = sum ||x - mu||^2 if the cluster contains more than one input sigma^2 = avg(all other sigma^2's) if the cluster contains only one input :param one_variance: Boolean of indicating how to calculate the variances. """ if one_variance: d_max = 0 for cluster in self.clusters: this_d_max = max([utils.euclidean_distance(cluster.center, x.center) for x in self.clusters]) if this_d_max > d_max: d_max = this_d_max for cluster in self.clusters: cluster.variance = (d_max / (2 * self.K)) ** 2 else: one_input_clusters = [] for cluster in self.clusters: if len(cluster.assigned_inputs) == 1: one_input_clusters.append(cluster) continue cluster.variance = sum([utils.euclidean_distance(x, cluster.center) ** 2 for x in cluster.assigned_inputs]) avg_variance = sum([cluster.variance for cluster in self.clusters]) avg_variance /= (len(self.clusters) - len(one_input_clusters)) for cluster in one_input_clusters: cluster.variance = avg_variance return [cluster.variance for cluster in self.clusters]
def distance_to_point(other_point): return utils.euclidean_distance(sample, other_point)