Beispiel #1
0
    def get_variances(self, one_variance=False):
        """Returns the squared variance for each cluster in a list of length K.

        If one_variance is set to True, it will return the same variance for
        each cluster as calculated by sigma^2 = d_max^2 / 2K
        Otherwise, the variance will be determined individually using the
        following formula:

        sigma^2 = sum ||x - mu||^2          if the cluster contains more than
                                            one input
        sigma^2 = avg(all other sigma^2's)  if the cluster contains only one
                                            input

        :param one_variance: Boolean of indicating how to calculate the
                             variances.
        """
        if one_variance:
            d_max = 0
            for cluster in self.clusters:
                this_d_max = max([utils.euclidean_distance(cluster.center,
                                                           x.center)
                                  for x in self.clusters])
                if this_d_max > d_max:
                    d_max = this_d_max
            for cluster in self.clusters:
                cluster.variance = (d_max / (2 * self.K)) ** 2
        else:
            one_input_clusters = []
            for cluster in self.clusters:
                if len(cluster.assigned_inputs) == 1:
                    one_input_clusters.append(cluster)
                    continue
                cluster.variance = sum([utils.euclidean_distance(x,
                                                                 cluster.center)
                                        ** 2 for x in cluster.assigned_inputs])
            avg_variance = sum([cluster.variance for cluster in self.clusters])
            avg_variance /= (len(self.clusters) - len(one_input_clusters))
            for cluster in one_input_clusters:
                cluster.variance = avg_variance
        return [cluster.variance for cluster in self.clusters]
 def distance_to_point(other_point):
     return utils.euclidean_distance(sample, other_point)