Ejemplo n.º 1
0
    def summarize(self, out=sys.stdout):
        """Prints a summary of the cluster info

        """
        out.write(u"Cluster of %s centroids\n\n" % len(self.centroids))

        out.write(u"Data distribution:\n")
        print_distribution(self.get_data_distribution(), out=out)
        out.write(u"\n\n")
        centroids_list = sorted(self.centroids, key=lambda x: x.name)

        out.write(u"Centroids features:\n")
        for centroid in centroids_list:
            out.write(utf8(u"\n%s: " % centroid.name))
            connector = ""
            for field_id, value in centroid.center.items():
                if isinstance(value, basestring):
                    value = u"\"%s\"" % value
                out.write(utf8(u"%s%s: %s" % (connector,
                                              self.fields[field_id]['name'],
                                              value)))
                connector = ", "
        out.write(u"\n\n")

        out.write(u"Data distance statistics:\n\n")
        for centroid in centroids_list:
            centroid.print_statistics(out=out)

        if len(self.centroids) > 1:
            out.write(u"Intercentroids distance:\n\n")
            for centroid in centroids_list:
                out.write(utf8(u"To centroid: %s\n" % centroid.name))
                for measure, result in self.centroids_distance(centroid):
                    out.write(u"%s%s: %s\n" % (INDENT, measure, result))
                out.write(u"\n")
Ejemplo n.º 2
0
    def summarize(self, out=sys.stdout):
        """Prints a summary of the cluster info

        """
        out.write(u"Cluster of %s centroids\n\n" % len(self.centroids))

        out.write(u"Data distribution:\n")
        print_distribution(self.get_data_distribution(), out=out)
        out.write(u"\n\n")
        centroids_list = sorted(self.centroids, key=lambda x: x.name)

        out.write(u"Centroids features:\n")
        for centroid in centroids_list:
            out.write(utf8(u"\n%s: " % centroid.name))
            connector = ""
            for field_id, value in centroid.center.items():
                if isinstance(value, basestring):
                    value = u"\"%s\"" % value
                out.write(
                    utf8(u"%s%s: %s" %
                         (connector, self.fields[field_id]['name'], value)))
                connector = ", "
        out.write(u"\n\n")

        out.write(u"Data distance statistics:\n\n")
        for centroid in centroids_list:
            centroid.print_statistics(out=out)

        out.write(u"Intercentroids distance:\n\n")
        for centroid in centroids_list:
            out.write(utf8(u"To centroid: %s\n" % centroid.name))
            for measure, result in self.centroids_distance(centroid):
                out.write(u"%s%s: %s\n" % (INDENT, measure, result))
            out.write(u"\n")
Ejemplo n.º 3
0
    def summarize(self, out=sys.stdout):
        """Prints a summary of the cluster info

        """
        report_header = ''
        if self.is_g_means:
            report_header = \
                u'G-means Cluster (critical_value=%d)' % self.critical_value
        else:
            report_header = u'K-means Cluster (k=%d)' % self.k

        out.write(report_header + ' with %d centroids\n\n' %
                  len(self.centroids))

        out.write(u"Data distribution:\n")
        # "Global" is set as first entry
        self.print_global_distribution(out=out)
        print_distribution(self.get_data_distribution(), out=out)
        out.write(u"\n")
        centroids_list = [self.cluster_global] if self.cluster_global else []
        centroids_list.extend(sorted(self.centroids, key=lambda x: x.name))

        out.write(u"Cluster metrics:\n")
        self.print_ss_metrics(out=out)
        out.write(u"\n")


        out.write(u"Centroids:\n")
        for centroid in centroids_list:
            out.write(utf8(u"\n%s%s: " % (INDENT, centroid.name)))
            connector = ""
            for field_id, value in centroid.center.items():
                if isinstance(value, basestring):
                    value = u"\"%s\"" % value
                out.write(utf8(u"%s%s: %s" % (connector,
                                              self.fields[field_id]['name'],
                                              value)))
                connector = ", "
        out.write(u"\n\n")

        out.write(u"Distance distribution:\n\n")
        for centroid in centroids_list:
            centroid.print_statistics(out=out)
        out.write(u"\n")

        if len(self.centroids) > 1:
            out.write(u"Intercentroid distance:\n\n")
            centroids_list = (centroids_list[1:] if self.cluster_global else
                              centroids_list)
            for centroid in centroids_list:
                out.write(utf8(u"%sTo centroid: %s\n" % (INDENT,
                                                         centroid.name)))
                for measure, result in self.centroids_distance(centroid):
                    out.write(u"%s%s: %s\n" % (INDENT * 2, measure, result))
                out.write(u"\n")
Ejemplo n.º 4
0
    def summarize(self, out=sys.stdout):
        """Prints a summary of the cluster info

        """
        report_header = ''
        if self.is_g_means:
            report_header = \
                u'G-means Cluster (critical_value=%d)' % self.critical_value
        else:
            report_header = u'K-means Cluster (k=%d)' % self.k

        out.write(report_header + ' with %d centroids\n\n' %
                  len(self.centroids))

        out.write(u"Data distribution:\n")
        # "Global" is set as first entry
        self.print_global_distribution(out=out)
        print_distribution(self.get_data_distribution(), out=out)
        out.write(u"\n")
        centroids_list = [self.cluster_global] if self.cluster_global else []
        centroids_list.extend(sorted(self.centroids, key=lambda x: x.name))

        out.write(u"Cluster metrics:\n")
        self.print_ss_metrics(out=out)
        out.write(u"\n")


        out.write(u"Centroids:\n")
        for centroid in centroids_list:
            out.write(utf8(u"\n%s%s: " % (INDENT, centroid.name)))
            connector = ""
            for field_id, value in centroid.center.items():
                if isinstance(value, basestring):
                    value = u"\"%s\"" % value
                out.write(utf8(u"%s%s: %s" % (connector,
                                              self.fields[field_id]['name'],
                                              value)))
                connector = ", "
        out.write(u"\n\n")

        out.write(u"Distance distribution:\n\n")
        for centroid in centroids_list:
            centroid.print_statistics(out=out)
        out.write(u"\n")

        if len(self.centroids) > 1:
            out.write(u"Intercentroid distance:\n\n")
            centroids_list = (centroids_list[1:] if self.cluster_global else
                              centroids_list)
            for centroid in centroids_list:
                out.write(utf8(u"%sTo centroid: %s\n" % (INDENT,
                                                         centroid.name)))
                for measure, result in self.centroids_distance(centroid):
                    out.write(u"%s%s: %s\n" % (INDENT * 2, measure, result))
                out.write(u"\n")
Ejemplo n.º 5
0
    def summarize(self, out=sys.stdout):
        """Prints ensemble summary. Only field importance at present.

        """
        distribution = self.get_data_distribution("training")

        out.write(u"Data distribution:\n")
        print_distribution(distribution, out=out)
        out.write(u"\n\n")

        predictions = self.get_data_distribution("predictions")

        out.write(u"Predicted distribution:\n")
        print_distribution(predictions, out=out)
        out.write(u"\n\n")

        out.write(u"Field importance:\n")
        self.print_importance(out=out)
        out.flush()
Ejemplo n.º 6
0
    def summarize(self, out=sys.stdout):
        """Prints ensemble summary. Only field importance at present.

        """
        distribution = self.get_data_distribution("training")

        out.write(u"Data distribution:\n")
        print_distribution(distribution, out=out)
        out.write(u"\n\n")

        predictions = self.get_data_distribution("predictions")

        out.write(u"Predicted distribution:\n")
        print_distribution(predictions, out=out)
        out.write(u"\n\n")

        out.write(u"Field importance:\n")
        self.print_importance(out=out)
        out.flush()