Example #1
0
File: model.py Project: rmmx/python
    def summarize(self, out=sys.stdout):
        """Prints summary grouping distribution as class header and details

        """
        tree = self.tree

        def extract_common_path(groups):
            """Extracts the common segment of the prediction path for a group

            """
            for group in groups:
                details = groups[group]['details']
                common_path = []
                if len(details) > 0:
                    mcd_len = min([len(x[0]) for x in details])
                    for i in range(0, mcd_len):
                        test_common_path = details[0][0][i]
                        for subgroup in details:
                            if subgroup[0][i] != test_common_path:
                                i = mcd_len
                                break
                        if i < mcd_len:
                            common_path.append(test_common_path)
                groups[group]['total'][0] = common_path
                if len(details) > 0:
                    groups[group]['details'] = sorted(details,
                                                      key=lambda x: x[1],
                                                      reverse=True)

        def confidence_error(value, impurity=None):
            """Returns confidence for categoric objective fields
               and error for numeric objective fields
            """
            if value is None:
                return ""
            impurity_literal = ""
            if impurity is not None and impurity > 0:
                impurity_literal = "; impurity: %.2f%%" % (round(impurity, 4))
            objective_type = self.fields[tree.objective_id]['optype']
            if objective_type == 'numeric':
                return u" [Error: %s]" % value
            else:
                return u" [Confidence: %.2f%%%s]" % ((round(value, 4) * 100),
                                                     impurity_literal)

        distribution = self.get_data_distribution()

        out.write(u"Data distribution:\n")
        print_distribution(distribution, out=out)
        out.write(u"\n\n")

        groups = self.group_prediction()
        predictions = self.get_prediction_distribution(groups)

        out.write(u"Predicted distribution:\n")
        print_distribution(predictions, out=out)
        out.write(u"\n\n")

        if self.field_importance:
            out.write(u"Field importance:\n")
            print_importance(self, out=out)

        extract_common_path(groups)

        for group in [x[0] for x in predictions]:
            details = groups[group]['details']
            path = [prediction.to_rule(self.fields) for
                    prediction in groups[group]['total'][0]]
            data_per_group = groups[group]['total'][1] * 1.0 / tree.count
            pred_per_group = groups[group]['total'][2] * 1.0 / tree.count
            out.write(utf8(u"\n\n%s : (data %.2f%% / prediction %.2f%%) %s\n" %
                           (group,
                            round(data_per_group, 4) * 100,
                            round(pred_per_group, 4) * 100,
                            " and ".join(path))))

            if len(details) == 0:
                out.write(u"    The model will never predict this class\n")
            for j in range(0, len(details)):
                subgroup = details[j]
                pred_per_sgroup = subgroup[1] * 1.0 / groups[group]['total'][2]
                path = [prediction.to_rule(self.fields) for
                        prediction in subgroup[0]]
                path_chain = " and ".join(path) if len(path) else "(root node)"
                out.write(utf8(u"    · %.2f%%: %s%s\n" %
                               (round(pred_per_sgroup, 4) * 100,
                                path_chain,
                                confidence_error(subgroup[2],
                                                 impurity=subgroup[3]))))
        out.flush()
Example #2
0
    def print_importance(self, out=sys.stdout):
        """Prints ensemble field importance

        """
        print_importance(self, out=out)
Example #3
0
    def summarize(self, out=sys.stdout):
        """Prints summary grouping distribution as class header and details

        """
        tree = self.tree

        def extract_common_path(groups):
            """Extracts the common segment of the prediction path for a group

            """
            for group in groups:
                details = groups[group]['details']
                common_path = []
                if len(details) > 0:
                    mcd_len = min([len(x[0]) for x in details])
                    for i in range(0, mcd_len):
                        test_common_path = details[0][0][i]
                        for subgroup in details:
                            if subgroup[0][i] != test_common_path:
                                i = mcd_len
                                break
                        if i < mcd_len:
                            common_path.append(test_common_path)
                groups[group]['total'][0] = common_path
                if len(details) > 0:
                    groups[group]['details'] = sorted(details,
                                                      key=lambda x: x[1],
                                                      reverse=True)

        def confidence_error(value):
            """Returns confidence for categoric objective fields
               and error for numeric objective fields
            """
            if value is None:
                return ""
            objective_type = self.fields[tree.objective_field]['optype']
            if objective_type == 'numeric':
                return u" [Error: %s]" % value
            else:
                return u" [Confidence: %.2f%%]" % (round(value, 4) * 100)

        distribution = self.get_data_distribution()

        out.write(u"Data distribution:\n")
        print_distribution(distribution, out=out)
        out.write(u"\n\n")

        groups = self.group_prediction()
        predictions = self.get_prediction_distribution(groups)

        out.write(u"Predicted distribution:\n")
        print_distribution(predictions, out=out)
        out.write(u"\n\n")

        if self.field_importance:
            out.write(u"Field importance:\n")
            print_importance(self, out=out)

        extract_common_path(groups)

        for group in [x[0] for x in predictions]:
            details = groups[group]['details']
            path = [
                prediction.to_rule(self.fields)
                for prediction in groups[group]['total'][0]
            ]
            data_per_group = groups[group]['total'][1] * 1.0 / tree.count
            pred_per_group = groups[group]['total'][2] * 1.0 / tree.count
            out.write(
                utf8(u"\n\n%s : (data %.2f%% / prediction %.2f%%) %s\n" %
                     (group, round(data_per_group, 4) * 100,
                      round(pred_per_group, 4) * 100, " and ".join(path))))

            if len(details) == 0:
                out.write(u"    The model will never predict this class\n")
            for j in range(0, len(details)):
                subgroup = details[j]
                pred_per_sgroup = subgroup[1] * 1.0 / groups[group]['total'][2]
                path = [
                    prediction.to_rule(self.fields)
                    for prediction in subgroup[0]
                ]
                path_chain = " and ".join(path) if len(path) else "(root node)"
                out.write(
                    utf8(u"    · %.2f%%: %s%s\n" %
                         (round(pred_per_sgroup, 4) * 100, path_chain,
                          confidence_error(subgroup[2]))))
        out.flush()
Example #4
0
    def print_importance(self, out=sys.stdout):
        """Prints ensemble field importance

        """
        print_importance(self, out=out)
Example #5
0
def summarize(model, out=sys.stdout, format=BRIEF):
    """Prints summary grouping distribution as class header and details

    """
    if model.boosting:
        raise AttributeError("This method is not available for boosting"
                             " models.")
    tree = model.tree

    def extract_common_path(groups):
        """Extracts the common segment of the prediction path for a group

        """
        for group in groups:
            details = groups[group]['details']
            common_path = []
            if len(details) > 0:
                mcd_len = min([len(x[0]) for x in details])
                for i in range(0, mcd_len):
                    test_common_path = details[0][0][i]
                    for subgroup in details:
                        if subgroup[0][i] != test_common_path:
                            i = mcd_len
                            break
                    if i < mcd_len:
                        common_path.append(test_common_path)
            groups[group]['total'][0] = common_path
            if len(details) > 0:
                groups[group]['details'] = sorted(details,
                                                  key=lambda x: x[1],
                                                  reverse=True)

    def confidence_error(value, impurity=None):
        """Returns confidence for categoric objective fields
           and error for numeric objective fields
        """
        if value is None:
            return ""
        impurity_literal = ""
        if impurity is not None and impurity > 0:
            impurity_literal = "; impurity: %.2f%%" % (round(impurity, 4))
        objective_type = model.fields[model.objective_id]['optype']
        if objective_type == 'numeric':
            return " [Error: %s]" % value
        return " [Confidence: %.2f%%%s]" % (round(value, 4) * 100,
                                            impurity_literal)

    distribution = get_data_distribution(model)

    out.write(utf8("Data distribution:\n"))
    print_distribution(distribution, out=out)
    out.write(utf8("\n\n"))

    groups = group_prediction(model)
    predictions = get_prediction_distribution(model, groups)

    out.write(utf8("Predicted distribution:\n"))
    print_distribution(predictions, out=out)
    out.write(utf8("\n\n"))

    if model.field_importance:
        out.write(utf8("Field importance:\n"))
        print_importance(model, out=out)

    extract_common_path(groups)

    out.write(utf8("\n\nRules summary:"))

    node = get_node(tree)
    count = node[model.offsets["count"]]
    for group in [x[0] for x in predictions]:
        details = groups[group]['details']
        path = Path(groups[group]['total'][0])
        data_per_group = groups[group]['total'][1] * 1.0 / count
        pred_per_group = groups[group]['total'][2] * 1.0 / count
        out.write(
            utf8("\n\n%s : (data %.2f%% / prediction %.2f%%) %s" %
                 (group, round(data_per_group, 4) * 100,
                  round(pred_per_group, 4) * 100,
                  path.to_rules(model.fields, format=format))))

        if len(details) == 0:
            out.write(
                utf8("\n    The model will never predict this"
                     " class\n"))
        elif len(details) == 1:
            subgroup = details[0]
            out.write(
                utf8("%s\n" %
                     confidence_error(subgroup[2], impurity=subgroup[3])))
        else:
            out.write(utf8("\n"))
            for subgroup in details:
                pred_per_sgroup = subgroup[1] * 1.0 / \
                    groups[group]['total'][2]
                path = Path(subgroup[0])
                path_chain = path.to_rules(model.fields, format=format) if \
                    path.predicates else "(root node)"
                out.write(
                    utf8(
                        "    · %.2f%%: %s%s\n" %
                        (round(pred_per_sgroup, 4) * 100, path_chain,
                         confidence_error(subgroup[2], impurity=subgroup[3]))))

    out.flush()