def summarize(self, out=sys.stdout): """Prints summary grouping distribution as class header and details """ tree = self.tree def extract_common_path(groups): """Extracts the common segment of the prediction path for a group """ for group in groups: details = groups[group]['details'] common_path = [] if len(details) > 0: mcd_len = min([len(x[0]) for x in details]) for i in range(0, mcd_len): test_common_path = details[0][0][i] for subgroup in details: if subgroup[0][i] != test_common_path: i = mcd_len break if i < mcd_len: common_path.append(test_common_path) groups[group]['total'][0] = common_path if len(details) > 0: groups[group]['details'] = sorted(details, key=lambda x: x[1], reverse=True) def confidence_error(value, impurity=None): """Returns confidence for categoric objective fields and error for numeric objective fields """ if value is None: return "" impurity_literal = "" if impurity is not None and impurity > 0: impurity_literal = "; impurity: %.2f%%" % (round(impurity, 4)) objective_type = self.fields[tree.objective_id]['optype'] if objective_type == 'numeric': return u" [Error: %s]" % value else: return u" [Confidence: %.2f%%%s]" % ((round(value, 4) * 100), impurity_literal) distribution = self.get_data_distribution() out.write(u"Data distribution:\n") print_distribution(distribution, out=out) out.write(u"\n\n") groups = self.group_prediction() predictions = self.get_prediction_distribution(groups) out.write(u"Predicted distribution:\n") print_distribution(predictions, out=out) out.write(u"\n\n") if self.field_importance: out.write(u"Field importance:\n") print_importance(self, out=out) extract_common_path(groups) for group in [x[0] for x in predictions]: details = groups[group]['details'] path = [prediction.to_rule(self.fields) for prediction in groups[group]['total'][0]] data_per_group = groups[group]['total'][1] * 1.0 / tree.count pred_per_group = groups[group]['total'][2] * 1.0 / tree.count out.write(utf8(u"\n\n%s : (data %.2f%% / prediction %.2f%%) %s\n" % (group, round(data_per_group, 4) * 100, round(pred_per_group, 4) * 100, " and ".join(path)))) if len(details) == 0: out.write(u" The model will never predict this class\n") for j in range(0, len(details)): subgroup = details[j] pred_per_sgroup = subgroup[1] * 1.0 / groups[group]['total'][2] path = [prediction.to_rule(self.fields) for prediction in subgroup[0]] path_chain = " and ".join(path) if len(path) else "(root node)" out.write(utf8(u" · %.2f%%: %s%s\n" % (round(pred_per_sgroup, 4) * 100, path_chain, confidence_error(subgroup[2], impurity=subgroup[3])))) out.flush()
def print_importance(self, out=sys.stdout): """Prints ensemble field importance """ print_importance(self, out=out)
def summarize(self, out=sys.stdout): """Prints summary grouping distribution as class header and details """ tree = self.tree def extract_common_path(groups): """Extracts the common segment of the prediction path for a group """ for group in groups: details = groups[group]['details'] common_path = [] if len(details) > 0: mcd_len = min([len(x[0]) for x in details]) for i in range(0, mcd_len): test_common_path = details[0][0][i] for subgroup in details: if subgroup[0][i] != test_common_path: i = mcd_len break if i < mcd_len: common_path.append(test_common_path) groups[group]['total'][0] = common_path if len(details) > 0: groups[group]['details'] = sorted(details, key=lambda x: x[1], reverse=True) def confidence_error(value): """Returns confidence for categoric objective fields and error for numeric objective fields """ if value is None: return "" objective_type = self.fields[tree.objective_field]['optype'] if objective_type == 'numeric': return u" [Error: %s]" % value else: return u" [Confidence: %.2f%%]" % (round(value, 4) * 100) distribution = self.get_data_distribution() out.write(u"Data distribution:\n") print_distribution(distribution, out=out) out.write(u"\n\n") groups = self.group_prediction() predictions = self.get_prediction_distribution(groups) out.write(u"Predicted distribution:\n") print_distribution(predictions, out=out) out.write(u"\n\n") if self.field_importance: out.write(u"Field importance:\n") print_importance(self, out=out) extract_common_path(groups) for group in [x[0] for x in predictions]: details = groups[group]['details'] path = [ prediction.to_rule(self.fields) for prediction in groups[group]['total'][0] ] data_per_group = groups[group]['total'][1] * 1.0 / tree.count pred_per_group = groups[group]['total'][2] * 1.0 / tree.count out.write( utf8(u"\n\n%s : (data %.2f%% / prediction %.2f%%) %s\n" % (group, round(data_per_group, 4) * 100, round(pred_per_group, 4) * 100, " and ".join(path)))) if len(details) == 0: out.write(u" The model will never predict this class\n") for j in range(0, len(details)): subgroup = details[j] pred_per_sgroup = subgroup[1] * 1.0 / groups[group]['total'][2] path = [ prediction.to_rule(self.fields) for prediction in subgroup[0] ] path_chain = " and ".join(path) if len(path) else "(root node)" out.write( utf8(u" · %.2f%%: %s%s\n" % (round(pred_per_sgroup, 4) * 100, path_chain, confidence_error(subgroup[2])))) out.flush()
def summarize(model, out=sys.stdout, format=BRIEF): """Prints summary grouping distribution as class header and details """ if model.boosting: raise AttributeError("This method is not available for boosting" " models.") tree = model.tree def extract_common_path(groups): """Extracts the common segment of the prediction path for a group """ for group in groups: details = groups[group]['details'] common_path = [] if len(details) > 0: mcd_len = min([len(x[0]) for x in details]) for i in range(0, mcd_len): test_common_path = details[0][0][i] for subgroup in details: if subgroup[0][i] != test_common_path: i = mcd_len break if i < mcd_len: common_path.append(test_common_path) groups[group]['total'][0] = common_path if len(details) > 0: groups[group]['details'] = sorted(details, key=lambda x: x[1], reverse=True) def confidence_error(value, impurity=None): """Returns confidence for categoric objective fields and error for numeric objective fields """ if value is None: return "" impurity_literal = "" if impurity is not None and impurity > 0: impurity_literal = "; impurity: %.2f%%" % (round(impurity, 4)) objective_type = model.fields[model.objective_id]['optype'] if objective_type == 'numeric': return " [Error: %s]" % value return " [Confidence: %.2f%%%s]" % (round(value, 4) * 100, impurity_literal) distribution = get_data_distribution(model) out.write(utf8("Data distribution:\n")) print_distribution(distribution, out=out) out.write(utf8("\n\n")) groups = group_prediction(model) predictions = get_prediction_distribution(model, groups) out.write(utf8("Predicted distribution:\n")) print_distribution(predictions, out=out) out.write(utf8("\n\n")) if model.field_importance: out.write(utf8("Field importance:\n")) print_importance(model, out=out) extract_common_path(groups) out.write(utf8("\n\nRules summary:")) node = get_node(tree) count = node[model.offsets["count"]] for group in [x[0] for x in predictions]: details = groups[group]['details'] path = Path(groups[group]['total'][0]) data_per_group = groups[group]['total'][1] * 1.0 / count pred_per_group = groups[group]['total'][2] * 1.0 / count out.write( utf8("\n\n%s : (data %.2f%% / prediction %.2f%%) %s" % (group, round(data_per_group, 4) * 100, round(pred_per_group, 4) * 100, path.to_rules(model.fields, format=format)))) if len(details) == 0: out.write( utf8("\n The model will never predict this" " class\n")) elif len(details) == 1: subgroup = details[0] out.write( utf8("%s\n" % confidence_error(subgroup[2], impurity=subgroup[3]))) else: out.write(utf8("\n")) for subgroup in details: pred_per_sgroup = subgroup[1] * 1.0 / \ groups[group]['total'][2] path = Path(subgroup[0]) path_chain = path.to_rules(model.fields, format=format) if \ path.predicates else "(root node)" out.write( utf8( " · %.2f%%: %s%s\n" % (round(pred_per_sgroup, 4) * 100, path_chain, confidence_error(subgroup[2], impurity=subgroup[3])))) out.flush()