def _score_disc_bin(): n_values = len(attr.values) if n_values == 2: return _score_disc() score, mapping = _tree_scorers.find_binarization_MSE( col_x, col_y, n_values, self.min_samples_leaf) # The score is already adjusted for missing attribute values, so # we don't do it here if score == 0: return REJECT_ATTRIBUTE mapping, branches = MappedDiscreteNode.branches_from_mapping( data.X[:, attr_no], mapping, len(attr.values)) node = MappedDiscreteNode(attr, attr_no, mapping, None) return score, node, branches, 2
def _score_disc_bin(): n_values = len(attr.values) if n_values == 2: return _score_disc() score, mapping = _tree_scorers.find_binarization_MSE( col_x, col_y, n_values, self.min_samples_leaf) # The score is already adjusted for missing attribute values, so # we don't do it here if score == 0: return REJECT_ATTRIBUTE mapping, branches = MappedDiscreteNode.branches_from_mapping( col_x, mapping, len(attr.values)) node = MappedDiscreteNode(attr, attr_no, mapping, None) return score, node, branches, 2
def _score_disc_bin(): """Scoring for discrete attributes, with binarization""" n_values = len(attr.values) if n_values <= 2: return _score_disc() cont = contingency.Discrete(data, attr) attr_distr = np.sum(cont, axis=0) # Skip instances with missing value of the attribute cls_distr = np.sum(cont, axis=1) if np.sum(attr_distr) == 0: # all values are missing return REJECT_ATTRIBUTE best_score, best_mapping = _tree_scorers.find_binarization_entropy( cont, cls_distr, attr_distr, self.min_samples_leaf) if best_score <= 0: return REJECT_ATTRIBUTE best_score *= 1 - np.sum(cont.unknowns) / len(data) mapping, branches = MappedDiscreteNode.branches_from_mapping( col_x, best_mapping, n_values) node = MappedDiscreteNode(attr, attr_no, mapping, None) return best_score, node, branches, 2
def _score_disc_bin(): """Scoring for discrete attributes, with binarization""" n_values = len(attr.values) if n_values <= 2: return _score_disc() cont = contingency.Discrete(data, attr) attr_distr = np.sum(cont, axis=0) # Skip instances with missing value of the attribute cls_distr = np.sum(cont, axis=1) if np.sum(attr_distr) == 0: # all values are missing return REJECT_ATTRIBUTE best_score, best_mapping = _tree_scorers.find_binarization_entropy( cont, cls_distr, attr_distr, self.min_samples_leaf) if best_score <= 0: return REJECT_ATTRIBUTE best_score *= 1 - np.sum(cont.unknowns) / len(data) mapping, branches = MappedDiscreteNode.branches_from_mapping( data.X[:, attr_no], best_mapping, n_values) node = MappedDiscreteNode(attr, attr_no, mapping, None) return best_score, node, branches, 2