Ejemplo n.º 1
0
 def test_classless(self):
     classless = Table(Domain(self.zoo.domain.attributes), self.zoo[:,
                                                                    0:-1])
     scorers = [scoring.Gini(), scoring.InfoGain(), scoring.GainRatio()]
     for scorer in scorers:
         with self.assertRaises(ValueError):
             scorer(0, classless)
Ejemplo n.º 2
0
 def test_wrong_class_type(self):
     scorers = [scoring.Gini(), scoring.InfoGain(), scoring.GainRatio()]
     for scorer in scorers:
         with self.assertRaises(ValueError):
             scorer(0, self.housing)
Ejemplo n.º 3
0
class TestMeasureAttr_GainRatio(testing.MeasureAttributeTestCase):
    MEASURE = scoring.GainRatio()
Ejemplo n.º 4
0
 def test_gain_ratio(self):
     scorer = scoring.GainRatio()
     correct = [0.80351, 1.00000, 0.84754, 1.00000, 0.59376]
     np.testing.assert_almost_equal([scorer(a, self.zoo) for a in range(5)],
                                    correct,
                                    decimal=5)
Ejemplo n.º 5
0
    def __call__(self, instances, weight=0):
        """Build a decision tree for the given instances according to the
        specified parameters.
        Return an Orange.classification.tree.TreeClassfier object with the
        constructed tree.
        
        Arguments:
        instances -- Orange.data.Table holding learning instances
        
        Keyword arguments:
        weight -- meta attribute with weights of instances (optional)
        
        """
        # create an (internal) Orange.core.TreeLearner object
        bl = self._base_learner()
        self.base_learner = bl

        # set the scoring criteria if it was not set by the user
        if not self._handset_split and not self.measure:
            if instances.domain.class_var.var_type == Orange.data.Type.Discrete:
                measure = fscoring.GainRatio()
            else:
                measure = fscoring.MSE()
            bl.split.continuous_split_constructor.measure = measure
            bl.split.discrete_split_constructor.measure = measure
        # set the splitter if it was set by the user
        if self.splitter != None:
            bl.example_splitter = self.splitter

        # set up a boolean list with one entry for each feature and select the
        # (single) feature that the SplitConstructor should consider
        candidate_feat = [
            feat.name == self.first_split_attr for feat in instances.domain
        ]
        # create the tree's root node
        root_node = self._new_tree_node(instances)
        # call the SplitConstructor for the root node manually
        bs, bd, ss, quality, spent_feature = self.split(
            instances, weight, root_node.contingency, root_node.distribution,
            candidate_feat, root_node.node_classifier)
        root_node.branch_selector = bs
        root_node.branch_descriptions = bd
        root_node.branch_sizes = ss
        # split the examples into subsets by calling the appropriate Splitter
        if self.splitter != None:
            splitter = self.splitter
        else:
            splitter = octree.Splitter_IgnoreUnknowns()
        subsets = splitter(root_node, root_node.examples)[0]
        # build a sub-tree for each subset (which is not None) and store it as
        # a branch of the root_node
        root_node.branches = []
        for subset in subsets:
            if subset != None:
                subtree = bl(subset, weight)
                root_node.branches.append(subtree.tree)
        # create an (internal) Orange.core.TreeClassifier object
        descender = getattr(self, "descender",
                            octree.Descender_UnknownMergeAsBranchSizes())
        tree = octree._TreeClassifier(domain=instances.domain,
                                      tree=root_node,
                                      descender=descender)

        # perform post pruning
        if getattr(self, "same_majority_pruning", 0):
            tree = Pruner_SameMajority(tree)
        if getattr(self, "m_pruning", 0):
            tree = Pruner_m(tree, m=self.m_pruning)

        return octree.TreeClassifier(base_classifier=tree)