Ejemplo n.º 1
0
    def set_learner(self):
        self.learner = tree.TreeLearner(
            criterion=self.scores[self.attribute_score][1],
            max_depth=self.max_depth,
            min_samples_split=self.min_internal,
            min_samples_leaf=self.min_leaf,
            preprocessors=self.preprocessors)

        self.learner.name = self.model_name

        self.send("Learner", self.learner)

        self.error(1)
        if self.data is not None:
            try:
                self.classifier = self.learner(self.data)
                self.classifier.name = self.model_name
                self.classifier.instances = self.data
            except Exception as errValue:
                self.error(1, str(errValue))
                self.classifier = None
        else:
            self.classifier = None
        self.send("Classification Tree", self.classifier)
Ejemplo n.º 2
0
class TestClassification(testing.LearnerTestCase):
    LEARNER = ctree.TreeLearner(max_depth=50)
Ejemplo n.º 3
0
 def setUp(self):
     from Orange.ensemble import stacking
     from Orange.classification import tree, bayes
     self.learner = stacking.StackedClassificationLearner(
         [tree.TreeLearner(), bayes.NaiveLearner()], folds=5)
    # NOTE: Needed by the save_treegraph_image() function.
    import sys
    from OWWidget import QApplication
    a = QApplication(sys.argv)

    # find out the current file's location so it can be used to compute the
    # location of other files/directories
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    path_prefix = os.path.abspath(os.path.join(cur_dir, "../../"))

    base_learners = OrderedDict()
    import Orange.classification.tree as octree
    import Orange.feature.scoring as fscoring
    split_const = octree.SplitConstructor_ExhaustiveBinary(
        measure=fscoring.InfoGain())
    base_learners["orange_tree"] = octree.TreeLearner(split=split_const,
                                                      store_instances=True)
    base_learners["orange_tree_pruned"] = octree.TreeLearner(
        split=split_const,
        min_instances=10,
        same_majority_pruning=True,
        store_instances=True)

    measures = []
    measures.append("CA")
    measures.append("AUC")

    learners = OrderedDict()
    learners["Tree"] = bin_exp.TreeMarkedAndMergedLearner()
    learners["ForcedTree"] = bin_exp.ForcedFirstSplitMTLLearner(
        first_split_attr="id")
    no_filter = prefiltering.NoFilter()
Ejemplo n.º 5
0
        for subset in subsets:
            if subset != None:
                subtree = bl(subset, weight)
                root_node.branches.append(subtree.tree)
        # create an (internal) Orange.core.TreeClassifier object
        descender = getattr(self, "descender",
                            octree.Descender_UnknownMergeAsBranchSizes())
        tree = octree._TreeClassifier(domain=instances.domain,
                                      tree=root_node,
                                      descender=descender)

        # perform post pruning
        if getattr(self, "same_majority_pruning", 0):
            tree = Pruner_SameMajority(tree)
        if getattr(self, "m_pruning", 0):
            tree = Pruner_m(tree, m=self.m_pruning)

        return octree.TreeClassifier(base_classifier=tree)


if __name__ == "__main__":
    data = Orange.data.Table("titanic")
    nt = octree.TreeLearner(data)
    print "'Normal' TreeLearner:"
    print nt  # should have the 'sex' attribute as the first split
    print

    ffst = ForcedFirstSplitTreeLearner(data, first_split_attr="age")
    print "ForcedFirstSplitTreeLearner:"
    print ffst  # should have 'age' attribute as the first split
Ejemplo n.º 6
0
                             "input. Model n_features is {} and input "
                             "n_features is {}".format(self.n_features_,
                                                       n_features))
        # convert numpy data to Orange
        orange_test_data = convert_numpy_data_to_orange(self.orange_domain_, X)
        # classify all examples with the previously built classifier
        p = np.empty((n_samples, self.n_classes_))
        for i, ex in enumerate(orange_test_data):
            p[i, :] = list(
                self.orange_classifier_(ex, Orange.core.GetProbabilities))
        return p


if __name__ == "__main__":
    data = Orange.data.Table("titanic")
    nt = octree.TreeLearner(data)
    print "'Normal' TreeLearner:"
    print nt  # should have the 'sex' attribute as the first split
    print

    ffst = ForcedFirstSplitTreeLearner(data, first_split_attr="age")
    print "ForcedFirstSplitTreeLearner (with the first split forced to 'age'):"
    print ffst  # should have 'age' attribute as the first split
    print

    import PyMTL.synthetic_data as sd
    a, d = 8, 4
    attr, func = sd.generate_boolean_function(a, d, random_seed=2)
    print "Boolean function (a={}, d={}): {}".format(a, d, func)
    X, y = sd.generate_examples(attr, func, n=100, random_state=10)
    print "% of True values in y: {:.2f}".format(100 * sum(y == True) / len(y))
Ejemplo n.º 7
0
if __name__ == "__main__":
    import os.path
    # find out the current file's location so it can be used to compute the
    # location of other files/directories
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    path_prefix = os.path.abspath(os.path.join(cur_dir, "../../"))

    results_path = os.path.join(path_prefix,
                                "results/binarization_experiment/")

    import Orange.classification.tree as octree
    import Orange.feature.scoring as fscoring
    split_const = octree.SplitConstructor_ExhaustiveBinary(
        measure=fscoring.InfoGain())
    tree_learner = octree.TreeLearner(split=split_const,
                                      min_instances=10,
                                      same_majority_pruning=True,
                                      store_instances=True)
    from Orange.data import Table

    # TEST for equality of "original" vs. "pickled/unpickled" Orange trees
    from PyMTL.util import pickle_obj, unpickle_obj
    import numpy as np
    for i in range(10):
        data = Table(
            os.path.join(
                results_path, "bool_func-a8d4n100g2tg5nse0.0rs15"
                "nls10-seed63-complete_test/orange_merged_learn-"
                "repetition{}.tab".format(i)))
        tree = tree_learner(data)
        pickle_path = os.path.join(results_path, "test-pickle.pkl")
        pickle_obj(tree, pickle_path)