Esempio n. 1
0
    def test_integration(self):
        res = self.extractor.extract_features(self.files)
        self.assertIsNotNone(res, "Failed to parse files.")
        X, y, _, = res
        train_X, test_X, train_y, test_y = \
            model_selection.train_test_split(X, y, random_state=1989)

        model = tree.DecisionTreeClassifier(min_samples_leaf=26,
                                            random_state=1989,
                                            max_depth=None,
                                            max_features="auto",
                                            min_samples_split=2)
        model.fit(train_X, train_y)
        rules = TrainableRules(
            base_model_name="sklearn.tree.DecisionTreeClassifier",
            prune_branches_algorithms=[],
            prune_attributes=False,
            min_samples_leaf=26,
            random_state=1989,
            max_depth=None,
            max_features="auto",
            min_samples_split=2,
            confidence_threshold=0)
        rules.fit(train_X, train_y)
        model_score_train = model.score(train_X, train_y)
        model_score_test = model.score(test_X, test_y)
        rules_score_train = rules.score(train_X, train_y)
        rules_score_test = rules.score(test_X, test_y)
        self.assertEqual(rules_score_train, model_score_train)
        self.assertEqual(rules_score_test, model_score_test)
Esempio n. 2
0
    def test_integration(self):
        X, y, _ = self.extractor.extract_features(self.files)
        train_X, test_X, train_y, test_y = \
            model_selection.train_test_split(X, y, random_state=1989)

        model = tree.DecisionTreeClassifier(min_samples_leaf=26,
                                            random_state=1989,
                                            max_depth=None,
                                            max_features="auto",
                                            min_samples_split=2)
        model.fit(train_X, train_y)
        rules = TrainableRules("sklearn.tree.DecisionTreeClassifier",
                               prune_branches_algorithms=[],
                               prune_attributes=False,
                               min_samples_leaf=26,
                               random_state=1989,
                               max_depth=None,
                               max_features="auto",
                               min_samples_split=2)
        rules.fit(train_X, train_y)
        model_score_train = model.score(train_X, train_y)
        model_score_test = model.score(test_X, test_y)
        rules_score_train = rules.score(train_X, train_y)
        rules_score_test = rules.score(test_X, test_y)
        self.assertEqual(rules_score_train, model_score_train)
        self.assertEqual(rules_score_test, model_score_test)
Esempio n. 3
0
 def test_budget(budget):
     rules = TrainableRules(
         "sklearn.tree.DecisionTreeClassifier",
         prune_branches_algorithms=["top-down-greedy"],
         prune_attributes=False,
         top_down_greedy_budget=(False, budget),
         random_state=1989)
     rules.fit(self.train_x, self.train_y)
     return rules.score(self.train_x, self.train_y)
Esempio n. 4
0
 def test_tree_attr_pruning(self):
     model = tree.DecisionTreeClassifier(min_samples_leaf=26,
                                         random_state=1989)
     model = model.fit(self.train_x, self.train_y)
     rules = TrainableRules("sklearn.tree.DecisionTreeClassifier",
                            prune_branches_algorithms=[],
                            prune_attributes=True,
                            min_samples_leaf=26,
                            random_state=1989)
     rules.fit(self.train_x, self.train_y)
     tree_score = model.score(self.test_x, self.test_y)
     rules_score = rules.score(self.test_x, self.test_y)
     self.assertGreater(rules_score * 1.1, tree_score)
Esempio n. 5
0
 def test_tree_no_pruning(self):
     model = tree.DecisionTreeClassifier(min_samples_leaf=26,
                                         random_state=1989)
     model = model.fit(self.train_x, self.train_y)
     rules = TrainableRules(
         base_model_name="sklearn.tree.DecisionTreeClassifier",
         prune_branches_algorithms=[],
         confidence_threshold=0,
         prune_attributes=False,
         min_samples_leaf=26,
         random_state=1989)
     rules.fit(self.train_x, self.train_y)
     tree_score = model.score(self.train_x, self.train_y)
     rules_score = rules.score(self.train_x, self.train_y)
     self.assertGreater(rules_score * 1.1, tree_score)
Esempio n. 6
0
 def test_forest_no_pruning(self):
     model = ensemble.RandomForestClassifier(n_estimators=50,
                                             min_samples_leaf=26,
                                             random_state=1989)
     model = model.fit(self.train_x, self.train_y)
     rules = TrainableRules("sklearn.ensemble.RandomForestClassifier",
                            prune_branches_algorithms=[],
                            prune_attributes=False,
                            n_estimators=50,
                            min_samples_leaf=26,
                            random_state=1989)
     rules.fit(self.train_x, self.train_y)
     forest_score = model.score(self.train_x, self.train_y)
     rules_score = rules.score(self.train_x, self.train_y)
     self.assertGreater(rules_score * 1.1, forest_score)