Ejemplo n.º 1
0
 def setUp(self):
     self.decision_forest = DecisionForestClassifier()
     self.X = np.array(['A', 'B', 'A', 'B', 'B', 'C', 'A', 'C',
                        'B']).reshape((3, 3))
     self.y = np.array([1, 1, 0])
Ejemplo n.º 2
0
from sklearn.model_selection import train_test_split
from examples import load_data
from proactive_forest.estimator import DecisionForestClassifier, ProactiveForestClassifier

if __name__ == '__main__':

    X, y = load_data.load_kr_vs_kp()

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4)

    pf = ProactiveForestClassifier(alpha=0.1, bootstrap=True)
    rf = DecisionForestClassifier(split_chooser='best',
                                  feature_selection='log',
                                  bootstrap=True)

    pf.fit(X_train, y_train)
    print('Processed: Proactive Forest')
    rf.fit(X_train, y_train)
    print('Processed: Random Forest')

    pf_diversity = pf.diversity_measure(X_test, y_test)
    rf_diversity = rf.diversity_measure(X_test, y_test)

    print('Proactive Forest Diversity: ', pf_diversity)
    print('Random Forest Diversity: ', rf_diversity)
Ejemplo n.º 3
0
 def test_bootstrap_admissible_value(self):
     self.decision_forest = DecisionForestClassifier(bootstrap=True)
     self.assertTrue(self.decision_forest.bootstrap)
Ejemplo n.º 4
0
class DecisionForestClassifierTest(TestCase):
    def setUp(self):
        self.decision_forest = DecisionForestClassifier()
        self.X = np.array(['A', 'B', 'A', 'B', 'B', 'C', 'A', 'C',
                           'B']).reshape((3, 3))
        self.y = np.array([1, 1, 0])

    def tearDown(self):
        pass

    def test_fit(self):
        self.decision_forest.fit(self.X, self.y)
        self.assertIsNotNone(self.decision_forest._encoder)
        self.assertIsNotNone(self.decision_forest._tree_builder)
        self.assertIsNotNone(self.decision_forest._trees)
        self.assertIsInstance(self.decision_forest._trees, list)
        self.assertEqual(len(self.decision_forest._trees),
                         self.decision_forest.n_estimators)

    def test_predict_one_instance(self):
        self.decision_forest._n_features = 3
        self.decision_forest._n_classes = 2

        tree_1 = mock.MagicMock(spec=DecisionTree)
        tree_1.weight = 1
        tree_1.predict.return_value = 1

        tree_2 = mock.MagicMock(spec=DecisionTree)
        tree_2.weight = 1
        tree_2.predict.return_value = 0

        tree_3 = mock.MagicMock(spec=DecisionTree)
        tree_3.weight = 1
        tree_3.predict.return_value = 1

        self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder)
        self.decision_forest._encoder.inverse_transform.return_value = 1

        self.decision_forest._trees = [tree_1, tree_2, tree_3]

        expected_prediction = 1
        a = np.array(['A', 'B', 'A']).reshape((1, 3))
        resulted_prediction = self.decision_forest.predict(a)
        self.assertEqual(expected_prediction, resulted_prediction)

    def test_predict_two_instances(self):
        self.decision_forest._n_features = 3
        self.decision_forest._n_classes = 2

        tree_1 = mock.MagicMock(spec=DecisionTree)
        tree_1.weight = 1
        tree_1.predict.return_value = 1

        tree_2 = mock.MagicMock(spec=DecisionTree)
        tree_2.weight = 1
        tree_2.predict.return_value = 0

        tree_3 = mock.MagicMock(spec=DecisionTree)
        tree_3.weight = 1
        tree_3.predict.return_value = 1

        self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder)
        self.decision_forest._encoder.inverse_transform.return_value = [1, 1]

        self.decision_forest._trees = [tree_1, tree_2, tree_3]

        expected_len_prediction = 2
        x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3))
        resulted_prediction = self.decision_forest.predict(x)
        self.assertEqual(expected_len_prediction, len(resulted_prediction))

    def test_feature_importances(self):
        self.decision_forest._n_features = 3
        tree_1 = mock.MagicMock(spec=DecisionTree)
        tree_1.feature_importances.return_value = [0.2, 0.2, 0.2]

        tree_2 = mock.MagicMock(spec=DecisionTree)
        tree_2.feature_importances.return_value = [0.3, 0.3, 0.3]

        tree_3 = mock.MagicMock(spec=DecisionTree)
        tree_3.feature_importances.return_value = [0.4, 0.4, 0.4]

        self.decision_forest._trees = [tree_1, tree_2, tree_3]
        expected_feature_importances = [0.3, 0.3, 0.3]
        resulted_feature_importances = self.decision_forest.feature_importances(
        )
        self.assertEqual(len(expected_feature_importances),
                         len(resulted_feature_importances))
        for a, b in zip(expected_feature_importances,
                        resulted_feature_importances):
            self.assertAlmostEqual(a, b, places=2)

    def test_trees_mean_weight(self):
        tree_1 = mock.MagicMock(spec=DecisionTree)
        tree_1.weight = 1

        tree_2 = mock.MagicMock(spec=DecisionTree)
        tree_2.weight = 0.8

        tree_3 = mock.MagicMock(spec=DecisionTree)
        tree_3.weight = 0.8

        self.decision_forest._trees = [tree_1, tree_2, tree_3]
        expected_weight = 0.87
        resulted_weight = self.decision_forest.trees_mean_weight()
        self.assertAlmostEqual(expected_weight, resulted_weight, places=2)

    def test_diversity_measure_exception(self):
        x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3))
        y = [1, 0]

        self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder)
        self.decision_forest._encoder.transform.return_value = [1, 0]

        with self.assertRaises(ValueError):
            self.decision_forest.diversity_measure(x, y, diversity='kappa')

    def test_diversity_measure(self):
        tree_1 = mock.MagicMock(spec=DecisionTree)
        tree_1.predict.return_value = 1

        tree_2 = mock.MagicMock(spec=DecisionTree)
        tree_2.predict.return_value = 1

        tree_3 = mock.MagicMock(spec=DecisionTree)
        tree_3.predict.return_value = 0

        self.decision_forest._trees = [tree_1, tree_2, tree_3]
        x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3))
        y = [1, 0]

        self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder)
        self.decision_forest._encoder.transform.return_value = [1, 0]

        self.assertIsNotNone(self.decision_forest.diversity_measure(x, y))

    def test__validate_exception_not_fitted(self):
        x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3))
        with self.assertRaises(NotFittedError):
            self.decision_forest._validate(x, False)

    def test__validate_exception_n_instances(self):
        self.decision_forest._trees = mock.MagicMock(spec=DecisionTree)
        x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3))
        self.decision_forest._n_features = 1
        with self.assertRaises(ValueError):
            self.decision_forest._validate(x, False)

    def test__predict_on_tree(self):
        tree = mock.MagicMock(spec=DecisionTree)
        tree.predict.return_value = 1
        x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3))

        expected_prediction = [1, 1]
        resulted_prediction = self.decision_forest._predict_on_tree(
            x, tree, False)
        for expected, resulted in zip(expected_prediction,
                                      resulted_prediction):
            self.assertEqual(expected, resulted)
Ejemplo n.º 5
0
 def test_bootstrap_exception_none_value(self):
     with self.assertRaises(ValueError):
         self.decision_forest = DecisionForestClassifier(bootstrap=None)
Ejemplo n.º 6
0
 def test_n_estimators_admissible_value(self):
     self.decision_forest = DecisionForestClassifier(n_estimators=12)
     self.assertEqual(self.decision_forest.n_estimators, 12)
Ejemplo n.º 7
0
 def test_n_estimators_none_value(self):
     self.decision_forest = DecisionForestClassifier(n_estimators=None)
     self.assertIsNone(self.decision_forest.n_estimators)
Ejemplo n.º 8
0
 def test_n_estimators_exception_inadmissible_value(self):
     with self.assertRaises(ValueError):
         self.decision_forest = DecisionForestClassifier(n_estimators=-1)
Ejemplo n.º 9
0
from sklearn.model_selection import cross_val_score, KFold
from examples import load_data
from proactive_forest.estimator import DecisionForestClassifier, ProactiveForestClassifier
import pandas as pd
import numpy as np

if __name__ == '__main__':

    X, y = load_data.load_iris()

    pf = ProactiveForestClassifier(alpha=0.1, bootstrap=False)
    rf = DecisionForestClassifier(feature_selection='log',
                                  split_chooser='best',
                                  bootstrap=False)
    """
    pf_scores = cross_val_score(pf, X, y, cv=5)
    print('Processed: Proactive Forest')
    rf_scores = cross_val_score(rf, X, y, cv=5)
    print('Processed: Random Forest')
    """
    pf_scores = []
    rf_scores = []

    skf = KFold(n_splits=5, random_state=4)
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X.loc[train_index], X.loc[test_index]
        y_train, y_test = y.loc[train_index], y.loc[test_index]
        pf.fit(X_train, y_train)
        rf.fit(X_train, y_train)
        pf_scores.append(pf.score(X_test, y_test))
        rf_scores.append(rf.score(X_test, y_test))