예제 #1
0
    def test_anytime(self):
        # The feature weights should be as uniformly distributed as possible.
        # We select old features with very little restraint -> we can mess up old distributions,
        # furthermore, we do not correct that -> fulfil at least these loose constraints.

        rf = RF(y, mtry=0.75, n_jobs=30, seed=2001)
        rf.fit(random.randint(0, 10, (5, 20)))
        weights = rf.fit(random.randint(0, 10, (5, 1)))

        self.assertTrue(min(weights) > (mean(weights) - 3 * std(weights)))
        self.assertTrue(max(weights) < (mean(weights) + 3 * std(weights)))
        assert_almost_equal(
            mean(weights[0:20]),
            weights[20],
            err_msg=
            "The new feature should have the weight equivalent to the average weight of all the previous features"
        )

        weights = rf.fit(random.randint(0, 10, (5, 1)))
        assert_almost_equal(
            mean(weights[0:21]),
            weights[21],
            err_msg=
            "The new feature should have the weight equivalent to the average weight of all the previous features"
        )
예제 #2
0
    def test_incremental_learning(self):
        # Test that we can initialize the RF, add a feature, score, add features, score.

        rf = RF(y, mtry=0.8, n_jobs=2, seed=2001)
        rf.fit(X0)
        prediction1 = rf.score(X0)
        rf.fit(X1)
        prediction2 = rf.score(column_stack((X0, X1)))

        assert_almost_equal(
            prediction1,
            y,
            err_msg="Feature X0 is a leaking feature - overfit on it!")
        assert_almost_equal(
            prediction2,
            y,
            err_msg="Feature X0 is a leaking feature - overfit on it!")
예제 #3
0
class RFWrapper:
    """
    Online Random Forest.
    """
    def __init__(self, y, X_t, y_t, n_jobs, mtry, random_state):
        self.rf = RF(y, n_jobs=n_jobs, mtry=mtry, seed=random_state)
        self.X_t = X_t
        self.y_t = y_t

    def fit(self, x):
        self.rf.fit(x)

    def get_auc(self):
        prediction = self.rf.score(self.X_t)
        fpr, tpr, thresholds = metrics.roc_curve(self.y_t,
                                                 prediction,
                                                 pos_label=1)
        return metrics.auc(fpr, tpr)
예제 #4
0
    def test_seed_diff(self):
        ## When the seeds are different, the random forest provides different results.

        rf = RF(y, mtry=0.75, n_jobs=20, seed=2001)
        rf.fit(X1)
        weights1 = rf.fit(X2)

        rf = RF(y, mtry=0.75, n_jobs=20, seed=2002)
        rf.fit(X1)
        weights2 = rf.fit(X2)

        self.assertFalse(all(weights1 == weights2))
예제 #5
0
    def test_seed(self):
        # When the seeds are the same, the random forest provides identical results.

        rf = RF(y, mtry=0.75, n_jobs=20, seed=2001)
        rf.fit(X1)
        weights1 = rf.fit(X2)

        rf = RF(y, mtry=0.75, n_jobs=20, seed=2001)
        rf.fit(X1)
        weights2 = rf.fit(X2)

        assert_almost_equal(weights1, weights2)
예제 #6
0
 def test_vector(self):
     rf = RF(y, mtry=0.8, n_jobs=2, seed=2001)
     rf.fit(y)