Ejemplo n.º 1
0
    def test_raise_exceptions(self, ):
        # test that we raise errors in mishandled situations.
        n_features = 2
        n = 10
        random_state = 123
        X, y, _ = self._get_policy_data(n, n_features, random_state)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=20, max_samples=20).fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=20, max_samples=1.2).fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=4,
                                  criterion='peculiar').fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=4, max_depth=-1).fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=4,
                                  min_samples_split=-1).fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=4,
                                  min_samples_leaf=-1).fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=4,
                                  min_weight_fraction_leaf=-1.0).fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=4, max_features=10).fit(X, y)
        with np.testing.assert_raises(ValueError):
            forest = PolicyForest(n_estimators=4,
                                  min_balancedness_tol=.55).fit(X, y)

        return
Ejemplo n.º 2
0
    def test_pickling(self, ):

        n_features = 2
        n = 10
        random_state = 123
        X, y, _ = self._get_policy_data(n, n_features, random_state)

        forest = PolicyForest(n_estimators=4,
                              warm_start=True,
                              random_state=123).fit(X, y)
        forest.n_estimators = 8
        forest.fit(X, y)
        pred1 = forest.predict(X)

        joblib.dump(forest, 'forest.jbl')
        loaded_forest = joblib.load('forest.jbl')
        np.testing.assert_equal(loaded_forest.n_estimators,
                                forest.n_estimators)
        np.testing.assert_allclose(loaded_forest.predict(X), pred1)
Ejemplo n.º 3
0
 def _train_policy_forest(self, X, y, config, sample_weight=None):
     return PolicyForest(**config).fit(X, y, sample_weight=sample_weight)
Ejemplo n.º 4
0
    def test_warm_start(self, ):
        n_features = 2
        n = 10
        random_state = 123
        X, y, _ = self._get_policy_data(n, n_features, random_state)

        forest = PolicyForest(n_estimators=4,
                              warm_start=True,
                              random_state=123).fit(X, y)
        with pytest.warns(UserWarning):
            forest.fit(X, y)
        forest.n_estimators = 3
        with np.testing.assert_raises(ValueError):
            forest.fit(X, y)
        forest.n_estimators = 8
        forest.fit(X, y)
        pred1 = forest.predict(X)
        inds1 = forest.get_subsample_inds()
        tree_states1 = [t.random_state for t in forest]

        forest = PolicyForest(n_estimators=8,
                              warm_start=True,
                              random_state=123).fit(X, y)
        pred2 = forest.predict(X)
        inds2 = forest.get_subsample_inds()
        tree_states2 = [t.random_state for t in forest]

        np.testing.assert_allclose(pred1, pred2)
        np.testing.assert_allclose(inds1, inds2)
        np.testing.assert_allclose(tree_states1, tree_states2)
        return
Ejemplo n.º 5
0
    def test_non_standard_input(self, ):
        # test that the estimator accepts lists, tuples and pandas data frames
        n_features = 2
        n = 2000
        random_state = 123
        X, y, _ = self._get_policy_data(n, n_features, random_state)
        forest = PolicyForest(n_estimators=20, n_jobs=1,
                              random_state=123).fit(X, y)
        pred = forest.predict(X)
        pred_val = forest.predict_value(X)
        pred_prob = forest.predict_proba(X)
        assert pred_prob.shape == (X.shape[0], 2)
        feat_imp = forest.feature_importances()
        forest = PolicyForest(n_estimators=20, n_jobs=1,
                              random_state=123).fit(X.astype(np.float32),
                                                    np.asfortranarray(y))
        np.testing.assert_allclose(pred, forest.predict(tuple(X)))
        np.testing.assert_allclose(pred_val, forest.predict_value(tuple(X)))
        forest = PolicyForest(n_estimators=20, n_jobs=1,
                              random_state=123).fit(tuple(X), tuple(y))
        np.testing.assert_allclose(pred, forest.predict(tuple(X)))
        np.testing.assert_allclose(pred_val, forest.predict_value(tuple(X)))
        np.testing.assert_allclose(pred_prob, forest.predict_proba(tuple(X)))
        forest = PolicyForest(n_estimators=20, n_jobs=1,
                              random_state=123).fit(list(X), list(y))
        np.testing.assert_allclose(pred, forest.predict(list(X)))
        np.testing.assert_allclose(pred_val, forest.predict_value(list(X)))
        np.testing.assert_allclose(pred_prob, forest.predict_proba(list(X)))
        forest = PolicyForest(n_estimators=20, n_jobs=1,
                              random_state=123).fit(pd.DataFrame(X),
                                                    pd.DataFrame(y))
        np.testing.assert_allclose(pred, forest.predict(pd.DataFrame(X)))
        np.testing.assert_allclose(pred_val,
                                   forest.predict_value(pd.DataFrame(X)))
        np.testing.assert_allclose(pred_prob,
                                   forest.predict_proba(pd.DataFrame(X)))

        groups = np.repeat(np.arange(X.shape[0]), 2)
        Xraw = X.copy()
        X = np.repeat(X, 2, axis=0)
        T = np.zeros(y.shape)
        T[:, 1] = 1
        T = T.flatten()
        y = y.flatten()
        forest = DRPolicyForest(
            model_regression=DummyRegressor(strategy='constant', constant=0),
            model_propensity=DummyClassifier(strategy='uniform'),
            featurizer=PolynomialFeatures(degree=1, include_bias=False),
            cv=GroupKFold(n_splits=2),
            n_estimators=20,
            n_jobs=1,
            random_state=123).fit(y, T, X=X, groups=groups)
        mask = np.abs(Xraw[:, 0]) > .1
        np.testing.assert_allclose(pred[mask], forest.predict(Xraw[mask]))
        np.testing.assert_allclose(pred_val[mask, 1] - pred_val[mask, 0],
                                   forest.predict_value(Xraw[mask]).flatten(),
                                   atol=.08)
        np.testing.assert_allclose(feat_imp,
                                   forest.feature_importances(),
                                   atol=1e-4)
        np.testing.assert_allclose(feat_imp,
                                   forest.feature_importances_,
                                   atol=1e-4)
        pred = forest.predict(X)
        pred_val = forest.predict_value(X)
        pred_prob = forest.predict_proba(X)
        np.testing.assert_allclose(pred, forest.predict(tuple(X)))
        np.testing.assert_allclose(pred_val, forest.predict_value(tuple(X)))
        np.testing.assert_allclose(pred, forest.predict(pd.DataFrame(X)))
        np.testing.assert_allclose(pred_val,
                                   forest.predict_value(pd.DataFrame(X)))
        np.testing.assert_allclose(pred_prob,
                                   forest.predict_proba(pd.DataFrame(X)))

        return