Beispiel #1
0
 def test_toy_data_rand(self):
     y_conf = self.data['y_conf'].value
     kernel = self.data['kernel'].value
     X = self.data['X'].value
     # This is a non-random cross validation
     (training, test) = utils.crossValidationScheme(2, y_conf.size)
     lm_forest = MF(kernel=kernel[SP.ix_(training, training)],
                    sampsize=.5, verbose=0, n_estimators=100)
     lm_forest.fit(X[training], y_conf[training])
     response_tot = lm_forest.predict(X[test],
                                      kernel[SP.ix_(test, training)])
     random_forest = MF(kernel='iid')
     random_forest.fit(X[training], y_conf[training])
     response_iid = random_forest.predict(X[test])
     response_fixed = lm_forest.predict(X[test])
     feature_scores_lmf = lm_forest.log_importance
     feature_scores_rf = random_forest.log_importance
     # All consistency checks
     err = (feature_scores_lmf-self.data['feature_scores_lmf'].value).sum()
     self.assertTrue(SP.absolute(err) < 10)
     err = (feature_scores_rf-self.data['feature_scores_rf'].value).sum()
     self.assertTrue(SP.absolute(err) < 10)
     err = SP.absolute(self.data['response_tot'] - response_tot).sum()
     self.assertTrue(SP.absolute(err) < 2)
     err = SP.absolute(self.data['response_fixed'] - response_fixed).sum()
     self.assertTrue(SP.absolute(err) < 4)
     err = SP.absolute(self.data['response_iid'] - response_iid).sum()
     self.assertTrue(SP.absolute(err) < 8)
Beispiel #2
0
 def test_toy_data_rand(self):
     y_conf = self.data['y_conf'].value
     kernel = self.data['kernel'].value
     X = self.data['X'].value
     # This is a non-random cross validation
     (training, test) = utils.crossValidationScheme(2, y_conf.size)
     lm_forest = MF(kernel=kernel[SP.ix_(training, training)],
                    sampsize=.5,
                    verbose=0,
                    n_estimators=100)
     lm_forest.fit(X[training], y_conf[training])
     response_tot = lm_forest.predict(X[test],
                                      kernel[SP.ix_(test, training)])
     random_forest = MF(kernel='iid')
     random_forest.fit(X[training], y_conf[training])
     response_iid = random_forest.predict(X[test])
     response_fixed = lm_forest.predict(X[test])
     feature_scores_lmf = lm_forest.log_importance
     feature_scores_rf = random_forest.log_importance
     # All consistency checks
     err = (feature_scores_lmf -
            self.data['feature_scores_lmf'].value).sum()
     self.assertTrue(SP.absolute(err) < 10)
     err = (feature_scores_rf - self.data['feature_scores_rf'].value).sum()
     self.assertTrue(SP.absolute(err) < 10)
     err = SP.absolute(self.data['response_tot'] - response_tot).sum()
     self.assertTrue(SP.absolute(err) < 2)
     err = SP.absolute(self.data['response_fixed'] - response_fixed).sum()
     self.assertTrue(SP.absolute(err) < 4)
     err = SP.absolute(self.data['response_iid'] - response_iid).sum()
     self.assertTrue(SP.absolute(err) < 8)
Beispiel #3
0
    def test_covariate_shift(self):
        n_sample = 100
        # Biased training
        var_bias = .5**2
        mean_bias = .7
        x_train = SP.random.randn(n_sample)*SP.sqrt(var_bias) + mean_bias
        y_train = self.complete_sample(x_train)

        # Unbiased test set
        var = .3**2
        mean = 0

        x_test = SP.random.randn(n_sample)*SP.sqrt(var) + mean
        x_complete = SP.hstack((x_train, x_test))

        kernel = utils.getQuadraticKernel(x_complete, d=1) +\
            10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1))
        kernel = utils.scale_K(kernel)
        kernel_train = kernel[SP.ix_(SP.arange(x_train.size),
                                     SP.arange(x_train.size))]
        kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size),
                             SP.arange(x_train.size))]

        mf = MF(n_estimators=100, kernel=kernel_train, min_depth=0,
                subsampling=False)
        mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1))
        response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0)
        self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
Beispiel #4
0
    def test_covariate_shift(self):
        n_sample = 100
        # Biased training
        var_bias = .5**2
        mean_bias = .7
        x_train = SP.random.randn(n_sample) * SP.sqrt(var_bias) + mean_bias
        y_train = self.complete_sample(x_train)

        # Unbiased test set
        var = .3**2
        mean = 0

        x_test = SP.random.randn(n_sample) * SP.sqrt(var) + mean
        x_complete = SP.hstack((x_train, x_test))

        kernel = utils.getQuadraticKernel(x_complete, d=1) +\
            10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1))
        kernel = utils.scale_K(kernel)
        kernel_train = kernel[SP.ix_(SP.arange(x_train.size),
                                     SP.arange(x_train.size))]
        kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size),
                                    SP.arange(x_train.size))]

        mf = MF(n_estimators=100,
                kernel=kernel_train,
                min_depth=0,
                subsampling=False)
        mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1))
        response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0)
        self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
Beispiel #5
0
 def test_forest_stump_recycling(self):
     self.setUp(m=5)
     SP.random.seed(42)
     model = MF(fit_optimal_depth=True, kernel='iid',
                build_to_opt_depth=True)
     model.fit(self.x[self.train], self.y[self.train])
     prediction_1 = model.predict(self.x[self.test], depth=model.opt_depth)
     model.fit(self.x[self.train], self.y[self.train], recycle=True)
     prediction_2 = model.predict(self.x[self.test], depth=model.opt_depth)
     self.assertGreater(.7, ((prediction_1 - prediction_2)**2).sum())
Beispiel #6
0
 def test_forest_stump_recycling(self):
     self.setUp(m=5)
     SP.random.seed(42)
     model = MF(fit_optimal_depth=True,
                kernel='iid',
                build_to_opt_depth=True)
     model.fit(self.x[self.train], self.y[self.train])
     prediction_1 = model.predict(self.x[self.test], depth=model.opt_depth)
     model.fit(self.x[self.train], self.y[self.train], recycle=True)
     prediction_2 = model.predict(self.x[self.test], depth=model.opt_depth)
     self.assertGreater(.7, ((prediction_1 - prediction_2)**2).sum())
Beispiel #7
0
    def test_delta_updating(self):
        n_sample = 100
        # A 20 x 2 random integer matrix
        X = SP.empty((n_sample, 2))
        X[:, 0] = SP.arange(0, 1, 1.0 / n_sample)
        X[:, 1] = SP.random.rand(n_sample)
        sd_noise = .5
        sd_conf = .5
        noise = SP.random.randn(n_sample, 1) * sd_noise

        # print 'true delta equals', (sd_noise**2)/(sd_conf**2)
        # Here, the observed y is just a linear function of the first column
        # in X and # a little independent gaussian noise
        y_fixed = (X[:, 0:1] > .5) * 1.0
        y_fn = y_fixed + noise

        # Divide into training and test sample using 2/3 of data for training
        training_sample = SP.zeros(n_sample, dtype='bool')
        training_sample[SP.random.permutation(n_sample)
                        [:SP.int_(.66 * n_sample)]] = True
        test_sample = ~training_sample

        kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\
            1e-3*SP.eye(n_sample)
        # The confounded version of y_lin is computed as
        y_conf = sd_conf * SP.random.multivariate_normal(
            SP.zeros(n_sample), kernel, 1).reshape(-1, 1)
        y_tot = y_fn + y_conf
        # Selects rows and columns
        kernel_train = kernel[SP.ix_(training_sample, training_sample)]
        kernel_test = kernel[SP.ix_(test_sample, training_sample)]
        lm_forest = MF(kernel=kernel_train,
                       update_delta=False,
                       max_depth=1,
                       verbose=0)
        # Returns prediction for random effect
        lm_forest.fit(X[training_sample], y_tot[training_sample])
        response_lmf = lm_forest.predict(X[test_sample], k=kernel_test)

        # print 'fitting forest (delta-update)'
        # earn random forest, not accounting for the confounding
        random_forest = MF(kernel=kernel_train,
                           update_delta=True,
                           max_depth=5,
                           verbose=0)
        random_forest.fit(X[training_sample], y_tot[training_sample])
        response_rf = random_forest.predict(X[test_sample], k=kernel_test)
Beispiel #8
0
    def test_delta_updating(self):
        n_sample = 100
        # A 20 x 2 random integer matrix
        X = SP.empty((n_sample, 2))
        X[:, 0] = SP.arange(0, 1, 1.0/n_sample)
        X[:, 1] = SP.random.rand(n_sample)
        sd_noise = .5
        sd_conf = .5
        noise = SP.random.randn(n_sample, 1)*sd_noise

        # print 'true delta equals', (sd_noise**2)/(sd_conf**2)
        # Here, the observed y is just a linear function of the first column
        # in X and # a little independent gaussian noise
        y_fixed = (X[:, 0:1] > .5)*1.0
        y_fn = y_fixed + noise

        # Divide into training and test sample using 2/3 of data for training
        training_sample = SP.zeros(n_sample, dtype='bool')
        training_sample[
            SP.random.permutation(n_sample)[:SP.int_(.66*n_sample)]] = True
        test_sample = ~training_sample

        kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\
            1e-3*SP.eye(n_sample)
        # The confounded version of y_lin is computed as
        y_conf = sd_conf*SP.random.multivariate_normal(SP.zeros(n_sample),
                                                       kernel, 1).reshape(-1, 1)
        y_tot = y_fn + y_conf
        # Selects rows and columns
        kernel_train = kernel[SP.ix_(training_sample, training_sample)]
        kernel_test = kernel[SP.ix_(test_sample, training_sample)]
        lm_forest = MF(kernel=kernel_train, update_delta=False, max_depth=1,
                       verbose=0)
        # Returns prediction for random effect
        lm_forest.fit(X[training_sample], y_tot[training_sample])
        response_lmf = lm_forest.predict(X[test_sample], k=kernel_test)

        # print 'fitting forest (delta-update)'
        # earn random forest, not accounting for the confounding
        random_forest = MF(kernel=kernel_train, update_delta=True, max_depth=5,
                           verbose=0)
        random_forest.fit(X[training_sample], y_tot[training_sample])
        response_rf = random_forest.predict(X[test_sample], k=kernel_test)
Beispiel #9
0
def mixed_forest_predictions(Y,
                             which_col,
                             X,
                             K,
                             Itrain,
                             Itest,
                             conditional=False,
                             **kwargs):
    y = Y[:, which_col]
    if conditional:
        for j in range(Y.shape[1]):
            if j != which_col:
                X = np.column_stack((X, Y[:, j]))

    lm_forest = LMF(kernel=K[Itrain, :][:, Itrain], **kwargs)
    lm_forest.fit(X[Itrain, :], y[Itrain])
    predictions = lm_forest.predict(X[Itest, :], K[Itest, :][:, Itrain])

    return predictions
Beispiel #10
0
 def test_depth_building(self):
     self.setUp(m=10)
     X = self.x.copy()
     X -= X.mean(axis=0)
     X /= X.std(axis=0)
     kernel = SP.dot(X, X.T)
     train = SP.where(self.train)[0]
     test = SP.where(~self.train)[0]
     model = MF(fit_optimal_depth=True, max_depth=3,
                kernel=kernel[SP.ix_(train, train)])
     model.fit(self.x[self.train], self.y[self.train],
               fit_optimal_depth=True)
     prediction_1 = model.predict(X[test], k=kernel[test, train],
                                  depth=model.opt_depth)
     # Grow to end
     model.further()
     # Prediction again
     prediction_2 = model.predict(X[test], k=kernel[test, train],
                                  depth=model.opt_depth)
     self.assertEqual((prediction_1 - prediction_2).sum(), 0.0)
Beispiel #11
0
    def test_normalization_kernel(self):
        #SP.random.seed(42)
        n = 50
        m = 100
        X = (SP.random.rand(n, m) > .5)*1.
        X_test = (SP.random.rand(10, m) > .5)*1.
        K = utils.estimateKernel(X)
        y = SP.random.rand(n, 1)
        SP.random.seed(1)
        mf = MF(kernel=K)
        mf.fit(X, y)
        results_1 = mf.predict(X_test)

        X -= X.mean(axis=0)
        X /= X.std(axis=0)
        X_test -= X_test.mean(axis=0)
        X_test /= X_test.std(axis=0)

        SP.random.seed(1)
        mf = MF(kernel=K)
        mf.fit(X, y)
        results_2 = mf.predict(X_test)
        self.assertEqual(results_1.sum(), results_2.sum())
Beispiel #12
0
    def test_normalization_kernel(self):
        #SP.random.seed(42)
        n = 50
        m = 100
        X = (SP.random.rand(n, m) > .5) * 1.
        X_test = (SP.random.rand(10, m) > .5) * 1.
        K = utils.estimateKernel(X)
        y = SP.random.rand(n, 1)
        SP.random.seed(1)
        mf = MF(kernel=K)
        mf.fit(X, y)
        results_1 = mf.predict(X_test)

        X -= X.mean(axis=0)
        X /= X.std(axis=0)
        X_test -= X_test.mean(axis=0)
        X_test /= X_test.std(axis=0)

        SP.random.seed(1)
        mf = MF(kernel=K)
        mf.fit(X, y)
        results_2 = mf.predict(X_test)
        self.assertEqual(results_1.sum(), results_2.sum())
Beispiel #13
0
 def test_depth_building(self):
     self.setUp(m=10)
     X = self.x.copy()
     X -= X.mean(axis=0)
     X /= X.std(axis=0)
     kernel = SP.dot(X, X.T)
     train = SP.where(self.train)[0]
     test = SP.where(~self.train)[0]
     model = MF(fit_optimal_depth=True,
                max_depth=3,
                kernel=kernel[SP.ix_(train, train)])
     model.fit(self.x[self.train],
               self.y[self.train],
               fit_optimal_depth=True)
     prediction_1 = model.predict(X[test],
                                  k=kernel[test, train],
                                  depth=model.opt_depth)
     # Grow to end
     model.further()
     # Prediction again
     prediction_2 = model.predict(X[test],
                                  k=kernel[test, train],
                                  depth=model.opt_depth)
     self.assertEqual((prediction_1 - prediction_2).sum(), 0.0)