Example #1
0
    def test_covariate_shift(self):
        n_sample = 100
        # Biased training
        var_bias = .5**2
        mean_bias = .7
        x_train = SP.random.randn(n_sample)*SP.sqrt(var_bias) + mean_bias
        y_train = self.complete_sample(x_train)

        # Unbiased test set
        var = .3**2
        mean = 0

        x_test = SP.random.randn(n_sample)*SP.sqrt(var) + mean
        x_complete = SP.hstack((x_train, x_test))

        kernel = utils.getQuadraticKernel(x_complete, d=1) +\
            10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1))
        kernel = utils.scale_K(kernel)
        kernel_train = kernel[SP.ix_(SP.arange(x_train.size),
                                     SP.arange(x_train.size))]
        kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size),
                             SP.arange(x_train.size))]

        mf = MF(n_estimators=100, kernel=kernel_train, min_depth=0,
                subsampling=False)
        mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1))
        response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0)
        self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
Example #2
0
    def test_covariate_shift(self):
        n_sample = 100
        # Biased training
        var_bias = .5**2
        mean_bias = .7
        x_train = SP.random.randn(n_sample) * SP.sqrt(var_bias) + mean_bias
        y_train = self.complete_sample(x_train)

        # Unbiased test set
        var = .3**2
        mean = 0

        x_test = SP.random.randn(n_sample) * SP.sqrt(var) + mean
        x_complete = SP.hstack((x_train, x_test))

        kernel = utils.getQuadraticKernel(x_complete, d=1) +\
            10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1))
        kernel = utils.scale_K(kernel)
        kernel_train = kernel[SP.ix_(SP.arange(x_train.size),
                                     SP.arange(x_train.size))]
        kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size),
                                    SP.arange(x_train.size))]

        mf = MF(n_estimators=100,
                kernel=kernel_train,
                min_depth=0,
                subsampling=False)
        mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1))
        response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0)
        self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
Example #3
0
    def test_delta_updating(self):
        n_sample = 100
        # A 20 x 2 random integer matrix
        X = SP.empty((n_sample, 2))
        X[:, 0] = SP.arange(0, 1, 1.0 / n_sample)
        X[:, 1] = SP.random.rand(n_sample)
        sd_noise = .5
        sd_conf = .5
        noise = SP.random.randn(n_sample, 1) * sd_noise

        # print 'true delta equals', (sd_noise**2)/(sd_conf**2)
        # Here, the observed y is just a linear function of the first column
        # in X and # a little independent gaussian noise
        y_fixed = (X[:, 0:1] > .5) * 1.0
        y_fn = y_fixed + noise

        # Divide into training and test sample using 2/3 of data for training
        training_sample = SP.zeros(n_sample, dtype='bool')
        training_sample[SP.random.permutation(n_sample)
                        [:SP.int_(.66 * n_sample)]] = True
        test_sample = ~training_sample

        kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\
            1e-3*SP.eye(n_sample)
        # The confounded version of y_lin is computed as
        y_conf = sd_conf * SP.random.multivariate_normal(
            SP.zeros(n_sample), kernel, 1).reshape(-1, 1)
        y_tot = y_fn + y_conf
        # Selects rows and columns
        kernel_train = kernel[SP.ix_(training_sample, training_sample)]
        kernel_test = kernel[SP.ix_(test_sample, training_sample)]
        lm_forest = MF(kernel=kernel_train,
                       update_delta=False,
                       max_depth=1,
                       verbose=0)
        # Returns prediction for random effect
        lm_forest.fit(X[training_sample], y_tot[training_sample])
        response_lmf = lm_forest.predict(X[test_sample], k=kernel_test)

        # print 'fitting forest (delta-update)'
        # earn random forest, not accounting for the confounding
        random_forest = MF(kernel=kernel_train,
                           update_delta=True,
                           max_depth=5,
                           verbose=0)
        random_forest.fit(X[training_sample], y_tot[training_sample])
        response_rf = random_forest.predict(X[test_sample], k=kernel_test)
Example #4
0
    def test_delta_updating(self):
        n_sample = 100
        # A 20 x 2 random integer matrix
        X = SP.empty((n_sample, 2))
        X[:, 0] = SP.arange(0, 1, 1.0/n_sample)
        X[:, 1] = SP.random.rand(n_sample)
        sd_noise = .5
        sd_conf = .5
        noise = SP.random.randn(n_sample, 1)*sd_noise

        # print 'true delta equals', (sd_noise**2)/(sd_conf**2)
        # Here, the observed y is just a linear function of the first column
        # in X and # a little independent gaussian noise
        y_fixed = (X[:, 0:1] > .5)*1.0
        y_fn = y_fixed + noise

        # Divide into training and test sample using 2/3 of data for training
        training_sample = SP.zeros(n_sample, dtype='bool')
        training_sample[
            SP.random.permutation(n_sample)[:SP.int_(.66*n_sample)]] = True
        test_sample = ~training_sample

        kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\
            1e-3*SP.eye(n_sample)
        # The confounded version of y_lin is computed as
        y_conf = sd_conf*SP.random.multivariate_normal(SP.zeros(n_sample),
                                                       kernel, 1).reshape(-1, 1)
        y_tot = y_fn + y_conf
        # Selects rows and columns
        kernel_train = kernel[SP.ix_(training_sample, training_sample)]
        kernel_test = kernel[SP.ix_(test_sample, training_sample)]
        lm_forest = MF(kernel=kernel_train, update_delta=False, max_depth=1,
                       verbose=0)
        # Returns prediction for random effect
        lm_forest.fit(X[training_sample], y_tot[training_sample])
        response_lmf = lm_forest.predict(X[test_sample], k=kernel_test)

        # print 'fitting forest (delta-update)'
        # earn random forest, not accounting for the confounding
        random_forest = MF(kernel=kernel_train, update_delta=True, max_depth=5,
                           verbose=0)
        random_forest.fit(X[training_sample], y_tot[training_sample])
        response_rf = random_forest.predict(X[test_sample], k=kernel_test)