def test_toy_data_rand(self): y_conf = self.data['y_conf'].value kernel = self.data['kernel'].value X = self.data['X'].value # This is a non-random cross validation (training, test) = utils.crossValidationScheme(2, y_conf.size) lm_forest = MF(kernel=kernel[SP.ix_(training, training)], sampsize=.5, verbose=0, n_estimators=100) lm_forest.fit(X[training], y_conf[training]) response_tot = lm_forest.predict(X[test], kernel[SP.ix_(test, training)]) random_forest = MF(kernel='iid') random_forest.fit(X[training], y_conf[training]) response_iid = random_forest.predict(X[test]) response_fixed = lm_forest.predict(X[test]) feature_scores_lmf = lm_forest.log_importance feature_scores_rf = random_forest.log_importance # All consistency checks err = (feature_scores_lmf-self.data['feature_scores_lmf'].value).sum() self.assertTrue(SP.absolute(err) < 10) err = (feature_scores_rf-self.data['feature_scores_rf'].value).sum() self.assertTrue(SP.absolute(err) < 10) err = SP.absolute(self.data['response_tot'] - response_tot).sum() self.assertTrue(SP.absolute(err) < 2) err = SP.absolute(self.data['response_fixed'] - response_fixed).sum() self.assertTrue(SP.absolute(err) < 4) err = SP.absolute(self.data['response_iid'] - response_iid).sum() self.assertTrue(SP.absolute(err) < 8)
def test_toy_data_rand(self): y_conf = self.data['y_conf'].value kernel = self.data['kernel'].value X = self.data['X'].value # This is a non-random cross validation (training, test) = utils.crossValidationScheme(2, y_conf.size) lm_forest = MF(kernel=kernel[SP.ix_(training, training)], sampsize=.5, verbose=0, n_estimators=100) lm_forest.fit(X[training], y_conf[training]) response_tot = lm_forest.predict(X[test], kernel[SP.ix_(test, training)]) random_forest = MF(kernel='iid') random_forest.fit(X[training], y_conf[training]) response_iid = random_forest.predict(X[test]) response_fixed = lm_forest.predict(X[test]) feature_scores_lmf = lm_forest.log_importance feature_scores_rf = random_forest.log_importance # All consistency checks err = (feature_scores_lmf - self.data['feature_scores_lmf'].value).sum() self.assertTrue(SP.absolute(err) < 10) err = (feature_scores_rf - self.data['feature_scores_rf'].value).sum() self.assertTrue(SP.absolute(err) < 10) err = SP.absolute(self.data['response_tot'] - response_tot).sum() self.assertTrue(SP.absolute(err) < 2) err = SP.absolute(self.data['response_fixed'] - response_fixed).sum() self.assertTrue(SP.absolute(err) < 4) err = SP.absolute(self.data['response_iid'] - response_iid).sum() self.assertTrue(SP.absolute(err) < 8)
def setUp(self, n=100, m=1): self.dir_name = os.path.dirname(os.path.realpath(__file__)) self.data = h5py.File(os.path.join(self.dir_name, 'test_data/lmm_forest_toy_data.h5'), 'r') SP.random.seed(1) self.x, self.y = utils.lin_data_cont_predictors(n=n,m=m) self.n, self.m = self.x.shape [self.train, self.test] = utils.crossValidationScheme(2,self.n) self.n_estimators = 100
def setUp(self, n=100, m=1): self.dir_name = os.path.dirname(os.path.realpath(__file__)) self.data = h5py.File( os.path.join(self.dir_name, 'test_data/lmm_forest_toy_data.h5'), 'r') SP.random.seed(1) self.x, self.y = utils.lin_data_cont_predictors(n=n, m=m) self.n, self.m = self.x.shape [self.train, self.test] = utils.crossValidationScheme(2, self.n) self.n_estimators = 100