def setUp(self): windows = [Window(7, 7, 2, 2, 0, 0), Window(11, 11, 2, 2, 0, 0)] rf1 = ExtraRandomForestConfig(n_estimators=40, min_samples_leaf=10) rf2 = RandomForestConfig(n_estimators=40, min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2]] self.mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=10) pools = [[MaxPooling(), MaxPooling()], [MaxPooling(), MaxPooling()]] self.poolayer = PoolingLayer(pools=pools) self.concat_layer = ConcatLayer() self.est_configs = [ ExtraRandomForestConfig(n_estimators=40), ExtraRandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40) ] self.cascade = CascadeLayer(est_configs=self.est_configs, n_classes=10, keep_in_mem=True, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade')) self.auto_cascade = AutoGrowingCascadeLayer(est_configs=self.est_configs, early_stopping_rounds=3, data_save_rounds=4, stop_by_test=True, n_classes=10, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'auto_cascade'))
def _init(self, distribute=False): self.est_configs = [ ExtraRandomForestConfig(n_estimators=20), ExtraRandomForestConfig(n_estimators=20), RandomForestConfig(n_estimators=20), RandomForestConfig(n_estimators=20) ] gc = CascadeLayer(est_configs=self.est_configs, n_classes=2, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade'), distribute=distribute) agc = AutoGrowingCascadeLayer(est_configs=self.est_configs, early_stopping_rounds=2, stop_by_test=False, data_save_rounds=4, n_classes=2, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'auto_cascade'), distribute=distribute) return gc, agc
def _init(self): self.est_configs = [ ExtraRandomForestConfig(n_estimators=40), ExtraRandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40) ] windows = [Window(7, 7, 2, 2, 0, 0), Window(11, 11, 2, 2, 0, 0)] rf1 = ExtraRandomForestConfig(min_samples_leaf=10) rf2 = RandomForestConfig(min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2]] mgs = MultiGrainScanLayer(dtype=np.float32, windows=windows, est_for_windows=est_for_windows, n_class=10) pools = [[Pooling(2, 2, "max"), Pooling(2, 2, "max")], [Pooling(2, 2, "max"), Pooling(2, 2, "max")]] poolayer = PoolingLayer(pools=pools) concat_layer = ConcatLayer() auto_cascade = AutoGrowingCascadeLayer( est_configs=self.est_configs, early_stopping_rounds=2, stop_by_test=False, data_save_rounds=4, n_classes=10, data_save_dir=osp.join(get_data_save_base(), 'test_graph', 'auto_cascade')) return mgs, poolayer, concat_layer, auto_cascade
print('x_test.shape: {}'.format(x_test.shape)) est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'uci_yeast') model_save_dir = osp.join(get_model_save_base(), 'uci_yeast') auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=10, data_save_dir=data_save_dir, model_save_dir=model_save_dir, distribute=False, seed=0) model = Graph() model.add(auto_cascade) model.fit_transform(x_train, y_train, x_test, y_test) print("time cost: {}".format(time.time() - start_time))
""" # Copyright 2017 Authors NJU PASA BigData Laboratory. # Authors: Qiu Hu <huqiu00#163.com> # License: Apache-2.0 from __future__ import print_function from keras.datasets import boston_housing from forestlayer.estimators.estimator_configs import ExtraRandomForestConfig, RandomForestConfig, GBDTConfig from forestlayer.layers.layer import AutoGrowingCascadeLayer (x_train, y_train), (x_test, y_test) = boston_housing.load_data(test_split=0.25) print("x_train: {}".format(x_train.shape)) print("x_test: {}".format(x_test.shape)) est_configs = [ RandomForestConfig(), ExtraRandomForestConfig(), GBDTConfig() ] cascade = AutoGrowingCascadeLayer(task='regression', est_configs=est_configs, early_stopping_rounds=3, keep_in_mem=False) cascade.fit_transform(x_train, y_train, x_test, y_test)
est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'uci_iris') model_save_dir = osp.join(get_model_save_base(), 'uci_iris') auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=3, stop_by_test=False, data_save_dir=data_save_dir, model_save_dir=model_save_dir, distribute=False, dis_level=0, verbose_dis=False, seed=0) model = Graph() model.add(auto_cascade) model.fit_transform(x_train, y_train, x_test, y_test) print("time cost: {}".format(time.time() - start_time))
import numpy as np import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import AdaBoostRegressor from forestlayer.estimators.estimator_configs import RandomForestConfig, ExtraRandomForestConfig from forestlayer.layers.layer import AutoGrowingCascadeLayer rng = np.random.RandomState(1) X = np.linspace(0, 6, 100)[:, np.newaxis] y = np.sin(X).ravel() + np.sin(6 * X).ravel() + rng.normal(0, 0.1, X.shape[0]) est_configs = [RandomForestConfig(), ExtraRandomForestConfig()] cascade = AutoGrowingCascadeLayer(task='regression', est_configs=est_configs, early_stopping_rounds=3, keep_in_mem=True) cascade.fit(X, y) y1 = cascade.predict(X) y1 = y1.reshape(-1) abr = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), n_estimators=300, random_state=rng) abr.fit(X, y) y2 = abr.predict(X) # Plot the results plt.figure() plt.scatter(X, y, c="k", label="training samples") plt.plot(X, y2, c="g", label="n_estimators=300", linewidth=1.5)
rf2 = RandomForestConfig(n_folds=3, min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]] mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=6) pools = [[MeanPooling(), MeanPooling()], [MeanPooling(), MeanPooling()], [MeanPooling(), MeanPooling()]] pool_layer = PoolingLayer(pools=pools) concat_layer = ConcatLayer() est_configs = Basic4x2() auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=6, look_index_cycle=[[0, 1], [2, 3], [4, 5]]) model = Graph() model.add(mgs) model.add(pool_layer) # model.add(concat_layer) model.add(auto_cascade) model.fit_transform(x_train, y_train, x_test, y_test)
est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(n_estimators=100), RandomForestConfig(n_estimators=100), GBDTConfig(n_estimators=100), GBDTConfig(n_estimators=100), # XGBRegressorConfig(), # XGBRegressorConfig() ] data_save_dir = osp.join(get_data_save_base(), 'tianchi', 'intelmanu') agc = AutoGrowingCascadeLayer(task='regression', est_configs=est_configs, max_layers=1, data_save_dir=data_save_dir, keep_test_result=True) agc.fit_transform(feat_dim_120[:500], label, feat_dim_120[500:]) result = agc.test_results true_A = pd.read_csv(osp.join(get_dataset_dir(), 'tianchi', 'intelmanu', 'true_A_20180114.csv'), header=None) true = true_A.iloc[:, 1] print("MSE Score: {}".format(mse(result, true))) # ret = pd.DataFrame()
print('x_train.shape', x_train.shape, x_train.dtype) print('x_test.shape', x_test.shape, x_test.dtype) est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'imdb') model_save_dir = osp.join(get_model_save_base(), 'imdb') cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, stop_by_test=True, n_classes=2, data_save_dir=data_save_dir, model_save_dir=model_save_dir, distribute=True, dis_level=0, seed=0) cascade.fit_transform(x_train, y_train, x_test, y_test) print("Time cost: {}".format(time.time() - start_time))
# ray.init(redis_address="192.168.x.x:6379") (x_train, y_train, x_test, y_test) = uci_adult.load_data() start_time = time.time() est_configs = [ ExtraRandomForestConfig(n_jobs=-1), ExtraRandomForestConfig(n_jobs=-1), ExtraRandomForestConfig(n_jobs=-1), ExtraRandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1), RandomForestConfig(n_jobs=-1) ] auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=2, distribute=True, seed=0) model = Graph() model.add(auto_cascade) model.summary() model.fit_transform(x_train, y_train, x_test, y_test) end_time = time.time() print('time cost: {}'.format(end_time - start_time))