def setUp(self): windows = [Window(7, 7, 2, 2, 0, 0), Window(11, 11, 2, 2, 0, 0)] rf1 = ExtraRandomForestConfig(n_estimators=40, min_samples_leaf=10) rf2 = RandomForestConfig(n_estimators=40, min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2]] self.mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=10) pools = [[MaxPooling(), MaxPooling()], [MaxPooling(), MaxPooling()]] self.poolayer = PoolingLayer(pools=pools) self.concat_layer = ConcatLayer() self.est_configs = [ ExtraRandomForestConfig(n_estimators=40), ExtraRandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40) ] self.cascade = CascadeLayer(est_configs=self.est_configs, n_classes=10, keep_in_mem=True, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade')) self.auto_cascade = AutoGrowingCascadeLayer(est_configs=self.est_configs, early_stopping_rounds=3, data_save_rounds=4, stop_by_test=True, n_classes=10, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'auto_cascade'))
def _init(self, distribute=False): self.est_configs = [ ExtraRandomForestConfig(n_estimators=20), ExtraRandomForestConfig(n_estimators=20), RandomForestConfig(n_estimators=20), RandomForestConfig(n_estimators=20) ] gc = CascadeLayer(est_configs=self.est_configs, n_classes=2, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade'), distribute=distribute) agc = AutoGrowingCascadeLayer(est_configs=self.est_configs, early_stopping_rounds=2, stop_by_test=False, data_save_rounds=4, n_classes=2, data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'auto_cascade'), distribute=distribute) return gc, agc
def _init(self): self.est_configs = [ ExtraRandomForestConfig(n_estimators=40), ExtraRandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40), RandomForestConfig(n_estimators=40) ] windows = [Window(7, 7, 2, 2, 0, 0), Window(11, 11, 2, 2, 0, 0)] rf1 = ExtraRandomForestConfig(min_samples_leaf=10) rf2 = RandomForestConfig(min_samples_leaf=10) est_for_windows = [[rf1, rf2], [rf1, rf2]] mgs = MultiGrainScanLayer(dtype=np.float32, windows=windows, est_for_windows=est_for_windows, n_class=10) pools = [[Pooling(2, 2, "max"), Pooling(2, 2, "max")], [Pooling(2, 2, "max"), Pooling(2, 2, "max")]] poolayer = PoolingLayer(pools=pools) concat_layer = ConcatLayer() auto_cascade = AutoGrowingCascadeLayer( est_configs=self.est_configs, early_stopping_rounds=2, stop_by_test=False, data_save_rounds=4, n_classes=10, data_save_dir=osp.join(get_data_save_base(), 'test_graph', 'auto_cascade')) return mgs, poolayer, concat_layer, auto_cascade
pool = PoolingLayer(pools=pools) concatlayer = ConcatLayer() est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'mnist') model_save_dir = osp.join(get_model_save_base(), 'mnist') auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, stop_by_test=True, n_classes=10, data_save_dir=data_save_dir, model_save_dir=model_save_dir) model = Graph() model.add(mgs) model.add(pool) model.add(concatlayer) model.add(auto_cascade) model.fit_transform(x_train, y_train, x_test, y_test)
print('x_train shape: {}'.format(x_train.shape)) print('x_test.shape: {}'.format(x_test.shape)) est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'uci_yeast') model_save_dir = osp.join(get_model_save_base(), 'uci_yeast') auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=10, data_save_dir=data_save_dir, model_save_dir=model_save_dir, distribute=False, seed=0) model = Graph() model.add(auto_cascade) model.fit_transform(x_train, y_train, x_test, y_test) print("time cost: {}".format(time.time() - start_time))
pool = PoolingLayer(pools=pools) concatlayer = ConcatLayer() est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'fashion_mnist') model_save_dir = osp.join(get_model_save_base(), 'fashion_mnist') auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, stop_by_test=True, n_classes=10, data_save_dir=data_save_dir, model_save_dir=model_save_dir) model = Graph() model.add(mgs) model.add(pool) model.add(concatlayer) model.add(auto_cascade) model.fit_transform(x_train, y_train, x_test, y_test)
print('x_test.shape: {}'.format(x_test.shape)) start_time = time.time() est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'uci_iris') model_save_dir = osp.join(get_model_save_base(), 'uci_iris') auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, n_classes=3, stop_by_test=False, data_save_dir=data_save_dir, model_save_dir=model_save_dir, distribute=False, dis_level=0, verbose_dis=False, seed=0) model = Graph() model.add(auto_cascade)
rf1 = ExtraRandomForestConfig(n_folds=3, n_jobs=-1, min_samples_leaf=10, max_features='auto') rf2 = RandomForestConfig(n_folds=3, n_jobs=-1, min_samples_leaf=10) windows = [ Window(win_x=8, win_y=8, stride_x=2, stride_y=2, pad_x=0, pad_y=0), Window(11, 11, 2, 2), Window(16, 16, 2, 2) ] est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]] data_save_dir = osp.join(get_data_save_base(), 'cifar10') model_save_dir = osp.join(get_model_save_base(), 'cifar10') mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=10, distribute=False, dis_level=0, keep_in_mem=False, data_save_dir=data_save_dir, cache_in_disk=False, seed=0) pools = [[MeanPooling(2, 2), MeanPooling(2, 2)], [MeanPooling(2, 2), MeanPooling(2, 2)], [MeanPooling(2, 2), MeanPooling(2, 2)]]
feat_dim_120 = auto_encoder() # feat_dim_120 = PCA() est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(n_estimators=100), RandomForestConfig(n_estimators=100), GBDTConfig(n_estimators=100), GBDTConfig(n_estimators=100), # XGBRegressorConfig(), # XGBRegressorConfig() ] data_save_dir = osp.join(get_data_save_base(), 'tianchi', 'intelmanu') agc = AutoGrowingCascadeLayer(task='regression', est_configs=est_configs, max_layers=1, data_save_dir=data_save_dir, keep_test_result=True) agc.fit_transform(feat_dim_120[:500], label, feat_dim_120[500:]) result = agc.test_results true_A = pd.read_csv(osp.join(get_dataset_dir(), 'tianchi', 'intelmanu', 'true_A_20180114.csv'), header=None) true = true_A.iloc[:, 1]
print('x_train.shape', x_train.shape, x_train.dtype) print('x_test.shape', x_test.shape, x_test.dtype) est_configs = [ ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'imdb') model_save_dir = osp.join(get_model_save_base(), 'imdb') cascade = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, stop_by_test=True, n_classes=2, data_save_dir=data_save_dir, model_save_dir=model_save_dir, distribute=True, dis_level=0, seed=0) cascade.fit_transform(x_train, y_train, x_test, y_test) print("Time cost: {}".format(time.time() - start_time))
print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') print(x_train.shape[1], 'features') est_configs = [ RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), RandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig(), ExtraRandomForestConfig() ] data_save_dir = osp.join(get_data_save_base(), 'higgs', 'higgs-{}'.format(sz)) agc = AutoGrowingCascadeLayer(est_configs=est_configs, early_stopping_rounds=4, max_layers=0, stop_by_test=True, n_classes=2, data_save_rounds=0, data_save_dir=data_save_dir, keep_in_mem=False, distribute=False, dis_level=2, verbose_dis=True, seed=0) model = Graph()
print('train shape and plus shape', x_train.shape, x_train_plus.shape) print('test shape and plus shape', x_test.shape, x_test_plus.shape) rf1 = ExtraRandomForestConfig(n_folds=3, n_jobs=-1, min_samples_leaf=10, max_features='auto') rf2 = RandomForestConfig(n_folds=3, n_jobs=-1, min_samples_leaf=10) windows = [Window(win_x=24, win_y=24, stride_x=2, stride_y=2, pad_x=0, pad_y=0), Window(34, 34, 2, 2), Window(48, 48, 2, 2)] est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]] data_save_dir = osp.join(get_data_save_base(), 'small_norb') model_save_dir = osp.join(get_model_save_base(), 'small_norb') mgs = MultiGrainScanLayer(windows=windows, est_for_windows=est_for_windows, n_class=10, distribute=False, keep_in_mem=False, data_save_dir=data_save_dir, cache_in_disk=True, seed=0) model = Graph() model.add(mgs) # model.add(pool) # model.add(concatlayer)