예제 #1
0
    def setUp(self):
        windows = [Window(7, 7, 2, 2, 0, 0),
                   Window(11, 11, 2, 2, 0, 0)]

        rf1 = ExtraRandomForestConfig(n_estimators=40, min_samples_leaf=10)
        rf2 = RandomForestConfig(n_estimators=40, min_samples_leaf=10)

        est_for_windows = [[rf1, rf2],
                           [rf1, rf2]]

        self.mgs = MultiGrainScanLayer(windows=windows,
                                  est_for_windows=est_for_windows,
                                  n_class=10)
        pools = [[MaxPooling(), MaxPooling()],
                 [MaxPooling(), MaxPooling()]]
        self.poolayer = PoolingLayer(pools=pools)
        self.concat_layer = ConcatLayer()
        self.est_configs = [
            ExtraRandomForestConfig(n_estimators=40),
            ExtraRandomForestConfig(n_estimators=40),
            RandomForestConfig(n_estimators=40),
            RandomForestConfig(n_estimators=40)
        ]

        self.cascade = CascadeLayer(est_configs=self.est_configs,
                               n_classes=10,
                               keep_in_mem=True,
                               data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade'))
        self.auto_cascade = AutoGrowingCascadeLayer(est_configs=self.est_configs,
                                               early_stopping_rounds=3,
                                               data_save_rounds=4,
                                               stop_by_test=True,
                                               n_classes=10,
                                               data_save_dir=osp.join(get_data_save_base(),
                                                                      'test_layer', 'auto_cascade'))
예제 #2
0
    def _init(self, distribute=False):
        self.est_configs = [
            ExtraRandomForestConfig(n_estimators=20),
            ExtraRandomForestConfig(n_estimators=20),
            RandomForestConfig(n_estimators=20),
            RandomForestConfig(n_estimators=20)
        ]

        gc = CascadeLayer(est_configs=self.est_configs,
                          n_classes=2,
                          data_save_dir=osp.join(get_data_save_base(), 'test_layer', 'cascade'),
                          distribute=distribute)

        agc = AutoGrowingCascadeLayer(est_configs=self.est_configs,
                                      early_stopping_rounds=2,
                                      stop_by_test=False,
                                      data_save_rounds=4,
                                      n_classes=2,
                                      data_save_dir=osp.join(get_data_save_base(),
                                                             'test_layer', 'auto_cascade'),
                                      distribute=distribute)
        return gc, agc
예제 #3
0
    def _init(self):
        self.est_configs = [
            ExtraRandomForestConfig(n_estimators=40),
            ExtraRandomForestConfig(n_estimators=40),
            RandomForestConfig(n_estimators=40),
            RandomForestConfig(n_estimators=40)
        ]

        windows = [Window(7, 7, 2, 2, 0, 0), Window(11, 11, 2, 2, 0, 0)]

        rf1 = ExtraRandomForestConfig(min_samples_leaf=10)
        rf2 = RandomForestConfig(min_samples_leaf=10)

        est_for_windows = [[rf1, rf2], [rf1, rf2]]

        mgs = MultiGrainScanLayer(dtype=np.float32,
                                  windows=windows,
                                  est_for_windows=est_for_windows,
                                  n_class=10)

        pools = [[Pooling(2, 2, "max"),
                  Pooling(2, 2, "max")],
                 [Pooling(2, 2, "max"),
                  Pooling(2, 2, "max")]]

        poolayer = PoolingLayer(pools=pools)

        concat_layer = ConcatLayer()

        auto_cascade = AutoGrowingCascadeLayer(
            est_configs=self.est_configs,
            early_stopping_rounds=2,
            stop_by_test=False,
            data_save_rounds=4,
            n_classes=10,
            data_save_dir=osp.join(get_data_save_base(), 'test_graph',
                                   'auto_cascade'))
        return mgs, poolayer, concat_layer, auto_cascade
예제 #4
0
print('x_test.shape: {}'.format(x_test.shape))

est_configs = [
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig()
]

data_save_dir = osp.join(get_data_save_base(), 'uci_yeast')
model_save_dir = osp.join(get_model_save_base(), 'uci_yeast')

auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs,
                                       early_stopping_rounds=4,
                                       n_classes=10,
                                       data_save_dir=data_save_dir,
                                       model_save_dir=model_save_dir,
                                       distribute=False,
                                       seed=0)

model = Graph()
model.add(auto_cascade)
model.fit_transform(x_train, y_train, x_test, y_test)

print("time cost: {}".format(time.time() - start_time))

예제 #5
0
"""

# Copyright 2017 Authors NJU PASA BigData Laboratory.
# Authors: Qiu Hu <huqiu00#163.com>
# License: Apache-2.0

from __future__ import print_function
from keras.datasets import boston_housing
from forestlayer.estimators.estimator_configs import ExtraRandomForestConfig, RandomForestConfig, GBDTConfig
from forestlayer.layers.layer import AutoGrowingCascadeLayer

(x_train, y_train), (x_test, y_test) = boston_housing.load_data(test_split=0.25)

print("x_train: {}".format(x_train.shape))
print("x_test: {}".format(x_test.shape))

est_configs = [
    RandomForestConfig(),
    ExtraRandomForestConfig(),
    GBDTConfig()
]

cascade = AutoGrowingCascadeLayer(task='regression',
                                  est_configs=est_configs,
                                  early_stopping_rounds=3,
                                  keep_in_mem=False)

cascade.fit_transform(x_train, y_train, x_test, y_test)


예제 #6
0
est_configs = [
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig()
]

data_save_dir = osp.join(get_data_save_base(), 'uci_iris')
model_save_dir = osp.join(get_model_save_base(), 'uci_iris')

auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs,
                                       early_stopping_rounds=4,
                                       n_classes=3,
                                       stop_by_test=False,
                                       data_save_dir=data_save_dir,
                                       model_save_dir=model_save_dir,
                                       distribute=False,
                                       dis_level=0,
                                       verbose_dis=False,
                                       seed=0)

model = Graph()
model.add(auto_cascade)
model.fit_transform(x_train, y_train, x_test, y_test)

print("time cost: {}".format(time.time() - start_time))
예제 #7
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from forestlayer.estimators.estimator_configs import RandomForestConfig, ExtraRandomForestConfig
from forestlayer.layers.layer import AutoGrowingCascadeLayer

rng = np.random.RandomState(1)
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = np.sin(X).ravel() + np.sin(6 * X).ravel() + rng.normal(0, 0.1, X.shape[0])

est_configs = [RandomForestConfig(), ExtraRandomForestConfig()]

cascade = AutoGrowingCascadeLayer(task='regression',
                                  est_configs=est_configs,
                                  early_stopping_rounds=3,
                                  keep_in_mem=True)
cascade.fit(X, y)
y1 = cascade.predict(X)
y1 = y1.reshape(-1)

abr = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                        n_estimators=300,
                        random_state=rng)
abr.fit(X, y)
y2 = abr.predict(X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y2, c="g", label="n_estimators=300", linewidth=1.5)
예제 #8
0
rf2 = RandomForestConfig(n_folds=3, min_samples_leaf=10)

est_for_windows = [[rf1, rf2], [rf1, rf2], [rf1, rf2]]

mgs = MultiGrainScanLayer(windows=windows,
                          est_for_windows=est_for_windows,
                          n_class=6)

pools = [[MeanPooling(), MeanPooling()], [MeanPooling(),
                                          MeanPooling()],
         [MeanPooling(), MeanPooling()]]

pool_layer = PoolingLayer(pools=pools)

concat_layer = ConcatLayer()

est_configs = Basic4x2()

auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs,
                                       early_stopping_rounds=4,
                                       n_classes=6,
                                       look_index_cycle=[[0, 1], [2, 3],
                                                         [4, 5]])

model = Graph()
model.add(mgs)
model.add(pool_layer)
# model.add(concat_layer)
model.add(auto_cascade)
model.fit_transform(x_train, y_train, x_test, y_test)
예제 #9
0
est_configs = [
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    RandomForestConfig(n_estimators=100),
    RandomForestConfig(n_estimators=100),
    GBDTConfig(n_estimators=100),
    GBDTConfig(n_estimators=100),
    # XGBRegressorConfig(),
    # XGBRegressorConfig()
]

data_save_dir = osp.join(get_data_save_base(), 'tianchi', 'intelmanu')

agc = AutoGrowingCascadeLayer(task='regression',
                              est_configs=est_configs,
                              max_layers=1,
                              data_save_dir=data_save_dir,
                              keep_test_result=True)

agc.fit_transform(feat_dim_120[:500], label, feat_dim_120[500:])
result = agc.test_results

true_A = pd.read_csv(osp.join(get_dataset_dir(), 'tianchi', 'intelmanu',
                              'true_A_20180114.csv'),
                     header=None)

true = true_A.iloc[:, 1]

print("MSE Score: {}".format(mse(result, true)))

# ret = pd.DataFrame()
예제 #10
0
print('x_train.shape', x_train.shape, x_train.dtype)
print('x_test.shape', x_test.shape, x_test.dtype)

est_configs = [
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    ExtraRandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig(),
    RandomForestConfig()
]

data_save_dir = osp.join(get_data_save_base(), 'imdb')
model_save_dir = osp.join(get_model_save_base(), 'imdb')

cascade = AutoGrowingCascadeLayer(est_configs=est_configs,
                                  early_stopping_rounds=4,
                                  stop_by_test=True,
                                  n_classes=2,
                                  data_save_dir=data_save_dir,
                                  model_save_dir=model_save_dir,
                                  distribute=True,
                                  dis_level=0,
                                  seed=0)

cascade.fit_transform(x_train, y_train, x_test, y_test)

print("Time cost: {}".format(time.time() - start_time))
예제 #11
0
# ray.init(redis_address="192.168.x.x:6379")

(x_train, y_train, x_test, y_test) = uci_adult.load_data()

start_time = time.time()

est_configs = [
    ExtraRandomForestConfig(n_jobs=-1),
    ExtraRandomForestConfig(n_jobs=-1),
    ExtraRandomForestConfig(n_jobs=-1),
    ExtraRandomForestConfig(n_jobs=-1),
    RandomForestConfig(n_jobs=-1),
    RandomForestConfig(n_jobs=-1),
    RandomForestConfig(n_jobs=-1),
    RandomForestConfig(n_jobs=-1)
]

auto_cascade = AutoGrowingCascadeLayer(est_configs=est_configs,
                                       early_stopping_rounds=4,
                                       n_classes=2,
                                       distribute=True,
                                       seed=0)

model = Graph()
model.add(auto_cascade)
model.summary()
model.fit_transform(x_train, y_train, x_test, y_test)

end_time = time.time()
print('time cost: {}'.format(end_time - start_time))