Example #1
0
    def test_bankdata(self):
        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')],
                      # reward_metric='accuracy',
                      reward_metric='AUC',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      )

        df = dsutils.load_bank().sample(frac=0.1, random_state=9527)
        df.drop(['id'], axis=1, inplace=True)
        df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
        y = df_train.pop('y')
        y_test = df_test.pop('y')

        hdt.search(df_train, y, df_test, y_test, max_trials=3, )
        best_trial = hdt.get_best_trial()
        assert best_trial

        estimator = hdt.final_train(best_trial.space_sample, df_train, y)
        score = estimator.predict(df_test)
        result = estimator.evaluate(df_test, y_test)
        assert len(score) == len(y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
Example #2
0
    def test_boston(self):
        print("Loading datasets...")
        boston_dataset = load_boston()

        df_train = pd.DataFrame(boston_dataset.data)
        df_train.columns = boston_dataset.feature_names
        self.y = pd.Series(boston_dataset.target)
        self.X = df_train

        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

        rs = RandomSearcher(tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')],
                      reward_metric='RootMeanSquaredError',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      )
        hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3)

        best_trial = hdt.get_best_trial()

        estimator = hdt.final_train(best_trial.space_sample, self.X, self.y)
        score = estimator.predict(self.X_test)
        result = estimator.evaluate(self.X_test, self.y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
Example #3
0
    def test_hyper_dt(self):
        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback()],
                      reward_metric='accuracy',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      cache_preprocessed_data=True,
                      cache_home=homedir + '/cache'
                      )
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.randint(0, 2, size=(100)).astype('str')
        x3 = np.random.randint(0, 2, size=(100)).astype('str')
        x4 = np.random.normal(0.0, 1.0, size=(100))

        y = np.random.randint(0, 2, size=(100), dtype='int')
        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        hdt.search(df, y, df, y, max_trials=3, epochs=1)
        best_trial = hdt.get_best_trial()
        model = hdt.load_estimator(best_trial.model_file)
        assert model
        score = model.predict(df)
        result = model.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(model, DeepTable)

        estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1)
        score = estimator.predict(df)
        result = estimator.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(estimator.model, DeepTable)
Example #4
0
def train(X_train,
          y_train,
          X_eval,
          y_eval,
          task=None,
          reward_metric=None,
          optimize_direction='max',
          **kwargs):
    from hypernets.core.callbacks import SummaryCallback
    from hypernets.searchers import make_searcher

    if task is None:
        task, _ = get_tool_box(y_train).infer_task_type(y_train)
    if reward_metric is None:
        reward_metric = 'rmse' if task == const.TASK_REGRESSION else 'accuracy'

    search_space = PlainSearchSpace()
    searcher = make_searcher('mcts',
                             search_space,
                             optimize_direction=optimize_direction)
    callbacks = [SummaryCallback()]
    hm = PlainModel(searcher=searcher,
                    task=task,
                    reward_metric=reward_metric,
                    callbacks=callbacks)
    hm.search(X_train, y_train, X_eval, y_eval, **kwargs)
    best = hm.get_best_trial()
    model = hm.final_train(best.space_sample, X_train, y_train)
    return hm, model
Example #5
0
    def test_cnn_space_hyper_model(self):
        rs = RandomSearcher(
            lambda: cnn_search_space(input_shape=(28, 28, 1),
                                     output_units=10,
                                     output_activation='softmax',
                                     block_num_choices=[2, 3, 4, 5],
                                     filters_choices=[32, 64, 128],
                                     kernel_size_choices=[(1, 1), (3, 3)]),
            optimize_direction='max')
        hk = HyperKeras(rs, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'],
                        callbacks=[SummaryCallback()])

        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

        # Rescale the images from [0,255] to the [0.0,1.0] range.
        x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0
        y_train = tf.keras.utils.to_categorical(y_train)
        y_test = tf.keras.utils.to_categorical(y_test)
        print("Number of original training examples:", len(x_train))
        print("Number of original test examples:", len(x_test))

        # sample for speed up
        samples = 100
        hk.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)],
                  max_trails=3, epochs=1)
        assert hk.best_model
Example #6
0
    def test_dnn_space_hyper_model(self):
        rs = RandomSearcher(lambda: dnn_search_space(input_shape=10, output_units=2, output_activation='sigmoid'),
                            optimize_direction='max')
        hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'],
                        callbacks=[SummaryCallback()])

        x = np.random.randint(0, 10000, size=(100, 10))
        y = np.random.randint(0, 2, size=(100), dtype='int')

        hk.search(x, y, x, y, max_trails=3)
        assert hk.best_model
Example #7
0
def create_plain_model(reward_metric='auc', optimize_direction='max',
                       with_encoder=False, with_dask=False):
    search_space = PlainSearchSpace(enable_dt=True, enable_lr=True, enable_nn=False)
    searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction)

    encoder = MultiLabelEncoder if with_encoder else None
    cls = DaskPlainModel if with_dask else PlainModel
    hyper_model = cls(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()],
                      transformer=encoder)

    return hyper_model
Example #8
0
    def train_bankdata(self, data_partition):
        rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize)
        hk = HyperGBM(rs, task='classification', reward_metric='accuracy',
                      cache_dir=f'{test_output_dir}/hypergbm_cache',
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')])

        df = dsutils.load_bank()
        df.drop(['id'], axis=1, inplace=True)

        X_train, X_test, y_train, y_test = data_partition()

        hk.search(X_train, y_train, X_test, y_test, max_trails=3)
        best_trial = hk.get_best_trail()

        estimator = hk.final_train(best_trial.space_sample, X_train, y_train)
        score = estimator.predict(X_test)
        result = estimator.evaluate(X_test, y_test)
        assert len(score) == 200
        return estimator, hk
Example #9
0
homedir = f'{consts.PROJECT_NAME}_run_dt_{time.strftime("%Y%m%d%H%M%S")}'
disk_trial_store = DiskTrialStore(f'hotexamples_com/trial_store')

# searcher = MCTSSearcher(mini_dt_space, max_node_space=0,optimize_direction=OptimizeDirection.Maximize)
# searcher = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize)
searcher = EvolutionSearcher(mini_dt_space,
                             200,
                             100,
                             regularized=True,
                             candidates_size=30,
                             optimize_direction=OptimizeDirection.Maximize)

hdt = HyperDT(searcher,
              callbacks=[
                  SummaryCallback(),
                  FileStorageLoggingCallback(searcher,
                                             output_dir=f'hotexamples_com/hyn_logs')
              ],
              reward_metric='AUC',
              earlystopping_patience=1)

space = mini_dt_space()
assert space.combinations == 589824
space2 = default_dt_space()
assert space2.combinations == 3559292928

df = dsutils.load_adult()
# df.drop(['id'], axis=1, inplace=True)
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
X = df_train
Example #10
0
from hypernets.searchers.mcts_searcher import MCTSSearcher
from hypernets.searchers.evolution_searcher import EvolutionSearcher
from hypernets.core.trial import DiskTrailStore
from deeptables.datasets import dsutils
from sklearn.model_selection import train_test_split
from .. import homedir

disk_trail_store = DiskTrailStore(f'hotexamples_com/trail_store')

# searcher = MCTSSearcher(mini_dt_space, max_node_space=0,optimize_direction=OptimizeDirection.Maximize)
# searcher = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize)
searcher = EvolutionSearcher(mini_dt_space, 200, 100, regularized=True, candidates_size=30,
                             optimize_direction=OptimizeDirection.Maximize)

hdt = HyperDT(searcher,
              callbacks=[SummaryCallback(), FileLoggingCallback(searcher, output_dir=f'hotexamples_com/hyn_logs')],
              reward_metric='AUC',
              earlystopping_patience=1)

space = mini_dt_space()
assert space.combinations == 589824
space2 = default_dt_space()
assert space2.combinations == 3559292928

df = dsutils.load_adult()
# df.drop(['id'], axis=1, inplace=True)
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
X = df_train
y = df_train.pop(14)
y_test = df_test.pop(14)
# dataset_id='adult_whole_data',
Example #11
0
from hyperkeras.search_space.enas_micro import enas_micro_search_space
from hyperkeras.one_shot_model import OneShotModel

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Rescale the images from [0,255] to the [0.0,1.0] range.
x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)
print("Number of original training examples:", len(x_train))
print("Number of original test examples:", len(x_test))
# sample for speed up
samples = 100

searcher = MCTSSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max')
one_shot_sampler = RandomSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max')

model = OneShotModel(searcher,
                     optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'],
                     epochs=3,
                     batch_size=64,
                     controller_train_per_epoch=False,  # Single path
                     callbacks=[SummaryCallback()],
                     one_shot_train_sampler=one_shot_sampler,  # uniform sampler
                     visualization=False)
# model.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)],

model.search(x_train, y_train, x_test, y_test, max_trails=1000, epochs=100, callbacks=[])
assert model.best_model
Example #12
0
# model2 = space.keras_model(deepcopy=False)
# model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# model2.fit(x_train[:samples], y_train[:samples], batch_size=32)
# result2 = model.evaluate(x_train[:samples], y_train[:samples])
#
# weights_cache = LayerWeightsCache()
# space = enas_micro_search_space(arch='NR', hp_dict={}, use_input_placeholder=False, weights_cache=weights_cache)
# space.random_sample()
#
# model = SharingWeightModel(space)
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# model.fit(x_train[:samples], y_train[:samples], batch_size=32)
# result = model.evaluate(x_train[:samples], y_train[:samples])
#
# space = enas_micro_search_space(arch='NR', hp_dict={}, use_input_placeholder=False, weights_cache=weights_cache)
# space.random_sample()
# model.update_search_space(space)
# model.fit(x_train[:samples], y_train[:samples], batch_size=100)
# result = model.evaluate(x_train[:samples], y_train[:samples])

rs = RandomSearcher(
    lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}),
    optimize_direction='max')
hk = HyperKeras(rs, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'],
                callbacks=[SummaryCallback()], one_shot_mode=True, visualization=False)

# tenserboard = TensorBoard('./tensorboard/run_enas')
hk.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)],
          max_trails=100, epochs=1, callbacks=[])
assert hk.get
Example #13
0
def create_hyper_model(reward_metric='AUC', optimize_direction='max'):
    search_space = tiny_dt_space
    searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction)
    hyper_model = HyperDT(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()])

    return hyper_model