예제 #1
0
    def test_hypergbm_bankdata(self):
        rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize)
        hk = HyperGBM(rs, task='classification', reward_metric='accuracy',
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs)])

        df = pd.read_csv('cooka/test/dataset/Bank_Marketing_Data/train.csv')

        df.drop(['id'], axis=1, inplace=True)
        X_train, X_test = train_test_split(df.head(1000), test_size=0.2, random_state=42)
        y_train = X_train.pop('y')
        y_test = X_test.pop('y')

        X_train_origin = X_train.copy()

        hk.search(X_train, y_train, X_test, y_test, max_trails=1)
        assert hk.best_model
        best_trial = hk.get_best_trail()

        estimator = hk.final_train(best_trial.space_sample, X_train, y_train)

        result = estimator.evaluate(X_test, y_test)
        print(result)

        predict_result = estimator.predict(X_train_origin)
        print(predict_result)
예제 #2
0
    def test_hyper_dt(self):
        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback()],
                      reward_metric='accuracy',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      cache_preprocessed_data=True,
                      cache_home=homedir + '/cache'
                      )
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.randint(0, 2, size=(100)).astype('str')
        x3 = np.random.randint(0, 2, size=(100)).astype('str')
        x4 = np.random.normal(0.0, 1.0, size=(100))

        y = np.random.randint(0, 2, size=(100), dtype='int')
        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        hdt.search(df, y, df, y, max_trials=3, epochs=1)
        best_trial = hdt.get_best_trial()
        model = hdt.load_estimator(best_trial.model_file)
        assert model
        score = model.predict(df)
        result = model.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(model, DeepTable)

        estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1)
        score = estimator.predict(df)
        result = estimator.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(estimator.model, DeepTable)
    def test_boston(self):

        print("Loading datasets...")
        boston_dataset = load_boston()

        df_train = pd.DataFrame(boston_dataset.data)
        df_train.columns = boston_dataset.feature_names
        self.y = pd.Series(boston_dataset.target)
        self.X = df_train

        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')],
                      reward_metric='RootMeanSquaredError',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      )
        hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3)

        best_trial = hdt.get_best_trial()

        estimator = hdt.final_train(best_trial.space_sample, self.X, self.y)
        score = estimator.predict(self.X_test)
        result = estimator.evaluate(self.X_test, self.y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
예제 #4
0
    def test_bankdata(self):
        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')],
                      # reward_metric='accuracy',
                      reward_metric='AUC',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      )

        df = dsutils.load_bank().sample(frac=0.1, random_state=9527)
        df.drop(['id'], axis=1, inplace=True)
        df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
        y = df_train.pop('y')
        y_test = df_test.pop('y')

        hdt.search(df_train, y, df_test, y_test, max_trials=3, )
        best_trial = hdt.get_best_trial()
        assert best_trial

        estimator = hdt.final_train(best_trial.space_sample, df_train, y)
        score = estimator.predict(df_test)
        result = estimator.evaluate(df_test, y_test)
        assert len(score) == len(y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
예제 #5
0
def main():
    # client = Client("tcp://127.0.0.1:64958")
    client = Client(processes=False, threads_per_worker=2, n_workers=1, memory_limit='4GB')
    print(client)

    rs = RandomSearcher(get_space_num_cat_pipeline_complex, optimize_direction=OptimizeDirection.Maximize)
    hk = HyperGBM(rs, task='classification', reward_metric='accuracy',
                  cache_dir=f'{test_output_dir}/hypergbm_cache',
                  callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')])

    df = dsutils.load_bank_by_dask()
    df.drop(['id'], axis=1)
    df['y'] = dm_pre.LabelEncoder().fit_transform(df['y'])
    # df = df.sample(frac=0.1)

    # object_columns = [i for i, v in df.dtypes.items() if v == 'object']
    # for c in object_columns:
    #     df[c] = df[c].astype('category')
    # df = df.categorize(object_columns)

    X_train, X_test = train_test_split(df, test_size=0.8, random_state=42)
    y_train = X_train.pop('y')
    y_test = X_test.pop('y')

    hk.search(X_train, y_train, X_test, y_test, max_trails=50)
    print('-' * 30)

    best_trial = hk.get_best_trail()
    print(f'best_train:{best_trial}')
    estimator = hk.final_train(best_trial.space_sample, X_train, y_train)
    score = estimator.predict(X_test)
    result = estimator.evaluate(X_test, y_test, metrics=['accuracy', 'auc', 'logloss'])
    print(f'final result:{result}')
예제 #6
0
    def test_hypergbm_diabetes(self):
        rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize)
        hk = HyperGBM(rs, task='classification', reward_metric='accuracy',
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs)])

        df = pd.read_csv('cooka/test/dataset/diabetes_10k.csv')

        X_train, X_test = train_test_split(df.head(1000), test_size=0.2, random_state=42)
        y_train = X_train.pop('readmitted')
        y_test = X_test.pop('readmitted')

        X_train_origin = X_train.copy()

        hk.search(X_train, y_train, X_test, y_test, max_trails=2)
        assert hk.best_model
        best_trial = hk.get_best_trail()

        estimator = hk.final_train(best_trial.space_sample, X_train, y_train)

        # result = estimator.evaluate(X_test, y_test)
        # print(result)

        series_gender = X_train_origin.pop('gender')
        X_train_origin['gender'] = series_gender

        predict_result = estimator.predict(X_train_origin)
        print(predict_result)
예제 #7
0
    def test_cnn_space_hyper_model(self):
        rs = RandomSearcher(
            lambda: cnn_search_space(input_shape=(28, 28, 1),
                                     output_units=10,
                                     output_activation='softmax',
                                     block_num_choices=[2, 3, 4, 5],
                                     filters_choices=[32, 64, 128],
                                     kernel_size_choices=[(1, 1), (3, 3)]),
            optimize_direction='max')
        hk = HyperKeras(rs, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'],
                        callbacks=[SummaryCallback()])

        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

        # Rescale the images from [0,255] to the [0.0,1.0] range.
        x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0
        y_train = tf.keras.utils.to_categorical(y_train)
        y_test = tf.keras.utils.to_categorical(y_test)
        print("Number of original training examples:", len(x_train))
        print("Number of original test examples:", len(x_test))

        # sample for speed up
        samples = 100
        hk.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)],
                  max_trails=3, epochs=1)
        assert hk.best_model
예제 #8
0
    def test_dnn_space_hyper_model(self):
        rs = RandomSearcher(lambda: dnn_search_space(input_shape=10, output_units=2, output_activation='sigmoid'),
                            optimize_direction='max')
        hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'],
                        callbacks=[SummaryCallback()])

        x = np.random.randint(0, 10000, size=(100, 10))
        y = np.random.randint(0, 2, size=(100), dtype='int')

        hk.search(x, y, x, y, max_trails=3)
        assert hk.best_model
예제 #9
0
 def test_fit_one_shot_model_epoch(self):
     rs = RandomSearcher(self.get_space_simple,
                         optimize_direction=OptimizeDirection.Maximize)
     hk = HyperKeras(rs,
                     optimizer='adam',
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'],
                     callbacks=[SummaryCallback()],
                     one_shot_mode=True,
                     one_shot_train_sampler=rs)
     x, y = self.get_x_y_1()
     hk.fit_one_shot_model_epoch(x, y)
예제 #10
0
    def test_random_searcher(self):

        searcher = RandomSearcher(self.get_space,
                                  space_sample_validation_fn=lambda s: False)
        with pytest.raises(ValueError):
            searcher.sample()

        searcher = RandomSearcher(self.get_space,
                                  space_sample_validation_fn=lambda s: True)
        sample = searcher.sample()
        assert sample

        def valid_sample(sample):
            if sample.Param_Bool_1.value:
                return True
            else:
                return False

        searcher = RandomSearcher(self.get_space,
                                  space_sample_validation_fn=valid_sample)
        sample = searcher.sample()
        assert sample
예제 #11
0
def run_search():
    searchers = (RandomSearcher(get_space,
                                space_sample_validation_fn=lambda s: True),
                 MCTSSearcher(get_space, max_node_space=10),
                 EvolutionSearcher(get_space, 5, 3, regularized=False))

    for searcher in searchers:
        for i in range(100):
            space_sample = searcher.sample()
            assert space_sample.all_assigned == True
            print(searcher.__class__.__name__, i,
                  space_sample.params_summary())
            searcher.update_result(space_sample, np.random.uniform(0.1, 0.9))
예제 #12
0
    def test_build_dataset_iter(self):
        rs = RandomSearcher(self.get_space,
                            optimize_direction=OptimizeDirection.Maximize)
        hk = HyperKeras(rs,
                        optimizer='adam',
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'],
                        callbacks=[SummaryCallback()])
        x, y = self.get_x_y_1()

        ds_iter = hk.build_dataset_iter(x, y, batch_size=10)

        batch_counter = 0
        for x_b, y_b in ds_iter:
            # x_b, y_b = next()
            assert len(x_b) == 10
            assert len(y_b) == 10
            batch_counter += 1
        assert batch_counter == 10

        ds_iter = hk.build_dataset_iter(x, y, batch_size=32)

        batch_counter = 0
        for x_b, y_b in ds_iter:
            # x_b, y_b = next()
            if batch_counter < 3:
                assert len(x_b) == 32
                assert len(y_b) == 32
            else:
                assert len(x_b) == 4
                assert len(y_b) == 4
            batch_counter += 1
        assert batch_counter == 4

        ds_iter = hk.build_dataset_iter(x, y, batch_size=32, repeat_count=2)

        batch_counter = 0
        for x_b, y_b in ds_iter:
            # x_b, y_b = next()
            if batch_counter < 6:
                assert len(x_b) == 32
                assert len(y_b) == 32
            else:
                assert len(x_b) == 8
                assert len(y_b) == 8
            batch_counter += 1
        assert batch_counter == 7
예제 #13
0
def main():
    rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize)
    hk = HyperGBM(rs, task='classification', reward_metric='auc',
                  cache_dir=f'{test_output_dir}/hypergbm_cache',
                  callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')])

    df = dsutils.load_bank()
    df.drop(['id'], axis=1, inplace=True)
    X_train, X_test = train_test_split(df, test_size=0.8, random_state=42)
    y_train = X_train.pop('y')
    y_test = X_test.pop('y')

    hk.search(X_train, y_train, X_test, y_test, max_trails=500)
    best_trial = hk.get_best_trail()
    print(f'best_train:{best_trial}')
    estimator = hk.final_train(best_trial.space_sample, X_train, y_train)
    score = estimator.predict(X_test)
    result = estimator.evaluate(X_test, y_test, metrics=['auc', 'accuracy'])
    print(f'final result:{result}')
예제 #14
0
    def test_model_with_hp(self):
        rs = RandomSearcher(self.get_space,
                            optimize_direction=OptimizeDirection.Maximize)
        hk = HyperKeras(rs,
                        optimizer='adam',
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'],
                        callbacks=[SummaryCallback()])

        x, y = self.get_x_y()
        hk.search(x, y, x, y, max_trails=3)
        assert hk.best_model
        best_trial = hk.get_best_trail()

        estimator = hk.final_train(best_trial.space_sample, x, y)
        score = estimator.predict(x)
        result = estimator.evaluate(x, y)
        assert len(score) == 100
        assert result
예제 #15
0
    def train_bankdata(self, data_partition):
        rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize)
        hk = HyperGBM(rs, task='classification', reward_metric='accuracy',
                      cache_dir=f'{test_output_dir}/hypergbm_cache',
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')])

        df = dsutils.load_bank()
        df.drop(['id'], axis=1, inplace=True)

        X_train, X_test, y_train, y_test = data_partition()

        hk.search(X_train, y_train, X_test, y_test, max_trails=3)
        best_trial = hk.get_best_trail()

        estimator = hk.final_train(best_trial.space_sample, X_train, y_train)
        score = estimator.predict(X_test)
        result = estimator.evaluate(X_test, y_test)
        assert len(score) == 200
        return estimator, hk
예제 #16
0
    def test_set_random_state(self):
        from hypernets.core import set_random_state
        set_random_state(9527)

        searcher = RandomSearcher(self.get_space)
        vectors = []
        for i in range(1, 10):
            vectors.append(searcher.sample().vectors)
        assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24],
                           [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43],
                           [57, 1, 1, 0.05], [49, 0, 0, 0.71],
                           [71, 1, 1, 0.49]]

        set_random_state(None)
        searcher = RandomSearcher(self.get_space)
        vectors = []
        for i in range(1, 10):
            vectors.append(searcher.sample().vectors)
        assert vectors != [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24],
                           [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43],
                           [57, 1, 1, 0.05], [49, 0, 0, 0.71],
                           [71, 1, 1, 0.49]]

        set_random_state(9527)
        searcher = RandomSearcher(self.get_space)
        vectors = []
        for i in range(1, 10):
            vectors.append(searcher.sample().vectors)
        assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24],
                           [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43],
                           [57, 1, 1, 0.05], [49, 0, 0, 0.71],
                           [71, 1, 1, 0.49]]

        set_random_state(1)
        searcher = RandomSearcher(self.get_space)
        vectors = []
        for i in range(1, 10):
            vectors.append(searcher.sample().vectors)
        assert vectors == [[38, 1, 0, 0.93], [10, 1, 1, 0.15],
                           [17, 1, 0, 0.39], [7, 1, 0, 0.85], [19, 0, 1, 0.44],
                           [29, 1, 0, 0.67], [88, 1, 1, 0.43], [95, 0, 0, 0.8],
                           [10, 1, 1, 0.09]]

        set_random_state(None)
예제 #17
0
from hypernets.searchers.random_searcher import RandomSearcher
from hyperkeras.search_space.enas_micro import enas_micro_search_space
from hyperkeras.one_shot_model import OneShotModel

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Rescale the images from [0,255] to the [0.0,1.0] range.
x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)
print("Number of original training examples:", len(x_train))
print("Number of original test examples:", len(x_test))
# sample for speed up
samples = 100

searcher = MCTSSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max')
one_shot_sampler = RandomSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max')

model = OneShotModel(searcher,
                     optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'],
                     epochs=3,
                     batch_size=64,
                     controller_train_per_epoch=False,  # Single path
                     callbacks=[SummaryCallback()],
                     one_shot_train_sampler=one_shot_sampler,  # uniform sampler
                     visualization=False)
# model.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)],

model.search(x_train, y_train, x_test, y_test, max_trails=1000, epochs=100, callbacks=[])
assert model.best_model
예제 #18
0

def get_space():
    space = HyperSpace()
    with space.as_default():
        in1 = Input(shape=(10, ))
        dense1 = Dense(10,
                       activation=Choice(['relu', 'tanh', None]),
                       use_bias=Bool())(in1)
        bn1 = BatchNormalization()(dense1)
        dropout1 = Dropout(Choice([0.3, 0.4, 0.5]))(bn1)
        output = Dense(2, activation='softmax', use_bias=True)(dropout1)
    return space


rs = RandomSearcher(get_space, optimize_direction=OptimizeDirection.Maximize)
hk = HyperKeras(rs,
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'],
                callbacks=[SummaryCallback()])

x = np.random.randint(0, 10000, size=(100, 10))
y = np.random.randint(0, 2, size=(100), dtype='int')

hk.search(x, y, x, y, max_trails=3)
best_trial = hk.get_best_trail()
assert best_trial

estimator = hk.final_train(best_trial.space_sample, x, y)
score = estimator.predict(x)