Example #1
0
    def test_hyper_dt(self):
        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback()],
                      reward_metric='accuracy',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      cache_preprocessed_data=True,
                      cache_home=homedir + '/cache'
                      )
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.randint(0, 2, size=(100)).astype('str')
        x3 = np.random.randint(0, 2, size=(100)).astype('str')
        x4 = np.random.normal(0.0, 1.0, size=(100))

        y = np.random.randint(0, 2, size=(100), dtype='int')
        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        hdt.search(df, y, df, y, max_trials=3, epochs=1)
        best_trial = hdt.get_best_trial()
        model = hdt.load_estimator(best_trial.model_file)
        assert model
        score = model.predict(df)
        result = model.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(model, DeepTable)

        estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1)
        score = estimator.predict(df)
        result = estimator.evaluate(df, y)
        assert len(score) == 100
        assert result
        assert isinstance(estimator.model, DeepTable)
Example #2
0
    def test_boston(self):
        print("Loading datasets...")
        boston_dataset = load_boston()

        df_train = pd.DataFrame(boston_dataset.data)
        df_train.columns = boston_dataset.feature_names
        self.y = pd.Series(boston_dataset.target)
        self.X = df_train

        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

        rs = RandomSearcher(tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')],
                      reward_metric='RootMeanSquaredError',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      )
        hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3)

        best_trial = hdt.get_best_trial()

        estimator = hdt.final_train(best_trial.space_sample, self.X, self.y)
        score = estimator.predict(self.X_test)
        result = estimator.evaluate(self.X_test, self.y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
Example #3
0
    def test_bankdata(self):
        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')],
                      # reward_metric='accuracy',
                      reward_metric='AUC',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      )

        df = dsutils.load_bank().sample(frac=0.1, random_state=9527)
        df.drop(['id'], axis=1, inplace=True)
        df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
        y = df_train.pop('y')
        y_test = df_test.pop('y')

        hdt.search(df_train, y, df_test, y_test, max_trials=3, )
        best_trial = hdt.get_best_trial()
        assert best_trial

        estimator = hdt.final_train(best_trial.space_sample, df_train, y)
        score = estimator.predict(df_test)
        result = estimator.evaluate(df_test, y_test)
        assert len(score) == len(y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
Example #4
0
    def train(self, X, y, X_test):
        searcher = EvolutionSearcher(
            mini_dt_space,
            optimize_direction=OptimizeDirection.Maximize,
            population_size=30,
            sample_size=10,
            regularized=True,
            candidates_size=10)
        es = EarlyStoppingCallback(self.earlystop_rounds,
                                   'max',
                                   time_limit=self.time_limit,
                                   expected_reward=self.expected_reward)

        hdt = HyperDT(
            searcher,
            callbacks=[es],
            reward_metric=self.reward_metric,
            cache_preprocessed_data=True,
        )
        stratify = y
        if self.task == 'regression':
            stratify = None
        X_train, X_eval, y_train, y_eval = train_test_split(X,
                                                            y,
                                                            test_size=0.3,
                                                            random_state=9527,
                                                            stratify=stratify)

        hdt.search(X_train,
                   y_train,
                   X_eval,
                   y_eval,
                   max_trials=self.max_trials,
                   epochs=self.epochs)
        best_trial = hdt.get_best_trial()
        self.estimator = hdt.load_estimator(best_trial.model_file)
Example #5
0
homedir = f'{consts.PROJECT_NAME}_run_dt_{time.strftime("%Y%m%d%H%M%S")}'
disk_trial_store = DiskTrialStore(f'hotexamples_com/trial_store')

# searcher = MCTSSearcher(mini_dt_space, max_node_space=0,optimize_direction=OptimizeDirection.Maximize)
# searcher = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize)
searcher = EvolutionSearcher(mini_dt_space,
                             200,
                             100,
                             regularized=True,
                             candidates_size=30,
                             optimize_direction=OptimizeDirection.Maximize)

hdt = HyperDT(searcher,
              callbacks=[
                  SummaryCallback(),
                  FileStorageLoggingCallback(searcher,
                                             output_dir=f'hotexamples_com/hyn_logs')
              ],
              reward_metric='AUC',
              earlystopping_patience=1)

space = mini_dt_space()
assert space.combinations == 589824
space2 = default_dt_space()
assert space2.combinations == 3559292928

df = dsutils.load_adult()
# df.drop(['id'], axis=1, inplace=True)
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
X = df_train
y = df_train.pop(14)
y_test = df_test.pop(14)
Example #6
0
                        reduce_factor=Choice([1, 0.8]),
                        dnn_dropout=Choice([0, 0.3]),
                        use_bn=Bool(),
                        dnn_layers=2,
                        activation='relu')(dt_module)
        fit = DTFit(batch_size=Choice([128, 256]))(dt_module)

    return space


rs = MCTSSearcher(my_space, max_node_space=5)

hdt = HyperDT(rs,
              callbacks=[SummaryCallback(),
                         FileLoggingCallback(rs)],
              reward_metric='AUC',
              dnn_params={
                  'dnn_units': ((256, 0, False), (256, 0, False)),
                  'dnn_activation': 'relu',
              })

from deeptables.datasets import dsutils

df = dsutils.load_bank()[:1000]
print("data shape: ")
print(df.shape)

df.drop(['id'], axis=1, inplace=True)
y = df.pop("y")
X = df

X_train, X_test, y_train, y_test = train_test_split(X,
Example #7
0
def create_hyper_model(reward_metric='AUC', optimize_direction='max'):
    search_space = tiny_dt_space
    searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction)
    hyper_model = HyperDT(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()])

    return hyper_model