Beispiel #1
0
def run(distribute_strategy=None, batch_size=32, epochs=5):
    # loading data
    df = dsutils.load_bank()
    df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

    y = df_train.pop('y')
    y_test = df_test.pop('y')

    # training
    config = deeptable.ModelConfig(
        nets=deepnets.DeepFM,
        earlystopping_patience=999,
        apply_class_weight=True,
        distribute_strategy=distribute_strategy,
    )
    dt = deeptable.DeepTable(config=config)
    model, history = dt.fit(df_train, y, batch_size=batch_size, epochs=epochs)

    # evaluation
    result = dt.evaluate(df_test, y_test, verbose=0)
    print('score:', result)

    # scoring
    preds = dt.predict(df_test)
    uniques = np.unique(preds, return_counts=True)
    print({k: v for k, v in zip(*uniques)})
Beispiel #2
0
    def test_bankdata(self):
        rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, )
        hdt = HyperDT(rs,
                      callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')],
                      # reward_metric='accuracy',
                      reward_metric='AUC',
                      dnn_params={
                          'hidden_units': ((256, 0, False), (256, 0, False)),
                          'dnn_activation': 'relu',
                      },
                      )

        df = dsutils.load_bank().sample(frac=0.1, random_state=9527)
        df.drop(['id'], axis=1, inplace=True)
        df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
        y = df_train.pop('y')
        y_test = df_test.pop('y')

        hdt.search(df_train, y, df_test, y_test, max_trials=3, )
        best_trial = hdt.get_best_trial()
        assert best_trial

        estimator = hdt.final_train(best_trial.space_sample, df_train, y)
        score = estimator.predict(df_test)
        result = estimator.evaluate(df_test, y_test)
        assert len(score) == len(y_test)
        assert result
        assert isinstance(estimator.model, DeepTable)
Beispiel #3
0
 def test_load_data(self):
     df_adult = dsutils.load_adult()
     df_glass = dsutils.load_glass_uci()
     df_hd = dsutils.load_heart_disease_uci()
     df_bank = dsutils.load_bank()
     df_boston = dsutils.load_boston()
     assert df_adult.shape, (32561, 15)
     assert df_glass.shape, (214, 11)
     assert df_hd.shape, (303, 14)
     assert df_bank.shape, (108504, 18)
     assert df_boston.shape, (506, 14)
    def test_importances(self):
        if have_eli5:
            df = dsutils.load_bank().head(100)
            df.drop(['id'], axis=1, inplace=True)
            X, X_test = train_test_split(df, test_size=0.2, random_state=42)
            y = X.pop('y')
            y_test = X_test.pop('y')

            config = deeptable.ModelConfig(nets=['dnn_nets'], auto_discrete=True, metrics=['AUC'])
            dt = deeptable.DeepTable(config=config)
            dt.fit(X, y, epochs=1)

            fi = get_score_importances(dt, X_test, y_test, 'AUC', 1, mode='max')
            assert fi.shape == (16, 2)

            fi2 = get_score_importances(dt, X_test, y_test, 'log_loss', 1, mode='min')
            assert fi2.shape == (16, 2)
Beispiel #5
0
    def test_shap(self):
        if have_shap:
            df = dsutils.load_bank().head(100)
            df.drop(['id'], axis=1, inplace=True)
            X, X_test = train_test_split(df, test_size=0.2, random_state=42)
            y = X.pop('y')
            y_test = X_test.pop('y')

            config = deeptable.ModelConfig(nets=['dnn_nets'],
                                           auto_discrete=True,
                                           metrics=['AUC'])
            dt = deeptable.DeepTable(config=config)
            dt.fit(X, y, epochs=1)

            dt_explainer = DeepTablesExplainer(dt, X, num_samples=10)

            shap_values = dt_explainer.get_shap_values(X[:1], nsamples='auto')
            assert shap_values[0].shape == (1, 16)
Beispiel #6
0

rs = MCTSSearcher(my_space, max_node_space=5)

hdt = HyperDT(rs,
              callbacks=[SummaryCallback(),
                         FileLoggingCallback(rs)],
              reward_metric='AUC',
              dnn_params={
                  'dnn_units': ((256, 0, False), (256, 0, False)),
                  'dnn_activation': 'relu',
              })

from deeptables.datasets import dsutils

df = dsutils.load_bank()[:1000]
print("data shape: ")
print(df.shape)

df.drop(['id'], axis=1, inplace=True)
y = df.pop("y")
X = df

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=12)

t1 = time.time()
hdt.search(X_train, y_train, X_test, y_test, max_trails=1)
 def setup_class(cls):
     cls.df_bank = dsutils.load_bank().sample(frac=0.01)
     cls.df_movielens = dsutils.load_movielens()