def run(distribute_strategy=None, batch_size=32, epochs=5): # loading data df = dsutils.load_bank() df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) y = df_train.pop('y') y_test = df_test.pop('y') # training config = deeptable.ModelConfig( nets=deepnets.DeepFM, earlystopping_patience=999, apply_class_weight=True, distribute_strategy=distribute_strategy, ) dt = deeptable.DeepTable(config=config) model, history = dt.fit(df_train, y, batch_size=batch_size, epochs=epochs) # evaluation result = dt.evaluate(df_test, y_test, verbose=0) print('score:', result) # scoring preds = dt.predict(df_test) uniques = np.unique(preds, return_counts=True) print({k: v for k, v in zip(*uniques)})
def test_bankdata(self): rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], # reward_metric='accuracy', reward_metric='AUC', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) df = dsutils.load_bank().sample(frac=0.1, random_state=9527) df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) y = df_train.pop('y') y_test = df_test.pop('y') hdt.search(df_train, y, df_test, y_test, max_trials=3, ) best_trial = hdt.get_best_trial() assert best_trial estimator = hdt.final_train(best_trial.space_sample, df_train, y) score = estimator.predict(df_test) result = estimator.evaluate(df_test, y_test) assert len(score) == len(y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_load_data(self): df_adult = dsutils.load_adult() df_glass = dsutils.load_glass_uci() df_hd = dsutils.load_heart_disease_uci() df_bank = dsutils.load_bank() df_boston = dsutils.load_boston() assert df_adult.shape, (32561, 15) assert df_glass.shape, (214, 11) assert df_hd.shape, (303, 14) assert df_bank.shape, (108504, 18) assert df_boston.shape, (506, 14)
def test_importances(self): if have_eli5: df = dsutils.load_bank().head(100) df.drop(['id'], axis=1, inplace=True) X, X_test = train_test_split(df, test_size=0.2, random_state=42) y = X.pop('y') y_test = X_test.pop('y') config = deeptable.ModelConfig(nets=['dnn_nets'], auto_discrete=True, metrics=['AUC']) dt = deeptable.DeepTable(config=config) dt.fit(X, y, epochs=1) fi = get_score_importances(dt, X_test, y_test, 'AUC', 1, mode='max') assert fi.shape == (16, 2) fi2 = get_score_importances(dt, X_test, y_test, 'log_loss', 1, mode='min') assert fi2.shape == (16, 2)
def test_shap(self): if have_shap: df = dsutils.load_bank().head(100) df.drop(['id'], axis=1, inplace=True) X, X_test = train_test_split(df, test_size=0.2, random_state=42) y = X.pop('y') y_test = X_test.pop('y') config = deeptable.ModelConfig(nets=['dnn_nets'], auto_discrete=True, metrics=['AUC']) dt = deeptable.DeepTable(config=config) dt.fit(X, y, epochs=1) dt_explainer = DeepTablesExplainer(dt, X, num_samples=10) shap_values = dt_explainer.get_shap_values(X[:1], nsamples='auto') assert shap_values[0].shape == (1, 16)
rs = MCTSSearcher(my_space, max_node_space=5) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileLoggingCallback(rs)], reward_metric='AUC', dnn_params={ 'dnn_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }) from deeptables.datasets import dsutils df = dsutils.load_bank()[:1000] print("data shape: ") print(df.shape) df.drop(['id'], axis=1, inplace=True) y = df.pop("y") X = df X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12) t1 = time.time() hdt.search(X_train, y_train, X_test, y_test, max_trails=1)
def setup_class(cls): cls.df_bank = dsutils.load_bank().sample(frac=0.01) cls.df_movielens = dsutils.load_movielens()