Example #1
0
    def test_cache_preprocessed_data(self):
        config = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, apply_class_weight=True)
        df_train = dsutils.load_adult().head(100)
        y = df_train.pop(14).values
        X = df_train
        cache_home = homedir + '/cache'
        preprocessor = DefaultPreprocessor(config, cache_home=cache_home, use_cache=True)
        dt = deeptable.DeepTable(config=config, preprocessor=preprocessor)
        dt.fit(X, y, epochs=1)

        dt = deeptable.DeepTable(config=config, preprocessor=preprocessor)
        dt.fit(X, y, epochs=1)
Example #2
0
    def test_predict_unseen_data(self):
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.randint(0, 2, size=(100)).astype('str')
        x3 = np.random.normal(0.0, 1.0, size=(100))
        y = np.random.randint(0, 2, size=(100), dtype='int')

        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'y': y})
        df = dd.from_pandas(df, npartitions=1)
        y = df.pop('y')

        dt = deeptable.DeepTable(config=deeptable.ModelConfig(
            apply_gbm_features=False,
            auto_categorize=True,
            auto_discrete=True,
            # nets=['linear', 'cin_nets', 'fm_nets', 'afm_nets', 'pnn_nets', 'dnn2_nets', 'dcn_nets',
            #       'autoint_nets', 'fibi_dnn_nets'],
            # 'fg_nets', 'fgcnn_cin_nets', 'fgcnn_fm_nets', 'fgcnn_ipnn_nets',
            #          'fgcnn_dnn_nets', ]
        ))
        dt.fit(df, y)

        xt_1 = np.random.randint(0, 50, size=(10), dtype='int')
        xt_2 = np.random.randint(0, 10, size=(10)).astype('str')
        xt_3 = np.random.normal(0.0, 2.0, size=(10))

        dft = pd.DataFrame({'x1': xt_1, 'x2': xt_2, 'x3': xt_3})
        dft = dd.from_pandas(dft, npartitions=2)
        preds = dt.predict(dft)
        assert len(preds), 10
Example #3
0
    def run_nets(self, nets, **kwargs):
        df_train = dsutils.load_adult().head(100)
        y = df_train.pop(14).values
        X = df_train

        conf = deeptable.ModelConfig(nets=nets,
                                     metrics=['AUC'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=2,
                                     apply_gbm_features=False,
                                     apply_class_weight=True,
                                     **kwargs)

        dt = deeptable.DeepTable(config=conf)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)
        model, history = dt.fit(X_train, y_train, epochs=1)

        result = dt.evaluate(X_test, y_test)
        assert result['AUC'] >= 0.0

        # test reload from disk
        # model_path = os.path.join("/tmp/dt_model", str(uuid.uuid4()))
        # dt.save(model_path)
        #
        # p = multiprocessing.Process(target=self.run_load_model, args=(model_path, X_test, y_test, ))
        # p.start()
        # p.join()

        return dt, result
Example #4
0
def run(distribute_strategy=None, batch_size=32, epochs=5):
    # loading data
    df = dsutils.load_bank()
    df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

    y = df_train.pop('y')
    y_test = df_test.pop('y')

    # training
    config = deeptable.ModelConfig(
        nets=deepnets.DeepFM,
        earlystopping_patience=999,
        apply_class_weight=True,
        distribute_strategy=distribute_strategy,
    )
    dt = deeptable.DeepTable(config=config)
    model, history = dt.fit(df_train, y, batch_size=batch_size, epochs=epochs)

    # evaluation
    result = dt.evaluate(df_test, y_test, verbose=0)
    print('score:', result)

    # scoring
    preds = dt.predict(df_test)
    uniques = np.unique(preds, return_counts=True)
    print({k: v for k, v in zip(*uniques)})
Example #5
0
 def run_dt(self, config):
     df_train = dsutils.load_adult().head(1000)
     y = df_train.pop(14).values
     X = df_train
     dt = deeptable.DeepTable(config=config)
     dm, history = dt.fit(X, y, epochs=1)
     return dt, dm, history
Example #6
0
    def test_fit(self):
        print("Loading datasets...")
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.normal(0.0, 1.0, size=(100))
        x3 = np.random.normal(0.0, 1.0, size=(100))

        y1 = np.random.randint(0, 2, size=(100), dtype='int')
        y2 = np.random.randint(0, 2, size=(100), dtype='int')

        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3})
        df_y = pd.DataFrame({'y1': y1, 'y2': y2})

        conf = deeptable.ModelConfig(metrics=['AUC'],
                                     nets=['dnn_nets'],
                                     apply_gbm_features=False,
                                     task='multilabel')
        dt = deeptable.DeepTable(config=conf)
        X_train, X_test, y_train, y_test = train_test_split(df,
                                                            df_y,
                                                            test_size=0.2,
                                                            random_state=42)
        model, history = dt.fit(X_train,
                                y_train.values,
                                batch_size=10,
                                epochs=1)
Example #7
0
    def test_custom_dnn(self):
        df_train = dsutils.load_adult().head(100)
        y = df_train.pop(14).values
        X = df_train

        conf = deeptable.ModelConfig(nets=['dnn_nets'],
                                     dnn_params={
                                         'custom_dnn_fn':
                                         deepnets.custom_dnn_D_A_D_B,
                                         'hidden_units':
                                         ((128, 0.2, True), (64, 0, False)),
                                     },
                                     metrics=['AUC'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=2,
                                     apply_gbm_features=False,
                                     apply_class_weight=True)
        dt = deeptable.DeepTable(config=conf)
        model, history = dt.fit(X, y, epochs=1)
        l1 = model.model.get_layer('dnn_custom_dense_1')
        l2 = model.model.get_layer('dnn_custom_dropout_1')
        l3 = model.model.get_layer('dnn_custom_bn_1')
        l4 = model.model.get_layer('dnn_custom_dense_2')

        assert l1
        assert l2
        assert l3
        assert l4
Example #8
0
    def setup_class(self):
        setup_dask(self)

        print("Loading datasets...")
        df_train = dd.from_pandas(dsutils.load_adult().head(1000),
                                  npartitions=2)
        self.y = df_train.pop(14)
        self.X = df_train

        conf = deeptable.ModelConfig(metrics=['AUC'],
                                     apply_gbm_features=False,
                                     auto_categorize=False,
                                     auto_discrete=False)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, \
        self.X_eval, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.oof_proba, self.eval_proba, self.test_proba = \
            self.dt.fit_cross_validation(self.X_train,
                                         self.y_train,
                                         self.X_eval,
                                         num_folds=3,
                                         epochs=1,
                                         n_jobs=1)
 def test_class_weights(self):
     conf = deeptable.ModelConfig(metrics=['AUC'],
                                  apply_gbm_features=False,
                                  apply_class_weight=True)
     dt = deeptable.DeepTable(config=conf)
     model, history = dt.fit(self.X_train, self.y_train, epochs=1)
     assert history.history['AUC'][0] > 0
Example #10
0
    def test_only_1_categorical(self, net):
        # Note: afm_nets needs embedding array, and at least 2 elements
        # Note: opnn_nets,ipnn_nets,pnn_nets, needs at least 2 embedding to build `layers.InnerProduct`
        # Note: dnn_nets,cross_dnn_nets,cross_nets,dcn_nets, does not using embedding
        # Note: fibi_nets,fibi_dnn_nets  needs at least 2 embedding because of `BilinearInteraction`
        df = dsutils.load_movielens()
        y = df['rating'].values.astype('float32')
        X = df[['movie_id']]

        conf = deeptable.ModelConfig(nets=[net],
                                     task=consts.TASK_REGRESSION,
                                     categorical_columns=["movie_id"],
                                     metrics=['mse'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=4,
                                     apply_gbm_features=False,
                                     apply_class_weight=True,
                                     earlystopping_patience=5)

        dt = deeptable.DeepTable(config=conf)

        model, history = dt.fit(X,
                                y,
                                validation_split=0.2,
                                epochs=10,
                                batch_size=32)
        assert model
    def test_var_categorical_feature(self):
        X = self.df.copy()
        y = X.pop('rating').values.astype('float32')

        conf = deeptable.ModelConfig(nets=['dnn_nets'],
                                     task=consts.TASK_REGRESSION,
                                     categorical_columns=[
                                         "movie_id", "user_id", "gender",
                                         "occupation", "zip", "title", "age"
                                     ],
                                     metrics=['mse'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=4,
                                     apply_gbm_features=False,
                                     apply_class_weight=True,
                                     earlystopping_patience=5,
                                     var_len_categorical_columns=[
                                         ('genres', "|", "max")
                                     ])

        dt = deeptable.DeepTable(config=conf)

        X_train, X_validation, y_train, y_validation = train_test_split(
            X, y, test_size=0.2)

        model, history = dt.fit(X_train,
                                y_train,
                                validation_data=(X_validation, y_validation),
                                epochs=10,
                                batch_size=32)

        assert 'genres' in model.model.input_names
Example #12
0
 def _train_and_asset(self, X, y, conf: deeptable.ModelConfig):
     dt = deeptable.DeepTable(config=conf)
     model, history = dt.fit(X,
                             y,
                             validation_split=0.2,
                             epochs=2,
                             batch_size=32)
     assert len(model.model.input_names) == 1
Example #13
0
    def setup_class(self):
        self.X, self.y = self.load_data()

        conf = deeptable.ModelConfig(task=consts.TASK_REGRESSION, metrics=[r2_c, 'RootMeanSquaredError'],
                                     apply_gbm_features=False)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            get_tool_box(self.X).train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, batch_size=32, epochs=100)
Example #14
0
    def setup_class(self):
        setup_dask(self)

        print("Loading datasets...")
        data = dd.from_pandas(dsutils.load_glass_uci(), npartitions=2)
        self.y = data.pop(10).values
        self.X = data

        conf = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, )
        self.dt = deeptable.DeepTable(config=conf)
        self.X_train, self.X_test, self.y_train, self.y_test = \
            [t.persist() for t in get_tool_box(data).train_test_split(self.X, self.y, test_size=0.2, random_state=42)]
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, batch_size=32, epochs=3)
Example #15
0
    def test_modelinfo(self):
        df_train = dsutils.load_adult()
        y = df_train.pop(14).values
        X = df_train

        conf = deepmodels.ModelConfig(metrics=['AUC'])
        dt = deeptable.DeepTable(config=conf)
        model, history = dt.fit(X, y, epochs=2)
        mi = modelset.ModelInfo('val',
                                'm1',
                                model, {},
                                history=history.history)
        assert mi.score['val_auc'] > 0
        assert len(mi.meta['history']['AUC']) == 2
Example #16
0
    def setup_class(self):
        print("Loading datasets...")
        df_train = dsutils.load_adult().head(1000)
        self.y = df_train.pop(14).values
        self.X = df_train

        conf = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, apply_class_weight=True)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, epochs=1)
Example #17
0
    def test_embeddings_output_dim(self):
        print("Loading datasets...")
        df_train = dsutils.load_adult().head(1000)
        y = df_train.pop(14).values
        X = df_train

        conf = deeptable.ModelConfig(fixed_embedding_dim=False,
                                     embeddings_output_dim=0)
        dt = deeptable.DeepTable(config=conf)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)
        model, history = dt.fit(X_train, y_train, epochs=1)
Example #18
0
    def test_duplicate_columns(self):
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.randint(0, 2, size=(100)).astype('str')
        x3 = np.random.normal(0.0, 1.0, size=(100))

        y = np.random.randint(0, 2, size=(100), dtype='int')
        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3})
        df.columns = ['x1', 'x1', 'x3']
        dt = deeptable.DeepTable(config=deeptable.ModelConfig(
            apply_gbm_features=False,
            auto_categorize=True,
            auto_discrete=True,
        ))
        with pytest.raises(ValueError) as excinfo:
            dt.fit(df, y)
        assert "Columns with duplicate names in X:" in str(excinfo.value)
        assert excinfo.type == ValueError
    def setup_class(self):
        print("Loading datasets...")
        boston_dataset = load_boston()

        df_train = pd.DataFrame(boston_dataset.data)
        df_train.columns = boston_dataset.feature_names
        self.y = pd.Series(boston_dataset.target)
        self.X = df_train

        conf = deeptable.ModelConfig(task=consts.TASK_REGRESSION, metrics=[r2_c, 'RootMeanSquaredError'], apply_gbm_features=False)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, epochs=100)
    def test_importances(self):
        if have_eli5:
            df = dsutils.load_bank().head(100)
            df.drop(['id'], axis=1, inplace=True)
            X, X_test = train_test_split(df, test_size=0.2, random_state=42)
            y = X.pop('y')
            y_test = X_test.pop('y')

            config = deeptable.ModelConfig(nets=['dnn_nets'], auto_discrete=True, metrics=['AUC'])
            dt = deeptable.DeepTable(config=config)
            dt.fit(X, y, epochs=1)

            fi = get_score_importances(dt, X_test, y_test, 'AUC', 1, mode='max')
            assert fi.shape == (16, 2)

            fi2 = get_score_importances(dt, X_test, y_test, 'log_loss', 1, mode='min')
            assert fi2.shape == (16, 2)
Example #21
0
def run(distribute_strategy=None, batch_size=32, epochs=5):
    # loading data
    df = dsutils.load_bank_by_dask()
    df_train, df_test = get_tool_box(df).train_test_split(df,
                                                          test_size=0.2,
                                                          random_state=42)

    y = df_train.pop('y')
    y_test = df_test.pop('y')
    df_train, y, df_test, y_test = dask.persist(df_train, y, df_test, y_test)

    # training
    config = deeptable.ModelConfig(
        nets=deepnets.DeepFM,
        earlystopping_patience=5,
        distribute_strategy=distribute_strategy,
    )
    dt = deeptable.DeepTable(config=config)
    model, history = dt.fit(df_train, y, batch_size=batch_size, epochs=epochs)

    # save
    model_path = 'model_by_dask'
    dt.save(model_path)
    print(f'saved to {model_path}')

    # evaluation
    model_path = 'model_by_dask'
    dt2 = deeptable.DeepTable.load(model_path)
    result = dt2.evaluate(df_test, y_test, batch_size=512, verbose=0)
    print('score:', result)

    # scoring
    preds = dt2.predict(
        df_test,
        batch_size=512,
    )
    proba = dt2.predict_proba(
        df_test,
        batch_size=512,
    )
    print(
        get_tool_box(y_test).metrics.calc_score(y_test,
                                                preds,
                                                proba,
                                                metrics=['accuracy', 'auc']))
Example #22
0
    def test_shap(self):
        if have_shap:
            df = dsutils.load_bank().head(100)
            df.drop(['id'], axis=1, inplace=True)
            X, X_test = train_test_split(df, test_size=0.2, random_state=42)
            y = X.pop('y')
            y_test = X_test.pop('y')

            config = deeptable.ModelConfig(nets=['dnn_nets'],
                                           auto_discrete=True,
                                           metrics=['AUC'])
            dt = deeptable.DeepTable(config=config)
            dt.fit(X, y, epochs=1)

            dt_explainer = DeepTablesExplainer(dt, X, num_samples=10)

            shap_values = dt_explainer.get_shap_values(X[:1], nsamples='auto')
            assert shap_values[0].shape == (1, 16)
Example #23
0
 def test_no_input(self):
     df: pd.DataFrame = self.df_movielens.copy()
     X = pd.DataFrame()
     y = df['rating']
     conf = deeptable.ModelConfig(
         nets=['dnn_nets'],
         task=consts.TASK_REGRESSION,
         metrics=['mse'],
         fixed_embedding_dim=True,
         embeddings_output_dim=4,
         apply_gbm_features=False,
         apply_class_weight=True,
         earlystopping_patience=3,
     )
     dt = deeptable.DeepTable(config=conf)
     with pytest.raises(ValueError) as err_info:
         dt.fit(X, y, validation_split=0.2, epochs=2, batch_size=32)
         print(err_info)
    def setup_class(self):
        print("Loading datasets...")
        data = dsutils.load_glass_uci()
        self.y = data.pop(10).values
        self.X = data

        conf = deeptable.ModelConfig(
            metrics=['AUC'],
            apply_gbm_features=False,
        )
        self.dt = deeptable.DeepTable(config=conf)
        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model, self.history = self.dt.fit(self.X_train,
                                               self.y_train,
                                               epochs=1)
Example #25
0
    def setup_class(self):
        self.X = self.load_data()
        self.y = self.X.pop(14)

        conf = deeptable.ModelConfig(metrics=['AUC'],
                                     apply_gbm_features=False,
                                     auto_categorize=False,
                                     auto_discrete=False)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, \
        self.X_eval, \
        self.y_train, \
        self.y_test = get_tool_box(self.X).train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.oof_proba, self.eval_proba, self.test_proba = self.dt.fit_cross_validation(self.X_train,
                                                                                        self.y_train,
                                                                                        self.X_eval,
                                                                                        num_folds=3,
                                                                                        epochs=1,
                                                                                        n_jobs=1)
Example #26
0
    def run_nets(self, nets):
        df_train = dsutils.load_adult().head(100)
        y = df_train.pop(14).values
        X = df_train

        conf = deeptable.ModelConfig(nets=nets,
                                     metrics=['AUC'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=2,
                                     apply_gbm_features=False,
                                     apply_class_weight=True)
        dt = deeptable.DeepTable(config=conf)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)
        model, history = dt.fit(X_train, y_train, epochs=1)
        result = dt.evaluate(X_test, y_test)
        assert result['AUC'] >= 0.0
        return dt, result
Example #27
0
    def test_fit_cross_validation(self):
        print("Loading datasets...")
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.normal(0.0, 1.0, size=(100))
        x3 = np.random.normal(0.0, 1.0, size=(100))

        y1 = np.random.randint(0, 2, size=(100), dtype='int')
        y2 = np.random.randint(0, 2, size=(100), dtype='int')

        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3})
        df_y = pd.DataFrame({'y1': y1, 'y2': y2})

        conf = deeptable.ModelConfig(metrics=['AUC'],
                                     nets=['dnn_nets'],
                                     apply_gbm_features=False,
                                     task='multilabel')
        dt = deeptable.DeepTable(config=conf)
        oof_predict, _, test_predict = dt.fit_cross_validation(df,
                                                               df_y,
                                                               X_test=df,
                                                               num_folds=3)
        assert oof_predict.shape[-1] == df_y.shape[-1]
        assert test_predict.shape[-1] == df_y.shape[-1]
    def test_callback_injection(self):
        print("Loading datasets...")
        df_train = dsutils.load_adult()
        self.y = df_train.pop(14).values
        self.X = df_train
        path = tempfile.mkdtemp()
        conf = deeptable.ModelConfig(
            metrics=['AUC'],
            apply_gbm_features=False,
            auto_discrete=False,
            home_dir=path,
        )

        self.dt = deeptable.DeepTable(config=conf)

        mcp = ModelCheckpoint(
            path,
            'val_auc',
            verbose=0,
            save_best_only=False,
            save_weights_only=False,
            mode='max',
            save_freq='epoch',
        )
        callbacks = [mcp]
        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model, self.history = self.dt.fit(self.X_train,
                                               self.y_train,
                                               epochs=1,
                                               callbacks=callbacks)

        files = os.listdir(path)
        assert 'saved_model.pb' in files
Example #29
0
# -*- encoding: utf-8 -*-
from deeptables.models import deeptable, ModelConfig
import pickle as pkl

nets=['dnn_nets','linear','cin_nets','fm_nets','afm_nets','opnn_nets','ipnn_nets','pnn_nets','cross_nets','cross_dnn_nets','dcn_nets','autoint_nets','fg_nets','fgcnn_cin_nets','fgcnn_fm_nets','fgcnn_ipnn_nets','fgcnn_dnn_nets','fibi_nets','fibi_dnn_nets']
config = ModelConfig(nets=nets)

with open('/Users/wuhf/Downloads/model_config.pkl', 'rb') as f:
    pass
    # config  = pkl.load(f)
dt = deeptable.DeepTable(config=config)

print(dt)

with open('/Users/wuhf/Downloads/7c4373fe6880477185d4bb0674f99ba2_1.pkl', 'rb') as f:
    import pickle as pkl
    df = pkl.load(f)
X_train = df
y_trian = df.pop('y')

dt.fit(X_train, y_trian)



Example #30
0
    def run_dt(self, config, df=None, target=None, fit_kwargs={}):
        if df is None or target is None:
            df = self.df.copy()
            target = self.target

        X_train, X_test = dex.train_test_split(df,
                                               test_size=0.2,
                                               random_state=9527)
        y_train = X_train.pop(target)
        y_test = X_test.pop(target)
        test_size = dex.compute(X_test.shape)[0][0]

        dt = deeptable.DeepTable(config=config)

        if fit_kwargs is None:
            fit_kwargs = {'epochs': 1}
        else:
            fit_kwargs = {'epochs': 1, **fit_kwargs}

        dm, history = dt.fit(X_train, y_train, **fit_kwargs)
        assert dm is not None
        assert history is not None

        # test evaluate
        result = dt.evaluate(X_test, y_test)
        assert result.get(config.metrics[0]) is not None
        print('evaluate:', result)

        # test_model_selector(self):
        m1 = dt.get_model(consts.MODEL_SELECTOR_CURRENT)
        m2 = dt.get_model(consts.MODEL_SELECTOR_BEST)
        m3 = dt.get_model('dnn_nets')

        assert isinstance(m1, deepmodel.DeepModel)
        assert m1 is m2
        assert m2 is m3

        # test_best_model(self):
        model = dt.best_model
        assert isinstance(model, deepmodel.DeepModel)

        if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]:
            # test_predict_proba(self):
            num_classes = dt.num_classes
            proba = dt.predict_proba(X_test)

            assert proba.shape == (test_size, num_classes)

            # test_proba2predict(self):
            proba = dt.predict_proba(X_test)
            preds = dt.predict(X_test)
            preds2 = dt.proba2predict(proba)
            assert proba.shape == (test_size, num_classes)
            assert (preds == preds2).sum() == test_size
            assert preds2.shape == (test_size, )
        elif dt.task in [
                consts.TASK_REGRESSION,
        ]:
            preds = dt.predict(X_test)
            assert preds.shape == (test_size, 1)

        # test_apply(self):
        features = dt.apply(
            X_test,
            output_layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'])
        assert len(features) == 3
        assert len(features[0].shape) == 2

        features = dt.apply(X_test, output_layers=['flatten_embeddings'])
        assert len(features.shape) == 2

        #  test_apply_with_transformer(self):
        tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)

        out1 = random.sample(range(test_size), test_size // 2)
        # X_sample = X_test.iloc[out1,]
        X_test_values = X_test.to_dask_array(lengths=True)
        samples = dex.make_chunk_size_known(X_test_values[out1])
        X_sample = dex.dd.from_array(samples, columns=X_test.columns)

        features = dt.apply(
            X_sample,
            output_layers=['flatten_embeddings', 'dnn_dense_1'],
            transformer=tsne)
        assert len(features) == 2
        assert len(features[0].shape) == 2
        assert features[0].shape[1] == 2
        assert features[1].shape[1] == 2

        # def test_probe_evaluate(self):
        result = deeptable.probe_evaluate(dt,
                                          X_train,
                                          y_train,
                                          X_test,
                                          y_test,
                                          layers=['flatten_embeddings'],
                                          score_fn={})

        assert len(result) == 1
        assert result['flatten_embeddings']['accuracy'] > 0

        scores = {'MSE': mean_squared_error}
        if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]:
            scores = {'AUC': roc_auc_score, 'F1': f1_score, **scores}
        result = deeptable.probe_evaluate(
            dt,
            X_train,
            y_train,
            X_test,
            y_test,
            layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'],
            score_fn=scores)

        assert len(result) == 3
        assert len(result['flatten_embeddings']) == len(scores)
        if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]:
            assert result['flatten_embeddings']['AUC'] > 0
            assert result['dnn_dense_2']['AUC'] > 0

        return dt, dm