Esempio n. 1
0
    def test_probe_evaluate(self):
        result = deeptable.probe_evaluate(self.dt,
                                          self.X_train,
                                          self.y_train,
                                          self.X_test,
                                          self.y_test,
                                          layers=['flatten_embeddings'],
                                          score_fn={})

        assert len(result) == 1
        assert result['flatten_embeddings']['accuracy'] > 0

        result = deeptable.probe_evaluate(
            self.dt,
            self.X_train,
            self.y_train,
            self.X_test,
            self.y_test,
            layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'],
            score_fn={
                'AUC': roc_auc_score,
                'F1': f1_score,
                'MSE': mean_squared_error
            })

        assert len(result) == 3
        assert len(result['flatten_embeddings']) == 3
        assert result['flatten_embeddings']['AUC'] > 0
        assert result['dnn_dense_2']['AUC'] > 0
Esempio n. 2
0
    def run_dt(self, config, df=None, target=None, fit_kwargs={}):
        if df is None or target is None:
            df = self.df.copy()
            target = self.target

        X_train, X_test = dex.train_test_split(df,
                                               test_size=0.2,
                                               random_state=9527)
        y_train = X_train.pop(target)
        y_test = X_test.pop(target)
        test_size = dex.compute(X_test.shape)[0][0]

        dt = deeptable.DeepTable(config=config)

        if fit_kwargs is None:
            fit_kwargs = {'epochs': 1}
        else:
            fit_kwargs = {'epochs': 1, **fit_kwargs}

        dm, history = dt.fit(X_train, y_train, **fit_kwargs)
        assert dm is not None
        assert history is not None

        # test evaluate
        result = dt.evaluate(X_test, y_test)
        assert result.get(config.metrics[0]) is not None
        print('evaluate:', result)

        # test_model_selector(self):
        m1 = dt.get_model(consts.MODEL_SELECTOR_CURRENT)
        m2 = dt.get_model(consts.MODEL_SELECTOR_BEST)
        m3 = dt.get_model('dnn_nets')

        assert isinstance(m1, deepmodel.DeepModel)
        assert m1 is m2
        assert m2 is m3

        # test_best_model(self):
        model = dt.best_model
        assert isinstance(model, deepmodel.DeepModel)

        if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]:
            # test_predict_proba(self):
            num_classes = dt.num_classes
            proba = dt.predict_proba(X_test)

            assert proba.shape == (test_size, num_classes)

            # test_proba2predict(self):
            proba = dt.predict_proba(X_test)
            preds = dt.predict(X_test)
            preds2 = dt.proba2predict(proba)
            assert proba.shape == (test_size, num_classes)
            assert (preds == preds2).sum() == test_size
            assert preds2.shape == (test_size, )
        elif dt.task in [
                consts.TASK_REGRESSION,
        ]:
            preds = dt.predict(X_test)
            assert preds.shape == (test_size, 1)

        # test_apply(self):
        features = dt.apply(
            X_test,
            output_layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'])
        assert len(features) == 3
        assert len(features[0].shape) == 2

        features = dt.apply(X_test, output_layers=['flatten_embeddings'])
        assert len(features.shape) == 2

        #  test_apply_with_transformer(self):
        tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)

        out1 = random.sample(range(test_size), test_size // 2)
        # X_sample = X_test.iloc[out1,]
        X_test_values = X_test.to_dask_array(lengths=True)
        samples = dex.make_chunk_size_known(X_test_values[out1])
        X_sample = dex.dd.from_array(samples, columns=X_test.columns)

        features = dt.apply(
            X_sample,
            output_layers=['flatten_embeddings', 'dnn_dense_1'],
            transformer=tsne)
        assert len(features) == 2
        assert len(features[0].shape) == 2
        assert features[0].shape[1] == 2
        assert features[1].shape[1] == 2

        # def test_probe_evaluate(self):
        result = deeptable.probe_evaluate(dt,
                                          X_train,
                                          y_train,
                                          X_test,
                                          y_test,
                                          layers=['flatten_embeddings'],
                                          score_fn={})

        assert len(result) == 1
        assert result['flatten_embeddings']['accuracy'] > 0

        scores = {'MSE': mean_squared_error}
        if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]:
            scores = {'AUC': roc_auc_score, 'F1': f1_score, **scores}
        result = deeptable.probe_evaluate(
            dt,
            X_train,
            y_train,
            X_test,
            y_test,
            layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'],
            score_fn=scores)

        assert len(result) == 3
        assert len(result['flatten_embeddings']) == len(scores)
        if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]:
            assert result['flatten_embeddings']['AUC'] > 0
            assert result['dnn_dense_2']['AUC'] > 0

        return dt, dm