def test_probe_evaluate(self): result = deeptable.probe_evaluate(self.dt, self.X_train, self.y_train, self.X_test, self.y_test, layers=['flatten_embeddings'], score_fn={}) assert len(result) == 1 assert result['flatten_embeddings']['accuracy'] > 0 result = deeptable.probe_evaluate( self.dt, self.X_train, self.y_train, self.X_test, self.y_test, layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'], score_fn={ 'AUC': roc_auc_score, 'F1': f1_score, 'MSE': mean_squared_error }) assert len(result) == 3 assert len(result['flatten_embeddings']) == 3 assert result['flatten_embeddings']['AUC'] > 0 assert result['dnn_dense_2']['AUC'] > 0
def run_dt(self, config, df=None, target=None, fit_kwargs={}): if df is None or target is None: df = self.df.copy() target = self.target X_train, X_test = dex.train_test_split(df, test_size=0.2, random_state=9527) y_train = X_train.pop(target) y_test = X_test.pop(target) test_size = dex.compute(X_test.shape)[0][0] dt = deeptable.DeepTable(config=config) if fit_kwargs is None: fit_kwargs = {'epochs': 1} else: fit_kwargs = {'epochs': 1, **fit_kwargs} dm, history = dt.fit(X_train, y_train, **fit_kwargs) assert dm is not None assert history is not None # test evaluate result = dt.evaluate(X_test, y_test) assert result.get(config.metrics[0]) is not None print('evaluate:', result) # test_model_selector(self): m1 = dt.get_model(consts.MODEL_SELECTOR_CURRENT) m2 = dt.get_model(consts.MODEL_SELECTOR_BEST) m3 = dt.get_model('dnn_nets') assert isinstance(m1, deepmodel.DeepModel) assert m1 is m2 assert m2 is m3 # test_best_model(self): model = dt.best_model assert isinstance(model, deepmodel.DeepModel) if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]: # test_predict_proba(self): num_classes = dt.num_classes proba = dt.predict_proba(X_test) assert proba.shape == (test_size, num_classes) # test_proba2predict(self): proba = dt.predict_proba(X_test) preds = dt.predict(X_test) preds2 = dt.proba2predict(proba) assert proba.shape == (test_size, num_classes) assert (preds == preds2).sum() == test_size assert preds2.shape == (test_size, ) elif dt.task in [ consts.TASK_REGRESSION, ]: preds = dt.predict(X_test) assert preds.shape == (test_size, 1) # test_apply(self): features = dt.apply( X_test, output_layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2']) assert len(features) == 3 assert len(features[0].shape) == 2 features = dt.apply(X_test, output_layers=['flatten_embeddings']) assert len(features.shape) == 2 # test_apply_with_transformer(self): tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) out1 = random.sample(range(test_size), test_size // 2) # X_sample = X_test.iloc[out1,] X_test_values = X_test.to_dask_array(lengths=True) samples = dex.make_chunk_size_known(X_test_values[out1]) X_sample = dex.dd.from_array(samples, columns=X_test.columns) features = dt.apply( X_sample, output_layers=['flatten_embeddings', 'dnn_dense_1'], transformer=tsne) assert len(features) == 2 assert len(features[0].shape) == 2 assert features[0].shape[1] == 2 assert features[1].shape[1] == 2 # def test_probe_evaluate(self): result = deeptable.probe_evaluate(dt, X_train, y_train, X_test, y_test, layers=['flatten_embeddings'], score_fn={}) assert len(result) == 1 assert result['flatten_embeddings']['accuracy'] > 0 scores = {'MSE': mean_squared_error} if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]: scores = {'AUC': roc_auc_score, 'F1': f1_score, **scores} result = deeptable.probe_evaluate( dt, X_train, y_train, X_test, y_test, layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'], score_fn=scores) assert len(result) == 3 assert len(result['flatten_embeddings']) == len(scores) if dt.task in [consts.TASK_BINARY, consts.TASK_MULTICLASS]: assert result['flatten_embeddings']['AUC'] > 0 assert result['dnn_dense_2']['AUC'] > 0 return dt, dm