def test_averaging(): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'objective': 'binary', 'max_depth': 8} with tempfile.TemporaryDirectory() as temp_path: for i in range(3): params['seed'] = i ret_single = run_experiment( params, X_train, y_train, X_test, os.path.join(temp_path, 'seed{}'.format(i))) df = average_results( [os.path.join(temp_path, 'seed{}'.format(i)) for i in range(3)], os.path.join(temp_path, 'average.csv')) score = roc_auc_score(y_test, df[df.columns[-1]]) assert score >= 0.85 assert score >= roc_auc_score(y_test, ret_single.test_prediction)
def test_experiment_sklearn_multiclass(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0, n_classes=5, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'n_neighbors': 10} result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type=KNeighborsClassifier, with_auto_prep=False) assert len(np.unique(result.oof_prediction[:, 0]) ) > 5 # making sure prediction is not binarized assert len(np.unique(result.test_prediction[:, 0])) > 5 assert result.oof_prediction.shape == (len(y_train), 5) assert result.test_prediction.shape == (len(y_test), 5) _check_file_exists(tmpdir_name)
def test_with_feature_attachment(): X, y = make_classification_df(n_num_features=5, class_sep=0.7) params = { 'objective': 'binary', 'max_depth': 8 } with get_temp_directory() as temp_feature_path: cols = list(X.columns) for i, c in enumerate(cols): if X.shape[1] == 1: break save_feature(X[[c]], i, directory=temp_feature_path) X.drop(c, axis=1, inplace=True) X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False) with get_temp_directory() as temp_path: result_wo_feature = run_experiment(params, X_train, y_train, X_test, logging_directory=temp_path) with get_temp_directory() as temp_path: result_w_feature = run_experiment(params, X_train, y_train, X_test, logging_directory=temp_path, feature_list=[0, 1, 2, 3], feature_directory=temp_feature_path) assert result_w_feature.metrics[-1] > result_wo_feature.metrics[-1]
def test_experiment_fit_params(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'objective': 'binary', 'max_depth': 8, 'n_estimators': 500} result1 = run_experiment(params, X_train, y_train, X_test, os.path.join(tmpdir_name, '1'), fit_params={'early_stopping_rounds': None}) result2 = run_experiment(params, X_train, y_train, X_test, os.path.join(tmpdir_name, '2'), fit_params={'early_stopping_rounds': 5}) assert result1.models[-1].booster_.num_trees() == params['n_estimators'] assert result2.models[-1].booster_.num_trees() < params['n_estimators']
def test_experiment_lgb_multiclass(): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, n_classes=5, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'objective': 'multiclass', 'max_depth': 8} with get_temp_directory() as temp_path: result = run_experiment(params, X_train, y_train, X_test, temp_path) assert len(np.unique(result.oof_prediction[:, 0]) ) > 5 # making sure prediction is not binarized assert len(np.unique(result.test_prediction[:, 0])) > 5 assert result.oof_prediction.shape == (len(y_train), 5) assert result.test_prediction.shape == (len(y_test), 5) _check_file_exists( temp_path, ('oof_prediction.npy', 'test_prediction.npy', 'metrics.txt'))
def test_experiment_sample_submission_multiclass(): X, y = make_classification_df(n_classes=5) X_train, X_test, y_train, y_test = train_test_split(X, y) sample_df = pd.DataFrame() sample_df['target_id_abc'] = np.arange(len(y_test)) + 10000 for i in range(5): sample_df['target_class_{}'.format(i)] = 0 params = { 'objective': 'multiclass', 'max_depth': 8 } with get_temp_directory() as temp_path: result = run_experiment(params, X_train, y_train, X_test, temp_path, sample_submission=sample_df) assert list(result.submission_df.columns) == ['target_id_abc', 'target_class_0', 'target_class_1', 'target_class_2', 'target_class_3', 'target_class_4' ] log_loss_trianed = log_loss(y_test, result.submission_df.drop('target_id_abc', axis=1), labels=[0, 1, 2, 3, 4]) log_loss_default = log_loss(y_test, np.full((len(y_test), 5), 0.2), labels=[0, 1, 2, 3, 4]) assert log_loss_trianed < log_loss_default
def test_submission_filename(): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'objective': 'binary', 'max_depth': 8} with get_temp_directory() as temp_path: run_experiment(params, X_train, y_train, X_test, temp_path, submission_filename='sub.csv') df = pd.read_csv(os.path.join(temp_path, 'sub.csv')) assert list(df.columns) == ['id', 'target']
def test_experiment_mlflow(): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'objective': 'binary', 'max_depth': 8} with get_temp_directory() as temp_path: run_experiment(params, X_train, y_train, None, temp_path, with_mlflow=True) _check_file_exists( temp_path, ('oof_prediction.npy', 'metrics.txt', 'mlflow.json')) # test if output files are also stored in the mlflow artifact uri with open(os.path.join(temp_path, 'mlflow.json'), 'r') as f: mlflow_meta = json.load(f) p = unquote(urlparse(mlflow_meta['artifact_uri']).path) if os.name == 'nt' and p.startswith("/"): p = p[1:] _check_file_exists(p, ('oof_prediction.npy', 'metrics.txt'))
def test_cv_lgbm_df(): X, y = make_classification_df(n_samples=1024, n_num_features=20, n_cat_features=1, class_sep=0.98, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) models = [LGBMClassifier(n_estimators=300) for _ in range(5)] pred_oof, pred_test, scores, importance = cross_validate( models, X_train, y_train, X_test, cv=5, eval_func=roc_auc_score) print(scores) assert len(scores) == 5 + 1 assert scores[-1] >= 0.85 # overall roc_auc assert roc_auc_score(y_train, pred_oof) == scores[-1] assert roc_auc_score(y_test, pred_test) >= 0.85 # test roc_auc assert roc_auc_score(y_test, models[0].predict_proba(X_test) [:, 1]) >= 0.85 # make sure models are trained assert len(importance) == 5 assert list(importance[0].columns) == ['feature', 'importance'] assert len(importance[0]) == 20 + 1 assert models[0].booster_.num_trees( ) < 300 # making sure early stopping worked
def test_experiment_sklearn_classifier(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'C': 0.1} result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, eval_func=roc_auc_score, algorithm_type=LogisticRegression, with_auto_prep=False) assert len(np.unique( result.oof_prediction)) > 5 # making sure prediction is not binarized assert len(np.unique(result.test_prediction)) > 5 assert roc_auc_score(y_train, result.oof_prediction) >= 0.8 assert roc_auc_score(y_test, result.test_prediction) >= 0.8 _check_file_exists(tmpdir_name)
def test_experiment_lgb_classifier(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'objective': 'binary', 'max_depth': 8} result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, eval_func=roc_auc_score) assert len(np.unique( result.oof_prediction)) > 5 # making sure prediction is not binarized assert len(np.unique(result.test_prediction)) > 5 assert roc_auc_score(y_train, result.oof_prediction) >= 0.9 assert roc_auc_score(y_test, result.test_prediction) >= 0.9 _check_file_exists(tmpdir_name)
def test_averaging_with_metrics(): X, y = make_classification_df() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) oof, test = _make_1st_stage_preds(X_train, y_train, X_test) result = averaging(test, oof, y_train, eval_func=roc_auc_score) assert result.score == roc_auc_score(y_train, result.oof_prediction)
def test_weight_averaging(): X, y = make_classification_df() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) oof, test = _make_1st_stage_preds(X_train, y_train, X_test) result = averaging(test, oof, y_train, weights=[0.2, 0.4, 0.3]) assert_array_almost_equal(0.2 * test[0] + 0.4 * test[1] + 0.3 * test[2], result.test_prediction) assert_array_almost_equal(0.2 * oof[0] + 0.4 * oof[1] + 0.3 * oof[2], result.oof_prediction) assert result.score is None
def test_averaging_multiclass(): X, y = make_classification_df(n_classes=5) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) oof, test = _make_1st_stage_preds(X_train, y_train, X_test) result = averaging(test, oof, y_train) assert_array_almost_equal((test[0] + test[1] + test[2]) / 3, result.test_prediction) assert_array_almost_equal((oof[0] + oof[1] + oof[2]) / 3, result.oof_prediction) assert result.score is None
def test_log_params(tmpdir_name): params = {'objective': 'binary', 'max_depth': 8} X, y = make_classification_df() run_experiment(params, X, y, logging_directory=tmpdir_name) with open(os.path.join(tmpdir_name, 'params.json'), 'r') as f: recorded_params = json.load(f) assert recorded_params['model_params.max_depth'] == 8 assert recorded_params['model_params.objective'] == 'binary' assert recorded_params['fit_params'] == 'None'
def test_custom_experiment(tmpdir_name): params = { 'objective': 'binary', 'max_depth': 8 } X, y = make_classification_df() with Experiment(tmpdir_name, with_mlflow=True) as e: run_experiment(params, X, y, logging_directory='foobar', inherit_experiment=e) # all files are logged into e.logging_directory, instead of 'foobar' _check_file_exists(tmpdir_name, with_mlflow=True)
def test_rank_averaging(): X, y = make_classification_df(n_samples=1024) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) oof, test = _make_1st_stage_preds(X_train, y_train, X_test) result = averaging(test, rank_averaging=True) test_rank = [stats.rankdata(t) / len(X_test) for t in test] assert_array_almost_equal((test_rank[0] + test_rank[1] + test_rank[2]) / 3, result.test_prediction) assert result.score is None
def test_with_long_params(tmpdir_name): X, y = make_classification_df(1024, n_num_features=5, n_cat_features=400) params = { 'objective': 'binary', 'max_depth': 8 } X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False) # just to make sure experiment finish run_experiment(params, X_train, y_train, X_test, logging_directory=tmpdir_name, with_mlflow=True)
def test_stacking(): X, y = make_classification_df() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) oof, test = _make_1st_stage_preds(X_train, y_train, X_test) worst_base_roc = min(roc_auc_score(y_train, oof[0]), roc_auc_score(y_train, oof[1]), roc_auc_score(y_train, oof[2])) result = stacking(test, oof, y_train, eval_func=roc_auc_score) assert roc_auc_score(y_train, result.oof_prediction) > worst_base_roc
def test_adv(): X, y = make_classification_df(1024) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) X_train['target'] = 0 X_test['target'] = 1 auc, importance = adversarial_validate(X_train, X_test) assert importance['feature'][0] == 'target' assert auc >= 0.9
def test_experiment_manual_cv_int(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = { 'objective': 'binary', 'max_depth': 8 } result = run_experiment(params, X_train, y_train, None, tmpdir_name, cv=KFold(2)) assert len(result.models) == 2 assert len(result.metrics) == 2 + 1
def test_experiment_manual_cv_kfold(): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = { 'objective': 'binary', 'max_depth': 8 } with get_temp_directory() as temp_path: result = run_experiment(params, X_train, y_train, None, temp_path, cv=KFold(4)) assert len(result.models) == 4 assert len(result.metrics) == 4 + 1
def test_experiment_without_test_data(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = { 'objective': 'binary', 'max_depth': 8 } result = run_experiment(params, X_train, y_train, None, tmpdir_name) assert roc_auc_score(y_train, result.oof_prediction) >= 0.9 _check_file_exists(tmpdir_name)
def test_rank_averaging_opt_maximize(): X, y = make_classification_df(n_samples=1024) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) oof, test = _make_1st_stage_preds(X_train, y_train, X_test) best_single_model = max(roc_auc_score(y_train, oof[0]), roc_auc_score(y_train, oof[1]), roc_auc_score(y_train, oof[2])) result = averaging_opt(test, oof, y_train, roc_auc_score, higher_is_better=True, rank_averaging=True) assert result.score >= best_single_model result_simple_avg = averaging(test, oof, y_train, eval_func=roc_auc_score, rank_averaging=True) assert result.score >= result_simple_avg.score
def test_experiment_sample_submission_binary(tmpdir_name): X, y = make_classification_df() X_train, X_test, y_train, y_test = train_test_split(X, y) sample_df = pd.DataFrame() sample_df['target_id_abc'] = np.arange(len(y_test)) + 10000 sample_df['target_value_abc'] = 0 params = { 'objective': 'binary', 'max_depth': 8 } result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, sample_submission=sample_df) assert list(result.submission_df.columns) == ['target_id_abc', 'target_value_abc'] assert roc_auc_score(y_test, result.submission_df['target_value_abc']) > 0.8
def test_experiment_already_exists(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = { 'objective': 'binary', 'max_depth': 8 } run_experiment(params, X_train, y_train, None, tmpdir_name) # result is not overwrited by default run_experiment(params, X_train, y_train, None, tmpdir_name, if_exists='replace') with pytest.raises(Exception): run_experiment(params, X_train, y_train, None, tmpdir_name)
def test_experiment_without_test_data(): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = {'objective': 'binary', 'max_depth': 8} with get_temp_directory() as temp_path: result = experiment_gbdt(params, X_train, y_train, None, temp_path) assert roc_auc_score(y_train, result.oof_prediction) >= 0.9 _check_file_exists(temp_path, ('oof_prediction.npy', 'metrics.txt'))
def test_ignore_errors_in_mlflow_params(tmpdir_name): mlflow.start_run() mlflow.log_param('features', 'ABC') mlflow.log_metric('Overall', -99) params = { 'objective': 'binary', 'max_depth': 8 } X, y = make_classification_df() result = run_experiment(params, X, y, with_mlflow=True, logging_directory=tmpdir_name, feature_list=[]) client = mlflow.tracking.MlflowClient() data = client.get_run(mlflow.active_run().info.run_id).data assert data.metrics['Overall'] == result.metrics[-1] assert data.params['features'] == 'ABC' # params cannot be overwritten mlflow.end_run()
def test_inherit_outer_scope_run(tmpdir_name): mlflow.start_run() mlflow.log_param('foo', 1) params = { 'objective': 'binary', 'max_depth': 8 } X, y = make_classification_df() run_experiment(params, X, y, with_mlflow=True, logging_directory=tmpdir_name) assert mlflow.active_run() is not None # still valid client = mlflow.tracking.MlflowClient() data = client.get_run(mlflow.active_run().info.run_id).data assert data.metrics['Overall'] > 0 # recorded mlflow.end_run()
def test_experiment_cat_multiclass(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2, n_classes=5, class_sep=0.98, random_state=0, id_column='user_id', target_name='tgt') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = { 'max_depth': 8, 'num_boost_round': 100 } result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type='cat', type_of_target='multiclass', submission_filename='submission.csv', with_auto_prep=True) assert result.oof_prediction.shape == (len(y_train), 5) assert result.test_prediction.shape == (len(y_test), 5) assert list(pd.read_csv(os.path.join(tmpdir_name, 'submission.csv')).columns) == ['id', '0', '1', '2', '3', '4'] _check_file_exists(tmpdir_name, submission_filename='submission.csv')