def test_custom_priors(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--priors', '-2:0:8', '--feature-priors', '0:1,1:2,2:-1:0', '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_crossentropy(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'CrossEntropy', '-f', data_file('adult_crossentropy', 'train_proba'), '-t', data_file('adult_crossentropy', 'test_proba'), '--column-description', data_file('adult_crossentropy', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_train_dir(): pool = 'no_split' output_model_path = 'model.bin' train_dir_path = 'trainDir' params = ( '--use-best-model', 'false', '--loss-function', 'RMSE', '-f', data_file(pool, 'train_full3'), '-t', data_file(pool, 'test3'), '--column-description', data_file(pool, 'train_full3.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--train-dir', train_dir_path, ) fit_catboost_gpu(params) outputs = ['time_left.tsv', 'learn_error.tsv', 'test_error.tsv', 'meta.tsv', output_model_path] for output in outputs: assert os.path.isfile(train_dir_path + '/' + output)
def test_meta(loss_function, boosting_type): output_model_path = yatest.common.test_output_path('model.bin') meta_path = 'meta.tsv' params = ( '--use-best-model', 'false', '--loss-function', loss_function, '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '--boosting-type', boosting_type, '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--name', 'test experiment', ) # meta_path is implicit output file fit_catboost_gpu(params, output_data={meta_path: meta_path}) return [local_canonical_file(meta_path)]
def test_deep_tree_classification(depth): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '--depth', str(depth), '-m', output_model_path, '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_queryaverage(): learn_error_path = yatest.common.test_output_path('learn_error.tsv') test_error_path = yatest.common.test_output_path('test_error.tsv') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'QueryRMSE', '-f', data_file('querywise_pool', 'train_full3'), '-t', data_file('querywise_pool', 'test3'), '--column-description', data_file('querywise_pool', 'train_full3.cd'), '-i', '20', '-T', '4', '-r', '0', '--custom-metric', 'QueryAverage:top=2', '--learn-err-log', learn_error_path, '--test-err-log', test_error_path, ) yatest.common.execute(cmd) return [local_canonical_file(learn_error_path), local_canonical_file(test_error_path)]
def test_weights(boosting_type): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cd_file = data_file('adult_weight', 'train.cd') test_file = data_file('adult_weight', 'test_weight') params = { '--use-best-model': 'false', '--loss-function': 'Logloss', '-f': data_file('adult_weight', 'train_weight'), '-t': test_file, '--column-description': cd_file, '--boosting-type': boosting_type, '-i': '10', '-T': '4', '-r': '0', '-m': output_model_path, } fit_catboost_gpu(params) apply_catboost(output_model_path, test_file, cd_file, output_eval_path) return [local_canonical_file(output_eval_path)]
def test_prediction_type(prediction_type, loss_function): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', loss_function, '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--prediction-type', prediction_type ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_feature_border_types(border_type): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--feature-border-type', border_type, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_queryrmse_approx_on_full_history(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'QueryRMSE', '-f', data_file('querywise_pool', 'train_full3'), '-t', data_file('querywise_pool', 'test3'), '--column-description', data_file('querywise_pool', 'train_full3.cd'), '--approx-on-full-history', '-i', '20', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_baseline(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult_weight', 'train_weight'), '-t', data_file('adult_weight', 'test_weight'), '--column-description', data_file('train_adult_baseline.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_newton(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--leaf-estimation-method', 'Newton', '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_fold_len_multiplier(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'MultiClass', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--fold-len-multiplier', '1.5' ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_custom_loss(custom_loss_function): pool = 'no_split' output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') learn_error_path = yatest.common.test_output_path('learn_error.tsv') test_error_path = yatest.common.test_output_path('test_error.tsv') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'RMSE', '-f', data_file(pool, 'train_full3'), '-t', data_file(pool, 'test3'), '--column-description', data_file(pool, 'train_full3.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--custom-loss', custom_loss_function, '--learn-err-log', learn_error_path, '--test-err-log', test_error_path, ) yatest.common.execute(cmd) return [ local_canonical_file(learn_error_path), local_canonical_file(test_error_path) ]
def test_bootstrap(boosting_type): bootstrap_option = { 'no': {'--bootstrap-type': 'No'}, 'bayes': {'--bootstrap-type': 'Bayesian', '--bagging-temperature': '0.0'}, 'bernoulli': {'--bootstrap-type': 'Bernoulli', '--subsample': '1.0'} } test_file = data_file('adult', 'test_small') cd_file = data_file('adult', 'train.cd') params = { '--use-best-model': 'false', '--loss-function': 'Logloss', '--random-strength': '0', '-f': data_file('adult', 'train_small'), '-t': test_file, '--column-description': cd_file, '--boosting-type': boosting_type, '-i': '10', '-w': '0.03', '-T': '4', '-r': '0', } for bootstrap in bootstrap_option: model_path = yatest.common.test_output_path('model_' + bootstrap + '.bin') eval_path = yatest.common.test_output_path('test_' + bootstrap + '.eval') model_option = {'-m': model_path} run_params = combine_dicts(params, bootstrap_option[bootstrap], model_option) fit_catboost_gpu(run_params) apply_catboost(model_path, test_file, cd_file, eval_path) ref_eval_path = yatest.common.test_output_path('test_no.eval') assert (filecmp.cmp(ref_eval_path, yatest.common.test_output_path('test_bayes.eval'))) assert (filecmp.cmp(ref_eval_path, yatest.common.test_output_path('test_bernoulli.eval'))) return [local_canonical_file(ref_eval_path)]
def test_has_time(boosting_type): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cd_file = data_file('adult', 'train.cd') test_file = data_file('adult', 'test_small') params = ( '--use-best-model', 'false', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', test_file, '--column-description', cd_file, '--boosting-type', boosting_type, '-i', '10', '-T', '4', '-r', '0', '--has-time', '-m', output_model_path, ) fit_catboost_gpu(params) apply_catboost(output_model_path, test_file, cd_file, output_eval_path) return [local_canonical_file(output_eval_path)]
def test_custom_priors(boosting_type): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') test_file = data_file('adult', 'test_small') cd_file = data_file('adult', 'train.cd') params = ( '--use-best-model', 'false', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', test_file, '--column-description', cd_file, '--boosting-type', boosting_type, '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--ctr', 'Borders:Prior=-2:Prior=0:Prior=8/3:Prior=1:Prior=-1:Prior=3,' 'FeatureFreq:Prior=0', '--per-feature-ctr', '4:Borders:Prior=0.444,FeatureFreq:Prior=0.444;' '6:Borders:Prior=0.666,FeatureFreq:Prior=0.666;' '8:Borders:Prior=-0.888:Prior=2/3,FeatureFreq:Prior=-0.888:Prior=0.888' ) fit_catboost_gpu(params) apply_catboost(output_model_path, test_file, cd_file, output_eval_path) return [local_canonical_file(output_eval_path)]
def test_pairlogit_no_target(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'PairLogit', '-f', data_file('zen', 'learn_small.tsv'), '-t', data_file('zen', 'test_small.tsv'), '--column-description', data_file('zen', 'zen_target_aux.cd'), '--learn-pairs', data_file('zen', 'learn_pairs.tsv'), '--test-pairs', data_file('zen', 'test_pairs.tsv'), '-i', '20', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_target_border(border_type): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'RMSE', '-f', data_file('adult_crossentropy', 'train_proba'), '-t', data_file('adult_crossentropy', 'test_proba'), '--column-description', data_file('adult_crossentropy', 'train.cd'), '-i', '3', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--ctr', 'Borders:TargetBorderCount=3:TargetBorderType=' + border_type ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def get_cuda_setup_error(): for flag in pytest.config.option.flags: if re.match('HAVE_CUDA=(0|no|false)', flag, flags=re.IGNORECASE): return flag try: cmd = (CATBOOST_PATH, 'fit', '--task-type', 'GPU', '--devices', '0', '--gpu-ram-part', '0.25', '--use-best-model', 'false', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '--boosting-type', 'Plain', '-i', '5', '-T', '4', '-r', '0') yatest.common.execute(cmd) except Exception as e: for reason in [ 'GPU support was not compiled', 'CUDA driver version is insufficient' ]: if reason in str(e): return reason return None
def test_permutation_block(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--fold-permutation-block', '239' ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_lost_class(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'MultiClass', '-f', data_file('cloudness_lost_class', 'train_small'), '-t', data_file('cloudness_lost_class', 'test_small'), '--column-description', data_file('cloudness_lost_class', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--classes-count', '3' ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_params_from_file(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '6', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--params-file', data_file('params.json') ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_class_names_multiclass(loss_function): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', loss_function, '-f', data_file('precipitation_small', 'train_small'), '-t', data_file('precipitation_small', 'test_small'), '--column-description', data_file('precipitation_small', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--class-names', '0.,0.5,1.,0.25,0.75' ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_quantile_targets(loss_function): pool = 'no_split' output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', loss_function + ':alpha=0.9', '-f', data_file(pool, 'train_full3'), '-t', data_file(pool, 'test3'), '--column-description', data_file(pool, 'train_full3.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_ctr_type(ctr_type, boosting_type): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cd_file = data_file('adult_crossentropy', 'train.cd') test_file = data_file('adult_crossentropy', 'test_proba') params = ( '--use-best-model', 'false', '--loss-function', 'RMSE', '-f', data_file('adult_crossentropy', 'train_proba'), '-t', test_file, '--column-description', cd_file, '--boosting-type', boosting_type, '-i', '3', '-T', '4', '-r', '0', '-m', output_model_path, '--ctr', ctr_type ) fit_catboost_gpu(params) apply_catboost(output_model_path, test_file, cd_file, output_eval_path) return [local_canonical_file(output_eval_path)]
def test_feature_id_fstr(): model_path = yatest.common.test_output_path('adult_model.bin') output_fstr_path = yatest.common.test_output_path('fstr.tsv') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', model_path, ) yatest.common.execute(cmd) fstr_cmd = ( CATBOOST_PATH, 'fstr', '--input-path', data_file('adult', 'train_small'), '--column-description', data_file('adult_with_id.cd'), '-m', model_path, '-o', output_fstr_path, ) yatest.common.execute(fstr_cmd) return local_canonical_file(output_fstr_path)
def test_allow_writing_files_and_used_ram_limit(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') cmd = ( CATBOOST_PATH, 'fit', '--allow-writing-files', 'false', '--used-ram-limit', '1024', '--loss-function', 'Logloss', '-f', data_file('adult', 'train_small'), '-t', data_file('adult', 'test_small'), '--column-description', data_file('adult', 'train.cd'), '-i', '100', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, ) yatest.common.execute(cmd) return [local_canonical_file(output_eval_path)]
def test_meta(): pool = 'no_split' output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') meta_path = 'meta.tsv' cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'RMSE', '-f', data_file(pool, 'train_full3'), '-t', data_file(pool, 'test3'), '--column-description', data_file(pool, 'train_full3.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--name', 'test experiment', ) yatest.common.execute(cmd) return [local_canonical_file(meta_path)]
def test_custom_loss_for_multiclassification(): output_model_path = yatest.common.test_output_path('model.bin') output_eval_path = yatest.common.test_output_path('test.eval') learn_error_path = yatest.common.test_output_path('learn_error.tsv') test_error_path = yatest.common.test_output_path('test_error.tsv') cmd = ( CATBOOST_PATH, 'fit', '--loss-function', 'MultiClass', '-f', data_file('cloudness_small', 'train_small'), '-t', data_file('cloudness_small', 'test_small'), '--column-description', data_file('cloudness_small', 'train.cd'), '-i', '10', '-T', '4', '-r', '0', '-m', output_model_path, '--eval-file', output_eval_path, '--custom-loss', 'AUC,Accuracy,Precision,Recall,F1,TotalF1,MultiClassOneVsAll,MCC', '--learn-err-log', learn_error_path, '--test-err-log', test_error_path, ) yatest.common.execute(cmd) return [ local_canonical_file(learn_error_path), local_canonical_file(test_error_path) ]
import hashlib import math import pytest import time import numpy as np from pandas import read_table, DataFrame, Series from catboost import Pool, CatBoost, CatBoostClassifier, CatBoostRegressor, CatboostError, cv, train from catboost_pytest_lib import data_file, local_canonical_file, remove_time_from_json import yatest.common EPS = 1e-5 TRAIN_FILE = data_file('adult', 'train_small') TEST_FILE = data_file('adult', 'test_small') CD_FILE = data_file('adult', 'train.cd') NAN_TRAIN_FILE = data_file('adult_nan', 'train_small') NAN_TEST_FILE = data_file('adult_nan', 'test_small') NAN_CD_FILE = data_file('adult_nan', 'train.cd') CLOUDNESS_TRAIN_FILE = data_file('cloudness_small', 'train_small') CLOUDNESS_TEST_FILE = data_file('cloudness_small', 'test_small') CLOUDNESS_CD_FILE = data_file('cloudness_small', 'train.cd') QUERYWISE_TRAIN_FILE = data_file('querywise', 'train') QUERYWISE_TEST_FILE = data_file('querywise', 'test') QUERYWISE_CD_FILE = data_file('querywise', 'train.cd') QUERYWISE_TRAIN_PAIRS_FILE = data_file('querywise', 'train.pairs')
def test_cv_with_not_binarized_target(): train_file = data_file('adult_not_binarized', 'train_small') cd = data_file('adult_not_binarized', 'train.cd') pool = Pool(train_file, column_description=cd) cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"}) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
import pytest import math import numpy as np from pandas import read_table, DataFrame, Series from catboost import Pool, CatBoost, CatBoostClassifier, CatBoostRegressor, CatboostError, cv from catboost_pytest_lib import data_file, local_canonical_file import yatest.common EPS = 1e-5 TRAIN_FILE = data_file('adult', 'train_small') TEST_FILE = data_file('adult', 'test_small') CD_FILE = data_file('adult', 'train.cd') NAN_TRAIN_FILE = data_file('adult_nan', 'train_small') NAN_TEST_FILE = data_file('adult_nan', 'test_small') NAN_CD_FILE = data_file('adult_nan', 'train.cd') CLOUDNESS_TRAIN_FILE = data_file('cloudness_small', 'train_small') CLOUDNESS_TEST_FILE = data_file('cloudness_small', 'test_small') CLOUDNESS_CD_FILE = data_file('cloudness_small', 'train.cd') ZEN_TRAIN_FILE = data_file('zen', 'learn_small.tsv') ZEN_TRAIN_PAIRS_FILE = data_file('zen', 'learn_pairs.tsv') ZEN_TEST_FILE = data_file('zen', 'test_small.tsv') ZEN_TEST_PAIRS_FILE = data_file('zen', 'test_pairs.tsv') ZEN_CD_FILE = data_file('zen', 'zen.cd')