def test_round_trip(self): for model_format, model_path, dtest_path, libname_fmt, \ expected_prob_path, expected_margin_path, multiclass in \ [('xgboost', 'mushroom/mushroom.model', 'mushroom/agaricus.test', './agaricus{}', 'mushroom/agaricus.test.prob', 'mushroom/agaricus.test.margin', False), ('xgboost', 'dermatology/dermatology.model', 'dermatology/dermatology.test', './dermatology{}', 'dermatology/dermatology.test.prob', 'dermatology/dermatology.test.margin', True), ('lightgbm', 'toy_categorical/toy_categorical_model.txt', 'toy_categorical/toy_categorical.test', './toycat{}', None, 'toy_categorical/toy_categorical.test.pred', False)]: model_path = os.path.join(dpath, model_path) model = treelite.Model.load(model_path, model_format=model_format) model.export_protobuf('./my.buffer') model2 = treelite.Model.load('./my.buffer', model_format='protobuf') for use_quantize in [False, True]: run_pipeline_test(model=model2, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_annotation=None, use_quantize=use_quantize, use_parallel_comp=None)
def test_basic(self): """ Test a basic workflow: load a model, compile and export as shared lib, and make predictions """ for model_path, dtrain_path, dtest_path, libname_fmt, \ expected_prob_path, expected_margin_path, multiclass in \ [('mushroom/mushroom.model', 'mushroom/agaricus.train', 'mushroom/agaricus.test', './agaricus{}', 'mushroom/agaricus.test.prob', 'mushroom/agaricus.test.margin', False), ('dermatology/dermatology.model', 'dermatology/dermatology.train', 'dermatology/dermatology.test', './dermatology{}', 'dermatology/dermatology.test.prob', 'dermatology/dermatology.test.margin', True)]: model_path = os.path.join(dpath, model_path) model = treelite.Model.load(model_path, model_format='xgboost') make_annotation(model=model, dtrain_path=dtrain_path, annotation_path='./annotation.json') for use_annotation in ['./annotation.json', None]: for use_quantize in [True, False]: run_pipeline_test( model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_annotation=use_annotation, use_quantize=use_quantize)
def test_sparse_categorical_model(self): """ LightGBM is able to produce categorical splits directly, so that categorical data don't have to be one-hot encoded. Test if Treelite is able to handle categorical splits. This example produces a model with high-cardinality categorical variables. The training data has many missing values, so we need to match LightGBM when it comes to handling missing values """ for model_path, dtest_path, libname_fmt, \ expected_prob_path, expected_margin_path, multiclass in \ [('sparse_categorical/sparse_categorical_model.txt', 'sparse_categorical/sparse_categorical.test', './sparsecat{}', None, 'sparse_categorical/sparse_categorical.test.margin', False)]: model_path = os.path.join(dpath, model_path) model = treelite.Model.load(model_path, model_format='lightgbm') for use_quantize in [False, True]: for use_parallel_comp in [None]: run_pipeline_test( model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_annotation=None, use_quantize=use_quantize, use_parallel_comp=use_parallel_comp, use_toolchains=['gcc'])
def test_code_folding(self): for model_format, model_path, dtrain_path, dtest_path, libname_fmt, \ expected_prob_path, expected_margin_path, multiclass, \ use_parallel_comp in \ [('xgboost', 'mushroom/mushroom.model', 'mushroom/agaricus.train', 'mushroom/agaricus.test', './agaricus{}', 'mushroom/agaricus.test.prob', 'mushroom/agaricus.test.margin', False, None), ('xgboost', 'dermatology/dermatology.model', 'dermatology/dermatology.train', 'dermatology/dermatology.test', './dermatology{}', 'dermatology/dermatology.test.prob', 'dermatology/dermatology.test.margin', True, None), ('lightgbm', 'toy_categorical/toy_categorical_model.txt', None, 'toy_categorical/toy_categorical.test', './toycat{}', None, 'toy_categorical/toy_categorical.test.pred', False, 2)]: model_path = os.path.join(dpath, model_path) model = treelite.Model.load(model_path, model_format=model_format) if dtrain_path is not None: make_annotation(model=model, dtrain_path=dtrain_path, annotation_path='./annotation.json') use_annotation = './annotation.json' else: use_annotation = None for use_quantize in [False, True]: for use_code_folding in [1.0, 2.0, 3.0]: run_pipeline_test(model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_annotation=use_annotation, use_quantize=use_quantize, use_parallel_comp=use_parallel_comp, use_code_folding=use_code_folding)
def test_categorical_data(self): """ LightGBM is able to produce categorical splits directly, so that categorical data don't have to be one-hot encoded. Test if Treelite is able to handle categorical splits. This toy example contains two features, both of which are categorical. The first has cardinality 3 and the second 5. The label was generated using the formula y = f(x0) + g(x1) + [noise with std=0.1] where f and g are given by the tables x0 f(x0) x1 g(x1) 0 -20 0 -2 1 -10 1 -1 2 0 2 0 3 1 4 2 """ for model_path, dtest_path, libname_fmt, \ expected_prob_path, expected_margin_path, multiclass in \ [('toy_categorical/toy_categorical_model.txt', 'toy_categorical/toy_categorical.test', './toycat{}', None, 'toy_categorical/toy_categorical.test.pred', False)]: model_path = os.path.join(dpath, model_path) model = treelite.Model.load(model_path, model_format='lightgbm') for use_quantize in [False, True]: for use_parallel_comp in [None, 2]: run_pipeline_test( model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_annotation=None, use_quantize=use_quantize, use_parallel_comp=use_parallel_comp)
def test_basic(self): """ Test a basic workflow: load a model, compile and export as shared lib, and make predictions """ is_linux = sys.platform.startswith('linux') for model_path, dtrain_path, dtest_path, libname_fmt, \ expected_prob_path, expected_margin_path, multiclass in \ [('mushroom/mushroom.model', 'mushroom/agaricus.train', 'mushroom/agaricus.test', './agaricus{}', 'mushroom/agaricus.test.prob', 'mushroom/agaricus.test.margin', False), ('dermatology/dermatology.model', 'dermatology/dermatology.train', 'dermatology/dermatology.test', './dermatology{}', 'dermatology/dermatology.test.prob', 'dermatology/dermatology.test.margin', True)]: model_path = os.path.join(dpath, model_path) model = treelite.Model.load(model_path, model_format='xgboost') make_annotation(model=model, dtrain_path=dtrain_path, annotation_path='./annotation.json') for use_annotation in ['./annotation.json', None]: for use_quantize in [True, False]: for use_parallel_comp in [None, 2]: run_pipeline_test( model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_annotation=use_annotation, use_quantize=use_quantize, use_parallel_comp=use_parallel_comp) for use_elf in [True, False] if is_linux else [False]: run_pipeline_test(model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_elf=use_elf, use_compiler='failsafe') if not is_linux: # Expect to see an exception when using ELF in non-Linux OS with pytest.raises(treelite.common.util.TreeliteError): run_pipeline_test( model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_elf=True, use_compiler='failsafe') # LETOR model_path = os.path.join(dpath, 'letor/mq2008.model') model = treelite.Model.load(model_path, model_format='xgboost') make_annotation(model=model, dtrain_path='letor/mq2008.train', annotation_path='./annotation.json') if os_platform() != 'windows': run_pipeline_test( model=model, dtest_path='letor/mq2008.test', libname_fmt='./mq2008{}', expected_prob_path=None, expected_margin_path='letor/mq2008.test.pred', multiclass=False, use_annotation='./annotation.json', use_quantize=1, use_parallel_comp=700, use_toolchains=[ 'msvc' if os_platform() == 'windows' else 'gcc' ]) run_pipeline_test(model=model, dtest_path='letor/mq2008.test', libname_fmt='./mq2008{}', expected_prob_path=None, expected_margin_path='letor/mq2008.test.pred', multiclass=False, use_elf=is_linux, use_compiler='failsafe')
def test_basic(self): """ Test a basic workflow: load a model, compile and export as shared lib, and make predictions """ for model_path, dtrain_path, dtest_path, libname_fmt, \ expected_prob_path, expected_margin_path, multiclass in \ [('mushroom/mushroom.model', 'mushroom/agaricus.train', 'mushroom/agaricus.test', './agaricus{}', 'mushroom/agaricus.test.prob', 'mushroom/agaricus.test.margin', False), ('dermatology/dermatology.model', 'dermatology/dermatology.train', 'dermatology/dermatology.test', './dermatology{}', 'dermatology/dermatology.test.prob', 'dermatology/dermatology.test.margin', True)]: model_path = os.path.join(dpath, model_path) model = treelite.Model.load(model_path, model_format='xgboost') make_annotation(model=model, dtrain_path=dtrain_path, annotation_path='./annotation.json') for use_annotation in ['./annotation.json', None]: for use_quantize in [True, False]: for use_parallel_comp in [None, 2]: run_pipeline_test( model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_annotation=use_annotation, use_quantize=use_quantize, use_parallel_comp=use_parallel_comp) run_pipeline_test(model=model, dtest_path=dtest_path, libname_fmt=libname_fmt, expected_prob_path=expected_prob_path, expected_margin_path=expected_margin_path, multiclass=multiclass, use_compiler='failsafe') # LETOR model_path = os.path.join(dpath, 'letor/mq2008.model') model = treelite.Model.load(model_path, model_format='xgboost') make_annotation(model=model, dtrain_path='letor/mq2008.train', annotation_path='./annotation.json') run_pipeline_test(model=model, dtest_path='letor/mq2008.test', libname_fmt='./mq2008{}', expected_prob_path=None, expected_margin_path='letor/mq2008.test.pred', multiclass=False, use_annotation='./annotation.json', use_quantize=1, use_parallel_comp=700, use_toolchains=['gcc']) run_pipeline_test(model=model, dtest_path='letor/mq2008.test', libname_fmt='./mq2008{}', expected_prob_path=None, expected_margin_path='letor/mq2008.test.pred', multiclass=False, use_compiler='failsafe')