Esempio n. 1
0
def make_annotation(model, dtrain_path, annotation_path):
    dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/'))
    dtrain_path = os.path.join(dpath, dtrain_path)
    dtrain = treelite.DMatrix(dtrain_path)
    annotator = treelite.Annotator()
    annotator.annotate_branch(model=model, dmat=dtrain, verbose=True)
    annotator.save(path=annotation_path)
Esempio n. 2
0
def run_pipeline_test(model, dtest_path, libname_fmt, expected_prob_path,
                      expected_margin_path, multiclass, use_annotation,
                      use_quantize):
    dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/'))
    dtest_path = os.path.join(dpath, dtest_path)
    libpath = libname(libname_fmt)
    dtest = treelite.DMatrix(dtest_path)
    batch = treelite.runtime.Batch.from_csr(dtest)

    expected_prob_path = os.path.join(dpath, expected_prob_path)
    expected_margin_path = os.path.join(dpath, expected_margin_path)
    expected_prob = load_txt(expected_prob_path)
    expected_margin = load_txt(expected_margin_path)
    if multiclass:
        nrow = dtest.shape[0]
        expected_prob = expected_prob.reshape((nrow, -1))
        expected_margin = expected_margin.reshape((nrow, -1))
    params = {}
    if use_annotation is not None:
        params['annotate_in'] = use_annotation
    if use_quantize:
        params['quantize'] = 1

    for toolchain in os_compatible_toolchains():
        model.export_lib(toolchain=toolchain,
                         libpath=libpath,
                         params=params,
                         verbose=True)
        predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
        out_prob = predictor.predict(batch)
        assert np.allclose(out_prob, expected_prob, atol=1e-11, rtol=1e-6)
        out_margin = predictor.predict(batch, pred_margin=True)
        assert np.allclose(out_margin, expected_margin, atol=1e-11, rtol=1e-6)
Esempio n. 3
0
def check_predictor(predictor, dataset):
    """Check whether a predictor produces correct predictions for a given dataset"""
    dtest = treelite.DMatrix(dataset_db[dataset].dtest)
    batch = treelite_runtime.Batch.from_csr(dtest)
    out_margin = predictor.predict(batch, pred_margin=True)
    out_prob = predictor.predict(batch)
    check_predictor_output(dataset, dtest.shape, out_margin, out_prob)
Esempio n. 4
0
  def test_srcpkg(self):
    """Test feature to export a source tarball"""
    model_path = os.path.join(dpath, 'mushroom/mushroom.model')
    dmat_path = os.path.join(dpath, 'mushroom/agaricus.test')
    libpath = libname('./mushroom/mushroom{}')
    model = treelite.Model.load(model_path, model_format='xgboost')

    toolchain = os_compatible_toolchains()[0]
    model.export_srcpkg(platform=os_platform(), toolchain=toolchain,
                        pkgpath='./srcpkg.zip', libname=libpath,
                        params={}, verbose=True)
    with ZipFile('./srcpkg.zip', 'r') as zip_ref:
      zip_ref.extractall('.')
    subprocess.call(['make', '-C', 'mushroom'])

    predictor = treelite.runtime.Predictor(libpath='./mushroom', verbose=True)

    X, _ = load_svmlight_file(dmat_path, zero_based=True)
    dmat = treelite.DMatrix(X)
    batch = treelite.runtime.Batch.from_csr(dmat)

    expected_prob_path = os.path.join(dpath, 'mushroom/agaricus.test.prob')
    expected_prob = load_txt(expected_prob_path)
    out_prob = predictor.predict(batch)
    assert_almost_equal(out_prob, expected_prob)
Esempio n. 5
0
def test_skl_converter_regressor(tmpdir, clazz, toolchain):  # pylint: disable=too-many-locals
    """Convert scikit-learn regressor"""
    X, y = load_boston(return_X_y=True)
    kwargs = {}
    if clazz == GradientBoostingRegressor:
        kwargs['init'] = 'zero'
    clf = clazz(max_depth=3, random_state=0, n_estimators=10, **kwargs)
    clf.fit(X, y)
    expected_pred = clf.predict(X)

    model = treelite.sklearn.import_model(clf)
    assert model.num_feature == clf.n_features_
    assert model.num_output_group == 1
    assert model.num_tree == clf.n_estimators

    dtrain = treelite.DMatrix(X)
    annotation_path = os.path.join(tmpdir, 'annotation.json')
    annotator = treelite.Annotator()
    annotator.annotate_branch(model=model, dmat=dtrain, verbose=True)
    annotator.save(path=annotation_path)

    libpath = os.path.join(tmpdir, 'skl' + _libext())
    model.export_lib(toolchain=toolchain,
                     libpath=libpath,
                     params={'annotate_in': annotation_path},
                     verbose=True)
    predictor = treelite_runtime.Predictor(libpath=libpath)
    assert predictor.num_feature == clf.n_features_
    assert model.num_output_group == 1
    assert predictor.pred_transform == 'identity'
    assert predictor.global_bias == 0.0
    assert predictor.sigmoid_alpha == 1.0
    batch = treelite_runtime.Batch.from_npy2d(X)
    out_pred = predictor.predict(batch)
    np.testing.assert_almost_equal(out_pred, expected_pred, decimal=5)
Esempio n. 6
0
def run_pipeline_test(model,
                      dtest_path,
                      libname_fmt,
                      expected_prob_path,
                      expected_margin_path,
                      multiclass,
                      use_annotation=None,
                      use_quantize=None,
                      use_parallel_comp=None,
                      use_code_folding=None,
                      use_toolchains=None,
                      use_compiler=None):
    dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/'))
    dtest_path = os.path.join(dpath, dtest_path)
    libpath = libname(libname_fmt)
    dtest = treelite.DMatrix(dtest_path)
    batch = treelite.runtime.Batch.from_csr(dtest)

    expected_prob_path = os.path.join(dpath, expected_prob_path) \
                         if expected_prob_path is not None else None
    expected_margin_path = os.path.join(dpath, expected_margin_path)
    expected_prob = load_txt(expected_prob_path)
    expected_margin = load_txt(expected_margin_path)
    if multiclass:
        nrow = dtest.shape[0]
        expected_prob = expected_prob.reshape((nrow, -1)) \
                        if expected_prob is not None else None
        expected_margin = expected_margin.reshape((nrow, -1))
    params = {}
    if use_annotation is not None:
        params['annotate_in'] = use_annotation
    if use_quantize:
        params['quantize'] = 1
    if use_parallel_comp is not None:
        params['parallel_comp'] = use_parallel_comp
    if use_code_folding is not None:
        params['code_folding_req'] = use_code_folding
    if use_compiler is None:
        import inspect
        use_compiler \
          = inspect.signature(model.export_lib).parameters['compiler'].default

    if use_toolchains is None:
        toolchains = os_compatible_toolchains()
    else:
        gcc = os.environ.get('GCC_PATH', 'gcc')
        toolchains = [(gcc if x == 'gcc' else x) for x in use_toolchains]
    for toolchain in toolchains:
        model.export_lib(toolchain=toolchain,
                         libpath=libpath,
                         compiler=use_compiler,
                         params=params,
                         verbose=True)
        predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
        out_prob = predictor.predict(batch)
        if expected_prob is not None:
            assert_almost_equal(out_prob, expected_prob)
        out_margin = predictor.predict(batch, pred_margin=True)
        assert_almost_equal(out_margin, expected_margin)
Esempio n. 7
0
 def compute_annotation(dataset):
     model = treelite.Model.load(
         dataset_db[dataset].model,
         model_format=dataset_db[dataset].format)
     if dataset_db[dataset].dtrain is None:
         return None
     dtrain = treelite.DMatrix(dataset_db[dataset].dtrain)
     annotator = treelite.Annotator()
     annotator.annotate_branch(model=model, dmat=dtrain, verbose=True)
     annotation_path = os.path.join(tmpdir, f'{dataset}.json')
     annotator.save(annotation_path)
     with open(annotation_path, 'r') as f:
         return f.read()
Esempio n. 8
0
def run_pipeline_test(model,
                      dtest_path,
                      libname_fmt,
                      expected_prob_path,
                      expected_margin_path,
                      multiclass,
                      use_annotation,
                      use_quantize,
                      use_parallel_comp,
                      use_code_folding=None,
                      use_all_toolchains=True):
    dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/'))
    dtest_path = os.path.join(dpath, dtest_path)
    libpath = libname(libname_fmt)
    dtest = treelite.DMatrix(dtest_path)
    batch = treelite.runtime.Batch.from_csr(dtest)

    expected_prob_path = os.path.join(dpath, expected_prob_path) \
                         if expected_prob_path is not None else None
    expected_margin_path = os.path.join(dpath, expected_margin_path)
    expected_prob = load_txt(expected_prob_path)
    expected_margin = load_txt(expected_margin_path)
    if multiclass:
        nrow = dtest.shape[0]
        expected_prob = expected_prob.reshape((nrow, -1)) \
                        if expected_prob is not None else None
        expected_margin = expected_margin.reshape((nrow, -1))
    params = {}
    if use_annotation is not None:
        params['annotate_in'] = use_annotation
    if use_quantize:
        params['quantize'] = 1
    if use_parallel_comp is not None:
        params['parallel_comp'] = use_parallel_comp
    if use_code_folding is not None:
        params['code_folding_req'] = use_code_folding

    toolchains = os_compatible_toolchains()
    if not use_all_toolchains:
        toolchains = [toolchains[0]]
    for toolchain in toolchains:
        model.export_lib(toolchain=toolchain,
                         libpath=libpath,
                         params=params,
                         verbose=True)
        predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
        out_prob = predictor.predict(batch)
        if expected_prob is not None:
            assert_almost_equal(out_prob, expected_prob)
        out_margin = predictor.predict(batch, pred_margin=True)
        assert_almost_equal(out_margin, expected_margin)
Esempio n. 9
0
    def test_lightgbm_binary_classification(self):
        try:
            import lightgbm
        except ImportError:
            raise nose.SkipTest(
            )  # skip this test if LightGBM is not installed

        dtrain_path = os.path.join(dpath, 'mushroom/agaricus.train')
        dtest_path = os.path.join(dpath, 'mushroom/agaricus.test')
        dtrain = lightgbm.Dataset(dtrain_path)
        dtest = lightgbm.Dataset(dtest_path, reference=dtrain)
        watchlist = [dtrain, dtest]
        watchlist_names = ['train', 'test']
        param = {
            'task': 'train',
            'boosting_type': 'gbdt',
            'metric': 'auc',
            'num_leaves': 7,
            'learning_rate': 0.1
        }
        num_round = 10

        for objective in ['binary', 'xentlambda', 'xentropy']:
            param['objective'] = objective
            bst = lightgbm.train(param, dtrain, num_round, watchlist,
                                 watchlist_names)
            bst.save_model('./mushroom_lightgbm.txt')

            expected_prob = bst.predict(dtest_path)
            expected_margin = bst.predict(dtest_path, raw_score=True)

            model = treelite.Model.load('./mushroom_lightgbm.txt',
                                        model_format='lightgbm')
            libpath = libname('./agaricus_{}{{}}'.format(objective))
            batch = treelite.runtime.Batch.from_csr(
                treelite.DMatrix(dtest_path))
            for toolchain in os_compatible_toolchains():
                model.export_lib(toolchain=toolchain,
                                 libpath=libpath,
                                 params={},
                                 verbose=True)
                predictor = treelite.runtime.Predictor(libpath, verbose=True)
                out_prob = predictor.predict(batch)
                assert_almost_equal(out_prob, expected_prob)
                out_margin = predictor.predict(batch, pred_margin=True)
                assert_almost_equal(out_margin, expected_margin)
Esempio n. 10
0
def test_skl_converter_multiclass_classifier(tmpdir, clazz, toolchain):
    # pylint: disable=too-many-locals
    """Convert scikit-learn multi-class classifier"""
    if clazz == RandomForestClassifier:
        pytest.xfail(
            reason='Need to use float64 thresholds to obtain correct result')

    X, y = load_iris(return_X_y=True)
    kwargs = {}
    if clazz == GradientBoostingClassifier:
        kwargs['init'] = 'zero'
    clf = clazz(max_depth=3, random_state=0, n_estimators=10, **kwargs)
    clf.fit(X, y)
    expected_prob = clf.predict_proba(X)

    model = treelite.sklearn.import_model(clf)
    assert model.num_feature == clf.n_features_
    assert model.num_output_group == clf.n_classes_
    assert (model.num_tree == clf.n_estimators *
            (clf.n_classes_ if clazz == GradientBoostingClassifier else 1))

    dtrain = treelite.DMatrix(X)
    annotation_path = os.path.join(tmpdir, 'annotation.json')
    annotator = treelite.Annotator()
    annotator.annotate_branch(model=model, dmat=dtrain, verbose=True)
    annotator.save(path=annotation_path)

    libpath = os.path.join(tmpdir, 'skl' + _libext())
    model.export_lib(toolchain=toolchain,
                     libpath=libpath,
                     params={'annotate_in': annotation_path},
                     verbose=True)
    predictor = treelite_runtime.Predictor(libpath=libpath)
    assert predictor.num_feature == clf.n_features_
    assert predictor.num_output_group == clf.n_classes_
    assert (predictor.pred_transform == ('softmax'
                                         if clazz == GradientBoostingClassifier
                                         else 'identity_multiclass'))
    assert predictor.global_bias == 0.0
    assert predictor.sigmoid_alpha == 1.0
    batch = treelite_runtime.Batch.from_npy2d(X)
    out_prob = predictor.predict(batch)
    np.testing.assert_almost_equal(out_prob, expected_prob)
def test_lightgbm_binary_classification(tmpdir, objective, toolchain):
    # pylint: disable=too-many-locals
    """Test a binary classifier"""
    dataset = 'mushroom'
    model_path = os.path.join(tmpdir, 'mushroom_lightgbm.txt')
    dtest_path = dataset_db[dataset].dtest

    dtrain = lightgbm.Dataset(dataset_db[dataset].dtrain)
    dtest = lightgbm.Dataset(dtest_path, reference=dtrain)
    param = {
        'task': 'train',
        'boosting_type': 'gbdt',
        'objective': objective,
        'metric': 'auc',
        'num_leaves': 7,
        'learning_rate': 0.1
    }
    bst = lightgbm.train(param,
                         dtrain,
                         num_boost_round=10,
                         valid_sets=[dtrain, dtest],
                         valid_names=['train', 'test'])
    bst.save_model(model_path)

    expected_prob = bst.predict(dtest_path)
    expected_margin = bst.predict(dtest_path, raw_score=True)

    model = treelite.Model.load(model_path, model_format='lightgbm')
    libpath = os.path.join(tmpdir, f'agaricus_{objective}' + _libext())
    batch = treelite_runtime.Batch.from_csr(treelite.DMatrix(dtest_path))
    model.export_lib(toolchain=toolchain,
                     libpath=libpath,
                     params={},
                     verbose=True)
    predictor = treelite_runtime.Predictor(libpath, verbose=True)
    out_prob = predictor.predict(batch)
    np.testing.assert_almost_equal(out_prob, expected_prob, decimal=5)
    out_margin = predictor.predict(batch, pred_margin=True)
    np.testing.assert_almost_equal(out_margin, expected_margin, decimal=5)
Esempio n. 12
0
import treelite.runtime
import xgboost
from fast_svmlight_loader import load_svmlight
import sys
import os
import numpy as np
import time

files = [('allstate/allstate0.train.libsvm', 'allstate/allstate.json',
          'allstate/allstate.so', 'allstate/allstate-1600-0.010000.model'),
         ('yahoo/yahoo.train', 'yahoo/yahoo.json', 'yahoo/yahoo.so',
          'yahoo/yahoo-1600-0.01.model')]

for datafile, annotfile, libfile, modfile in files:
    model = treelite.Model.load(modfile, 'xgboost')
    dmat = treelite.DMatrix(datafile)
    annotator = treelite.Annotator()
    annotator.annotate_branch(model, dmat, verbose=True)
    annotator.save(annotfile)
    model.export_lib(toolchain='gcc',
                     libpath=libfile,
                     params={
                         'quantize': 1,
                         'annotate_in': annotfile
                     },
                     verbose=True)

for datafile, _, libfile, modfile in files:
    print('Loading data file {}'.format(datafile))
    start = time.time()
    dmat = load_svmlight(filename=datafile, verbose=False)
Esempio n. 13
0
def test_model_builder(tmpdir, use_annotation, quantize, toolchain):
    """A simple model"""
    num_feature = 127
    pred_transform = 'sigmoid'
    builder = treelite.ModelBuilder(num_feature=num_feature,
                                    random_forest=False,
                                    pred_transform=pred_transform)

    # Build mushroom model
    tree = treelite.ModelBuilder.Tree()
    tree[0].set_numerical_test_node(feature_id=29,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=1,
                                    right_child_key=2)
    tree[1].set_numerical_test_node(feature_id=56,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=3,
                                    right_child_key=4)
    tree[3].set_numerical_test_node(feature_id=60,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=7,
                                    right_child_key=8)
    tree[7].set_leaf_node(leaf_value=1.89899647)
    tree[8].set_leaf_node(leaf_value=-1.94736838)
    tree[4].set_numerical_test_node(feature_id=21,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=9,
                                    right_child_key=10)
    tree[9].set_leaf_node(leaf_value=1.78378379)
    tree[10].set_leaf_node(leaf_value=-1.98135197)
    tree[2].set_numerical_test_node(feature_id=109,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=5,
                                    right_child_key=6)
    tree[5].set_numerical_test_node(feature_id=67,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=11,
                                    right_child_key=12)
    tree[11].set_leaf_node(leaf_value=-1.9854598)
    tree[12].set_leaf_node(leaf_value=0.938775539)
    tree[6].set_leaf_node(leaf_value=1.87096775)
    tree[0].set_root()
    builder.append(tree)

    tree = treelite.ModelBuilder.Tree()
    tree[0].set_numerical_test_node(feature_id=29,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=1,
                                    right_child_key=2)
    tree[1].set_numerical_test_node(feature_id=21,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=3,
                                    right_child_key=4)
    tree[3].set_leaf_node(leaf_value=1.14607906)
    tree[4].set_numerical_test_node(feature_id=36,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=7,
                                    right_child_key=8)
    tree[7].set_leaf_node(leaf_value=-6.87994671)
    tree[8].set_leaf_node(leaf_value=-0.10659159)
    tree[2].set_numerical_test_node(feature_id=109,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=5,
                                    right_child_key=6)
    tree[5].set_numerical_test_node(feature_id=39,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=9,
                                    right_child_key=10)
    tree[9].set_leaf_node(leaf_value=-0.0930657759)
    tree[10].set_leaf_node(leaf_value=-1.15261209)
    tree[6].set_leaf_node(leaf_value=1.00423074)
    tree[0].set_root()
    builder.append(tree)

    model = builder.commit()
    assert model.num_feature == num_feature
    assert model.num_output_group == 1
    assert model.num_tree == 2

    annotation_path = os.path.join(tmpdir, 'annotation.json')
    if use_annotation:
        dtrain = treelite.DMatrix(dataset_db['mushroom'].dtrain)
        annotator = treelite.Annotator()
        annotator.annotate_branch(model=model, dmat=dtrain, verbose=True)
        annotator.save(path=annotation_path)

    params = {
        'annotate_in': (annotation_path if use_annotation else 'NULL'),
        'quantize': (1 if quantize else 0),
        'parallel_comp': model.num_tree
    }
    libpath = os.path.join(tmpdir, 'mushroom' + _libext())
    model.export_lib(toolchain=toolchain,
                     libpath=libpath,
                     params=params,
                     verbose=True)
    predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True)
    check_predictor(predictor, 'mushroom')