Beispiel #1
0
  def test_lightgbm_multiclass_classification(self):
    try:
      import lightgbm
    except ImportError:
      raise nose.SkipTest()  # skip this test if LightGBM is not installed

    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test \
      = train_test_split(X, y, test_size=0.2, shuffle=False)
    dtrain = lightgbm.Dataset(X_train, y_train, free_raw_data=False)
    dtest = lightgbm.Dataset(X_test, y_test, reference=dtrain, free_raw_data=False)
    param = {'task': 'train', 'boosting_type': 'gbdt',
             'metric': 'multi_logloss', 'num_class': 3,
             'num_leaves': 31, 'learning_rate': 0.05}
    num_round = 10
    watchlist = [dtrain, dtest]
    watchlist_names = ['train', 'test']
    for objective in ['multiclass', 'multiclassova']:
      param['objective'] = objective
      bst = lightgbm.train(param, dtrain, num_round, watchlist, watchlist_names)
      bst.save_model('./iris_lightgbm.txt')

      expected_pred = bst.predict(X_test)

      model = treelite.Model.load('./iris_lightgbm.txt', model_format='lightgbm')
      libpath = libname('./iris_{}{{}}'.format(objective))
      batch = treelite.runtime.Batch.from_npy2d(X_test)
      for toolchain in os_compatible_toolchains():
        model.export_lib(toolchain=toolchain, libpath=libpath,
                         params={}, verbose=True)
        predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
        out_pred = predictor.predict(batch)
        assert_almost_equal(out_pred, expected_pred)
Beispiel #2
0
  def test_srcpkg(self):
    """Test feature to export a source tarball"""
    model_path = os.path.join(dpath, 'mushroom/mushroom.model')
    dmat_path = os.path.join(dpath, 'mushroom/agaricus.test')
    libpath = libname('./mushroom/mushroom{}')
    model = treelite.Model.load(model_path, model_format='xgboost')

    toolchain = os_compatible_toolchains()[0]
    model.export_srcpkg(platform=os_platform(), toolchain=toolchain,
                        pkgpath='./srcpkg.zip', libname=libpath,
                        params={}, verbose=True)
    with ZipFile('./srcpkg.zip', 'r') as zip_ref:
      zip_ref.extractall('.')
    subprocess.call(['make', '-C', 'mushroom'])

    predictor = treelite.runtime.Predictor(libpath='./mushroom', verbose=True)

    X, _ = load_svmlight_file(dmat_path, zero_based=True)
    dmat = treelite.DMatrix(X)
    batch = treelite.runtime.Batch.from_csr(dmat)

    expected_prob_path = os.path.join(dpath, 'mushroom/agaricus.test.prob')
    expected_prob = load_txt(expected_prob_path)
    out_prob = predictor.predict(batch)
    assert_almost_equal(out_prob, expected_prob)
    def test_xgb_iris(self):
        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test \
          = train_test_split(X, y, test_size=0.2, shuffle=False)
        dtrain = xgboost.DMatrix(X_train, label=y_train)
        dtest = xgboost.DMatrix(X_test, label=y_test)
        param = {
            'max_depth': 6,
            'eta': 0.05,
            'num_class': 3,
            'silent': 1,
            'objective': 'multi:softmax',
            'metric': 'mlogloss'
        }
        num_round = 10
        watchlist = [(dtrain, 'train'), (dtest, 'test')]
        bst = xgboost.train(param, dtrain, num_round, watchlist)

        expected_pred = bst.predict(dtest)

        model = treelite.Model.from_xgboost(bst)
        libpath = libname('./iris{}')
        batch = treelite.runtime.Batch.from_npy2d(X_test)
        for toolchain in os_compatible_toolchains():
            model.export_lib(toolchain=toolchain,
                             libpath=libpath,
                             params={},
                             verbose=True)
            predictor = treelite.runtime.Predictor(libpath=libpath,
                                                   verbose=True)
            out_pred = predictor.predict(batch)
            assert_almost_equal(out_pred, expected_pred)
Beispiel #4
0
  def run_non_linear_objective(self, objective, max_label, global_bias):
    np.random.seed(0)
    kRows = 16
    kCols = 8
    X = np.random.randn(kRows, kCols)
    y = np.random.randint(0, max_label, size=kRows)
    assert y.min() == 0
    assert y.max() == max_label - 1

    dtrain = xgboost.DMatrix(X, y)
    booster = xgboost.train({'objective': objective}, dtrain=dtrain,
                            num_boost_round=4)
    expected_pred = booster.predict(dtrain)
    model = treelite.Model.from_xgboost(booster)
    libpath = libname('./'+objective+'{}')
    batch = treelite.runtime.Batch.from_npy2d(X)
    for toolchain in os_compatible_toolchains():
      model.export_lib(toolchain=toolchain, libpath=libpath,
                       params={}, verbose=True)
      predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
      out_pred = predictor.predict(batch)
      assert_almost_equal(out_pred, expected_pred)
      assert predictor.num_feature == kCols
      np.testing.assert_almost_equal(predictor.global_bias, global_bias,
                                     decimal=6)
Beispiel #5
0
  def test_xgb(self):
    X, y = load_boston(return_X_y=True)
    X_train, X_test, y_train, y_test \
      = train_test_split(X, y, test_size=0.2, shuffle=False)
    dtrain = xgboost.DMatrix(X_train, label=y_train)
    dtest = xgboost.DMatrix(X_test, label=y_test)
    param = {'max_depth': 8, 'eta': 1, 'silent': 1, 'objective': 'reg:linear'}
    num_round = 10
    watchlist = [(dtrain, 'train'), (dtest, 'test')]
    bst = xgboost.train(param, dtrain, num_round, watchlist)

    expected_pred = bst.predict(dtest)

    model = treelite.Model.from_xgboost(bst)
    libpath = libname('./boston{}')
    batch = treelite.runtime.Batch.from_npy2d(X_test)
    for toolchain in os_compatible_toolchains():
      model.export_lib(toolchain=toolchain, libpath=libpath,
                       params={}, verbose=True)
      predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
      out_pred = predictor.predict(batch)
      assert_almost_equal(out_pred, expected_pred)
      assert predictor.num_feature == 13
      assert predictor.num_output_group == 1
      assert predictor.pred_transform == 'identity'
      assert predictor.global_bias == 0.5
      assert predictor.sigmoid_alpha == 1.0
    def test_xgb(self):
        try:
            import xgboost
        except ImportError:
            raise nose.SkipTest()  # skip this test if XGBoost is not installed

        X, y = load_boston(return_X_y=True)
        X_train, X_test, y_train, y_test \
          = train_test_split(X, y, test_size=0.2, shuffle=False)
        dtrain = xgboost.DMatrix(X_train, label=y_train)
        dtest = xgboost.DMatrix(X_test, label=y_test)
        param = {
            'max_depth': 8,
            'eta': 1,
            'silent': 1,
            'objective': 'reg:linear'
        }
        num_round = 10
        watchlist = [(dtrain, 'train'), (dtest, 'test')]
        bst = xgboost.train(param, dtrain, num_round, watchlist)

        expected_pred = bst.predict(dtest)

        model = treelite.Model.from_xgboost(bst)
        libpath = libname('./boston{}')
        batch = treelite.runtime.Batch.from_npy2d(X_test)
        for toolchain in os_compatible_toolchains():
            model.export_lib(toolchain=toolchain,
                             libpath=libpath,
                             params={},
                             verbose=True)
            predictor = treelite.runtime.Predictor(libpath=libpath,
                                                   verbose=True)
            out_pred = predictor.predict(batch)
            assert_almost_equal(out_pred, expected_pred)
Beispiel #7
0
    def run_pipeline_test(self, model, dtest_path, libname_fmt,
                          expected_prob_path, expected_margin_path, multiclass,
                          use_annotation, use_quantize):
        dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/'))
        dtest_path = os.path.join(dpath, dtest_path)
        libpath = libname(libname_fmt)
        X_test, _ = load_svmlight_file(dtest_path, zero_based=True)

        expected_prob_path = os.path.join(dpath, expected_prob_path)
        expected_margin_path = os.path.join(dpath, expected_margin_path)
        expected_prob = load_txt(expected_prob_path)
        expected_margin = load_txt(expected_margin_path)
        if multiclass:
            nrow = X_test.shape[0]
            expected_prob = expected_prob.reshape((nrow, -1))
            expected_margin = expected_margin.reshape((nrow, -1))
        params = {}
        if use_annotation is not None:
            params['annotate_in'] = use_annotation
        if use_quantize:
            params['quantize'] = 1

        for toolchain in os_compatible_toolchains():
            model.export_lib(toolchain=toolchain,
                             libpath=libpath,
                             params=params,
                             verbose=True)
            predictor = treelite.runtime.Predictor(libpath=libpath,
                                                   verbose=True)
            for i in range(X_test.shape[0]):
                x = X_test[i, :]
                # Scipy CSR matrix
                out_prob = predictor.predict_instance(x)
                out_margin = predictor.predict_instance(x, pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
                # NumPy 1D array with 0 as missing value
                x = x.toarray().flatten()
                out_prob = predictor.predict_instance(x, missing=0.0)
                out_margin = predictor.predict_instance(x,
                                                        missing=0.0,
                                                        pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
                # NumPy 1D array with np.nan as missing value
                np.place(x, x == 0.0, [np.nan])
                out_prob = predictor.predict_instance(x, missing=np.nan)
                out_margin = predictor.predict_instance(x,
                                                        missing=np.nan,
                                                        pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
                # NumPy 1D array with np.nan as missing value
                # (default when `missing` parameter is unspecified)
                out_prob = predictor.predict_instance(x)
                out_margin = predictor.predict_instance(x, pred_margin=True)
                assert_almost_equal(out_prob, expected_prob[i])
                assert_almost_equal(out_margin, expected_margin[i])
Beispiel #8
0
 def test_deficient_matrix(self):
   """
   Test if Treelite correctly handles sparse matrix with fewer columns
   than the training data used for the model. In this case, the matrix
   should be padded with zeros.
   """
   model_path = os.path.join(dpath, 'mushroom/mushroom.model')
   libpath = libname('./mushroom{}')
   model = treelite.Model.load(model_path, model_format='xgboost')
   toolchain = os_compatible_toolchains()[0]
   model.export_lib(toolchain=toolchain, libpath=libpath,
                    params={'quantize': 1}, verbose=True)
   X = csr_matrix(([], ([], [])), shape=(3, 3))
   batch = treelite.runtime.Batch.from_csr(X)
   predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
   predictor.predict(batch)  # should not crash
Beispiel #9
0
    def test_lightgbm_binary_classification(self):
        try:
            import lightgbm
        except ImportError:
            raise nose.SkipTest(
            )  # skip this test if LightGBM is not installed

        dtrain_path = os.path.join(dpath, 'mushroom/agaricus.train')
        dtest_path = os.path.join(dpath, 'mushroom/agaricus.test')
        dtrain = lightgbm.Dataset(dtrain_path)
        dtest = lightgbm.Dataset(dtest_path, reference=dtrain)
        watchlist = [dtrain, dtest]
        watchlist_names = ['train', 'test']
        param = {
            'task': 'train',
            'boosting_type': 'gbdt',
            'metric': 'auc',
            'num_leaves': 7,
            'learning_rate': 0.1
        }
        num_round = 10

        for objective in ['binary', 'xentlambda', 'xentropy']:
            param['objective'] = objective
            bst = lightgbm.train(param, dtrain, num_round, watchlist,
                                 watchlist_names)
            bst.save_model('./mushroom_lightgbm.txt')

            expected_prob = bst.predict(dtest_path)
            expected_margin = bst.predict(dtest_path, raw_score=True)

            model = treelite.Model.load('./mushroom_lightgbm.txt',
                                        model_format='lightgbm')
            libpath = libname('./agaricus_{}{{}}'.format(objective))
            batch = treelite.runtime.Batch.from_csr(
                treelite.DMatrix(dtest_path))
            for toolchain in os_compatible_toolchains():
                model.export_lib(toolchain=toolchain,
                                 libpath=libpath,
                                 params={},
                                 verbose=True)
                predictor = treelite.runtime.Predictor(libpath, verbose=True)
                out_prob = predictor.predict(batch)
                assert_almost_equal(out_prob, expected_prob)
                out_margin = predictor.predict(batch, pred_margin=True)
                assert_almost_equal(out_margin, expected_margin)
Beispiel #10
0
 def test_too_wide_matrix(self):
   """
   Test if Treelite correctly handles sparse matrix with more columns
   than the training data used for the model. In this case, an exception
   should be thrown
   """
   model_path = os.path.join(dpath, 'mushroom/mushroom.model')
   libpath = libname('./mushroom{}')
   model = treelite.Model.load(model_path, model_format='xgboost')
   toolchain = os_compatible_toolchains()[0]
   model.export_lib(toolchain=toolchain, libpath=libpath,
                    params={'quantize': 1}, verbose=True)
   X = csr_matrix(([], ([], [])), shape=(3, 1000))
   batch = treelite.runtime.Batch.from_csr(X)
   predictor = treelite.runtime.Predictor(libpath=libpath, verbose=True)
   import treelite_runtime
   err = treelite_runtime.common.util.TreeliteError
   assert_raises(err, predictor.predict, batch)  # should crash