def test_round_trip(self):
     for model_format, model_path, dtest_path, libname_fmt, \
         expected_prob_path, expected_margin_path, multiclass in \
         [('xgboost', 'mushroom/mushroom.model', 'mushroom/agaricus.test',
           './agaricus{}', 'mushroom/agaricus.test.prob',
           'mushroom/agaricus.test.margin', False),
          ('xgboost', 'dermatology/dermatology.model',
           'dermatology/dermatology.test', './dermatology{}',
           'dermatology/dermatology.test.prob',
           'dermatology/dermatology.test.margin', True),
          ('lightgbm', 'toy_categorical/toy_categorical_model.txt',
           'toy_categorical/toy_categorical.test', './toycat{}',
           None, 'toy_categorical/toy_categorical.test.pred', False)]:
         model_path = os.path.join(dpath, model_path)
         model = treelite.Model.load(model_path, model_format=model_format)
         model.export_protobuf('./my.buffer')
         model2 = treelite.Model.load('./my.buffer',
                                      model_format='protobuf')
         for use_quantize in [False, True]:
             run_pipeline_test(model=model2,
                               dtest_path=dtest_path,
                               libname_fmt=libname_fmt,
                               expected_prob_path=expected_prob_path,
                               expected_margin_path=expected_margin_path,
                               multiclass=multiclass,
                               use_annotation=None,
                               use_quantize=use_quantize,
                               use_parallel_comp=None)
Example #2
0
 def test_basic(self):
     """
 Test a basic workflow: load a model, compile and export as shared lib,
 and make predictions
 """
     for model_path, dtrain_path, dtest_path, libname_fmt, \
         expected_prob_path, expected_margin_path, multiclass in \
         [('mushroom/mushroom.model', 'mushroom/agaricus.train',
           'mushroom/agaricus.test', './agaricus{}',
           'mushroom/agaricus.test.prob',
           'mushroom/agaricus.test.margin', False),
          ('dermatology/dermatology.model', 'dermatology/dermatology.train',
           'dermatology/dermatology.test', './dermatology{}',
           'dermatology/dermatology.test.prob',
           'dermatology/dermatology.test.margin', True)]:
         model_path = os.path.join(dpath, model_path)
         model = treelite.Model.load(model_path, model_format='xgboost')
         make_annotation(model=model,
                         dtrain_path=dtrain_path,
                         annotation_path='./annotation.json')
         for use_annotation in ['./annotation.json', None]:
             for use_quantize in [True, False]:
                 run_pipeline_test(
                     model=model,
                     dtest_path=dtest_path,
                     libname_fmt=libname_fmt,
                     expected_prob_path=expected_prob_path,
                     expected_margin_path=expected_margin_path,
                     multiclass=multiclass,
                     use_annotation=use_annotation,
                     use_quantize=use_quantize)
    def test_sparse_categorical_model(self):
        """
    LightGBM is able to produce categorical splits directly, so that
    categorical data don't have to be one-hot encoded. Test if Treelite is
    able to handle categorical splits.

    This example produces a model with high-cardinality categorical variables.
    The training data has many missing values, so we need to match LightGBM
    when it comes to handling missing values
    """

        for model_path, dtest_path, libname_fmt, \
            expected_prob_path, expected_margin_path, multiclass in \
            [('sparse_categorical/sparse_categorical_model.txt',
              'sparse_categorical/sparse_categorical.test', './sparsecat{}',
              None, 'sparse_categorical/sparse_categorical.test.margin', False)]:
            model_path = os.path.join(dpath, model_path)
            model = treelite.Model.load(model_path, model_format='lightgbm')
            for use_quantize in [False, True]:
                for use_parallel_comp in [None]:
                    run_pipeline_test(
                        model=model,
                        dtest_path=dtest_path,
                        libname_fmt=libname_fmt,
                        expected_prob_path=expected_prob_path,
                        expected_margin_path=expected_margin_path,
                        multiclass=multiclass,
                        use_annotation=None,
                        use_quantize=use_quantize,
                        use_parallel_comp=use_parallel_comp,
                        use_toolchains=['gcc'])
Example #4
0
 def test_code_folding(self):
   for model_format, model_path, dtrain_path, dtest_path, libname_fmt, \
       expected_prob_path, expected_margin_path, multiclass, \
       use_parallel_comp in \
       [('xgboost', 'mushroom/mushroom.model', 'mushroom/agaricus.train',
         'mushroom/agaricus.test', './agaricus{}', 'mushroom/agaricus.test.prob',
         'mushroom/agaricus.test.margin', False, None),
        ('xgboost', 'dermatology/dermatology.model',
         'dermatology/dermatology.train', 'dermatology/dermatology.test',
         './dermatology{}', 'dermatology/dermatology.test.prob',
         'dermatology/dermatology.test.margin', True, None),
        ('lightgbm', 'toy_categorical/toy_categorical_model.txt', None,
         'toy_categorical/toy_categorical.test', './toycat{}',
         None, 'toy_categorical/toy_categorical.test.pred', False, 2)]:
     model_path = os.path.join(dpath, model_path)
     model = treelite.Model.load(model_path, model_format=model_format)
     if dtrain_path is not None:
       make_annotation(model=model, dtrain_path=dtrain_path,
                       annotation_path='./annotation.json')
       use_annotation = './annotation.json'
     else:
       use_annotation = None
     for use_quantize in [False, True]:
       for use_code_folding in [1.0, 2.0, 3.0]:
         run_pipeline_test(model=model, dtest_path=dtest_path,
                           libname_fmt=libname_fmt,
                           expected_prob_path=expected_prob_path,
                           expected_margin_path=expected_margin_path,
                           multiclass=multiclass, use_annotation=use_annotation,
                           use_quantize=use_quantize,
                           use_parallel_comp=use_parallel_comp,
                           use_code_folding=use_code_folding)
Example #5
0
    def test_categorical_data(self):
        """
    LightGBM is able to produce categorical splits directly, so that
    categorical data don't have to be one-hot encoded. Test if Treelite is
    able to handle categorical splits.

    This toy example contains two features, both of which are categorical.
    The first has cardinality 3 and the second 5. The label was generated using
    the formula

       y = f(x0) + g(x1) + [noise with std=0.1]

    where f and g are given by the tables

       x0  f(x0)        x1  g(x1)
        0    -20         0     -2
        1    -10         1     -1
        2      0         2      0
                         3      1
                         4      2
    """

        for model_path, dtest_path, libname_fmt, \
            expected_prob_path, expected_margin_path, multiclass in \
            [('toy_categorical/toy_categorical_model.txt',
              'toy_categorical/toy_categorical.test', './toycat{}',
              None, 'toy_categorical/toy_categorical.test.pred', False)]:
            model_path = os.path.join(dpath, model_path)
            model = treelite.Model.load(model_path, model_format='lightgbm')
            for use_quantize in [False, True]:
                for use_parallel_comp in [None, 2]:
                    run_pipeline_test(
                        model=model,
                        dtest_path=dtest_path,
                        libname_fmt=libname_fmt,
                        expected_prob_path=expected_prob_path,
                        expected_margin_path=expected_margin_path,
                        multiclass=multiclass,
                        use_annotation=None,
                        use_quantize=use_quantize,
                        use_parallel_comp=use_parallel_comp)
Example #6
0
    def test_basic(self):
        """
    Test a basic workflow: load a model, compile and export as shared lib,
    and make predictions
    """

        is_linux = sys.platform.startswith('linux')

        for model_path, dtrain_path, dtest_path, libname_fmt, \
            expected_prob_path, expected_margin_path, multiclass in \
            [('mushroom/mushroom.model', 'mushroom/agaricus.train',
              'mushroom/agaricus.test', './agaricus{}',
              'mushroom/agaricus.test.prob',
              'mushroom/agaricus.test.margin', False),
             ('dermatology/dermatology.model', 'dermatology/dermatology.train',
              'dermatology/dermatology.test', './dermatology{}',
              'dermatology/dermatology.test.prob',
              'dermatology/dermatology.test.margin', True)]:
            model_path = os.path.join(dpath, model_path)
            model = treelite.Model.load(model_path, model_format='xgboost')
            make_annotation(model=model,
                            dtrain_path=dtrain_path,
                            annotation_path='./annotation.json')
            for use_annotation in ['./annotation.json', None]:
                for use_quantize in [True, False]:
                    for use_parallel_comp in [None, 2]:
                        run_pipeline_test(
                            model=model,
                            dtest_path=dtest_path,
                            libname_fmt=libname_fmt,
                            expected_prob_path=expected_prob_path,
                            expected_margin_path=expected_margin_path,
                            multiclass=multiclass,
                            use_annotation=use_annotation,
                            use_quantize=use_quantize,
                            use_parallel_comp=use_parallel_comp)
            for use_elf in [True, False] if is_linux else [False]:
                run_pipeline_test(model=model,
                                  dtest_path=dtest_path,
                                  libname_fmt=libname_fmt,
                                  expected_prob_path=expected_prob_path,
                                  expected_margin_path=expected_margin_path,
                                  multiclass=multiclass,
                                  use_elf=use_elf,
                                  use_compiler='failsafe')
            if not is_linux:
                # Expect to see an exception when using ELF in non-Linux OS
                with pytest.raises(treelite.common.util.TreeliteError):
                    run_pipeline_test(
                        model=model,
                        dtest_path=dtest_path,
                        libname_fmt=libname_fmt,
                        expected_prob_path=expected_prob_path,
                        expected_margin_path=expected_margin_path,
                        multiclass=multiclass,
                        use_elf=True,
                        use_compiler='failsafe')

        # LETOR
        model_path = os.path.join(dpath, 'letor/mq2008.model')
        model = treelite.Model.load(model_path, model_format='xgboost')
        make_annotation(model=model,
                        dtrain_path='letor/mq2008.train',
                        annotation_path='./annotation.json')
        if os_platform() != 'windows':
            run_pipeline_test(
                model=model,
                dtest_path='letor/mq2008.test',
                libname_fmt='./mq2008{}',
                expected_prob_path=None,
                expected_margin_path='letor/mq2008.test.pred',
                multiclass=False,
                use_annotation='./annotation.json',
                use_quantize=1,
                use_parallel_comp=700,
                use_toolchains=[
                    'msvc' if os_platform() == 'windows' else 'gcc'
                ])
        run_pipeline_test(model=model,
                          dtest_path='letor/mq2008.test',
                          libname_fmt='./mq2008{}',
                          expected_prob_path=None,
                          expected_margin_path='letor/mq2008.test.pred',
                          multiclass=False,
                          use_elf=is_linux,
                          use_compiler='failsafe')
Example #7
0
 def test_basic(self):
     """
 Test a basic workflow: load a model, compile and export as shared lib,
 and make predictions
 """
     for model_path, dtrain_path, dtest_path, libname_fmt, \
         expected_prob_path, expected_margin_path, multiclass in \
         [('mushroom/mushroom.model', 'mushroom/agaricus.train',
           'mushroom/agaricus.test', './agaricus{}',
           'mushroom/agaricus.test.prob',
           'mushroom/agaricus.test.margin', False),
          ('dermatology/dermatology.model', 'dermatology/dermatology.train',
           'dermatology/dermatology.test', './dermatology{}',
           'dermatology/dermatology.test.prob',
           'dermatology/dermatology.test.margin', True)]:
         model_path = os.path.join(dpath, model_path)
         model = treelite.Model.load(model_path, model_format='xgboost')
         make_annotation(model=model,
                         dtrain_path=dtrain_path,
                         annotation_path='./annotation.json')
         for use_annotation in ['./annotation.json', None]:
             for use_quantize in [True, False]:
                 for use_parallel_comp in [None, 2]:
                     run_pipeline_test(
                         model=model,
                         dtest_path=dtest_path,
                         libname_fmt=libname_fmt,
                         expected_prob_path=expected_prob_path,
                         expected_margin_path=expected_margin_path,
                         multiclass=multiclass,
                         use_annotation=use_annotation,
                         use_quantize=use_quantize,
                         use_parallel_comp=use_parallel_comp)
         run_pipeline_test(model=model,
                           dtest_path=dtest_path,
                           libname_fmt=libname_fmt,
                           expected_prob_path=expected_prob_path,
                           expected_margin_path=expected_margin_path,
                           multiclass=multiclass,
                           use_compiler='failsafe')
     # LETOR
     model_path = os.path.join(dpath, 'letor/mq2008.model')
     model = treelite.Model.load(model_path, model_format='xgboost')
     make_annotation(model=model,
                     dtrain_path='letor/mq2008.train',
                     annotation_path='./annotation.json')
     run_pipeline_test(model=model,
                       dtest_path='letor/mq2008.test',
                       libname_fmt='./mq2008{}',
                       expected_prob_path=None,
                       expected_margin_path='letor/mq2008.test.pred',
                       multiclass=False,
                       use_annotation='./annotation.json',
                       use_quantize=1,
                       use_parallel_comp=700,
                       use_toolchains=['gcc'])
     run_pipeline_test(model=model,
                       dtest_path='letor/mq2008.test',
                       libname_fmt='./mq2008{}',
                       expected_prob_path=None,
                       expected_margin_path='letor/mq2008.test.pred',
                       multiclass=False,
                       use_compiler='failsafe')