Esempio n. 1
0
def test_node_insert_delete(tmpdir, toolchain):
    """Test ability to add and remove nodes"""
    num_feature = 3
    builder = treelite.ModelBuilder(num_feature=num_feature)
    builder.append(treelite.ModelBuilder.Tree())
    builder[0][1].set_root()
    builder[0][1].set_numerical_test_node(feature_id=2,
                                          opname='<',
                                          threshold=-0.5,
                                          default_left=True,
                                          left_child_key=5,
                                          right_child_key=10)
    builder[0][5].set_leaf_node(-1)
    builder[0][10].set_numerical_test_node(feature_id=0,
                                           opname='<=',
                                           threshold=0.5,
                                           default_left=False,
                                           left_child_key=7,
                                           right_child_key=8)
    builder[0][7].set_leaf_node(0.0)
    builder[0][8].set_leaf_node(1.0)
    del builder[0][1]
    del builder[0][5]
    builder[0][5].set_categorical_test_node(feature_id=1,
                                            left_categories=[1, 2, 4],
                                            default_left=True,
                                            left_child_key=20,
                                            right_child_key=10)
    builder[0][20].set_leaf_node(2.0)
    builder[0][5].set_root()

    model = builder.commit()
    assert model.num_feature == num_feature
    assert model.num_output_group == 1
    assert model.num_tree == 1

    libpath = os.path.join(tmpdir, 'libtest' + _libext())
    model.export_lib(toolchain=toolchain, libpath=libpath, verbose=True)
    predictor = treelite_runtime.Predictor(libpath=libpath)
    assert predictor.num_feature == num_feature
    assert predictor.num_output_group == 1
    assert predictor.pred_transform == 'identity'
    assert predictor.global_bias == 0.0
    assert predictor.sigmoid_alpha == 1.0
    for f0 in [-0.5, 0.5, 1.5, np.nan]:
        for f1 in [0, 1, 2, 3, 4, np.nan]:
            for f2 in [-1.0, -0.5, 1.0, np.nan]:
                x = np.array([f0, f1, f2])
                pred = predictor.predict_instance(x)
                if f1 in [1, 2, 4] or np.isnan(f1):
                    expected_pred = 2.0
                elif f0 <= 0.5 and not np.isnan(f0):
                    expected_pred = 0.0
                else:
                    expected_pred = 1.0
                if pred != expected_pred:
                    raise ValueError(
                        f'Prediction wrong for f0={f0}, f1={f1}, f2={f2}: ' +
                        f'expected_pred = {expected_pred} vs actual_pred = {pred}'
                    )
Esempio n. 2
0
    def process_model(cls, sklearn_model):
        """Process a GradientBoostingClassifier (multi-class classifier) to convert it into a
           Treelite model"""
        # Check for init='zero'
        if sklearn_model.init != 'zero':
            raise treelite.TreeliteError(
                "Gradient boosted trees must be trained with "
                "the option init='zero'")
        # Initialize Treelite model builder
        # Set random_forest=False for gradient boosted trees
        # Set num_output_group for multi-class classification
        # Set pred_transform='softmax' to obtain probability predictions
        builder = treelite.ModelBuilder(
            num_feature=sklearn_model.n_features_,
            num_output_group=sklearn_model.n_classes_,
            random_forest=False,
            pred_transform='softmax',
            threshold_type='float64',
            leaf_output_type='float64')
        # Process [number of iterations] * [number of classes] trees
        for i in range(sklearn_model.n_estimators):
            for k in range(sklearn_model.n_classes_):
                builder.append(
                    cls.process_tree(sklearn_model.estimators_[i][k].tree_,
                                     sklearn_model))

        return builder.commit()
Esempio n. 3
0
    def process_model(cls, sklearn_model):
        """Process a RandomForestClassifier (binary classifier) to convert it into a
           Treelite model"""
        builder = treelite.ModelBuilder(
            num_feature=sklearn_model.n_features_, average_tree_output=True,
            threshold_type='float64', leaf_output_type='float64')
        for i in range(sklearn_model.n_estimators):
            # Process i-th tree and add to the builder
            builder.append(cls.process_tree(sklearn_model.estimators_[i].tree_,
                                            sklearn_model))

        return builder.commit()
Esempio n. 4
0
def toy_model_fixture():
    builder = treelite.ModelBuilder(num_feature=2)
    tree = treelite.ModelBuilder.Tree()
    tree[0].set_categorical_test_node(feature_id=1,
                                      left_categories=[0],
                                      default_left=True,
                                      left_child_key=1,
                                      right_child_key=2)
    tree[1].set_leaf_node(-1.0)
    tree[2].set_leaf_node(1.0)
    tree[0].set_root()
    builder.append(tree)
    model = builder.commit()

    return model
def convert_model(model_name, num_feature, start_feature=0):
    builder = treelite.ModelBuilder(num_feature)
    ensemble = None
    with open(model_name + '.json') as f:
        ensemble = json.load(f)
    for tree_json in ensemble:
        tree = treelite.ModelBuilder.Tree()
        tree[0].set_root()
        dfs_build(tree, tree_json, start_feature)
        builder.append(tree)
    model = builder.commit()

    model.export_as_xgboost(model_name + '.model', 'binary:logistic')

    # model = builder.commit()
    return model
Esempio n. 6
0
    def process_model(cls, sklearn_model):
        """Process a RandomForestRegressor to convert it into a Treelite model"""
        # Initialize Treelite model builder
        # Set random_forest=True for random forests
        builder = treelite.ModelBuilder(num_feature=sklearn_model.n_features_,
                                        random_forest=True)

        # Iterate over individual trees
        for i in range(sklearn_model.n_estimators):
            # Process the i-th tree and add to the builder
            # process_tree() to be defined later
            builder.append(
                cls.process_tree(sklearn_model.estimators_[i].tree_,
                                 sklearn_model))

        return builder.commit()
Esempio n. 7
0
    def process_model(cls, sklearn_model):
        """Process a RandomForestClassifier (multi-class classifier) to convert it into a
           Treelite model"""
        # Must specify num_output_group and pred_transform
        builder = treelite.ModelBuilder(
            num_feature=sklearn_model.n_features_,
            num_output_group=sklearn_model.n_classes_,
            random_forest=True,
            pred_transform='identity_multiclass')
        for i in range(sklearn_model.n_estimators):
            # Process i-th tree and add to the builder
            builder.append(
                cls.process_tree(sklearn_model.estimators_[i].tree_,
                                 sklearn_model))

        return builder.commit()
Esempio n. 8
0
    def process_model(cls, sklearn_model):
        """Process a GradientBoostingRegressor to convert it into a Treelite model"""
        # Check for init='zero'
        if sklearn_model.init != 'zero':
            raise treelite.TreeliteError("Gradient boosted trees must be trained with "
                                         "the option init='zero'")
        # Initialize Treelite model builder
        # Set random_forest=False for gradient boosted trees
        builder = treelite.ModelBuilder(
            num_feature=sklearn_model.n_features_, random_forest=False,
            threshold_type='float64', leaf_output_type='float64')
        for i in range(sklearn_model.n_estimators):
            # Process i-th tree and add to the builder
            builder.append(cls.process_tree(sklearn_model.estimators_[i][0].tree_,
                                            sklearn_model))

        return builder.commit()
Esempio n. 9
0
    def process_model(cls, sklearn_model):
        """Process a GradientBoostingClassifier (binary classifier) to convert it into a
           Treelite model"""
        # Check for init='zero'
        if sklearn_model.init != 'zero':
            raise treelite.TreeliteError("Gradient boosted trees must be trained with "
                                         "the option init='zero'")
        # Initialize Treelite model builder
        # Set average_tree_output=False for gradient boosted trees
        # Set pred_transform='sigmoid' to obtain probability predictions
        builder = treelite.ModelBuilder(
            num_feature=sklearn_model.n_features_, average_tree_output=False,
            pred_transform='sigmoid', threshold_type='float64', leaf_output_type='float64')
        for i in range(sklearn_model.n_estimators):
            # Process i-th tree and add to the builder
            builder.append(cls.process_tree(sklearn_model.estimators_[i][0].tree_,
                                            sklearn_model))

        return builder.commit()
Esempio n. 10
0
def test_model_builder(tmpdir, use_annotation, quantize, toolchain,
                       test_round_trip):
    # pylint: disable=R0914,R0915
    """A simple model"""
    num_feature = 127
    pred_transform = 'sigmoid'
    builder = treelite.ModelBuilder(num_feature=num_feature,
                                    average_tree_output=False,
                                    pred_transform=pred_transform)

    # Build mushroom model
    tree = treelite.ModelBuilder.Tree()
    tree[0].set_numerical_test_node(feature_id=29,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=1,
                                    right_child_key=2)
    tree[1].set_numerical_test_node(feature_id=56,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=3,
                                    right_child_key=4)
    tree[3].set_numerical_test_node(feature_id=60,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=7,
                                    right_child_key=8)
    tree[7].set_leaf_node(leaf_value=1.89899647)
    tree[8].set_leaf_node(leaf_value=-1.94736838)
    tree[4].set_numerical_test_node(feature_id=21,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=9,
                                    right_child_key=10)
    tree[9].set_leaf_node(leaf_value=1.78378379)
    tree[10].set_leaf_node(leaf_value=-1.98135197)
    tree[2].set_numerical_test_node(feature_id=109,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=5,
                                    right_child_key=6)
    tree[5].set_numerical_test_node(feature_id=67,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=11,
                                    right_child_key=12)
    tree[11].set_leaf_node(leaf_value=-1.9854598)
    tree[12].set_leaf_node(leaf_value=0.938775539)
    tree[6].set_leaf_node(leaf_value=1.87096775)
    tree[0].set_root()
    builder.append(tree)

    tree = treelite.ModelBuilder.Tree()
    tree[0].set_numerical_test_node(feature_id=29,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=1,
                                    right_child_key=2)
    tree[1].set_numerical_test_node(feature_id=21,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=3,
                                    right_child_key=4)
    tree[3].set_leaf_node(leaf_value=1.14607906)
    tree[4].set_numerical_test_node(feature_id=36,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=7,
                                    right_child_key=8)
    tree[7].set_leaf_node(leaf_value=-6.87994671)
    tree[8].set_leaf_node(leaf_value=-0.10659159)
    tree[2].set_numerical_test_node(feature_id=109,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=5,
                                    right_child_key=6)
    tree[5].set_numerical_test_node(feature_id=39,
                                    opname='<',
                                    threshold=-9.53674316e-07,
                                    default_left=True,
                                    left_child_key=9,
                                    right_child_key=10)
    tree[9].set_leaf_node(leaf_value=-0.0930657759)
    tree[10].set_leaf_node(leaf_value=-1.15261209)
    tree[6].set_leaf_node(leaf_value=1.00423074)
    tree[0].set_root()
    builder.append(tree)

    model = builder.commit()
    if test_round_trip:
        checkpoint_path = os.path.join(tmpdir, 'checkpoint.bin')
        model.serialize(checkpoint_path)
        model = treelite.Model.deserialize(checkpoint_path)
    assert model.num_feature == num_feature
    assert model.num_class == 1
    assert model.num_tree == 2

    annotation_path = os.path.join(tmpdir, 'annotation.json')
    if use_annotation:
        dtrain = treelite_runtime.DMatrix(load_svmlight_file(
            dataset_db['mushroom'].dtrain, zero_based=True)[0],
                                          dtype='float32')
        annotator = treelite.Annotator()
        annotator.annotate_branch(model=model, dmat=dtrain, verbose=True)
        annotator.save(path=annotation_path)

    params = {
        'annotate_in': (annotation_path if use_annotation else 'NULL'),
        'quantize': (1 if quantize else 0),
        'parallel_comp': model.num_tree
    }
    libpath = os.path.join(tmpdir, 'mushroom' + _libext())
    model.export_lib(toolchain=toolchain,
                     libpath=libpath,
                     params=params,
                     verbose=True)
    predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True)
    check_predictor(predictor, 'mushroom')