def test_node_insert_delete(tmpdir, toolchain): """Test ability to add and remove nodes""" num_feature = 3 builder = treelite.ModelBuilder(num_feature=num_feature) builder.append(treelite.ModelBuilder.Tree()) builder[0][1].set_root() builder[0][1].set_numerical_test_node(feature_id=2, opname='<', threshold=-0.5, default_left=True, left_child_key=5, right_child_key=10) builder[0][5].set_leaf_node(-1) builder[0][10].set_numerical_test_node(feature_id=0, opname='<=', threshold=0.5, default_left=False, left_child_key=7, right_child_key=8) builder[0][7].set_leaf_node(0.0) builder[0][8].set_leaf_node(1.0) del builder[0][1] del builder[0][5] builder[0][5].set_categorical_test_node(feature_id=1, left_categories=[1, 2, 4], default_left=True, left_child_key=20, right_child_key=10) builder[0][20].set_leaf_node(2.0) builder[0][5].set_root() model = builder.commit() assert model.num_feature == num_feature assert model.num_output_group == 1 assert model.num_tree == 1 libpath = os.path.join(tmpdir, 'libtest' + _libext()) model.export_lib(toolchain=toolchain, libpath=libpath, verbose=True) predictor = treelite_runtime.Predictor(libpath=libpath) assert predictor.num_feature == num_feature assert predictor.num_output_group == 1 assert predictor.pred_transform == 'identity' assert predictor.global_bias == 0.0 assert predictor.sigmoid_alpha == 1.0 for f0 in [-0.5, 0.5, 1.5, np.nan]: for f1 in [0, 1, 2, 3, 4, np.nan]: for f2 in [-1.0, -0.5, 1.0, np.nan]: x = np.array([f0, f1, f2]) pred = predictor.predict_instance(x) if f1 in [1, 2, 4] or np.isnan(f1): expected_pred = 2.0 elif f0 <= 0.5 and not np.isnan(f0): expected_pred = 0.0 else: expected_pred = 1.0 if pred != expected_pred: raise ValueError( f'Prediction wrong for f0={f0}, f1={f1}, f2={f2}: ' + f'expected_pred = {expected_pred} vs actual_pred = {pred}' )
def process_model(cls, sklearn_model): """Process a GradientBoostingClassifier (multi-class classifier) to convert it into a Treelite model""" # Check for init='zero' if sklearn_model.init != 'zero': raise treelite.TreeliteError( "Gradient boosted trees must be trained with " "the option init='zero'") # Initialize Treelite model builder # Set random_forest=False for gradient boosted trees # Set num_output_group for multi-class classification # Set pred_transform='softmax' to obtain probability predictions builder = treelite.ModelBuilder( num_feature=sklearn_model.n_features_, num_output_group=sklearn_model.n_classes_, random_forest=False, pred_transform='softmax', threshold_type='float64', leaf_output_type='float64') # Process [number of iterations] * [number of classes] trees for i in range(sklearn_model.n_estimators): for k in range(sklearn_model.n_classes_): builder.append( cls.process_tree(sklearn_model.estimators_[i][k].tree_, sklearn_model)) return builder.commit()
def process_model(cls, sklearn_model): """Process a RandomForestClassifier (binary classifier) to convert it into a Treelite model""" builder = treelite.ModelBuilder( num_feature=sklearn_model.n_features_, average_tree_output=True, threshold_type='float64', leaf_output_type='float64') for i in range(sklearn_model.n_estimators): # Process i-th tree and add to the builder builder.append(cls.process_tree(sklearn_model.estimators_[i].tree_, sklearn_model)) return builder.commit()
def toy_model_fixture(): builder = treelite.ModelBuilder(num_feature=2) tree = treelite.ModelBuilder.Tree() tree[0].set_categorical_test_node(feature_id=1, left_categories=[0], default_left=True, left_child_key=1, right_child_key=2) tree[1].set_leaf_node(-1.0) tree[2].set_leaf_node(1.0) tree[0].set_root() builder.append(tree) model = builder.commit() return model
def convert_model(model_name, num_feature, start_feature=0): builder = treelite.ModelBuilder(num_feature) ensemble = None with open(model_name + '.json') as f: ensemble = json.load(f) for tree_json in ensemble: tree = treelite.ModelBuilder.Tree() tree[0].set_root() dfs_build(tree, tree_json, start_feature) builder.append(tree) model = builder.commit() model.export_as_xgboost(model_name + '.model', 'binary:logistic') # model = builder.commit() return model
def process_model(cls, sklearn_model): """Process a RandomForestRegressor to convert it into a Treelite model""" # Initialize Treelite model builder # Set random_forest=True for random forests builder = treelite.ModelBuilder(num_feature=sklearn_model.n_features_, random_forest=True) # Iterate over individual trees for i in range(sklearn_model.n_estimators): # Process the i-th tree and add to the builder # process_tree() to be defined later builder.append( cls.process_tree(sklearn_model.estimators_[i].tree_, sklearn_model)) return builder.commit()
def process_model(cls, sklearn_model): """Process a RandomForestClassifier (multi-class classifier) to convert it into a Treelite model""" # Must specify num_output_group and pred_transform builder = treelite.ModelBuilder( num_feature=sklearn_model.n_features_, num_output_group=sklearn_model.n_classes_, random_forest=True, pred_transform='identity_multiclass') for i in range(sklearn_model.n_estimators): # Process i-th tree and add to the builder builder.append( cls.process_tree(sklearn_model.estimators_[i].tree_, sklearn_model)) return builder.commit()
def process_model(cls, sklearn_model): """Process a GradientBoostingRegressor to convert it into a Treelite model""" # Check for init='zero' if sklearn_model.init != 'zero': raise treelite.TreeliteError("Gradient boosted trees must be trained with " "the option init='zero'") # Initialize Treelite model builder # Set random_forest=False for gradient boosted trees builder = treelite.ModelBuilder( num_feature=sklearn_model.n_features_, random_forest=False, threshold_type='float64', leaf_output_type='float64') for i in range(sklearn_model.n_estimators): # Process i-th tree and add to the builder builder.append(cls.process_tree(sklearn_model.estimators_[i][0].tree_, sklearn_model)) return builder.commit()
def process_model(cls, sklearn_model): """Process a GradientBoostingClassifier (binary classifier) to convert it into a Treelite model""" # Check for init='zero' if sklearn_model.init != 'zero': raise treelite.TreeliteError("Gradient boosted trees must be trained with " "the option init='zero'") # Initialize Treelite model builder # Set average_tree_output=False for gradient boosted trees # Set pred_transform='sigmoid' to obtain probability predictions builder = treelite.ModelBuilder( num_feature=sklearn_model.n_features_, average_tree_output=False, pred_transform='sigmoid', threshold_type='float64', leaf_output_type='float64') for i in range(sklearn_model.n_estimators): # Process i-th tree and add to the builder builder.append(cls.process_tree(sklearn_model.estimators_[i][0].tree_, sklearn_model)) return builder.commit()
def test_model_builder(tmpdir, use_annotation, quantize, toolchain, test_round_trip): # pylint: disable=R0914,R0915 """A simple model""" num_feature = 127 pred_transform = 'sigmoid' builder = treelite.ModelBuilder(num_feature=num_feature, average_tree_output=False, pred_transform=pred_transform) # Build mushroom model tree = treelite.ModelBuilder.Tree() tree[0].set_numerical_test_node(feature_id=29, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=1, right_child_key=2) tree[1].set_numerical_test_node(feature_id=56, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=3, right_child_key=4) tree[3].set_numerical_test_node(feature_id=60, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=7, right_child_key=8) tree[7].set_leaf_node(leaf_value=1.89899647) tree[8].set_leaf_node(leaf_value=-1.94736838) tree[4].set_numerical_test_node(feature_id=21, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=9, right_child_key=10) tree[9].set_leaf_node(leaf_value=1.78378379) tree[10].set_leaf_node(leaf_value=-1.98135197) tree[2].set_numerical_test_node(feature_id=109, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=5, right_child_key=6) tree[5].set_numerical_test_node(feature_id=67, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=11, right_child_key=12) tree[11].set_leaf_node(leaf_value=-1.9854598) tree[12].set_leaf_node(leaf_value=0.938775539) tree[6].set_leaf_node(leaf_value=1.87096775) tree[0].set_root() builder.append(tree) tree = treelite.ModelBuilder.Tree() tree[0].set_numerical_test_node(feature_id=29, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=1, right_child_key=2) tree[1].set_numerical_test_node(feature_id=21, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=3, right_child_key=4) tree[3].set_leaf_node(leaf_value=1.14607906) tree[4].set_numerical_test_node(feature_id=36, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=7, right_child_key=8) tree[7].set_leaf_node(leaf_value=-6.87994671) tree[8].set_leaf_node(leaf_value=-0.10659159) tree[2].set_numerical_test_node(feature_id=109, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=5, right_child_key=6) tree[5].set_numerical_test_node(feature_id=39, opname='<', threshold=-9.53674316e-07, default_left=True, left_child_key=9, right_child_key=10) tree[9].set_leaf_node(leaf_value=-0.0930657759) tree[10].set_leaf_node(leaf_value=-1.15261209) tree[6].set_leaf_node(leaf_value=1.00423074) tree[0].set_root() builder.append(tree) model = builder.commit() if test_round_trip: checkpoint_path = os.path.join(tmpdir, 'checkpoint.bin') model.serialize(checkpoint_path) model = treelite.Model.deserialize(checkpoint_path) assert model.num_feature == num_feature assert model.num_class == 1 assert model.num_tree == 2 annotation_path = os.path.join(tmpdir, 'annotation.json') if use_annotation: dtrain = treelite_runtime.DMatrix(load_svmlight_file( dataset_db['mushroom'].dtrain, zero_based=True)[0], dtype='float32') annotator = treelite.Annotator() annotator.annotate_branch(model=model, dmat=dtrain, verbose=True) annotator.save(path=annotation_path) params = { 'annotate_in': (annotation_path if use_annotation else 'NULL'), 'quantize': (1 if quantize else 0), 'parallel_comp': model.num_tree } libpath = os.path.join(tmpdir, 'mushroom' + _libext()) model.export_lib(toolchain=toolchain, libpath=libpath, params=params, verbose=True) predictor = treelite_runtime.Predictor(libpath=libpath, verbose=True) check_predictor(predictor, 'mushroom')