Esempio n. 1
0
def test_plot_estimator_and_lightgbm(tmpdir):
    pytest.importorskip('graphviz')
    lightgbm = pytest.importorskip('lightgbm')
    from pygbm.plotting import plot_tree

    n_classes = 3
    X, y = make_classification(n_samples=150,
                               n_classes=n_classes,
                               n_features=5,
                               n_informative=3,
                               n_redundant=0,
                               random_state=0)

    n_trees = 3
    est_pygbm = GradientBoostingClassifier(max_iter=n_trees,
                                           n_iter_no_change=None)
    est_pygbm.fit(X, y)
    est_lightgbm = lightgbm.LGBMClassifier(n_estimators=n_trees)
    est_lightgbm.fit(X, y)

    n_total_trees = n_trees * n_classes
    for i in range(n_total_trees):
        filename = tmpdir.join('plot_mixed_predictors.pdf')
        plot_tree(est_pygbm,
                  est_lightgbm=est_lightgbm,
                  tree_index=i,
                  view=False,
                  filename=filename)
        assert filename.exists()
Esempio n. 2
0
def test_plot_grower(tmpdir, classification_data):
    pytest.importorskip('graphviz')
    from pygbm.plotting import plot_tree

    dataset = Dataset(classification_data[0], classification_data[1])
    n_trees_per_iteration = 1
    loss = BinaryCrossEntropy()

    clf = GradientBoostingClassifier()

    gradients, hessians = loss.init_gradients_and_hessians(
        n_samples=dataset.shape[0], prediction_dim=n_trees_per_iteration)
    y = clf._encode_y(dataset.y)
    baseline_prediction_ = loss.get_baseline_prediction(y, 1)
    raw_predictions = np.zeros(shape=(dataset.shape[0], n_trees_per_iteration),
                               dtype=baseline_prediction_.dtype)
    raw_predictions += baseline_prediction_

    loss.update_gradients_and_hessians(gradients, hessians, y, raw_predictions)

    options = OptionSet(clf.parameter_dict)
    options['max_leaf_nodes'] = 5

    grower = TreeGrower(dataset, gradients, hessians, options)
    grower.grow()
    filename = tmpdir.join('plot_grower.pdf')
    plot_tree(grower, view=False, filename=filename)
    assert filename.exists()
Esempio n. 3
0
def test_plot_estimator(tmpdir):
    pytest.importorskip('graphviz')
    from pygbm.plotting import plot_tree

    n_trees = 3
    est = GradientBoostingRegressor(max_iter=n_trees)
    est.fit(X, y)
    for i in range(n_trees):
        filename = tmpdir.join('plot_predictor.pdf')
        plot_tree(est, tree_index=i, view=False, filename=filename)
        assert filename.exists()
Esempio n. 4
0
def test_plot_grower(tmpdir):
    pytest.importorskip('graphviz')
    from pygbm.plotting import plot_tree

    X_binned = BinMapper().fit_transform(X)
    gradients = np.asarray(y, dtype=np.float32).copy()
    hessians = np.ones(1, dtype=np.float32)
    grower = TreeGrower(X_binned, gradients, hessians, max_leaf_nodes=5)
    grower.grow()
    filename = tmpdir.join('plot_grower.pdf')
    plot_tree(grower, view=False, filename=filename)
    assert filename.exists()
Esempio n. 5
0
def test_plot_estimator_and_lightgbm(tmpdir):
    pytest.importorskip('graphviz')
    lightgbm = pytest.importorskip('lightgbm')
    from pygbm.plotting import plot_tree

    n_trees = 3
    est_pygbm = GradientBoostingMachine(max_iter=n_trees)
    est_pygbm.fit(X, y)
    est_lightgbm = lightgbm.LGBMRegressor(n_estimators=n_trees)
    est_lightgbm.fit(X, y)

    for i in range(n_trees):
        filename = tmpdir.join('plot_mixed_predictors.pdf')
        plot_tree(est_pygbm,
                  est_lightgbm=est_lightgbm,
                  tree_index=i,
                  view=False,
                  filename=filename)
        assert filename.exists()
from pygbm.binning import BinMapper
from pygbm.grower import TreeGrower
from pygbm import plotting

rng = np.random.RandomState(0)

n_samples = int(1e7)
n_leaf_nodes = 5
X, y = make_classification(n_samples=n_samples,
                           n_classes=2,
                           n_features=5,
                           n_informative=3,
                           n_redundant=0,
                           random_state=rng)

bin_mapper_ = BinMapper(random_state=rng)
X_binned = bin_mapper_.fit_transform(X)

gradients = np.asarray(y, dtype=np.float32).copy()
hessians = np.ones(1, dtype=np.float32)

# First run to trigger the compilation of numba jit methods to avoid recording
# the compiler overhead in the profile report.
TreeGrower(X_binned, gradients, hessians, max_leaf_nodes=n_leaf_nodes).grow()

# New run with to collect timing statistics that will be included in the plot.
grower = TreeGrower(X_binned, gradients, hessians, max_leaf_nodes=n_leaf_nodes)
grower.grow()
plotting.plot_tree(grower)