Example #1
0
def test_proba_classif_convergence():
    X_train, _, y_train, _ = load_scaled_boston()
    y_train = np.round(y_train)
    mc = MondrianTreeClassifier(random_state=0)
    mc.fit(X_train, y_train)

    lb = LabelBinarizer()
    y_bin = lb.fit_transform(y_train)

    le = LabelEncoder()
    y_enc = le.fit_transform(y_train)

    proba = mc.predict_proba(X_train)
    labels = mc.predict(X_train)
    assert_array_equal(proba, y_bin)
    assert_array_equal(labels, lb.inverse_transform(y_bin))

    # For points completely far away from the training data, this
    # should converge to the empirical distribution of labels.
    # X is scaled between to -1.0 and 1.0
    X_inf = np.vstack(
        (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1])))
    inf_proba = mc.predict_proba(X_inf)
    emp_proba = np.bincount(y_enc) / float(len(y_enc))
    assert_array_almost_equal(inf_proba, [emp_proba, emp_proba])
Example #2
0
def test_proba_classif_convergence():
    X_train, _, y_train, _ = load_scaled_boston()
    y_train = np.round(y_train)
    mc = MondrianTreeClassifier(random_state=0)
    mc.fit(X_train, y_train)
    check_proba_classif_convergence(X_train, y_train, mc)

    mc.partial_fit(X_train, y_train)
    check_proba_classif_convergence(X_train, y_train, mc)
Example #3
0
def test_weighted_decision_path_classif():
    X_train, X_test, y_train, y_test = load_scaled_boston()
    y_train = np.round(y_train)
    y_test = np.round(y_test)

    mtc = MondrianTreeClassifier(random_state=0)
    mtc.fit(X_train, np.round(y_train))
    check_weighted_decision_path_classif(mtc, X_test)

    mtc.partial_fit(X_train, np.round(y_train))
    check_weighted_decision_path_classif(mtc, X_test)
Example #4
0
def test_weighted_decision_path_test_classif():
    X_train, X_test, y_train, y_test = load_scaled_boston()
    y_train = np.round(y_train)
    y_test = np.round(y_test)
    n_train = X_train.shape[0]

    mtc = MondrianTreeClassifier(random_state=0)
    mtc.fit(X_train, np.round(y_train))
    weights = mtc.weighted_decision_path(X_test)
    node_probas = (mtc.tree_.value[:, 0, :] /
                   np.expand_dims(mtc.tree_.n_node_samples, axis=1))
    probas1 = []

    for startptr, endptr in zip(weights.indptr[:-1], weights.indptr[1:]):
        curr_nodes = weights.indices[startptr:endptr]
        curr_weights = np.expand_dims(weights.data[startptr:endptr], axis=1)
        curr_probas = node_probas[curr_nodes]
        probas1.append(np.sum(curr_weights * curr_probas, axis=0))

    probas2 = mtc.predict_proba(X_test)
    assert_array_almost_equal(probas1, probas2, 5)
Example #5
0
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from skgarden.utils.testing import assert_array_equal
from skgarden.utils.testing import assert_array_almost_equal
from skgarden.utils.testing import assert_almost_equal
from skgarden.utils.testing import assert_equal
from skgarden.utils.testing import assert_false
from skgarden.utils.testing import assert_less
from skgarden.utils.testing import assert_true

from skgarden.mondrian import MondrianTreeClassifier
from skgarden.mondrian import MondrianTreeRegressor

estimators = [
    MondrianTreeRegressor(random_state=0),
    MondrianTreeClassifier(random_state=0)
]


def test_tree_predict():
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
    y = [-1, -1, -1, 1, 1, 1]
    T = [[-1, -1], [2, 2], [3, 2]]

    # This test is dependent on the random-state since the feature
    # and the threshold selected at every split is independent of the
    # label.
    for est_true in estimators:
        est = clone(est_true)
        est.set_params(random_state=0, max_depth=1)
        est.fit(X, y)