def test_proba_classif_convergence(): X_train, _, y_train, _ = load_scaled_boston() y_train = np.round(y_train) mc = MondrianTreeClassifier(random_state=0) mc.fit(X_train, y_train) lb = LabelBinarizer() y_bin = lb.fit_transform(y_train) le = LabelEncoder() y_enc = le.fit_transform(y_train) proba = mc.predict_proba(X_train) labels = mc.predict(X_train) assert_array_equal(proba, y_bin) assert_array_equal(labels, lb.inverse_transform(y_bin)) # For points completely far away from the training data, this # should converge to the empirical distribution of labels. # X is scaled between to -1.0 and 1.0 X_inf = np.vstack( (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1]))) inf_proba = mc.predict_proba(X_inf) emp_proba = np.bincount(y_enc) / float(len(y_enc)) assert_array_almost_equal(inf_proba, [emp_proba, emp_proba])
def test_proba_classif_convergence(): X_train, _, y_train, _ = load_scaled_boston() y_train = np.round(y_train) mc = MondrianTreeClassifier(random_state=0) mc.fit(X_train, y_train) check_proba_classif_convergence(X_train, y_train, mc) mc.partial_fit(X_train, y_train) check_proba_classif_convergence(X_train, y_train, mc)
def test_weighted_decision_path_classif(): X_train, X_test, y_train, y_test = load_scaled_boston() y_train = np.round(y_train) y_test = np.round(y_test) mtc = MondrianTreeClassifier(random_state=0) mtc.fit(X_train, np.round(y_train)) check_weighted_decision_path_classif(mtc, X_test) mtc.partial_fit(X_train, np.round(y_train)) check_weighted_decision_path_classif(mtc, X_test)
def test_weighted_decision_path_test_classif(): X_train, X_test, y_train, y_test = load_scaled_boston() y_train = np.round(y_train) y_test = np.round(y_test) n_train = X_train.shape[0] mtc = MondrianTreeClassifier(random_state=0) mtc.fit(X_train, np.round(y_train)) weights = mtc.weighted_decision_path(X_test) node_probas = (mtc.tree_.value[:, 0, :] / np.expand_dims(mtc.tree_.n_node_samples, axis=1)) probas1 = [] for startptr, endptr in zip(weights.indptr[:-1], weights.indptr[1:]): curr_nodes = weights.indices[startptr:endptr] curr_weights = np.expand_dims(weights.data[startptr:endptr], axis=1) curr_probas = node_probas[curr_nodes] probas1.append(np.sum(curr_weights * curr_probas, axis=0)) probas2 = mtc.predict_proba(X_test) assert_array_almost_equal(probas1, probas2, 5)
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import MinMaxScaler from skgarden.utils.testing import assert_array_equal from skgarden.utils.testing import assert_array_almost_equal from skgarden.utils.testing import assert_almost_equal from skgarden.utils.testing import assert_equal from skgarden.utils.testing import assert_false from skgarden.utils.testing import assert_less from skgarden.utils.testing import assert_true from skgarden.mondrian import MondrianTreeClassifier from skgarden.mondrian import MondrianTreeRegressor estimators = [ MondrianTreeRegressor(random_state=0), MondrianTreeClassifier(random_state=0) ] def test_tree_predict(): X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) y = [-1, -1, -1, 1, 1, 1] T = [[-1, -1], [2, 2], [3, 2]] # This test is dependent on the random-state since the feature # and the threshold selected at every split is independent of the # label. for est_true in estimators: est = clone(est_true) est.set_params(random_state=0, max_depth=1) est.fit(X, y)