def test_depth_dist():
    """
    test for depth_dist function
    """
    X_train = np.concatenate(
        (np.random.normal(loc=(1, 2), scale=.6, size=(100, 2)),
         np.random.normal(loc=(-1.2, -.5), scale=.6, size=(100, 2))),
        axis=0)
    y_train = np.concatenate((np.zeros(100,
                                       dtype=np.int), np.ones(100,
                                                              dtype=np.int)))
    amount = np.int(200)
    # creating a random forest
    rf_class_known = sklearn.ensemble.RandomForestClassifier(
        n_estimators=1, min_samples_leaf=1)
    random_forest = rf_class_known.fit(X=np.array(X_train)[:amount, :],
                                       y=y_train[:amount].ravel())

    data = np.array(X_train[:amount, :])

    depth_dict, max_depth = smooth_rf.calc_depth_for_forest(random_forest,
                                                            verbose=False)

    Vt_dict = smooth_rf.make_Vt_mat(random_forest,
                                    data,
                                    depth_dict=depth_dict,
                                    verbose=False)

    Ut_prime_dict = smooth_rf.make_Ut_prime_mat_no_sym(Vt_dict,
                                                       Vt_dict,
                                                       max_depth=max_depth,
                                                       verbose=False)

    Ut_prime_dict = smooth_rf.remove_0_from_Ut_prime(Ut_prime_dict)

    if len(Ut_prime_dict) > 0:
        K_mat = smooth_rf.make_kernel(Ut_prime_dict)

        DD_mat = smooth_rf.depth_dist(K_mat)

        assert K_mat.shape == DD_mat.shape, \
            "dimensions between K_mat and DD_mat should be the same"

        if type(DD_mat) is sparse.coo.core.COO:
            assert np.all(np.diag(DD_mat.todense()) == 0), \
                "diagonal should be naturally 0 (has error)"
        else:
            assert np.all(np.diag(DD_mat) == 0), \
                "diagonal should be naturally 0 (has error)"

        assert np.all(DD_mat >= 0), \
            "all entries should be positive in DD (has error)"
    else:
        K_mat = smooth_rf.make_kernel(Ut_prime_dict)
        assert K_mat == 0, \
            "when you provide an empty Ut_prime_dict you should get a 0"
def test_make_kernel():
    """
    test for make_kernel
    """
    X_train = np.concatenate(
        (np.random.normal(loc=(1, 2), scale=.6, size=(100, 2)),
         np.random.normal(loc=(-1.2, -.5), scale=.6, size=(100, 2))),
        axis=0)
    y_train = np.concatenate((np.zeros(100,
                                       dtype=np.int), np.ones(100,
                                                              dtype=np.int)))
    amount = np.int(200)
    # creating a random forest
    rf_class_known = sklearn.ensemble.RandomForestClassifier(
        n_estimators=1, min_samples_leaf=1)
    random_forest = rf_class_known.fit(X=np.array(X_train)[:amount, :],
                                       y=y_train[:amount].ravel())

    data = np.array(X_train[:amount, :])

    depth_dict, max_depth = smooth_rf.calc_depth_for_forest(random_forest,
                                                            verbose=False)

    Vt_dict = smooth_rf.make_Vt_mat(random_forest,
                                    data,
                                    depth_dict=depth_dict,
                                    verbose=False)

    Ut_prime_dict = smooth_rf.make_Ut_prime_mat_no_sym(Vt_dict,
                                                       Vt_dict,
                                                       max_depth=max_depth,
                                                       verbose=False)

    Ut_prime_dict = smooth_rf.remove_0_from_Ut_prime(Ut_prime_dict)

    if len(Ut_prime_dict) > 0:
        K_mat = smooth_rf.make_kernel(Ut_prime_dict)

        assert K_mat.shape[0:2] == (K_mat.shape[0],K_mat.shape[0]), \
            "returned K matrix is not symmetric when the inputs were."

        assert len(K_mat.shape) == 2, \
            "returned K matrix is not 2d as expected."
    else:
        K_mat = smooth_rf.make_kernel(Ut_prime_dict)
        assert K_mat == 0, \
            "when you provide an empty Ut_prime_dict you should get a 0"
def test_make_Vt_mat():
    """
    tests for make_Vt_mat
    """
    X_train = np.concatenate(
        (np.random.normal(loc=(1, 2), scale=.6, size=(100, 2)),
         np.random.normal(loc=(-1.2, -.5), scale=.6, size=(100, 2))),
        axis=0)
    y_train = np.concatenate((np.zeros(100,
                                       dtype=np.int), np.ones(100,
                                                              dtype=np.int)))
    amount = np.int(200)
    s = 20
    c = y_train[:amount]
    # creating a random forest
    rf_class_known = sklearn.ensemble.RandomForestClassifier(
        n_estimators=100, min_samples_leaf=1)
    fit_rf_known = rf_class_known.fit(X=np.array(X_train)[:amount, :],
                                      y=y_train[:amount].ravel())
    random_forest = fit_rf_known

    data = np.array(X_train[:amount, :])

    Vt_dict = smooth_rf.make_Vt_mat(random_forest, data, verbose=False)

    assert len(Vt_dict) == 100, \
     "incorrect number of trees suggested in the full Vt output"

    for _ in range(10):
        r_idx = np.random.randint(100)
        random_Vt_dict = Vt_dict[r_idx]

        assert type(random_Vt_dict) == dict, \
         "output of _make_Vt_mat_tree is not a dictionary"

        assert np.all([x.shape[0] == data.shape[0] \
         for x in random_Vt_dict.values()]), \
         "output of _make_Vt_mat_tree elements have incorrect number of rows"

        assert np.sum([x.shape[1]  for x in random_Vt_dict.values()]) == \
         len(random_forest.estimators_[r_idx].tree_.children_left), \
         "output of split of Vt matrices have more columns than a" + \
         " full Vt mat would"