Ejemplo n.º 1
0
def test_octavo(
    num_points,
    classes,
    xbound,
    ybound,
    zbound,
    max_depth,
    min_node_size,
    min_loss,
    expected,
):
    xy_parent = data_for_tests.make_octavo(
        num_points, classes, xbound, ybound, zbound
    ).values
    X = xy_parent[:, :-1]
    y = xy_parent[:, -1]

    forest = random_forest.grow_random_forest(
        X, y, num_trees=20, max_features=2, min_node_size=1
    )
    predictions = random_forest.forest_predict(forest, X)
    targets = y
    tfpns = evaluation.tfpn(predictions, targets)
    cm = evaluation.make_confusion_matrix(*tfpns, percentage=True)
    result = np.array(
        [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)]
    )
    expected = np.array(expected)
    assert np.any(np.abs(expected - result) < 0.01)
Ejemplo n.º 2
0
def test_random_forest_accuracy():
    X, t = load_wine(return_X_y=True)
    X_train, X_test, t_train, t_test = util.split_data(X, t, seed=0)
    forest = rf.random_forest(X_train, t_train)
    predictions = rf.forest_predict(X_test, forest)
    count = 0
    for i in range(len(t_test)):
        if (predictions[i] == t_test[i]):
            count += 1
    rf_score = count / len(t_test)
    assert (rf_score >
            0.8), ('Prediction of bagged trees not good enough with ',
                   rf_score, ' accuracy.')
Ejemplo n.º 3
0
def test_diagonal_ndim(num_points, dim, max_features, expected, precision_bound):
    xy_parent = data_for_tests.make_diagonal_ndim(num_points, dim).values
    X = xy_parent[:, :-1]
    y = xy_parent[:, -1]

    forest = random_forest.grow_random_forest(
        X, y, num_trees=30, max_depth=20, max_features=max_features, min_node_size=1
    )
    predictions = random_forest.forest_predict(forest, X)
    targets = y
    tfpns = evaluation.tfpn(predictions, targets)
    cm = evaluation.make_confusion_matrix(*tfpns, percentage=True)
    result = np.array(
        [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)]
    )
    expected = np.array(expected)
    print(precision_bound)
    assert np.any(np.abs(expected - result) < precision_bound)
Ejemplo n.º 4
0
def test_random_forest_cv_accuracy():
    X, t = load_wine(return_X_y=True)
    cv_data = util.crossvalidate_data(X, t, seed=0)
    K = len(cv_data)
    rf_score = 0
    for i in range(K):
        X_train = cv_data[i][0]
        t_train = cv_data[i][1]
        X_test = cv_data[i][2]
        t_test = cv_data[i][3]
        forest = rf.random_forest(X_train, t_train)
        predictions = rf.forest_predict(X_test, forest)
        count = 0
        for i in range(len(t_test)):
            if (predictions[i] == t_test[i]):
                count += 1
        rf_score += count / len(t_test)
    rf_score = rf_score / 5
    assert (rf_score >
            0.8), ('Prediction of bagged trees not good enough with ',
                   rf_score, ' accuracy.')
Ejemplo n.º 5
0
import data_for_tests

num_points, dim, max_features = 1000, 5, 2

# generate data
xy_parent = data_for_tests.make_diagonal_ndim(num_points, dim).values
X = xy_parent[:, :-1]
y = xy_parent[:, -1]

# train the model -- grow the forest
forest = random_forest.grow_random_forest(
    X, y, num_trees=30, max_depth=20, max_features=max_features, min_node_size=1
)

# make predictions
predictions = random_forest.forest_predict(forest, X)

# calculate the numbers of true positives, false positives, true negatives, false negatives
tfpns = evaluation.tfpn(predictions, y)

# calculate the confusion matrix
cm = evaluation.make_confusion_matrix(*tfpns, percentage=True)

# calculate metrics: precision, sensitivity, false-positive-rate
metrics = np.array(
    [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)]
)

print(
f"{num_points} points are randomly generated in the unit cube in {dim}-dimensions.\n \
Those with the sum of coordinates >= {dim}/2 are labeled 1, \n those below are \