def test_octavo( num_points, classes, xbound, ybound, zbound, max_depth, min_node_size, min_loss, expected, ): xy_parent = data_for_tests.make_octavo( num_points, classes, xbound, ybound, zbound ).values X = xy_parent[:, :-1] y = xy_parent[:, -1] forest = random_forest.grow_random_forest( X, y, num_trees=20, max_features=2, min_node_size=1 ) predictions = random_forest.forest_predict(forest, X) targets = y tfpns = evaluation.tfpn(predictions, targets) cm = evaluation.make_confusion_matrix(*tfpns, percentage=True) result = np.array( [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)] ) expected = np.array(expected) assert np.any(np.abs(expected - result) < 0.01)
def test_random_forest_accuracy(): X, t = load_wine(return_X_y=True) X_train, X_test, t_train, t_test = util.split_data(X, t, seed=0) forest = rf.random_forest(X_train, t_train) predictions = rf.forest_predict(X_test, forest) count = 0 for i in range(len(t_test)): if (predictions[i] == t_test[i]): count += 1 rf_score = count / len(t_test) assert (rf_score > 0.8), ('Prediction of bagged trees not good enough with ', rf_score, ' accuracy.')
def test_diagonal_ndim(num_points, dim, max_features, expected, precision_bound): xy_parent = data_for_tests.make_diagonal_ndim(num_points, dim).values X = xy_parent[:, :-1] y = xy_parent[:, -1] forest = random_forest.grow_random_forest( X, y, num_trees=30, max_depth=20, max_features=max_features, min_node_size=1 ) predictions = random_forest.forest_predict(forest, X) targets = y tfpns = evaluation.tfpn(predictions, targets) cm = evaluation.make_confusion_matrix(*tfpns, percentage=True) result = np.array( [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)] ) expected = np.array(expected) print(precision_bound) assert np.any(np.abs(expected - result) < precision_bound)
def test_random_forest_cv_accuracy(): X, t = load_wine(return_X_y=True) cv_data = util.crossvalidate_data(X, t, seed=0) K = len(cv_data) rf_score = 0 for i in range(K): X_train = cv_data[i][0] t_train = cv_data[i][1] X_test = cv_data[i][2] t_test = cv_data[i][3] forest = rf.random_forest(X_train, t_train) predictions = rf.forest_predict(X_test, forest) count = 0 for i in range(len(t_test)): if (predictions[i] == t_test[i]): count += 1 rf_score += count / len(t_test) rf_score = rf_score / 5 assert (rf_score > 0.8), ('Prediction of bagged trees not good enough with ', rf_score, ' accuracy.')
import data_for_tests num_points, dim, max_features = 1000, 5, 2 # generate data xy_parent = data_for_tests.make_diagonal_ndim(num_points, dim).values X = xy_parent[:, :-1] y = xy_parent[:, -1] # train the model -- grow the forest forest = random_forest.grow_random_forest( X, y, num_trees=30, max_depth=20, max_features=max_features, min_node_size=1 ) # make predictions predictions = random_forest.forest_predict(forest, X) # calculate the numbers of true positives, false positives, true negatives, false negatives tfpns = evaluation.tfpn(predictions, y) # calculate the confusion matrix cm = evaluation.make_confusion_matrix(*tfpns, percentage=True) # calculate metrics: precision, sensitivity, false-positive-rate metrics = np.array( [evaluation.precision(cm), evaluation.sensitivity(cm), evaluation.fpr(cm)] ) print( f"{num_points} points are randomly generated in the unit cube in {dim}-dimensions.\n \ Those with the sum of coordinates >= {dim}/2 are labeled 1, \n those below are \