def main(): x, y = load_iris(return_X_y=True) indices = np.arange(len(x)) shuffle(indices) # use 80% of samples for training train_idx = indices[:int(0.8 * len(x))] test_idx = indices[int(0.8 * len(x)):] # Train the RF classifier print("- Training Random Forest classifier with %s samples of Iris " "dataset." % len(train_idx)) x_train = ds.array(x[train_idx], (10, 4)) y_train = ds.array(y[train_idx][:, np.newaxis], (10, 1)) forest = RandomForestClassifier(10) forest.fit(x_train, y_train) # Test the trained RF classifier print("- Testing the classifier.", end='') x_test = ds.array(x[test_idx], (10, 4)) y_real = ds.array(y[test_idx][:, np.newaxis], (10, 1)) y_pred = forest.predict(x_test) score = compss_wait_on(forest.score(x_test, y_real)) # Put results in fancy dataframe and print the accuracy df = pd.DataFrame(data=list(zip(y[test_idx], y_pred.collect())), columns=['Label', 'Predicted']) print(" Predicted values: \n\n%s" % df) print("\n- Classifier accuracy: %s" % score)
def test_make_classification_hard_vote_predict(self): """Tests RandomForestClassifier predict with hard_vote.""" x, y = make_classification( n_samples=3000, n_features=10, n_classes=3, n_informative=4, n_redundant=2, n_repeated=1, n_clusters_per_class=2, shuffle=True, random_state=0, ) x_train = ds.array(x[::2], (300, 10)) y_train = ds.array(y[::2][:, np.newaxis], (300, 1)) x_test = ds.array(x[1::2], (300, 10)) y_test = y[1::2] rf = RandomForestClassifier(random_state=0, sklearn_max=10, hard_vote=True) rf.fit(x_train, y_train) y_pred = rf.predict(x_test).collect() accuracy = np.count_nonzero(y_pred == y_test) / len(y_test) self.assertGreater(accuracy, 0.7)
def test_make_classification_predict_and_distr_depth(self): """Tests RandomForestClassifier fit and predict with a distr_depth.""" x, y = make_classification(n_samples=3000, n_features=10, n_classes=3, n_informative=4, n_redundant=2, n_repeated=1, n_clusters_per_class=2, shuffle=True, random_state=0) x_train = ds.array(x[:len(x) // 2], (300, 10)) y_train = ds.array(y[:len(y) // 2][:, np.newaxis], (300, 1)) x_test = ds.array(x[len(x) // 2:], (300, 10)) y_test = y[len(y) // 2:] rf = RandomForestClassifier(distr_depth=2, random_state=0) rf.fit(x_train, y_train) y_pred = rf.predict(x_test).collect() accuracy = np.count_nonzero(y_pred == y_test) / len(y_test) self.assertGreater(accuracy, 0.7)