Example #1
0
def main():
    x, y = load_iris(return_X_y=True)

    indices = np.arange(len(x))
    shuffle(indices)

    # use 80% of samples for training
    train_idx = indices[:int(0.8 * len(x))]
    test_idx = indices[int(0.8 * len(x)):]

    # Train the RF classifier
    print("- Training Random Forest classifier with %s samples of Iris "
          "dataset." % len(train_idx))
    x_train = ds.array(x[train_idx], (10, 4))
    y_train = ds.array(y[train_idx][:, np.newaxis], (10, 1))
    forest = RandomForestClassifier(10)
    forest.fit(x_train, y_train)

    # Test the trained RF classifier
    print("- Testing the classifier.", end='')
    x_test = ds.array(x[test_idx], (10, 4))
    y_real = ds.array(y[test_idx][:, np.newaxis], (10, 1))
    y_pred = forest.predict(x_test)

    score = compss_wait_on(forest.score(x_test, y_real))

    # Put results in fancy dataframe and print the accuracy
    df = pd.DataFrame(data=list(zip(y[test_idx], y_pred.collect())),
                      columns=['Label', 'Predicted'])
    print(" Predicted values: \n\n%s" % df)
    print("\n- Classifier accuracy: %s" % score)
Example #2
0
    def test_make_classification_hard_vote_predict(self):
        """Tests RandomForestClassifier predict with hard_vote."""
        x, y = make_classification(
            n_samples=3000,
            n_features=10,
            n_classes=3,
            n_informative=4,
            n_redundant=2,
            n_repeated=1,
            n_clusters_per_class=2,
            shuffle=True,
            random_state=0,
        )
        x_train = ds.array(x[::2], (300, 10))
        y_train = ds.array(y[::2][:, np.newaxis], (300, 1))
        x_test = ds.array(x[1::2], (300, 10))
        y_test = y[1::2]

        rf = RandomForestClassifier(random_state=0,
                                    sklearn_max=10,
                                    hard_vote=True)

        rf.fit(x_train, y_train)
        y_pred = rf.predict(x_test).collect()
        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
        self.assertGreater(accuracy, 0.7)
Example #3
0
    def test_make_classification_predict_and_distr_depth(self):
        """Tests RandomForestClassifier fit and predict with a distr_depth."""
        x, y = make_classification(n_samples=3000,
                                   n_features=10,
                                   n_classes=3,
                                   n_informative=4,
                                   n_redundant=2,
                                   n_repeated=1,
                                   n_clusters_per_class=2,
                                   shuffle=True,
                                   random_state=0)
        x_train = ds.array(x[:len(x) // 2], (300, 10))
        y_train = ds.array(y[:len(y) // 2][:, np.newaxis], (300, 1))
        x_test = ds.array(x[len(x) // 2:], (300, 10))
        y_test = y[len(y) // 2:]

        rf = RandomForestClassifier(distr_depth=2, random_state=0)

        rf.fit(x_train, y_train)
        y_pred = rf.predict(x_test).collect()
        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
        self.assertGreater(accuracy, 0.7)