Python RandomForestClassifier Exemples, dislib.classification.RandomForestClassifier Python Exemples

Exemple #1

0

Afficher le fichier

    def test_make_classification_hard_vote_score_mix(self):
        """Tests RandomForestClassifier score with hard_vote, sklearn_max,
        distr_depth and max_depth."""
        x, y = make_classification(n_samples=3000,
                                   n_features=10,
                                   n_classes=3,
                                   n_informative=4,
                                   n_redundant=2,
                                   n_repeated=1,
                                   n_clusters_per_class=2,
                                   shuffle=True,
                                   random_state=0)
        x_train = ds.array(x[:len(x) // 2], (300, 10))
        y_train = ds.array(y[:len(y) // 2][:, np.newaxis], (300, 1))
        x_test = ds.array(x[len(x) // 2:], (300, 10))
        y_test = ds.array(y[len(y) // 2:][:, np.newaxis], (300, 1))

        rf = RandomForestClassifier(random_state=0,
                                    sklearn_max=100,
                                    distr_depth=2,
                                    max_depth=12,
                                    hard_vote=True)

        rf.fit(x_train, y_train)
        accuracy = compss_wait_on(rf.score(x_test, y_test))
        self.assertGreater(accuracy, 0.7)

Exemple #2

0

Afficher le fichier

    def test_make_classification_hard_vote_predict(self):
        """Tests RandomForestClassifier predict with hard_vote."""
        x, y = make_classification(
            n_samples=3000,
            n_features=10,
            n_classes=3,
            n_informative=4,
            n_redundant=2,
            n_repeated=1,
            n_clusters_per_class=2,
            shuffle=True,
            random_state=0,
        )
        x_train = ds.array(x[::2], (300, 10))
        y_train = ds.array(y[::2][:, np.newaxis], (300, 1))
        x_test = ds.array(x[1::2], (300, 10))
        y_test = y[1::2]

        rf = RandomForestClassifier(random_state=0,
                                    sklearn_max=10,
                                    hard_vote=True)

        rf.fit(x_train, y_train)
        y_pred = rf.predict(x_test).collect()
        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
        self.assertGreater(accuracy, 0.7)

Exemple #3

0

Afficher le fichier

Fichier : test_gridsearch.py Projet : vibhatha/dislib

    def test_fit(self):
        """Tests GridSearchCV fit()."""
        x_np, y_np = datasets.load_iris(return_X_y=True)
        x = ds.array(x_np, (30, 4))
        y = ds.array(y_np[:, np.newaxis], (30, 1))

        param_grid = {'n_estimators': (2, 4), 'max_depth': range(3, 5)}
        rf = RandomForestClassifier()

        searcher = GridSearchCV(rf, param_grid)
        searcher.fit(x, y)

        expected_keys = {
            'param_max_depth', 'param_n_estimators', 'params',
            'mean_test_score', 'std_test_score', 'rank_test_score'
        }
        split_keys = {'split%d_test_score' % i for i in range(5)}
        expected_keys.update(split_keys)
        self.assertSetEqual(set(searcher.cv_results_.keys()), expected_keys)

        expected_params = [(3, 2), (3, 4), (4, 2), (4, 4)]
        for params in searcher.cv_results_['params']:
            m = params['max_depth']
            n = params['n_estimators']
            self.assertIn((m, n), expected_params)
            expected_params.remove((m, n))
        self.assertEqual(len(expected_params), 0)

        self.assertTrue(hasattr(searcher, 'best_estimator_'))
        self.assertTrue(hasattr(searcher, 'best_score_'))
        self.assertTrue(hasattr(searcher, 'best_params_'))
        self.assertTrue(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))
        self.assertEqual(searcher.n_splits_, 5)

Exemple #4

0

Afficher le fichier

Fichier : test_gridsearch.py Projet : vibhatha/dislib

    def test_scoring_callable(self):
        """Tests GridSearchCV with callable scoring parameter."""
        x_np, y_np = datasets.load_iris(return_X_y=True)
        x = ds.array(x_np, (30, 4))
        y = ds.array(y_np[:, np.newaxis], (30, 1))

        param_grid = {'n_estimators': (2, 4)}
        rf = RandomForestClassifier()

        def scoring(clf, x_score, y_real):
            return clf.score(x_score, y_real)

        searcher = GridSearchCV(rf, param_grid, cv=3, scoring=scoring)
        searcher.fit(x, y)

        self.assertTrue(hasattr(searcher, 'cv_results_'))
        self.assertTrue(hasattr(searcher, 'best_estimator_'))
        self.assertTrue(hasattr(searcher, 'best_score_'))
        self.assertTrue(hasattr(searcher, 'best_params_'))
        self.assertTrue(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))

        def invalid_scoring(clf, x_score, y_score):
            return '2'

        searcher = GridSearchCV(rf, param_grid, cv=3, scoring=invalid_scoring)
        with self.assertRaisesRegex(ValueError,
                                    'scoring must return a number'):
            searcher.fit(x, y)

Exemple #5

0

Afficher le fichier

def main():
    x_np, y_np = datasets.load_iris(return_X_y=True)
    x = ds.array(x_np, (30, 4))
    y = ds.array(y_np[:, np.newaxis], (30, 1))
    parameters = {
        'n_estimators': (1, 2, 4, 8, 16, 32),
        'max_depth': range(3, 5)
    }
    rf = RandomForestClassifier()
    searcher = GridSearchCV(rf, parameters, cv=5)
    np.random.seed(0)
    searcher.fit(x, y)
    print(searcher.cv_results_['params'])
    print(searcher.cv_results_['mean_test_score'])
    pd_df = pd.DataFrame.from_dict(searcher.cv_results_)
    print(pd_df[['params', 'mean_test_score']])
    with pd.option_context('display.max_rows', None, 'display.max_columns',
                           None):
        print(pd_df)
    print(searcher.best_estimator_)
    print(searcher.best_score_)
    print(searcher.best_params_)
    print(searcher.best_index_)
    print(searcher.scorer_)
    print(searcher.n_splits_)

Exemple #6

0

Afficher le fichier

Fichier : test_gridsearch.py Projet : vibhatha/dislib

    def test_refit_callable(self):
        """Tests GridSearchCV with callable refit parameter."""
        x_np, y_np = datasets.load_iris(return_X_y=True)
        x = ds.array(x_np, (30, 4))
        y = ds.array(y_np[:, np.newaxis], (30, 1))
        param_grid = {'n_estimators': (2, 4)}
        rf = RandomForestClassifier()

        best_index = 1

        def refit(results):
            return best_index

        searcher = GridSearchCV(rf, param_grid, cv=3, refit=refit)
        searcher.fit(x, y)

        self.assertTrue(hasattr(searcher, 'cv_results_'))
        self.assertTrue(hasattr(searcher, 'best_estimator_'))
        self.assertFalse(hasattr(searcher, 'best_score_'))
        self.assertTrue(hasattr(searcher, 'best_params_'))
        self.assertTrue(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))

        best_index = 'str'
        searcher = GridSearchCV(rf, param_grid, cv=3, refit=refit)
        with self.assertRaises(TypeError):
            searcher.fit(x, y)

        best_index = -1
        searcher = GridSearchCV(rf, param_grid, cv=3, refit=refit)
        with self.assertRaises(IndexError):
            searcher.fit(x, y)

Exemple #7

0

Afficher le fichier

    def test_iris(self):
        """Tests RandomForestClassifier with a minimal example."""
        x, y = datasets.load_iris(return_X_y=True)
        ds_fit = ds.array(x[::2], block_size=(30, 2))
        fit_y = ds.array(y[::2].reshape(-1, 1), block_size=(30, 1))
        ds_validate = ds.array(x[1::2], block_size=(30, 2))
        validate_y = ds.array(y[1::2].reshape(-1, 1), block_size=(30, 1))

        rf = RandomForestClassifier(n_estimators=1,
                                    max_depth=1,
                                    random_state=0)
        rf.fit(ds_fit, fit_y)
        accuracy = compss_wait_on(rf.score(ds_validate, validate_y))

        # Accuracy should be <= 2/3 for any seed, often exactly equal.
        self.assertAlmostEqual(accuracy, 2 / 3)

Exemple #8

0

Afficher le fichier

def main():
    x_kdd = ds.load_txt_file(
        "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/kdd99/train.csv",
        block_size=(11482, 122))

    y_kdd = x_kdd[:, 121:122]
    x_kdd = x_kdd[:, :121]

    x_mn, y_mn = ds.load_svmlight_file(
        "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/mnist/train.scaled",
        block_size=(5000, 780),
        n_features=780,
        store_sparse=False)

    rf = RandomForestClassifier(n_estimators=100, distr_depth=2)
    performance.measure("RF", "KDD99", rf.fit, x_kdd, y_kdd)

    rf = RandomForestClassifier(n_estimators=100, distr_depth=2)
    performance.measure("RF", "mnist", rf.fit, x_mn, y_mn)

Exemple #9

0

Afficher le fichier

def main():
    x_kdd = ds.load_txt_file(
        "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.csv",
        block_size=(11482, 122))

    y_kdd = x_kdd[:, 121:122]
    x_kdd = x_kdd[:, :121]

    rf = RandomForestClassifier(n_estimators=100, distr_depth=2)
    performance.measure("RF", "KDD99", rf.fit, x_kdd, y_kdd)

Exemple #10

0

Afficher le fichier

Fichier : rf_mnist.py Projet : mmarcrr/dislib

def main():

    x_mn, y_mn = ds.load_svmlight_file(
        "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.scaled",
        block_size=(5000, 780),
        n_features=780,
        store_sparse=False)

    rf = RandomForestClassifier(n_estimators=100, distr_depth=2)
    performance.measure("RF", "mnist", rf.fit, x_mn, y_mn)

Exemple #11

0

Afficher le fichier

Fichier : test_gridsearch.py Projet : vibhatha/dislib

 def test_cv_invalid(self):
     """Tests GridSearchCV with invalid cv parameter."""
     x_np, y_np = datasets.load_iris(return_X_y=True)
     x = ds.array(x_np, (30, 4))
     y = ds.array(y_np[:, np.newaxis], (30, 1))
     rf = RandomForestClassifier()
     param_grid = {'n_estimators': (2, 4)}
     with self.assertRaises(ValueError):
         searcher = GridSearchCV(rf, param_grid, cv={})
         searcher.fit(x, y)

Exemple #12

0

Afficher le fichier

    def test_make_classification_fit_predict(self):
        """Tests RandomForestClassifier fit_predict with default params."""
        x, y = make_classification(n_samples=3000,
                                   n_features=10,
                                   n_classes=3,
                                   n_informative=4,
                                   n_redundant=2,
                                   n_repeated=1,
                                   n_clusters_per_class=2,
                                   shuffle=True,
                                   random_state=0)
        x_train = ds.array(x[:len(x) // 2], (300, 10))
        y_train = ds.array(y[:len(y) // 2][:, np.newaxis], (300, 1))

        rf = RandomForestClassifier(random_state=0)

        y_pred = rf.fit(x_train, y_train).predict(x_train).collect()
        y_train = y_train.collect()
        accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
        self.assertGreater(accuracy, 0.7)

Exemple #13

0

Afficher le fichier

    def test_make_classification_score(self):
        """Tests RandomForestClassifier fit and score with default params."""
        x, y = make_classification(n_samples=3000,
                                   n_features=10,
                                   n_classes=3,
                                   n_informative=4,
                                   n_redundant=2,
                                   n_repeated=1,
                                   n_clusters_per_class=2,
                                   shuffle=True,
                                   random_state=0)
        x_train = ds.array(x[:len(x) // 2], (300, 10))
        y_train = ds.array(y[:len(y) // 2][:, np.newaxis], (300, 1))
        x_test = ds.array(x[len(x) // 2:], (300, 10))
        y_test = ds.array(y[len(y) // 2:][:, np.newaxis], (300, 1))

        rf = RandomForestClassifier(random_state=0)

        rf.fit(x_train, y_train)
        accuracy = compss_wait_on(rf.score(x_test, y_test))
        self.assertGreater(accuracy, 0.7)

Exemple #14

0

Afficher le fichier

def main():
    x, y = load_iris(return_X_y=True)

    indices = np.arange(len(x))
    shuffle(indices)

    # use 80% of samples for training
    train_idx = indices[:int(0.8 * len(x))]
    test_idx = indices[int(0.8 * len(x)):]

    # Train the RF classifier
    print("- Training Random Forest classifier with %s samples of Iris "
          "dataset." % len(train_idx))
    x_train = ds.array(x[train_idx], (10, 4))
    y_train = ds.array(y[train_idx][:, np.newaxis], (10, 1))
    forest = RandomForestClassifier(10)
    forest.fit(x_train, y_train)

    # Test the trained RF classifier
    print("- Testing the classifier.", end='')
    x_test = ds.array(x[test_idx], (10, 4))
    y_real = ds.array(y[test_idx][:, np.newaxis], (10, 1))
    y_pred = forest.predict(x_test)

    score = compss_wait_on(forest.score(x_test, y_real))

    # Put results in fancy dataframe and print the accuracy
    df = pd.DataFrame(data=list(zip(y[test_idx], y_pred.collect())),
                      columns=['Label', 'Predicted'])
    print(" Predicted values: \n\n%s" % df)
    print("\n- Classifier accuracy: %s" % score)

Exemple #15

0

Afficher le fichier

    def test_make_classification_sklearn_max_predict_proba(self):
        """Tests RandomForestClassifier predict_proba with sklearn_max."""
        x, y = make_classification(
            n_samples=3000,
            n_features=10,
            n_classes=3,
            n_informative=4,
            n_redundant=2,
            n_repeated=1,
            n_clusters_per_class=2,
            shuffle=True,
            random_state=0,
        )
        x_train = ds.array(x[::2], (300, 10))
        y_train = ds.array(y[::2][:, np.newaxis], (300, 1))
        x_test = ds.array(x[1::2], (300, 10))
        y_test = y[1::2]

        rf = RandomForestClassifier(random_state=0, sklearn_max=10)

        rf.fit(x_train, y_train)
        probabilities = rf.predict_proba(x_test).collect()
        rf.classes = compss_wait_on(rf.classes)
        y_pred = rf.classes[np.argmax(probabilities, axis=1)]
        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
        self.assertGreater(accuracy, 0.7)

Exemple #16

0

Afficher le fichier

    def test_make_classification_predict_and_distr_depth(self):
        """Tests RandomForestClassifier fit and predict with a distr_depth."""
        x, y = make_classification(n_samples=3000,
                                   n_features=10,
                                   n_classes=3,
                                   n_informative=4,
                                   n_redundant=2,
                                   n_repeated=1,
                                   n_clusters_per_class=2,
                                   shuffle=True,
                                   random_state=0)
        x_train = ds.array(x[:len(x) // 2], (300, 10))
        y_train = ds.array(y[:len(y) // 2][:, np.newaxis], (300, 1))
        x_test = ds.array(x[len(x) // 2:], (300, 10))
        y_test = y[len(y) // 2:]

        rf = RandomForestClassifier(distr_depth=2, random_state=0)

        rf.fit(x_train, y_train)
        y_pred = rf.predict(x_test).collect()
        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
        self.assertGreater(accuracy, 0.7)

Exemple #17

0

Afficher le fichier

Fichier : test_gridsearch.py Projet : vibhatha/dislib

    def test_scoring_invalid(self):
        """Tests GridSearchCV raises error with invalid scoring parameter."""
        x_np, y_np = datasets.load_iris(return_X_y=True)
        x = ds.array(x_np, (30, 4))
        y = ds.array(y_np[:, np.newaxis], (30, 1))

        param_grid = {'n_estimators': (2, 4)}
        rf = RandomForestClassifier()

        searcher = GridSearchCV(rf,
                                param_grid,
                                cv=3,
                                scoring='roc_auc',
                                refit=False)
        with self.assertRaises(ValueError):
            searcher.fit(x, y)

Exemple #18

0

Afficher le fichier

Fichier : test_gridsearch.py Projet : vibhatha/dislib

    def test_cv_class(self):
        """Tests GridSearchCV with a class cv parameter."""
        x_np, y_np = datasets.load_iris(return_X_y=True)
        x = ds.array(x_np, (30, 4))
        y = ds.array(y_np[:, np.newaxis], (30, 1))
        rf = RandomForestClassifier()
        param_grid = {'n_estimators': (2, 4)}
        searcher = GridSearchCV(rf, param_grid, cv=KFold(4))
        searcher.fit(x, y)

        self.assertTrue(hasattr(searcher, 'cv_results_'))
        self.assertTrue(hasattr(searcher, 'best_estimator_'))
        self.assertTrue(hasattr(searcher, 'best_score_'))
        self.assertTrue(hasattr(searcher, 'best_params_'))
        self.assertTrue(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))

Exemple #19

0

Afficher le fichier

Fichier : test_gridsearch.py Projet : vibhatha/dislib

    def test_scoring_dict(self):
        """Tests GridSearchCV with scoring parameter of type dict."""
        x_np, y_np = datasets.load_iris(return_X_y=True)
        x = ds.array(x_np, (30, 4))
        y = ds.array(y_np[:, np.newaxis], (30, 1))

        param_grid = {'n_estimators': (2, 4)}
        rf = RandomForestClassifier()

        def hard_vote_score(rand_forest, x, y):
            rand_forest.hard_vote = True
            score = rand_forest.score(x, y)
            rand_forest.hard_vote = False
            return score

        scoring = {'default_score': None, 'custom_score': hard_vote_score}

        searcher = GridSearchCV(rf,
                                param_grid,
                                cv=3,
                                scoring=scoring,
                                refit=False)
        searcher.fit(x, y)

        self.assertTrue(hasattr(searcher, 'cv_results_'))
        self.assertFalse(hasattr(searcher, 'best_estimator_'))
        self.assertFalse(hasattr(searcher, 'best_score_'))
        self.assertFalse(hasattr(searcher, 'best_params_'))
        self.assertFalse(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))

        searcher = GridSearchCV(rf,
                                param_grid,
                                cv=3,
                                scoring=scoring,
                                refit=True)
        with self.assertRaises(ValueError):
            searcher.fit(x, y)

Exemple #20

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--svmlight",
                        help="read files in SVMLight format",
                        action="store_true")
    parser.add_argument("-dt",
                        "--detailed_times",
                        help="get detailed execution times (read and fit)",
                        action="store_true")
    parser.add_argument("-e",
                        "--estimators",
                        metavar="N_ESTIMATORS",
                        type=int,
                        help="default is 10",
                        default=10)
    parser.add_argument("-b",
                        "--block_size",
                        metavar="BLOCK_SIZE",
                        type=str,
                        help="two comma separated ints that represent the "
                        "size of the blocks in which to divide the input "
                        "data (default is 100,100)",
                        default="100,100")
    parser.add_argument("-md",
                        "--max_depth",
                        metavar="MAX_DEPTH",
                        type=int,
                        help="default is np.inf",
                        required=False)
    parser.add_argument("-dd",
                        "--dist_depth",
                        metavar="DIST_DEPTH",
                        type=int,
                        help="default is auto",
                        required=False)
    parser.add_argument("-f",
                        "--features",
                        metavar="N_FEATURES",
                        help="number of features of the input data "
                        "(only for SVMLight files)",
                        type=int,
                        default=None,
                        required=False)
    parser.add_argument("--dense",
                        help="use dense data structures",
                        action="store_true")
    parser.add_argument("-t",
                        "--test-file",
                        metavar="TEST_FILE_PATH",
                        help="test file path",
                        type=str,
                        required=False)
    parser.add_argument("train_data",
                        help="input file in CSV or SVMLight format",
                        type=str)
    args = parser.parse_args()

    train_data = args.train_data

    s_time = time.time()
    read_time = 0

    sparse = not args.dense

    bsize = args.block_size.split(",")
    block_size = (int(bsize[0]), int(bsize[1]))

    if args.svmlight:
        x, y = ds.load_svmlight_file(train_data, block_size, args.features,
                                     sparse)
    else:
        x = ds.load_txt_file(train_data, block_size)
        y = x[:, x.shape[1] - 2:x.shape[1] - 1]
        x = x[:, :x.shape[1] - 1]

    if args.detailed_times:
        barrier()
        read_time = time.time() - s_time
        s_time = time.time()

    if args.dist_depth:
        dist_depth = args.dist_depth
    else:
        dist_depth = "auto"

    if args.max_depth:
        max_depth = args.max_depth
    else:
        max_depth = np.inf

    forest = RandomForestClassifier(n_estimators=args.estimators,
                                    max_depth=max_depth,
                                    distr_depth=dist_depth)
    forest.fit(x, y)

    barrier()
    fit_time = time.time() - s_time

    out = [
        forest.n_estimators, forest.distr_depth, forest.max_depth, read_time,
        fit_time
    ]

    if args.test_file:
        if args.svmlight:
            x_test, y_test = ds.load_svmlight_file(args.test_file, block_size,
                                                   args.features, sparse)
        else:
            x_test = ds.load_txt_file(args.test_file, block_size)
            y_test = x_test[:, x_test.shape[1] - 1:x_test.shape[1]]
            x_test = x_test[:, :x_test.shape[1] - 1]

        out.append(compss_wait_on(forest.score(x_test, y_test)))

    print(out)

Exemple #21

0

Afficher le fichier

Fichier : classifier_comparison.py Projet : vibhatha/dislib

def main():
    h = .02  # step size in the mesh

    names = ["Linear C-SVM", "RBF C-SVM", "Random forest"]

    classifiers = [
        CascadeSVM(kernel="linear", c=0.025, max_iter=5),
        CascadeSVM(gamma=2, c=1, max_iter=5),
        RandomForestClassifier(random_state=1)
    ]

    x, y = make_classification(n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               random_state=1,
                               n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    x += 2 * rng.uniform(size=x.shape)
    linearly_separable = (x, y)

    datasets = [
        make_moons(noise=0.3, random_state=0),
        make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable
    ]

    preprocessed_data = dict()
    scores = dict()
    mesh_accuracy_ds = dict()
    for ds_cnt, data in enumerate(datasets):
        # preprocess dataset, split into training and test part
        x, y = data
        x = StandardScaler().fit_transform(x)
        x_train, x_test, y_train, y_test = \
            train_test_split(x, y, test_size=.4, random_state=42)
        ds_x_train = ds.array(x_train, block_size=(20, 2))
        ds_y_train = ds.array(y_train.reshape(-1, 1), block_size=(20, 1))
        ds_x_test = ds.array(x_test, block_size=(20, 2))
        ds_y_test = ds.array(y_test.reshape(-1, 1), block_size=(20, 1))

        x_min, x_max = x[:, 0].min() - .5, x[:, 0].max() + .5
        y_min, y_max = x[:, 1].min() - .5, x[:, 1].max() + .5
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))
        preprocessed_data[ds_cnt] = x, x_train, x_test, y_train, y_test, xx, yy

        for name, clf in zip(names, classifiers):
            clf.fit(ds_x_train, ds_y_train)
            scores[(ds_cnt, name)] = clf.score(ds_x_test, ds_y_test)

            mesh = np.c_[xx.ravel(), yy.ravel()]
            mesh_array = ds.array(mesh, (mesh.shape[0], 2))

            if hasattr(clf, "decision_function"):
                mesh_proba = clf.decision_function(mesh_array)
            else:
                mesh_proba = clf.predict_proba(mesh_array)
            mesh_accuracy_ds[(ds_cnt, name)] = mesh_proba

    # Synchronize while plotting the results
    plt.figure(figsize=(27, 9))
    i = 1
    for ds_cnt, data in enumerate(datasets):
        x, x_train, x_test, y_train, y_test, xx, yy = preprocessed_data[ds_cnt]

        # just plot the dataset first
        cm = plt.cm.RdBu
        cm_bright = ListedColormap(['#FF0000', '#0000FF'])
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
        if ds_cnt == 0:
            ax.set_title("Input data")
        # Plot the training points
        ax.scatter(x_train[:, 0],
                   x_train[:, 1],
                   c=y_train,
                   cmap=cm_bright,
                   edgecolors='k')
        # Plot the testing points
        ax.scatter(x_test[:, 0],
                   x_test[:, 1],
                   c=y_test,
                   cmap=cm_bright,
                   alpha=0.6,
                   edgecolors='k')
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
        i += 1

        # iterate over classifiers
        for name, clf in zip(names, classifiers):
            ax = plt.subplot(len(datasets), len(classifiers) + 1, i)

            score = compss_wait_on(scores[(ds_cnt, name)])
            mesh_proba = mesh_accuracy_ds[(ds_cnt, name)]

            if hasattr(clf, "decision_function"):
                Z = mesh_proba.collect()
            else:
                Z = mesh_proba.collect()[:, 1]

            # Put the result into a color plot
            Z = Z.reshape(xx.shape)
            ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

            # Plot the training points
            ax.scatter(x_train[:, 0],
                       x_train[:, 1],
                       c=y_train,
                       cmap=cm_bright,
                       edgecolors='k')
            # Plot the testing points
            ax.scatter(x_test[:, 0],
                       x_test[:, 1],
                       c=y_test,
                       cmap=cm_bright,
                       edgecolors='k',
                       alpha=0.6)

            ax.set_xlim(xx.min(), xx.max())
            ax.set_ylim(yy.min(), yy.max())
            ax.set_xticks(())
            ax.set_yticks(())
            if ds_cnt == 0:
                ax.set_title(name)
            ax.text(xx.max() - .3,
                    yy.min() + .3, ('%.2f' % score).lstrip('0'),
                    size=15,
                    horizontalalignment='right')
            i += 1

    plt.tight_layout()
    plt.show()