def test_init_param_metric(self):
     pwc = PWC()
     self.assertEqual(pwc.metric, 'rbf')
     pwc = PWC(metric='Test')
     self.assertEqual(pwc.metric, 'Test')
     pwc = PWC(missing_label='nan', metric='Test')
     self.assertRaises(ValueError, pwc.fit, X=self.X, y=self.y)
 def test_init_param_metric_dict(self):
     pwc = PWC(missing_label=-1)
     self.assertEqual(pwc.metric_dict, None)
     pwc = PWC(missing_label='nan', metric_dict='Test')
     self.assertRaises(TypeError, pwc.fit, X=self.X, y=self.y)
     pwc = PWC(missing_label='nan', metric_dict=['gamma'])
     self.assertRaises(TypeError, pwc.fit, X=self.X, y=self.y)
    def setUp(self):
        self.path_prefix = os.path.dirname(visualization.__file__) + \
                           '/tests/images/'
        np.random.seed(0)
        self.X, self.y = make_classification(n_features=2,
                                             n_redundant=0,
                                             random_state=0)
        train_indices = np.random.randint(0, len(self.X), size=20)
        cand_indices = np.setdiff1d(np.arange(len(self.X)), train_indices)
        self.X_train = self.X[train_indices]
        self.y_train = self.y[train_indices]
        self.X_cand = self.X[cand_indices]
        self.clf = PWC()
        self.clf.fit(self.X_train, self.y_train)
        self.qs = UncertaintySampling()
        self.qs_dict = {'clf': self.clf, 'X': self.X_train, 'y': self.y_train}

        x1_min = min(self.X[:, 0])
        x1_max = max(self.X[:, 0])
        x2_min = min(self.X[:, 1])
        x2_max = max(self.X[:, 1])
        self.bound = [[x1_min, x2_min], [x1_max, x2_max]]

        self.cmap = 'jet'

        testing.set_font_settings_for_testing()
        testing.set_reproducibility_for_testing()
        testing.setup()
Example #4
0
    def setUp(self):
        self.path_prefix = os.path.dirname(visualization.__file__) + \
                           '/multi/tests/images/'

        self.X, self.y_true = make_classification(n_features=2,
                                                  n_redundant=0,
                                                  random_state=0)

        self.n_samples = self.X.shape[0]
        self.n_annotators = 5

        rng = np.random.default_rng(seed=0)

        noise = rng.binomial(n=1,
                             p=.2,
                             size=(self.n_samples, self.n_annotators))

        self.y = (self.y_true.reshape(-1, 1) + noise) % 2

        estimators = []
        for a in range(self.n_annotators):
            estimators.append((f'pwc_{a}', PWC(random_state=0)))
        self.clf_multi = MultiAnnotEnsemble(estimators=estimators,
                                            voting='soft')
        self.clf = PWC(random_state=0)
        self.ma_qs = IEThresh(random_state=0, n_annotators=self.n_annotators)

        testing.set_font_settings_for_testing()
        testing.set_reproducibility_for_testing()
        testing.setup()
 def test_init_param_n_neighbors(self):
     pwc = PWC()
     self.assertTrue(pwc.n_neighbors is None)
     pwc = PWC(n_neighbors=1)
     self.assertEqual(pwc.n_neighbors, 1)
     pwc = PWC(missing_label='nan', n_neighbors=0)
     self.assertRaises(ValueError, pwc.fit, X=self.X, y=self.y)
     pwc = PWC(missing_label='nan', n_neighbors=-1)
     self.assertRaises(ValueError, pwc.fit, X=self.X, y=self.y)
     pwc = PWC(missing_label='nan', n_neighbors=1.5)
     self.assertRaises(TypeError, pwc.fit, X=self.X, y=self.y)
 def test_fit(self):
     pwc = PWC(classes=[1, 2])
     gnb = SklearnClassifier(GaussianNB(), classes=[1, 2])
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc)], classes=[1, 2])
     np.testing.assert_array_equal(clf.classes, gnb.classes)
     np.testing.assert_array_equal(clf.classes, pwc.classes)
     pwc = PWC(classes=np.arange(3))
     gnb = SklearnClassifier(GaussianNB(), classes=np.arange(3))
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)],
                              voting='soft',
                              classes=np.arange(3))
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y[:, 0])
Example #7
0
 def test_query(self):
     ensemble_classifiers = [
         SklearnClassifier(classes=self.classes,
                           estimator=GaussianProcessClassifier()),
         SklearnClassifier(classes=self.classes,
                           estimator=GaussianProcessClassifier()),
         SklearnClassifier(classes=self.classes,
                           estimator=GaussianProcessClassifier()),
     ]
     gpc = PWC(classes=self.classes)
     ensemble_bagging = SklearnClassifier(
         estimator=BaggingClassifier(base_estimator=gpc),
         classes=self.classes)
     ensemble_voting = SklearnClassifier(
         VotingClassifier(estimators=ensemble_classifiers, voting='soft'))
     ensemble_list = [
         self.ensemble, ensemble_classifiers, ensemble_bagging,
         ensemble_voting
     ]
     for ensemble in ensemble_list:
         for method in ['KL_divergence', 'vote_entropy']:
             selector = QBC(method=method)
             idx, u = selector.query(X_cand=self.X_cand,
                                     ensemble=ensemble,
                                     X=self.X,
                                     y=self.y,
                                     return_utilities=True)
             self.assertEqual(len(idx), 1)
             self.assertEqual(len(u), 1)
Example #8
0
    def setUp(self):
        self.MISSING_LABEL = MISSING_LABEL
        self.X, self.y_true = make_blobs(n_samples=10,
                                         n_features=2,
                                         centers=2,
                                         cluster_std=1,
                                         random_state=1)
        self.budget = 5
        self.clf = PWC(classes=np.unique(self.y_true),
                       missing_label=MISSING_LABEL,
                       random_state=0)
        self.cmm = CMM(classes=np.unique(self.y_true),
                       missing_label=MISSING_LABEL,
                       random_state=0)
        self.ensemble = SklearnClassifier(
            classes=np.unique(self.y_true),
            missing_label=MISSING_LABEL,
            estimator=RandomForestClassifier(random_state=0),
            random_state=0)

        self.y_missing_label = np.full(self.y_true.shape, self.MISSING_LABEL)
        self.y = self.y_true.copy()
        self.y[:3] = self.y_true[:3]
        self.query_strategies = {}
        for qs_name in pool.__all__:
            qs = getattr(pool, qs_name)
            if inspect.isclass(qs) and \
                    issubclass(qs, SingleAnnotPoolBasedQueryStrategy):
                self.query_strategies[qs_name] = qs
        print(self.query_strategies.keys())
    def test_selection_strategies(self):
        # Create data set for testing.
        rand = np.random.RandomState(0)
        stream_length = 1000
        train_init_size = 10
        training_size = 100
        X, y = make_classification(
            n_samples=stream_length + train_init_size,
            random_state=rand.randint(2**31 - 1),
            shuffle=True,
        )

        clf = PWC(classes=[0, 1], random_state=rand.randint(2**31 - 1))

        X_init = X[:train_init_size, :]
        y_init = y[:train_init_size]

        X_stream = X[train_init_size:, :]
        y_stream = y[train_init_size:]

        # # Build dictionary of attributes.
        # query_strategy_classes = {}
        # for s_class in stream.__all__:
        #     query_strategy_classes[s_class] = getattr(stream, s_class)

        # Test predictions of classifiers.
        for qs_name, qs_class in self.query_strategies.items():
            self._test_query_strategy(rand.randint(2**31 - 1), qs_class, clf,
                                      X_init, y_init, X_stream, y_stream,
                                      training_size, qs_name)
            self._test_update_before_query(rand.randint(2**31 - 1), qs_class,
                                           clf, X_init, y_init, X_stream,
                                           y_stream, training_size, qs_name)
Example #10
0
    def test_query(self):
        mcpal = McPAL()
        self.assertRaises(ValueError,
                          mcpal.query,
                          X_cand=[],
                          clf=self.clf,
                          X=[],
                          y=[])
        self.assertRaises(ValueError,
                          mcpal.query,
                          X_cand=[],
                          clf=self.clf,
                          X=self.X,
                          y=self.y)
        self.assertRaises(ValueError,
                          mcpal.query,
                          X_cand=self.X_cand,
                          clf=self.clf,
                          X=self.X,
                          y=[0, 1, 4, 0, 2, 1])

        # Test missing labels
        X_cand = [[0], [1], [2], [3]]
        clf = PWC(classes=[0, 1])
        mcpal = McPAL()
        _, utilities = mcpal.query(X_cand,
                                   clf, [[1]], [MISSING_LABEL],
                                   return_utilities=True)
        self.assertEqual(utilities.shape, (1, len(X_cand)))
        self.assertEqual(len(np.unique(utilities)), 1)

        _, utilities = mcpal.query(X_cand,
                                   clf,
                                   X=[[0], [1], [2]],
                                   y=[0, 1, MISSING_LABEL],
                                   return_utilities=True)
        self.assertGreater(utilities[0, 2], utilities[0, 1])
        self.assertGreater(utilities[0, 2], utilities[0, 0])

        # Test scenario
        X_cand = [[0], [1], [2], [5]]
        mcpal = McPAL()

        best_indices = mcpal.query(X_cand, clf, X=[[1]], y=[0])
        np.testing.assert_array_equal(best_indices, np.array([3]))

        _, utilities = mcpal.query(X_cand,
                                   clf,
                                   X=[[1]],
                                   y=[0],
                                   return_utilities=True)
        min_utilities = np.argmin(utilities)
        np.testing.assert_array_equal(min_utilities, np.array([1]))

        best_indices = mcpal.query(X_cand=[[0], [1], [2]],
                                   clf=clf,
                                   X=[[0], [2]],
                                   y=[0, 1])
        np.testing.assert_array_equal(best_indices, [1])
 def test_init_param_voting(self):
     pwc = PWC()
     gnb = SklearnClassifier(GaussianNB())
     estimators = [('pwc', pwc), ('gnb', gnb)]
     clf = MultiAnnotEnsemble(estimators=estimators, voting='Test')
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=estimators, voting=1)
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
Example #12
0
 def setUp(self):
     self.X_cand = np.zeros((100, 2))
     self.X = np.zeros((6, 2))
     self.y = [0, 1, 1, 0, 2, 1]
     self.classes = [0, 1, 2]
     self.cost_matrix = np.array([[0, 2, 3], [4, 0, 6], [7, 8, 0]])
     self.regressor = SVR()
     self.pwc = PWC()
 def setUp(self):
     self.query_strategies = {}
     for qs_name in stream.__all__:
         qs = getattr(stream, qs_name)
         if inspect.isclass(qs) and \
                 issubclass(qs, SingleAnnotStreamBasedQueryStrategy):
             self.query_strategies[qs_name] = qs
     self.clf = PWC()
Example #14
0
 def setUp(self):
     self.X = np.zeros((6, 2))
     self.utility_weight = np.ones(len(self.X)) / len(self.X)
     self.X_cand = np.zeros((2, 2))
     self.y = [0, 1, 1, 0, 2, 1]
     self.classes = [0, 1, 2]
     self.C = np.eye(3)
     self.clf = PWC(classes=self.classes)
 def test_init_param_estimators(self):
     clf = MultiAnnotEnsemble(estimators='Test')
     self.assertEqual(clf.estimators, 'Test')
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=None)
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('GNB', GaussianNB())])
     self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label=0))])
     self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label='a'))])
     self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(classes=[0, 1],
                              estimators=[('PWC', PWC(classes=[0, 2]))])
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(classes=[0, 1]))])
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     perc = SklearnClassifier(Perceptron())
     clf = MultiAnnotEnsemble(estimators=[('perc', perc)], voting='soft')
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
Example #16
0
 def setUp(self):
     self.random_state = 1
     self.X_cand = np.array([[8, 1], [9, 1], [5, 1]])
     self.X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]])
     self.y = np.array([0, 0, 1, 1])
     self.classes = np.array([0, 1])
     self.clf = PWC()
     self.kwargs = dict(X_cand=self.X_cand,
                        clf=self.clf,
                        X=self.X,
                        y=self.y)
    def test_query(self):
        selector = EpistemicUncertainty()

        # return_utilities
        L = list(
            selector.query(**self.kwargs, clf=self.clf, return_utilities=True))
        self.assertTrue(len(L) == 2)
        L = list(
            selector.query(**self.kwargs, clf=self.clf,
                           return_utilities=False))
        self.assertTrue(len(L) == 1)

        # batch_size
        bs = 3
        selector = EpistemicUncertainty()
        best_idx = selector.query(**self.kwargs, clf=self.clf, batch_size=bs)
        self.assertEqual(bs, len(best_idx))

        # query - PWC
        clf = PWC(classes=self.classes, random_state=self.random_state)
        selector = EpistemicUncertainty()
        selector.query(**self.kwargs, clf=clf)
        selector.query(**self.kwargs_MISSING_LABEL, clf=clf)

        best_indices, utilities = selector.query(**self.kwargs,
                                                 clf=clf,
                                                 return_utilities=True)
        self.assertEqual(utilities.shape, (1, len(self.X_cand)))
        self.assertEqual(best_indices.shape, (1, ))

        # query - logistic regression
        clf = SklearnClassifier(LogisticRegression(),
                                classes=self.classes,
                                random_state=self.random_state)

        selector = EpistemicUncertainty()
        selector.query(**self.kwargs, clf=clf)
        selector.query(**self.kwargs_MISSING_LABEL, clf=clf)

        best_indices, utilities = selector.query(**self.kwargs,
                                                 clf=clf,
                                                 return_utilities=True)
        self.assertEqual(utilities.shape, (1, len(self.X_cand)))
        self.assertEqual(best_indices.shape, (1, ))

        best_indices_s, utilities_s = selector.query(**self.kwargs,
                                                     clf=clf,
                                                     return_utilities=True,
                                                     sample_weight=[
                                                         0.5, 1, 1, 1
                                                     ])
        comp = utilities_s == utilities
        self.assertTrue(not comp.all())
 def test_fit(self):
     pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan')
     pwc.fit(X=self.X, y=self.y_nan)
     self.assertIsNone(pwc.cost_matrix)
     np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_)
     np.testing.assert_array_equal(np.zeros((3, 3)), pwc.V_)
     pwc.fit(X=self.X, y=self.y)
     self.assertIsNone(pwc.cost_matrix)
     np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_)
     np.testing.assert_array_equal([[0, 0, 1], [0, 0, 0], [0, 1, 0]],
                                   pwc.V_)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     np.testing.assert_array_equal([[0, 0, 2], [0, 0, 0], [0, 1, 0]],
                                   pwc.V_)
 def setUp(self):
     self.random_state = 1
     self.X_cand = np.array([[8, 1], [9, 1], [5, 1]])
     self.X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]])
     self.y = np.array([0, 0, 1, 1])
     self.y_MISSING_LABEL = np.array(
         [MISSING_LABEL, MISSING_LABEL, MISSING_LABEL, MISSING_LABEL])
     self.classes = np.array([0, 1])
     self.clf = PWC(classes=self.classes, random_state=self.random_state)
     self.kwargs = dict(X_cand=self.X_cand, X=self.X, y=self.y)
     self.kwargs_MISSING_LABEL = dict(X_cand=self.X_cand,
                                      X=self.X,
                                      y=self.y_MISSING_LABEL)
 def test_predict_proba(self):
     pwc = PWC()
     gnb = SklearnClassifier(GaussianNB())
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)],
                              voting='soft')
     self.assertRaises(NotFittedError, clf.predict_proba, X=self.X)
     clf.fit(X=self.X, y=self.y)
     P = clf.predict_proba(X=self.X)
     np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1))
     clf.voting = 'hard'
     clf.fit(X=self.X, y=self.y)
     P = clf.predict_proba(X=self.X)
     np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1))
    def test_multi_class(self):
        X, y = make_classification(n_features=2,
                                   n_redundant=0,
                                   random_state=0,
                                   n_classes=3,
                                   n_clusters_per_class=1)
        train_indices = np.random.randint(0, len(X), size=20)
        cand_indices = np.setdiff1d(np.arange(len(X)), train_indices)
        X_train = X[train_indices]
        y_train = y[train_indices]
        X_cand = X[cand_indices]
        clf = PWC()
        clf.fit(X_train, y_train)
        qs = UncertaintySampling()
        bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]]

        fig, ax = plt.subplots()
        plot_utility(qs, {
            'clf': clf,
            'X': X_train,
            'y': y_train
        },
                     feature_bound=bound,
                     ax=ax)
        ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.')
        ax.scatter(X_train[:, 0],
                   X_train[:, 1],
                   c=y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap)
        fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf')
        comparison = compare_images(
            self.path_prefix + 'dec_bound_multiclass_base.pdf',
            self.path_prefix + 'dec_bound_multiclass.pdf',
            tol=0)
        self.assertIsNone(comparison)
Example #22
0
 def test_query_param_ensemble(self):
     selector = QBC()
     ensemble_list = [
         None, 'test', 1,
         GaussianProcessClassifier(),
         SklearnClassifier(GaussianProcessClassifier, classes=self.classes),
         PWC(classes=self.classes)
     ]
     for ensemble in ensemble_list:
         self.assertRaises(TypeError,
                           selector.query,
                           X_cand=self.X_cand,
                           X=self.X,
                           y=self.y,
                           ensemble=ensemble)
Example #23
0
    def setUp(self):
        # initialise valid data to test uncertainty parameters
        rand = np.random.RandomState(0)
        stream_length = 1000
        train_init_size = 10
        X, y = make_classification(
            n_samples=stream_length + train_init_size,
            random_state=rand.randint(2**31 - 1),
            shuffle=True,
        )

        self.X = X[:train_init_size, :]
        self.X_cand = X[[train_init_size], :]
        self.y = y[:train_init_size]
        self.clf = PWC()
        self.kwargs = dict(X_cand=self.X_cand,
                           clf=self.clf,
                           X=self.X,
                           y=self.y)
 def test_predict(self):
     pwc = PWC(random_state=0)
     gnb = SklearnClassifier(GaussianNB(), random_state=0)
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)],
                              voting='soft',
                              random_state=0)
     self.assertRaises(NotFittedError, clf.predict, X=self.X)
     clf.fit(X=self.X, y=self.y)
     y_pred_soft = clf.predict(X=self.X)
     self.assertEqual(len(y_pred_soft), len(self.X))
     self.assertTrue(clf.score(self.X, self.y_true), 0.8)
     clf.voting = 'hard'
     clf.fit(X=self.X, y=self.y)
     y_pred_hard = clf.predict(X=self.X)
     self.assertEqual(len(y_pred_hard), len(self.X))
     self.assertTrue(clf.score(self.X, self.y_true), 0.8)
     clf.fit(X=self.X, y=self.y, sample_weight=np.ones_like(self.y))
     y_pred_hard = clf.predict(X=self.X)
     self.assertEqual(len(y_pred_hard), len(self.X))
     self.assertTrue(clf.score(self.X, self.y_true), 0.8)
    def test_epistemic_uncertainty_pwc(self):
        freq = np.empty((121, 2))
        for n in range(11):
            for p in range(11):
                freq[n * 11 + p] = n, p

        indices = [39, 27, 18, 68, 20]
        expected = np.array([
            0.23132135217407046, 0.22057583593855598, 0.056099946963575974,
            0.16316360415548017, 0.021220951860586187
        ])

        utilities, arr = _epistemic_uncertainty_pwc(freq, None)
        self.assertEqual(utilities.shape, (121, ))
        np.testing.assert_allclose(expected, utilities[indices])
        _epistemic_uncertainty_pwc(np.array([[2.5, 1.5]]), None)

        val_utilities = utilities
        precompute_array = np.full((1, 1), np.nan)

        utilities, precompute_array = _epistemic_uncertainty_pwc(
            freq, precompute_array)
        np.testing.assert_array_equal(val_utilities, utilities)
        np.testing.assert_array_equal(val_utilities,
                                      precompute_array[:11, :11].flatten())

        class Dummy_PWC(PWC):
            def predict_freq(self, X):
                return freq

        selector = EpistemicUncertainty(precompute=True)
        _, utilities = selector.query(**self.kwargs,
                                      clf=Dummy_PWC(classes=self.classes),
                                      return_utilities=True)
        np.testing.assert_array_equal(val_utilities, utilities[0])

        selector = EpistemicUncertainty()
        self.assertRaises(ValueError,
                          selector.query,
                          clf=PWC(classes=[0, 1, 2]),
                          **self.kwargs)
Example #26
0
class TestFeatureSpace(unittest.TestCase):
    def setUp(self):
        self.path_prefix = os.path.dirname(visualization.__file__) + \
                           '/multi/tests/images/'

        self.X, self.y_true = make_classification(n_features=2,
                                                  n_redundant=0,
                                                  random_state=0)

        self.n_samples = self.X.shape[0]
        self.n_annotators = 5

        rng = np.random.default_rng(seed=0)

        noise = rng.binomial(n=1,
                             p=.2,
                             size=(self.n_samples, self.n_annotators))

        self.y = (self.y_true.reshape(-1, 1) + noise) % 2

        estimators = []
        for a in range(self.n_annotators):
            estimators.append((f'pwc_{a}', PWC(random_state=0)))
        self.clf_multi = MultiAnnotEnsemble(estimators=estimators,
                                            voting='soft')
        self.clf = PWC(random_state=0)
        self.ma_qs = IEThresh(random_state=0, n_annotators=self.n_annotators)

        testing.set_font_settings_for_testing()
        testing.set_reproducibility_for_testing()
        testing.setup()

    def test_ma_plot_data_set_X(self):
        self.assertRaises(ValueError, plot_ma_data_set, self.X.T, self.y,
                          self.y_true)
        self.assertRaises(ValueError, plot_ma_data_set, self.X, self.y,
                          self.y_true.reshape(-1, 1))
        self.assertRaises(TypeError,
                          plot_ma_data_set,
                          self.X,
                          self.y,
                          self.y_true,
                          fig=4)

    def test_ma_plot_data_set(self):
        y = np.array(self.y, dtype=float)
        y[np.arange(5), np.arange(5)] = np.nan
        fig = plot_ma_data_set(self.X,
                               y,
                               self.y_true,
                               fig_size=(12, 3),
                               legend_dict={
                                   'loc': 'lower center',
                                   'bbox_to_anchor': (0.5, 0.1),
                                   'ncol': 3
                               },
                               tick_dict={
                                   'labelbottom': True,
                                   'labelleft': True
                               })

        fig.tight_layout()
        fig.savefig(self.path_prefix + 'data_set_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'data_set_expected_result.pdf',
            self.path_prefix + 'data_set_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_data_set_mc(self):
        X_prime, y_true_prime = make_classification(n_features=2,
                                                    n_redundant=0,
                                                    n_clusters_per_class=1,
                                                    n_classes=4,
                                                    random_state=0)
        rng = np.random.default_rng(seed=0)

        noise = np.sum(rng.multinomial(n=1, pvals=[.7, .1, .1, .1],
                                       size=(self.n_samples,
                                             self.n_annotators)) \
                       * np.arange(4).reshape(1, 1, 4), axis=2)

        y_prime = (self.y_true.reshape(-1, 1) + noise) % 4

        fig = plot_ma_data_set(X_prime, y_prime, y_true_prime)
        fig.tight_layout()
        fig.savefig(self.path_prefix + 'data_set_mf_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'data_set_mf_expected_result.pdf',
            self.path_prefix + 'data_set_mf_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_utility_args(self):
        y = np.array(self.y, dtype=float)
        y[np.arange(5), np.arange(5)] = np.nan
        maqs_arg_dict = {
            'clf': self.clf,
            'X': self.X,
            'y': self.y,
            'X_cand': self.X
        }
        bound = check_bound(X=self.X)
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          feature_bound=bound)
        maqs_arg_dict = {
            'clf': self.clf,
            'X': self.X,
            'y': self.y,
            'A_cand': np.ones((self.n_samples, self.n_annotators))
        }
        bound = check_bound(X=self.X)
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          feature_bound=bound)
        maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y}
        self.ma_qs.n_annotators = None
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          feature_bound=bound)
        fig, _ = plt.subplots(ncols=7)
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          A_cand=np.ones((100, 5)),
                          fig=fig,
                          feature_bound=bound)
        self.ma_qs.n_annotators = 5
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          fig=fig,
                          feature_bound=bound)

    def test_ma_plot_utility(self):
        y = np.array(self.y, dtype=float)
        y[np.arange(5), np.arange(5)] = np.nan
        maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y}
        bound = check_bound(X=self.X)
        fig = plot_ma_utility(self.ma_qs,
                              maqs_arg_dict,
                              feature_bound=bound,
                              title='utility',
                              fig_size=(20, 5))
        fig.tight_layout()
        fig.savefig(self.path_prefix + 'plot_utility_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'plot_utility_expected_result.pdf',
            self.path_prefix + 'plot_utility_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_utility_with_X(self):
        maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y}
        A_cand = np.ones((self.n_samples, self.n_annotators))
        fig = plot_ma_utility(self.ma_qs,
                              maqs_arg_dict,
                              X_cand=self.X,
                              A_cand=A_cand)
        fig.tight_layout()
        fig.savefig(self.path_prefix + 'plot_utility_X_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'plot_utility_X_expected_result.pdf',
            self.path_prefix + 'plot_utility_X_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_decision_boundary_args(self):
        bound = check_bound(X=self.X)
        self.assertRaises(ValueError, plot_ma_decision_boundary, self.clf,
                          bound)

    def test_ma_plot_decision_boundary(self):
        bound = check_bound(X=self.X)
        self.clf.fit(self.X, majority_vote(self.y, random_state=0))
        fig = plot_ma_decision_boundary(self.clf,
                                        bound,
                                        n_annotators=self.n_annotators)
        fig.tight_layout()
        fig.savefig(self.path_prefix +
                    'plot_decision_boundary_returned_result.pdf')
        comparison = compare_images(self.path_prefix + 'plot_decision_boundary'
                                    '_expected_result.pdf',
                                    self.path_prefix + 'plot_decision_boundary'
                                    '_returned_result.pdf',
                                    tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_current_state(self):
        maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y}
        self.clf.fit(self.X, majority_vote(self.y, random_state=0))
        fig = plot_ma_current_state(self.X, self.y, self.y_true, self.ma_qs,
                                    self.clf, maqs_arg_dict)
        fig.tight_layout()
        fig.savefig(self.path_prefix +
                    'ma_plot_current_state_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'ma_plot_current_state_expected_result.pdf',
            self.path_prefix + 'ma_plot_current_state_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)
 def test_predict_proba(self):
     pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan')
     self.assertRaises(NotFittedError, pwc.predict_proba, X=self.X)
     pwc.fit(X=self.X, y=self.y_nan)
     P = pwc.predict_proba(X=self.X)
     np.testing.assert_array_equal(np.ones((len(self.X), 2)) * 0.5, P)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     P = pwc.predict_proba(X=[self.X[0]])
     np.testing.assert_array_equal([[1 / 3, 2 / 3]], P)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1,
               metric='precomputed',
               class_prior=1)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     P = pwc.predict_proba(X=[[1, 0, 0]])
     np.testing.assert_array_equal([[1 / 5, 1 / 5, 3 / 5]], P)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1,
               metric='precomputed',
               class_prior=[0, 0, 1])
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     P = pwc.predict_proba(X=[[1, 0, 0]])
     np.testing.assert_array_equal([[0, 0, 1]], P)
class TestFeatureSpace(unittest.TestCase):
    def setUp(self):
        self.path_prefix = os.path.dirname(visualization.__file__) + \
                           '/tests/images/'
        np.random.seed(0)
        self.X, self.y = make_classification(n_features=2,
                                             n_redundant=0,
                                             random_state=0)
        train_indices = np.random.randint(0, len(self.X), size=20)
        cand_indices = np.setdiff1d(np.arange(len(self.X)), train_indices)
        self.X_train = self.X[train_indices]
        self.y_train = self.y[train_indices]
        self.X_cand = self.X[cand_indices]
        self.clf = PWC()
        self.clf.fit(self.X_train, self.y_train)
        self.qs = UncertaintySampling()
        self.qs_dict = {'clf': self.clf, 'X': self.X_train, 'y': self.y_train}

        x1_min = min(self.X[:, 0])
        x1_max = max(self.X[:, 0])
        x2_min = min(self.X[:, 1])
        x2_max = max(self.X[:, 1])
        self.bound = [[x1_min, x2_min], [x1_max, x2_max]]

        self.cmap = 'jet'

        testing.set_font_settings_for_testing()
        testing.set_reproducibility_for_testing()
        testing.setup()

    # Tests for plot_decision_boundary function
    def test_decision_boundary_clf(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.qs,
                          feature_bound=self.bound)
        clf = TestClassifier()
        self.assertRaises(AttributeError,
                          plot_decision_boundary,
                          clf=clf,
                          feature_bound=self.bound)

    def test_decision_boundary_bound(self):
        self.assertRaises(ValueError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=[0, 0, 1, 1])

    def test_decision_boundary_res(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          res='string')

    def test_decision_boundary_ax(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          ax=3)

    def test_decision_boundary_confidence(self):
        self.assertRaises(ValueError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          confidence=0.0)
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          confidence='string')
        plot_decision_boundary(self.clf, self.bound, confidence=None)
        svc = LinearSVC()
        svc.fit(self.X_train, self.y_train)
        self.assertWarns(Warning,
                         plot_decision_boundary,
                         clf=svc,
                         feature_bound=self.bound,
                         confidence=0.75)

    def test_decision_boundary_cmap(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          cmap=4)

    def test_decision_boundary_boundary_dict(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          boundary_dict='string')
        plot_decision_boundary(clf=self.clf,
                               feature_bound=self.bound,
                               boundary_dict={'colors': 'r'})

    def test_decision_boundary_confidence_dict(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          confidence_dict='string')
        plot_decision_boundary(clf=self.clf,
                               feature_bound=self.bound,
                               confidence_dict={'linestyles': ':'})

    # Tests for plot_utility function
    def test_utility_qs(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.clf,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound)

    def test_utility_qs_dict(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict={0, 1, 2},
                          feature_bound=self.bound)

        qs_dict = self.qs_dict
        qs_dict['X_cand'] = []
        self.assertRaises(ValueError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=qs_dict,
                          feature_bound=self.bound)

    def test_utility_X_cand(self):
        self.assertRaises(ValueError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict)

    def test_utility_res(self):
        self.assertRaises(ValueError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound,
                          res=-3)

    def test_utility_ax(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound,
                          ax=2)

    def test_utility_contour_dict(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound,
                          contour_dict='string')
        plot_utility(qs=self.qs,
                     qs_dict=self.qs_dict,
                     feature_bound=self.bound,
                     contour_dict={'linestyles': '.'})

    # Graphical tests
    def test_no_candidates(self):
        fig, ax = plt.subplots()
        plot_utility(self.qs, self.qs_dict, feature_bound=self.bound, ax=ax)
        ax.scatter(self.X_cand[:, 0], self.X_cand[:, 1], c='k', marker='.')
        ax.scatter(self.X_train[:, 0],
                   self.X_train[:, 1],
                   c=self.y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap)

        fig.savefig(self.path_prefix + 'dec_bound_wo_cand.pdf')
        comparison = compare_images(self.path_prefix +
                                    'dec_bound_wo_cand_base.pdf',
                                    self.path_prefix + 'dec_bound_wo_cand.pdf',
                                    tol=0)
        self.assertIsNone(comparison)

    def test_with_candidates(self):
        fig, ax = plt.subplots()
        plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax)
        ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.')
        ax.scatter(self.X_train[:, 0],
                   self.X_train[:, 1],
                   c=self.y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap)

        fig.savefig(self.path_prefix + 'dec_bound_w_cand.pdf')
        comparison = compare_images(self.path_prefix +
                                    'dec_bound_w_cand_base.pdf',
                                    self.path_prefix + 'dec_bound_w_cand.pdf',
                                    tol=0)
        self.assertIsNone(comparison)

    def test_multi_class(self):
        X, y = make_classification(n_features=2,
                                   n_redundant=0,
                                   random_state=0,
                                   n_classes=3,
                                   n_clusters_per_class=1)
        train_indices = np.random.randint(0, len(X), size=20)
        cand_indices = np.setdiff1d(np.arange(len(X)), train_indices)
        X_train = X[train_indices]
        y_train = y[train_indices]
        X_cand = X[cand_indices]
        clf = PWC()
        clf.fit(X_train, y_train)
        qs = UncertaintySampling()
        bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]]

        fig, ax = plt.subplots()
        plot_utility(qs, {
            'clf': clf,
            'X': X_train,
            'y': y_train
        },
                     feature_bound=bound,
                     ax=ax)
        ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.')
        ax.scatter(X_train[:, 0],
                   X_train[:, 1],
                   c=y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap)
        fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf')
        comparison = compare_images(
            self.path_prefix + 'dec_bound_multiclass_base.pdf',
            self.path_prefix + 'dec_bound_multiclass.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_svc(self):
        svc = LinearSVC()
        svc.fit(self.X_train, self.y_train)

        fig, ax = plt.subplots()
        plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax)
        ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.')
        ax.scatter(self.X_train[:, 0],
                   self.X_train[:, 1],
                   c=self.y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(svc, self.bound, ax=ax, cmap=self.cmap)

        fig.savefig(self.path_prefix + 'dec_bound_svc.pdf')
        comparison = compare_images(self.path_prefix +
                                    'dec_bound_svc_base.pdf',
                                    self.path_prefix + 'dec_bound_svc.pdf',
                                    tol=0)
        self.assertIsNone(comparison)
 def test_predict(self):
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               random_state=0)
     self.assertRaises(NotFittedError, pwc.predict, X=self.X)
     pwc.fit(X=self.X, y=self.y_nan)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['tokyo', 'paris', 'tokyo'], y)
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               random_state=1)
     pwc.fit(X=self.X, y=self.y_nan)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['tokyo', 'tokyo', 'paris'], y)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['tokyo', 'tokyo', 'tokyo'], y)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               cost_matrix=[[0, 1, 4], [10, 0, 5], [2, 2, 0]])
     pwc.fit(X=self.X, y=self.y_nan)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['paris', 'paris', 'paris'], y)
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               cost_matrix=[[0, 1], [10, 0]])
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['paris', 'paris', 'paris'], y)
 def test_predict_freq(self):
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=10,
               metric='rbf',
               metric_dict={'gamma': 2})
     self.assertRaises(NotFittedError, pwc.predict_freq, X=self.X)
     pwc.fit(X=self.X, y=self.y_nan)
     F = pwc.predict_freq(X=self.X)
     np.testing.assert_array_equal(np.zeros((len(self.X), 3)), F)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     F = pwc.predict_freq(X=[self.X[0]])
     np.testing.assert_array_equal([[0, 1, 2]], F)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     F = pwc.predict_freq(X=[self.X[0]])
     np.testing.assert_array_equal([[0, 1, 0]], F)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1,
               metric='precomputed')
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     self.assertRaises(ValueError, pwc.predict_freq, X=[[1, 0]])
     self.assertRaises(ValueError, pwc.predict_freq, X=[[1], [0]])
     F = pwc.predict_freq(X=[[1, 0, 0]])
     np.testing.assert_array_equal([[0, 0, 2]], F)
     rbf_kernel = lambda x, y, gamma: np.exp(-gamma * np.sum((x - y)**2))
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               random_state=0,
               metric=rbf_kernel,
               metric_dict={'gamma': 2})
     F_call = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X))
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               metric='rbf',
               metric_dict={'gamma': 2},
               random_state=0)
     F_rbf = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X))
     np.testing.assert_array_equal(F_call, F_rbf)