def test_predict(self):
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               random_state=0)
     self.assertRaises(NotFittedError, pwc.predict, X=self.X)
     pwc.fit(X=self.X, y=self.y_nan)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['tokyo', 'paris', 'tokyo'], y)
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               random_state=1)
     pwc.fit(X=self.X, y=self.y_nan)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['tokyo', 'tokyo', 'paris'], y)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['tokyo', 'tokyo', 'tokyo'], y)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               cost_matrix=[[0, 1, 4], [10, 0, 5], [2, 2, 0]])
     pwc.fit(X=self.X, y=self.y_nan)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['paris', 'paris', 'paris'], y)
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               cost_matrix=[[0, 1], [10, 0]])
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     y = pwc.predict(self.X)
     np.testing.assert_array_equal(['paris', 'paris', 'paris'], y)
 def test_predict_proba(self):
     pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan')
     self.assertRaises(NotFittedError, pwc.predict_proba, X=self.X)
     pwc.fit(X=self.X, y=self.y_nan)
     P = pwc.predict_proba(X=self.X)
     np.testing.assert_array_equal(np.ones((len(self.X), 2)) * 0.5, P)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     P = pwc.predict_proba(X=[self.X[0]])
     np.testing.assert_array_equal([[1 / 3, 2 / 3]], P)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1,
               metric='precomputed',
               class_prior=1)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     P = pwc.predict_proba(X=[[1, 0, 0]])
     np.testing.assert_array_equal([[1 / 5, 1 / 5, 3 / 5]], P)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1,
               metric='precomputed',
               class_prior=[0, 0, 1])
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     P = pwc.predict_proba(X=[[1, 0, 0]])
     np.testing.assert_array_equal([[0, 0, 1]], P)
 def test_predict_freq(self):
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=10,
               metric='rbf',
               metric_dict={'gamma': 2})
     self.assertRaises(NotFittedError, pwc.predict_freq, X=self.X)
     pwc.fit(X=self.X, y=self.y_nan)
     F = pwc.predict_freq(X=self.X)
     np.testing.assert_array_equal(np.zeros((len(self.X), 3)), F)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     F = pwc.predict_freq(X=[self.X[0]])
     np.testing.assert_array_equal([[0, 1, 2]], F)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     F = pwc.predict_freq(X=[self.X[0]])
     np.testing.assert_array_equal([[0, 1, 0]], F)
     pwc = PWC(classes=['tokyo', 'paris', 'new york'],
               missing_label='nan',
               n_neighbors=1,
               metric='precomputed')
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     self.assertRaises(ValueError, pwc.predict_freq, X=[[1, 0]])
     self.assertRaises(ValueError, pwc.predict_freq, X=[[1], [0]])
     F = pwc.predict_freq(X=[[1, 0, 0]])
     np.testing.assert_array_equal([[0, 0, 2]], F)
     rbf_kernel = lambda x, y, gamma: np.exp(-gamma * np.sum((x - y)**2))
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               random_state=0,
               metric=rbf_kernel,
               metric_dict={'gamma': 2})
     F_call = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X))
     pwc = PWC(classes=['tokyo', 'paris'],
               missing_label='nan',
               metric='rbf',
               metric_dict={'gamma': 2},
               random_state=0)
     F_rbf = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X))
     np.testing.assert_array_equal(F_call, F_rbf)
    def test_multi_class(self):
        X, y = make_classification(n_features=2,
                                   n_redundant=0,
                                   random_state=0,
                                   n_classes=3,
                                   n_clusters_per_class=1)
        train_indices = np.random.randint(0, len(X), size=20)
        cand_indices = np.setdiff1d(np.arange(len(X)), train_indices)
        X_train = X[train_indices]
        y_train = y[train_indices]
        X_cand = X[cand_indices]
        clf = PWC()
        clf.fit(X_train, y_train)
        qs = UncertaintySampling()
        bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]]

        fig, ax = plt.subplots()
        plot_utility(qs, {
            'clf': clf,
            'X': X_train,
            'y': y_train
        },
                     feature_bound=bound,
                     ax=ax)
        ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.')
        ax.scatter(X_train[:, 0],
                   X_train[:, 1],
                   c=y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap)
        fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf')
        comparison = compare_images(
            self.path_prefix + 'dec_bound_multiclass_base.pdf',
            self.path_prefix + 'dec_bound_multiclass.pdf',
            tol=0)
        self.assertIsNone(comparison)
 def test_fit(self):
     pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan')
     pwc.fit(X=self.X, y=self.y_nan)
     self.assertIsNone(pwc.cost_matrix)
     np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_)
     np.testing.assert_array_equal(np.zeros((3, 3)), pwc.V_)
     pwc.fit(X=self.X, y=self.y)
     self.assertIsNone(pwc.cost_matrix)
     np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_)
     np.testing.assert_array_equal([[0, 0, 1], [0, 0, 0], [0, 1, 0]],
                                   pwc.V_)
     pwc.fit(X=self.X, y=self.y, sample_weight=self.w)
     np.testing.assert_array_equal([[0, 0, 2], [0, 0, 0], [0, 1, 0]],
                                   pwc.V_)
class TestFeatureSpace(unittest.TestCase):
    def setUp(self):
        self.path_prefix = os.path.dirname(visualization.__file__) + \
                           '/tests/images/'
        np.random.seed(0)
        self.X, self.y = make_classification(n_features=2,
                                             n_redundant=0,
                                             random_state=0)
        train_indices = np.random.randint(0, len(self.X), size=20)
        cand_indices = np.setdiff1d(np.arange(len(self.X)), train_indices)
        self.X_train = self.X[train_indices]
        self.y_train = self.y[train_indices]
        self.X_cand = self.X[cand_indices]
        self.clf = PWC()
        self.clf.fit(self.X_train, self.y_train)
        self.qs = UncertaintySampling()
        self.qs_dict = {'clf': self.clf, 'X': self.X_train, 'y': self.y_train}

        x1_min = min(self.X[:, 0])
        x1_max = max(self.X[:, 0])
        x2_min = min(self.X[:, 1])
        x2_max = max(self.X[:, 1])
        self.bound = [[x1_min, x2_min], [x1_max, x2_max]]

        self.cmap = 'jet'

        testing.set_font_settings_for_testing()
        testing.set_reproducibility_for_testing()
        testing.setup()

    # Tests for plot_decision_boundary function
    def test_decision_boundary_clf(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.qs,
                          feature_bound=self.bound)
        clf = TestClassifier()
        self.assertRaises(AttributeError,
                          plot_decision_boundary,
                          clf=clf,
                          feature_bound=self.bound)

    def test_decision_boundary_bound(self):
        self.assertRaises(ValueError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=[0, 0, 1, 1])

    def test_decision_boundary_res(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          res='string')

    def test_decision_boundary_ax(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          ax=3)

    def test_decision_boundary_confidence(self):
        self.assertRaises(ValueError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          confidence=0.0)
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          confidence='string')
        plot_decision_boundary(self.clf, self.bound, confidence=None)
        svc = LinearSVC()
        svc.fit(self.X_train, self.y_train)
        self.assertWarns(Warning,
                         plot_decision_boundary,
                         clf=svc,
                         feature_bound=self.bound,
                         confidence=0.75)

    def test_decision_boundary_cmap(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          cmap=4)

    def test_decision_boundary_boundary_dict(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          boundary_dict='string')
        plot_decision_boundary(clf=self.clf,
                               feature_bound=self.bound,
                               boundary_dict={'colors': 'r'})

    def test_decision_boundary_confidence_dict(self):
        self.assertRaises(TypeError,
                          plot_decision_boundary,
                          clf=self.clf,
                          feature_bound=self.bound,
                          confidence_dict='string')
        plot_decision_boundary(clf=self.clf,
                               feature_bound=self.bound,
                               confidence_dict={'linestyles': ':'})

    # Tests for plot_utility function
    def test_utility_qs(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.clf,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound)

    def test_utility_qs_dict(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict={0, 1, 2},
                          feature_bound=self.bound)

        qs_dict = self.qs_dict
        qs_dict['X_cand'] = []
        self.assertRaises(ValueError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=qs_dict,
                          feature_bound=self.bound)

    def test_utility_X_cand(self):
        self.assertRaises(ValueError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict)

    def test_utility_res(self):
        self.assertRaises(ValueError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound,
                          res=-3)

    def test_utility_ax(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound,
                          ax=2)

    def test_utility_contour_dict(self):
        self.assertRaises(TypeError,
                          plot_utility,
                          qs=self.qs,
                          qs_dict=self.qs_dict,
                          feature_bound=self.bound,
                          contour_dict='string')
        plot_utility(qs=self.qs,
                     qs_dict=self.qs_dict,
                     feature_bound=self.bound,
                     contour_dict={'linestyles': '.'})

    # Graphical tests
    def test_no_candidates(self):
        fig, ax = plt.subplots()
        plot_utility(self.qs, self.qs_dict, feature_bound=self.bound, ax=ax)
        ax.scatter(self.X_cand[:, 0], self.X_cand[:, 1], c='k', marker='.')
        ax.scatter(self.X_train[:, 0],
                   self.X_train[:, 1],
                   c=self.y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap)

        fig.savefig(self.path_prefix + 'dec_bound_wo_cand.pdf')
        comparison = compare_images(self.path_prefix +
                                    'dec_bound_wo_cand_base.pdf',
                                    self.path_prefix + 'dec_bound_wo_cand.pdf',
                                    tol=0)
        self.assertIsNone(comparison)

    def test_with_candidates(self):
        fig, ax = plt.subplots()
        plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax)
        ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.')
        ax.scatter(self.X_train[:, 0],
                   self.X_train[:, 1],
                   c=self.y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap)

        fig.savefig(self.path_prefix + 'dec_bound_w_cand.pdf')
        comparison = compare_images(self.path_prefix +
                                    'dec_bound_w_cand_base.pdf',
                                    self.path_prefix + 'dec_bound_w_cand.pdf',
                                    tol=0)
        self.assertIsNone(comparison)

    def test_multi_class(self):
        X, y = make_classification(n_features=2,
                                   n_redundant=0,
                                   random_state=0,
                                   n_classes=3,
                                   n_clusters_per_class=1)
        train_indices = np.random.randint(0, len(X), size=20)
        cand_indices = np.setdiff1d(np.arange(len(X)), train_indices)
        X_train = X[train_indices]
        y_train = y[train_indices]
        X_cand = X[cand_indices]
        clf = PWC()
        clf.fit(X_train, y_train)
        qs = UncertaintySampling()
        bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]]

        fig, ax = plt.subplots()
        plot_utility(qs, {
            'clf': clf,
            'X': X_train,
            'y': y_train
        },
                     feature_bound=bound,
                     ax=ax)
        ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.')
        ax.scatter(X_train[:, 0],
                   X_train[:, 1],
                   c=y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap)
        fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf')
        comparison = compare_images(
            self.path_prefix + 'dec_bound_multiclass_base.pdf',
            self.path_prefix + 'dec_bound_multiclass.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_svc(self):
        svc = LinearSVC()
        svc.fit(self.X_train, self.y_train)

        fig, ax = plt.subplots()
        plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax)
        ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.')
        ax.scatter(self.X_train[:, 0],
                   self.X_train[:, 1],
                   c=self.y_train,
                   cmap=self.cmap,
                   alpha=.9,
                   marker='.')
        plot_decision_boundary(svc, self.bound, ax=ax, cmap=self.cmap)

        fig.savefig(self.path_prefix + 'dec_bound_svc.pdf')
        comparison = compare_images(self.path_prefix +
                                    'dec_bound_svc_base.pdf',
                                    self.path_prefix + 'dec_bound_svc.pdf',
                                    tol=0)
        self.assertIsNone(comparison)
Exemple #7
0
class TestFeatureSpace(unittest.TestCase):
    def setUp(self):
        self.path_prefix = os.path.dirname(visualization.__file__) + \
                           '/multi/tests/images/'

        self.X, self.y_true = make_classification(n_features=2,
                                                  n_redundant=0,
                                                  random_state=0)

        self.n_samples = self.X.shape[0]
        self.n_annotators = 5

        rng = np.random.default_rng(seed=0)

        noise = rng.binomial(n=1,
                             p=.2,
                             size=(self.n_samples, self.n_annotators))

        self.y = (self.y_true.reshape(-1, 1) + noise) % 2

        estimators = []
        for a in range(self.n_annotators):
            estimators.append((f'pwc_{a}', PWC(random_state=0)))
        self.clf_multi = MultiAnnotEnsemble(estimators=estimators,
                                            voting='soft')
        self.clf = PWC(random_state=0)
        self.ma_qs = IEThresh(random_state=0, n_annotators=self.n_annotators)

        testing.set_font_settings_for_testing()
        testing.set_reproducibility_for_testing()
        testing.setup()

    def test_ma_plot_data_set_X(self):
        self.assertRaises(ValueError, plot_ma_data_set, self.X.T, self.y,
                          self.y_true)
        self.assertRaises(ValueError, plot_ma_data_set, self.X, self.y,
                          self.y_true.reshape(-1, 1))
        self.assertRaises(TypeError,
                          plot_ma_data_set,
                          self.X,
                          self.y,
                          self.y_true,
                          fig=4)

    def test_ma_plot_data_set(self):
        y = np.array(self.y, dtype=float)
        y[np.arange(5), np.arange(5)] = np.nan
        fig = plot_ma_data_set(self.X,
                               y,
                               self.y_true,
                               fig_size=(12, 3),
                               legend_dict={
                                   'loc': 'lower center',
                                   'bbox_to_anchor': (0.5, 0.1),
                                   'ncol': 3
                               },
                               tick_dict={
                                   'labelbottom': True,
                                   'labelleft': True
                               })

        fig.tight_layout()
        fig.savefig(self.path_prefix + 'data_set_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'data_set_expected_result.pdf',
            self.path_prefix + 'data_set_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_data_set_mc(self):
        X_prime, y_true_prime = make_classification(n_features=2,
                                                    n_redundant=0,
                                                    n_clusters_per_class=1,
                                                    n_classes=4,
                                                    random_state=0)
        rng = np.random.default_rng(seed=0)

        noise = np.sum(rng.multinomial(n=1, pvals=[.7, .1, .1, .1],
                                       size=(self.n_samples,
                                             self.n_annotators)) \
                       * np.arange(4).reshape(1, 1, 4), axis=2)

        y_prime = (self.y_true.reshape(-1, 1) + noise) % 4

        fig = plot_ma_data_set(X_prime, y_prime, y_true_prime)
        fig.tight_layout()
        fig.savefig(self.path_prefix + 'data_set_mf_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'data_set_mf_expected_result.pdf',
            self.path_prefix + 'data_set_mf_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_utility_args(self):
        y = np.array(self.y, dtype=float)
        y[np.arange(5), np.arange(5)] = np.nan
        maqs_arg_dict = {
            'clf': self.clf,
            'X': self.X,
            'y': self.y,
            'X_cand': self.X
        }
        bound = check_bound(X=self.X)
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          feature_bound=bound)
        maqs_arg_dict = {
            'clf': self.clf,
            'X': self.X,
            'y': self.y,
            'A_cand': np.ones((self.n_samples, self.n_annotators))
        }
        bound = check_bound(X=self.X)
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          feature_bound=bound)
        maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y}
        self.ma_qs.n_annotators = None
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          feature_bound=bound)
        fig, _ = plt.subplots(ncols=7)
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          A_cand=np.ones((100, 5)),
                          fig=fig,
                          feature_bound=bound)
        self.ma_qs.n_annotators = 5
        self.assertRaises(ValueError,
                          plot_ma_utility,
                          self.ma_qs,
                          maqs_arg_dict,
                          fig=fig,
                          feature_bound=bound)

    def test_ma_plot_utility(self):
        y = np.array(self.y, dtype=float)
        y[np.arange(5), np.arange(5)] = np.nan
        maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y}
        bound = check_bound(X=self.X)
        fig = plot_ma_utility(self.ma_qs,
                              maqs_arg_dict,
                              feature_bound=bound,
                              title='utility',
                              fig_size=(20, 5))
        fig.tight_layout()
        fig.savefig(self.path_prefix + 'plot_utility_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'plot_utility_expected_result.pdf',
            self.path_prefix + 'plot_utility_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_utility_with_X(self):
        maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y}
        A_cand = np.ones((self.n_samples, self.n_annotators))
        fig = plot_ma_utility(self.ma_qs,
                              maqs_arg_dict,
                              X_cand=self.X,
                              A_cand=A_cand)
        fig.tight_layout()
        fig.savefig(self.path_prefix + 'plot_utility_X_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'plot_utility_X_expected_result.pdf',
            self.path_prefix + 'plot_utility_X_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_decision_boundary_args(self):
        bound = check_bound(X=self.X)
        self.assertRaises(ValueError, plot_ma_decision_boundary, self.clf,
                          bound)

    def test_ma_plot_decision_boundary(self):
        bound = check_bound(X=self.X)
        self.clf.fit(self.X, majority_vote(self.y, random_state=0))
        fig = plot_ma_decision_boundary(self.clf,
                                        bound,
                                        n_annotators=self.n_annotators)
        fig.tight_layout()
        fig.savefig(self.path_prefix +
                    'plot_decision_boundary_returned_result.pdf')
        comparison = compare_images(self.path_prefix + 'plot_decision_boundary'
                                    '_expected_result.pdf',
                                    self.path_prefix + 'plot_decision_boundary'
                                    '_returned_result.pdf',
                                    tol=0)
        self.assertIsNone(comparison)

    def test_ma_plot_current_state(self):
        maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y}
        self.clf.fit(self.X, majority_vote(self.y, random_state=0))
        fig = plot_ma_current_state(self.X, self.y, self.y_true, self.ma_qs,
                                    self.clf, maqs_arg_dict)
        fig.tight_layout()
        fig.savefig(self.path_prefix +
                    'ma_plot_current_state_returned_result.pdf')
        comparison = compare_images(
            self.path_prefix + 'ma_plot_current_state_expected_result.pdf',
            self.path_prefix + 'ma_plot_current_state_returned_result.pdf',
            tol=0)
        self.assertIsNone(comparison)