def test_predict(self): pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', random_state=0) self.assertRaises(NotFittedError, pwc.predict, X=self.X) pwc.fit(X=self.X, y=self.y_nan) y = pwc.predict(self.X) np.testing.assert_array_equal(['tokyo', 'paris', 'tokyo'], y) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', random_state=1) pwc.fit(X=self.X, y=self.y_nan) y = pwc.predict(self.X) np.testing.assert_array_equal(['tokyo', 'tokyo', 'paris'], y) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) y = pwc.predict(self.X) np.testing.assert_array_equal(['tokyo', 'tokyo', 'tokyo'], y) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', cost_matrix=[[0, 1, 4], [10, 0, 5], [2, 2, 0]]) pwc.fit(X=self.X, y=self.y_nan) y = pwc.predict(self.X) np.testing.assert_array_equal(['paris', 'paris', 'paris'], y) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', cost_matrix=[[0, 1], [10, 0]]) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) y = pwc.predict(self.X) np.testing.assert_array_equal(['paris', 'paris', 'paris'], y)
def test_predict_proba(self): pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan') self.assertRaises(NotFittedError, pwc.predict_proba, X=self.X) pwc.fit(X=self.X, y=self.y_nan) P = pwc.predict_proba(X=self.X) np.testing.assert_array_equal(np.ones((len(self.X), 2)) * 0.5, P) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) P = pwc.predict_proba(X=[self.X[0]]) np.testing.assert_array_equal([[1 / 3, 2 / 3]], P) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1, metric='precomputed', class_prior=1) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) P = pwc.predict_proba(X=[[1, 0, 0]]) np.testing.assert_array_equal([[1 / 5, 1 / 5, 3 / 5]], P) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1, metric='precomputed', class_prior=[0, 0, 1]) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) P = pwc.predict_proba(X=[[1, 0, 0]]) np.testing.assert_array_equal([[0, 0, 1]], P)
def test_predict_freq(self): pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=10, metric='rbf', metric_dict={'gamma': 2}) self.assertRaises(NotFittedError, pwc.predict_freq, X=self.X) pwc.fit(X=self.X, y=self.y_nan) F = pwc.predict_freq(X=self.X) np.testing.assert_array_equal(np.zeros((len(self.X), 3)), F) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) F = pwc.predict_freq(X=[self.X[0]]) np.testing.assert_array_equal([[0, 1, 2]], F) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) F = pwc.predict_freq(X=[self.X[0]]) np.testing.assert_array_equal([[0, 1, 0]], F) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1, metric='precomputed') pwc.fit(X=self.X, y=self.y, sample_weight=self.w) self.assertRaises(ValueError, pwc.predict_freq, X=[[1, 0]]) self.assertRaises(ValueError, pwc.predict_freq, X=[[1], [0]]) F = pwc.predict_freq(X=[[1, 0, 0]]) np.testing.assert_array_equal([[0, 0, 2]], F) rbf_kernel = lambda x, y, gamma: np.exp(-gamma * np.sum((x - y)**2)) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', random_state=0, metric=rbf_kernel, metric_dict={'gamma': 2}) F_call = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X)) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', metric='rbf', metric_dict={'gamma': 2}, random_state=0) F_rbf = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X)) np.testing.assert_array_equal(F_call, F_rbf)
def test_multi_class(self): X, y = make_classification(n_features=2, n_redundant=0, random_state=0, n_classes=3, n_clusters_per_class=1) train_indices = np.random.randint(0, len(X), size=20) cand_indices = np.setdiff1d(np.arange(len(X)), train_indices) X_train = X[train_indices] y_train = y[train_indices] X_cand = X[cand_indices] clf = PWC() clf.fit(X_train, y_train) qs = UncertaintySampling() bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]] fig, ax = plt.subplots() plot_utility(qs, { 'clf': clf, 'X': X_train, 'y': y_train }, feature_bound=bound, ax=ax) ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.') ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf') comparison = compare_images( self.path_prefix + 'dec_bound_multiclass_base.pdf', self.path_prefix + 'dec_bound_multiclass.pdf', tol=0) self.assertIsNone(comparison)
def test_fit(self): pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan') pwc.fit(X=self.X, y=self.y_nan) self.assertIsNone(pwc.cost_matrix) np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_) np.testing.assert_array_equal(np.zeros((3, 3)), pwc.V_) pwc.fit(X=self.X, y=self.y) self.assertIsNone(pwc.cost_matrix) np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_) np.testing.assert_array_equal([[0, 0, 1], [0, 0, 0], [0, 1, 0]], pwc.V_) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) np.testing.assert_array_equal([[0, 0, 2], [0, 0, 0], [0, 1, 0]], pwc.V_)
class TestFeatureSpace(unittest.TestCase): def setUp(self): self.path_prefix = os.path.dirname(visualization.__file__) + \ '/tests/images/' np.random.seed(0) self.X, self.y = make_classification(n_features=2, n_redundant=0, random_state=0) train_indices = np.random.randint(0, len(self.X), size=20) cand_indices = np.setdiff1d(np.arange(len(self.X)), train_indices) self.X_train = self.X[train_indices] self.y_train = self.y[train_indices] self.X_cand = self.X[cand_indices] self.clf = PWC() self.clf.fit(self.X_train, self.y_train) self.qs = UncertaintySampling() self.qs_dict = {'clf': self.clf, 'X': self.X_train, 'y': self.y_train} x1_min = min(self.X[:, 0]) x1_max = max(self.X[:, 0]) x2_min = min(self.X[:, 1]) x2_max = max(self.X[:, 1]) self.bound = [[x1_min, x2_min], [x1_max, x2_max]] self.cmap = 'jet' testing.set_font_settings_for_testing() testing.set_reproducibility_for_testing() testing.setup() # Tests for plot_decision_boundary function def test_decision_boundary_clf(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.qs, feature_bound=self.bound) clf = TestClassifier() self.assertRaises(AttributeError, plot_decision_boundary, clf=clf, feature_bound=self.bound) def test_decision_boundary_bound(self): self.assertRaises(ValueError, plot_decision_boundary, clf=self.clf, feature_bound=[0, 0, 1, 1]) def test_decision_boundary_res(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, res='string') def test_decision_boundary_ax(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, ax=3) def test_decision_boundary_confidence(self): self.assertRaises(ValueError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, confidence=0.0) self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, confidence='string') plot_decision_boundary(self.clf, self.bound, confidence=None) svc = LinearSVC() svc.fit(self.X_train, self.y_train) self.assertWarns(Warning, plot_decision_boundary, clf=svc, feature_bound=self.bound, confidence=0.75) def test_decision_boundary_cmap(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, cmap=4) def test_decision_boundary_boundary_dict(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, boundary_dict='string') plot_decision_boundary(clf=self.clf, feature_bound=self.bound, boundary_dict={'colors': 'r'}) def test_decision_boundary_confidence_dict(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, confidence_dict='string') plot_decision_boundary(clf=self.clf, feature_bound=self.bound, confidence_dict={'linestyles': ':'}) # Tests for plot_utility function def test_utility_qs(self): self.assertRaises(TypeError, plot_utility, qs=self.clf, qs_dict=self.qs_dict, feature_bound=self.bound) def test_utility_qs_dict(self): self.assertRaises(TypeError, plot_utility, qs=self.qs, qs_dict={0, 1, 2}, feature_bound=self.bound) qs_dict = self.qs_dict qs_dict['X_cand'] = [] self.assertRaises(ValueError, plot_utility, qs=self.qs, qs_dict=qs_dict, feature_bound=self.bound) def test_utility_X_cand(self): self.assertRaises(ValueError, plot_utility, qs=self.qs, qs_dict=self.qs_dict) def test_utility_res(self): self.assertRaises(ValueError, plot_utility, qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, res=-3) def test_utility_ax(self): self.assertRaises(TypeError, plot_utility, qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, ax=2) def test_utility_contour_dict(self): self.assertRaises(TypeError, plot_utility, qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, contour_dict='string') plot_utility(qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, contour_dict={'linestyles': '.'}) # Graphical tests def test_no_candidates(self): fig, ax = plt.subplots() plot_utility(self.qs, self.qs_dict, feature_bound=self.bound, ax=ax) ax.scatter(self.X_cand[:, 0], self.X_cand[:, 1], c='k', marker='.') ax.scatter(self.X_train[:, 0], self.X_train[:, 1], c=self.y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_wo_cand.pdf') comparison = compare_images(self.path_prefix + 'dec_bound_wo_cand_base.pdf', self.path_prefix + 'dec_bound_wo_cand.pdf', tol=0) self.assertIsNone(comparison) def test_with_candidates(self): fig, ax = plt.subplots() plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax) ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.') ax.scatter(self.X_train[:, 0], self.X_train[:, 1], c=self.y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_w_cand.pdf') comparison = compare_images(self.path_prefix + 'dec_bound_w_cand_base.pdf', self.path_prefix + 'dec_bound_w_cand.pdf', tol=0) self.assertIsNone(comparison) def test_multi_class(self): X, y = make_classification(n_features=2, n_redundant=0, random_state=0, n_classes=3, n_clusters_per_class=1) train_indices = np.random.randint(0, len(X), size=20) cand_indices = np.setdiff1d(np.arange(len(X)), train_indices) X_train = X[train_indices] y_train = y[train_indices] X_cand = X[cand_indices] clf = PWC() clf.fit(X_train, y_train) qs = UncertaintySampling() bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]] fig, ax = plt.subplots() plot_utility(qs, { 'clf': clf, 'X': X_train, 'y': y_train }, feature_bound=bound, ax=ax) ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.') ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf') comparison = compare_images( self.path_prefix + 'dec_bound_multiclass_base.pdf', self.path_prefix + 'dec_bound_multiclass.pdf', tol=0) self.assertIsNone(comparison) def test_svc(self): svc = LinearSVC() svc.fit(self.X_train, self.y_train) fig, ax = plt.subplots() plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax) ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.') ax.scatter(self.X_train[:, 0], self.X_train[:, 1], c=self.y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(svc, self.bound, ax=ax, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_svc.pdf') comparison = compare_images(self.path_prefix + 'dec_bound_svc_base.pdf', self.path_prefix + 'dec_bound_svc.pdf', tol=0) self.assertIsNone(comparison)
class TestFeatureSpace(unittest.TestCase): def setUp(self): self.path_prefix = os.path.dirname(visualization.__file__) + \ '/multi/tests/images/' self.X, self.y_true = make_classification(n_features=2, n_redundant=0, random_state=0) self.n_samples = self.X.shape[0] self.n_annotators = 5 rng = np.random.default_rng(seed=0) noise = rng.binomial(n=1, p=.2, size=(self.n_samples, self.n_annotators)) self.y = (self.y_true.reshape(-1, 1) + noise) % 2 estimators = [] for a in range(self.n_annotators): estimators.append((f'pwc_{a}', PWC(random_state=0))) self.clf_multi = MultiAnnotEnsemble(estimators=estimators, voting='soft') self.clf = PWC(random_state=0) self.ma_qs = IEThresh(random_state=0, n_annotators=self.n_annotators) testing.set_font_settings_for_testing() testing.set_reproducibility_for_testing() testing.setup() def test_ma_plot_data_set_X(self): self.assertRaises(ValueError, plot_ma_data_set, self.X.T, self.y, self.y_true) self.assertRaises(ValueError, plot_ma_data_set, self.X, self.y, self.y_true.reshape(-1, 1)) self.assertRaises(TypeError, plot_ma_data_set, self.X, self.y, self.y_true, fig=4) def test_ma_plot_data_set(self): y = np.array(self.y, dtype=float) y[np.arange(5), np.arange(5)] = np.nan fig = plot_ma_data_set(self.X, y, self.y_true, fig_size=(12, 3), legend_dict={ 'loc': 'lower center', 'bbox_to_anchor': (0.5, 0.1), 'ncol': 3 }, tick_dict={ 'labelbottom': True, 'labelleft': True }) fig.tight_layout() fig.savefig(self.path_prefix + 'data_set_returned_result.pdf') comparison = compare_images( self.path_prefix + 'data_set_expected_result.pdf', self.path_prefix + 'data_set_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_data_set_mc(self): X_prime, y_true_prime = make_classification(n_features=2, n_redundant=0, n_clusters_per_class=1, n_classes=4, random_state=0) rng = np.random.default_rng(seed=0) noise = np.sum(rng.multinomial(n=1, pvals=[.7, .1, .1, .1], size=(self.n_samples, self.n_annotators)) \ * np.arange(4).reshape(1, 1, 4), axis=2) y_prime = (self.y_true.reshape(-1, 1) + noise) % 4 fig = plot_ma_data_set(X_prime, y_prime, y_true_prime) fig.tight_layout() fig.savefig(self.path_prefix + 'data_set_mf_returned_result.pdf') comparison = compare_images( self.path_prefix + 'data_set_mf_expected_result.pdf', self.path_prefix + 'data_set_mf_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_utility_args(self): y = np.array(self.y, dtype=float) y[np.arange(5), np.arange(5)] = np.nan maqs_arg_dict = { 'clf': self.clf, 'X': self.X, 'y': self.y, 'X_cand': self.X } bound = check_bound(X=self.X) self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, feature_bound=bound) maqs_arg_dict = { 'clf': self.clf, 'X': self.X, 'y': self.y, 'A_cand': np.ones((self.n_samples, self.n_annotators)) } bound = check_bound(X=self.X) self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, feature_bound=bound) maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y} self.ma_qs.n_annotators = None self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, feature_bound=bound) fig, _ = plt.subplots(ncols=7) self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, A_cand=np.ones((100, 5)), fig=fig, feature_bound=bound) self.ma_qs.n_annotators = 5 self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, fig=fig, feature_bound=bound) def test_ma_plot_utility(self): y = np.array(self.y, dtype=float) y[np.arange(5), np.arange(5)] = np.nan maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y} bound = check_bound(X=self.X) fig = plot_ma_utility(self.ma_qs, maqs_arg_dict, feature_bound=bound, title='utility', fig_size=(20, 5)) fig.tight_layout() fig.savefig(self.path_prefix + 'plot_utility_returned_result.pdf') comparison = compare_images( self.path_prefix + 'plot_utility_expected_result.pdf', self.path_prefix + 'plot_utility_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_utility_with_X(self): maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y} A_cand = np.ones((self.n_samples, self.n_annotators)) fig = plot_ma_utility(self.ma_qs, maqs_arg_dict, X_cand=self.X, A_cand=A_cand) fig.tight_layout() fig.savefig(self.path_prefix + 'plot_utility_X_returned_result.pdf') comparison = compare_images( self.path_prefix + 'plot_utility_X_expected_result.pdf', self.path_prefix + 'plot_utility_X_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_decision_boundary_args(self): bound = check_bound(X=self.X) self.assertRaises(ValueError, plot_ma_decision_boundary, self.clf, bound) def test_ma_plot_decision_boundary(self): bound = check_bound(X=self.X) self.clf.fit(self.X, majority_vote(self.y, random_state=0)) fig = plot_ma_decision_boundary(self.clf, bound, n_annotators=self.n_annotators) fig.tight_layout() fig.savefig(self.path_prefix + 'plot_decision_boundary_returned_result.pdf') comparison = compare_images(self.path_prefix + 'plot_decision_boundary' '_expected_result.pdf', self.path_prefix + 'plot_decision_boundary' '_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_current_state(self): maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y} self.clf.fit(self.X, majority_vote(self.y, random_state=0)) fig = plot_ma_current_state(self.X, self.y, self.y_true, self.ma_qs, self.clf, maqs_arg_dict) fig.tight_layout() fig.savefig(self.path_prefix + 'ma_plot_current_state_returned_result.pdf') comparison = compare_images( self.path_prefix + 'ma_plot_current_state_expected_result.pdf', self.path_prefix + 'ma_plot_current_state_returned_result.pdf', tol=0) self.assertIsNone(comparison)