def test_init_param_metric(self): pwc = PWC() self.assertEqual(pwc.metric, 'rbf') pwc = PWC(metric='Test') self.assertEqual(pwc.metric, 'Test') pwc = PWC(missing_label='nan', metric='Test') self.assertRaises(ValueError, pwc.fit, X=self.X, y=self.y)
def test_init_param_metric_dict(self): pwc = PWC(missing_label=-1) self.assertEqual(pwc.metric_dict, None) pwc = PWC(missing_label='nan', metric_dict='Test') self.assertRaises(TypeError, pwc.fit, X=self.X, y=self.y) pwc = PWC(missing_label='nan', metric_dict=['gamma']) self.assertRaises(TypeError, pwc.fit, X=self.X, y=self.y)
def setUp(self): self.path_prefix = os.path.dirname(visualization.__file__) + \ '/tests/images/' np.random.seed(0) self.X, self.y = make_classification(n_features=2, n_redundant=0, random_state=0) train_indices = np.random.randint(0, len(self.X), size=20) cand_indices = np.setdiff1d(np.arange(len(self.X)), train_indices) self.X_train = self.X[train_indices] self.y_train = self.y[train_indices] self.X_cand = self.X[cand_indices] self.clf = PWC() self.clf.fit(self.X_train, self.y_train) self.qs = UncertaintySampling() self.qs_dict = {'clf': self.clf, 'X': self.X_train, 'y': self.y_train} x1_min = min(self.X[:, 0]) x1_max = max(self.X[:, 0]) x2_min = min(self.X[:, 1]) x2_max = max(self.X[:, 1]) self.bound = [[x1_min, x2_min], [x1_max, x2_max]] self.cmap = 'jet' testing.set_font_settings_for_testing() testing.set_reproducibility_for_testing() testing.setup()
def setUp(self): self.path_prefix = os.path.dirname(visualization.__file__) + \ '/multi/tests/images/' self.X, self.y_true = make_classification(n_features=2, n_redundant=0, random_state=0) self.n_samples = self.X.shape[0] self.n_annotators = 5 rng = np.random.default_rng(seed=0) noise = rng.binomial(n=1, p=.2, size=(self.n_samples, self.n_annotators)) self.y = (self.y_true.reshape(-1, 1) + noise) % 2 estimators = [] for a in range(self.n_annotators): estimators.append((f'pwc_{a}', PWC(random_state=0))) self.clf_multi = MultiAnnotEnsemble(estimators=estimators, voting='soft') self.clf = PWC(random_state=0) self.ma_qs = IEThresh(random_state=0, n_annotators=self.n_annotators) testing.set_font_settings_for_testing() testing.set_reproducibility_for_testing() testing.setup()
def test_init_param_n_neighbors(self): pwc = PWC() self.assertTrue(pwc.n_neighbors is None) pwc = PWC(n_neighbors=1) self.assertEqual(pwc.n_neighbors, 1) pwc = PWC(missing_label='nan', n_neighbors=0) self.assertRaises(ValueError, pwc.fit, X=self.X, y=self.y) pwc = PWC(missing_label='nan', n_neighbors=-1) self.assertRaises(ValueError, pwc.fit, X=self.X, y=self.y) pwc = PWC(missing_label='nan', n_neighbors=1.5) self.assertRaises(TypeError, pwc.fit, X=self.X, y=self.y)
def test_fit(self): pwc = PWC(classes=[1, 2]) gnb = SklearnClassifier(GaussianNB(), classes=[1, 2]) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc)], classes=[1, 2]) np.testing.assert_array_equal(clf.classes, gnb.classes) np.testing.assert_array_equal(clf.classes, pwc.classes) pwc = PWC(classes=np.arange(3)) gnb = SklearnClassifier(GaussianNB(), classes=np.arange(3)) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)], voting='soft', classes=np.arange(3)) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y[:, 0])
def test_query(self): ensemble_classifiers = [ SklearnClassifier(classes=self.classes, estimator=GaussianProcessClassifier()), SklearnClassifier(classes=self.classes, estimator=GaussianProcessClassifier()), SklearnClassifier(classes=self.classes, estimator=GaussianProcessClassifier()), ] gpc = PWC(classes=self.classes) ensemble_bagging = SklearnClassifier( estimator=BaggingClassifier(base_estimator=gpc), classes=self.classes) ensemble_voting = SklearnClassifier( VotingClassifier(estimators=ensemble_classifiers, voting='soft')) ensemble_list = [ self.ensemble, ensemble_classifiers, ensemble_bagging, ensemble_voting ] for ensemble in ensemble_list: for method in ['KL_divergence', 'vote_entropy']: selector = QBC(method=method) idx, u = selector.query(X_cand=self.X_cand, ensemble=ensemble, X=self.X, y=self.y, return_utilities=True) self.assertEqual(len(idx), 1) self.assertEqual(len(u), 1)
def setUp(self): self.MISSING_LABEL = MISSING_LABEL self.X, self.y_true = make_blobs(n_samples=10, n_features=2, centers=2, cluster_std=1, random_state=1) self.budget = 5 self.clf = PWC(classes=np.unique(self.y_true), missing_label=MISSING_LABEL, random_state=0) self.cmm = CMM(classes=np.unique(self.y_true), missing_label=MISSING_LABEL, random_state=0) self.ensemble = SklearnClassifier( classes=np.unique(self.y_true), missing_label=MISSING_LABEL, estimator=RandomForestClassifier(random_state=0), random_state=0) self.y_missing_label = np.full(self.y_true.shape, self.MISSING_LABEL) self.y = self.y_true.copy() self.y[:3] = self.y_true[:3] self.query_strategies = {} for qs_name in pool.__all__: qs = getattr(pool, qs_name) if inspect.isclass(qs) and \ issubclass(qs, SingleAnnotPoolBasedQueryStrategy): self.query_strategies[qs_name] = qs print(self.query_strategies.keys())
def test_selection_strategies(self): # Create data set for testing. rand = np.random.RandomState(0) stream_length = 1000 train_init_size = 10 training_size = 100 X, y = make_classification( n_samples=stream_length + train_init_size, random_state=rand.randint(2**31 - 1), shuffle=True, ) clf = PWC(classes=[0, 1], random_state=rand.randint(2**31 - 1)) X_init = X[:train_init_size, :] y_init = y[:train_init_size] X_stream = X[train_init_size:, :] y_stream = y[train_init_size:] # # Build dictionary of attributes. # query_strategy_classes = {} # for s_class in stream.__all__: # query_strategy_classes[s_class] = getattr(stream, s_class) # Test predictions of classifiers. for qs_name, qs_class in self.query_strategies.items(): self._test_query_strategy(rand.randint(2**31 - 1), qs_class, clf, X_init, y_init, X_stream, y_stream, training_size, qs_name) self._test_update_before_query(rand.randint(2**31 - 1), qs_class, clf, X_init, y_init, X_stream, y_stream, training_size, qs_name)
def test_query(self): mcpal = McPAL() self.assertRaises(ValueError, mcpal.query, X_cand=[], clf=self.clf, X=[], y=[]) self.assertRaises(ValueError, mcpal.query, X_cand=[], clf=self.clf, X=self.X, y=self.y) self.assertRaises(ValueError, mcpal.query, X_cand=self.X_cand, clf=self.clf, X=self.X, y=[0, 1, 4, 0, 2, 1]) # Test missing labels X_cand = [[0], [1], [2], [3]] clf = PWC(classes=[0, 1]) mcpal = McPAL() _, utilities = mcpal.query(X_cand, clf, [[1]], [MISSING_LABEL], return_utilities=True) self.assertEqual(utilities.shape, (1, len(X_cand))) self.assertEqual(len(np.unique(utilities)), 1) _, utilities = mcpal.query(X_cand, clf, X=[[0], [1], [2]], y=[0, 1, MISSING_LABEL], return_utilities=True) self.assertGreater(utilities[0, 2], utilities[0, 1]) self.assertGreater(utilities[0, 2], utilities[0, 0]) # Test scenario X_cand = [[0], [1], [2], [5]] mcpal = McPAL() best_indices = mcpal.query(X_cand, clf, X=[[1]], y=[0]) np.testing.assert_array_equal(best_indices, np.array([3])) _, utilities = mcpal.query(X_cand, clf, X=[[1]], y=[0], return_utilities=True) min_utilities = np.argmin(utilities) np.testing.assert_array_equal(min_utilities, np.array([1])) best_indices = mcpal.query(X_cand=[[0], [1], [2]], clf=clf, X=[[0], [2]], y=[0, 1]) np.testing.assert_array_equal(best_indices, [1])
def test_init_param_voting(self): pwc = PWC() gnb = SklearnClassifier(GaussianNB()) estimators = [('pwc', pwc), ('gnb', gnb)] clf = MultiAnnotEnsemble(estimators=estimators, voting='Test') self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=estimators, voting=1) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
def setUp(self): self.X_cand = np.zeros((100, 2)) self.X = np.zeros((6, 2)) self.y = [0, 1, 1, 0, 2, 1] self.classes = [0, 1, 2] self.cost_matrix = np.array([[0, 2, 3], [4, 0, 6], [7, 8, 0]]) self.regressor = SVR() self.pwc = PWC()
def setUp(self): self.query_strategies = {} for qs_name in stream.__all__: qs = getattr(stream, qs_name) if inspect.isclass(qs) and \ issubclass(qs, SingleAnnotStreamBasedQueryStrategy): self.query_strategies[qs_name] = qs self.clf = PWC()
def setUp(self): self.X = np.zeros((6, 2)) self.utility_weight = np.ones(len(self.X)) / len(self.X) self.X_cand = np.zeros((2, 2)) self.y = [0, 1, 1, 0, 2, 1] self.classes = [0, 1, 2] self.C = np.eye(3) self.clf = PWC(classes=self.classes)
def test_init_param_estimators(self): clf = MultiAnnotEnsemble(estimators='Test') self.assertEqual(clf.estimators, 'Test') self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=None) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('GNB', GaussianNB())]) self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label=0))]) self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label='a'))]) self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(classes=[0, 1], estimators=[('PWC', PWC(classes=[0, 2]))]) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(classes=[0, 1]))]) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) perc = SklearnClassifier(Perceptron()) clf = MultiAnnotEnsemble(estimators=[('perc', perc)], voting='soft') self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
def setUp(self): self.random_state = 1 self.X_cand = np.array([[8, 1], [9, 1], [5, 1]]) self.X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]]) self.y = np.array([0, 0, 1, 1]) self.classes = np.array([0, 1]) self.clf = PWC() self.kwargs = dict(X_cand=self.X_cand, clf=self.clf, X=self.X, y=self.y)
def test_query(self): selector = EpistemicUncertainty() # return_utilities L = list( selector.query(**self.kwargs, clf=self.clf, return_utilities=True)) self.assertTrue(len(L) == 2) L = list( selector.query(**self.kwargs, clf=self.clf, return_utilities=False)) self.assertTrue(len(L) == 1) # batch_size bs = 3 selector = EpistemicUncertainty() best_idx = selector.query(**self.kwargs, clf=self.clf, batch_size=bs) self.assertEqual(bs, len(best_idx)) # query - PWC clf = PWC(classes=self.classes, random_state=self.random_state) selector = EpistemicUncertainty() selector.query(**self.kwargs, clf=clf) selector.query(**self.kwargs_MISSING_LABEL, clf=clf) best_indices, utilities = selector.query(**self.kwargs, clf=clf, return_utilities=True) self.assertEqual(utilities.shape, (1, len(self.X_cand))) self.assertEqual(best_indices.shape, (1, )) # query - logistic regression clf = SklearnClassifier(LogisticRegression(), classes=self.classes, random_state=self.random_state) selector = EpistemicUncertainty() selector.query(**self.kwargs, clf=clf) selector.query(**self.kwargs_MISSING_LABEL, clf=clf) best_indices, utilities = selector.query(**self.kwargs, clf=clf, return_utilities=True) self.assertEqual(utilities.shape, (1, len(self.X_cand))) self.assertEqual(best_indices.shape, (1, )) best_indices_s, utilities_s = selector.query(**self.kwargs, clf=clf, return_utilities=True, sample_weight=[ 0.5, 1, 1, 1 ]) comp = utilities_s == utilities self.assertTrue(not comp.all())
def test_fit(self): pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan') pwc.fit(X=self.X, y=self.y_nan) self.assertIsNone(pwc.cost_matrix) np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_) np.testing.assert_array_equal(np.zeros((3, 3)), pwc.V_) pwc.fit(X=self.X, y=self.y) self.assertIsNone(pwc.cost_matrix) np.testing.assert_array_equal(1 - np.eye(3), pwc.cost_matrix_) np.testing.assert_array_equal([[0, 0, 1], [0, 0, 0], [0, 1, 0]], pwc.V_) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) np.testing.assert_array_equal([[0, 0, 2], [0, 0, 0], [0, 1, 0]], pwc.V_)
def setUp(self): self.random_state = 1 self.X_cand = np.array([[8, 1], [9, 1], [5, 1]]) self.X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]]) self.y = np.array([0, 0, 1, 1]) self.y_MISSING_LABEL = np.array( [MISSING_LABEL, MISSING_LABEL, MISSING_LABEL, MISSING_LABEL]) self.classes = np.array([0, 1]) self.clf = PWC(classes=self.classes, random_state=self.random_state) self.kwargs = dict(X_cand=self.X_cand, X=self.X, y=self.y) self.kwargs_MISSING_LABEL = dict(X_cand=self.X_cand, X=self.X, y=self.y_MISSING_LABEL)
def test_predict_proba(self): pwc = PWC() gnb = SklearnClassifier(GaussianNB()) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)], voting='soft') self.assertRaises(NotFittedError, clf.predict_proba, X=self.X) clf.fit(X=self.X, y=self.y) P = clf.predict_proba(X=self.X) np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1)) clf.voting = 'hard' clf.fit(X=self.X, y=self.y) P = clf.predict_proba(X=self.X) np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1))
def test_multi_class(self): X, y = make_classification(n_features=2, n_redundant=0, random_state=0, n_classes=3, n_clusters_per_class=1) train_indices = np.random.randint(0, len(X), size=20) cand_indices = np.setdiff1d(np.arange(len(X)), train_indices) X_train = X[train_indices] y_train = y[train_indices] X_cand = X[cand_indices] clf = PWC() clf.fit(X_train, y_train) qs = UncertaintySampling() bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]] fig, ax = plt.subplots() plot_utility(qs, { 'clf': clf, 'X': X_train, 'y': y_train }, feature_bound=bound, ax=ax) ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.') ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf') comparison = compare_images( self.path_prefix + 'dec_bound_multiclass_base.pdf', self.path_prefix + 'dec_bound_multiclass.pdf', tol=0) self.assertIsNone(comparison)
def test_query_param_ensemble(self): selector = QBC() ensemble_list = [ None, 'test', 1, GaussianProcessClassifier(), SklearnClassifier(GaussianProcessClassifier, classes=self.classes), PWC(classes=self.classes) ] for ensemble in ensemble_list: self.assertRaises(TypeError, selector.query, X_cand=self.X_cand, X=self.X, y=self.y, ensemble=ensemble)
def setUp(self): # initialise valid data to test uncertainty parameters rand = np.random.RandomState(0) stream_length = 1000 train_init_size = 10 X, y = make_classification( n_samples=stream_length + train_init_size, random_state=rand.randint(2**31 - 1), shuffle=True, ) self.X = X[:train_init_size, :] self.X_cand = X[[train_init_size], :] self.y = y[:train_init_size] self.clf = PWC() self.kwargs = dict(X_cand=self.X_cand, clf=self.clf, X=self.X, y=self.y)
def test_predict(self): pwc = PWC(random_state=0) gnb = SklearnClassifier(GaussianNB(), random_state=0) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)], voting='soft', random_state=0) self.assertRaises(NotFittedError, clf.predict, X=self.X) clf.fit(X=self.X, y=self.y) y_pred_soft = clf.predict(X=self.X) self.assertEqual(len(y_pred_soft), len(self.X)) self.assertTrue(clf.score(self.X, self.y_true), 0.8) clf.voting = 'hard' clf.fit(X=self.X, y=self.y) y_pred_hard = clf.predict(X=self.X) self.assertEqual(len(y_pred_hard), len(self.X)) self.assertTrue(clf.score(self.X, self.y_true), 0.8) clf.fit(X=self.X, y=self.y, sample_weight=np.ones_like(self.y)) y_pred_hard = clf.predict(X=self.X) self.assertEqual(len(y_pred_hard), len(self.X)) self.assertTrue(clf.score(self.X, self.y_true), 0.8)
def test_epistemic_uncertainty_pwc(self): freq = np.empty((121, 2)) for n in range(11): for p in range(11): freq[n * 11 + p] = n, p indices = [39, 27, 18, 68, 20] expected = np.array([ 0.23132135217407046, 0.22057583593855598, 0.056099946963575974, 0.16316360415548017, 0.021220951860586187 ]) utilities, arr = _epistemic_uncertainty_pwc(freq, None) self.assertEqual(utilities.shape, (121, )) np.testing.assert_allclose(expected, utilities[indices]) _epistemic_uncertainty_pwc(np.array([[2.5, 1.5]]), None) val_utilities = utilities precompute_array = np.full((1, 1), np.nan) utilities, precompute_array = _epistemic_uncertainty_pwc( freq, precompute_array) np.testing.assert_array_equal(val_utilities, utilities) np.testing.assert_array_equal(val_utilities, precompute_array[:11, :11].flatten()) class Dummy_PWC(PWC): def predict_freq(self, X): return freq selector = EpistemicUncertainty(precompute=True) _, utilities = selector.query(**self.kwargs, clf=Dummy_PWC(classes=self.classes), return_utilities=True) np.testing.assert_array_equal(val_utilities, utilities[0]) selector = EpistemicUncertainty() self.assertRaises(ValueError, selector.query, clf=PWC(classes=[0, 1, 2]), **self.kwargs)
class TestFeatureSpace(unittest.TestCase): def setUp(self): self.path_prefix = os.path.dirname(visualization.__file__) + \ '/multi/tests/images/' self.X, self.y_true = make_classification(n_features=2, n_redundant=0, random_state=0) self.n_samples = self.X.shape[0] self.n_annotators = 5 rng = np.random.default_rng(seed=0) noise = rng.binomial(n=1, p=.2, size=(self.n_samples, self.n_annotators)) self.y = (self.y_true.reshape(-1, 1) + noise) % 2 estimators = [] for a in range(self.n_annotators): estimators.append((f'pwc_{a}', PWC(random_state=0))) self.clf_multi = MultiAnnotEnsemble(estimators=estimators, voting='soft') self.clf = PWC(random_state=0) self.ma_qs = IEThresh(random_state=0, n_annotators=self.n_annotators) testing.set_font_settings_for_testing() testing.set_reproducibility_for_testing() testing.setup() def test_ma_plot_data_set_X(self): self.assertRaises(ValueError, plot_ma_data_set, self.X.T, self.y, self.y_true) self.assertRaises(ValueError, plot_ma_data_set, self.X, self.y, self.y_true.reshape(-1, 1)) self.assertRaises(TypeError, plot_ma_data_set, self.X, self.y, self.y_true, fig=4) def test_ma_plot_data_set(self): y = np.array(self.y, dtype=float) y[np.arange(5), np.arange(5)] = np.nan fig = plot_ma_data_set(self.X, y, self.y_true, fig_size=(12, 3), legend_dict={ 'loc': 'lower center', 'bbox_to_anchor': (0.5, 0.1), 'ncol': 3 }, tick_dict={ 'labelbottom': True, 'labelleft': True }) fig.tight_layout() fig.savefig(self.path_prefix + 'data_set_returned_result.pdf') comparison = compare_images( self.path_prefix + 'data_set_expected_result.pdf', self.path_prefix + 'data_set_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_data_set_mc(self): X_prime, y_true_prime = make_classification(n_features=2, n_redundant=0, n_clusters_per_class=1, n_classes=4, random_state=0) rng = np.random.default_rng(seed=0) noise = np.sum(rng.multinomial(n=1, pvals=[.7, .1, .1, .1], size=(self.n_samples, self.n_annotators)) \ * np.arange(4).reshape(1, 1, 4), axis=2) y_prime = (self.y_true.reshape(-1, 1) + noise) % 4 fig = plot_ma_data_set(X_prime, y_prime, y_true_prime) fig.tight_layout() fig.savefig(self.path_prefix + 'data_set_mf_returned_result.pdf') comparison = compare_images( self.path_prefix + 'data_set_mf_expected_result.pdf', self.path_prefix + 'data_set_mf_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_utility_args(self): y = np.array(self.y, dtype=float) y[np.arange(5), np.arange(5)] = np.nan maqs_arg_dict = { 'clf': self.clf, 'X': self.X, 'y': self.y, 'X_cand': self.X } bound = check_bound(X=self.X) self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, feature_bound=bound) maqs_arg_dict = { 'clf': self.clf, 'X': self.X, 'y': self.y, 'A_cand': np.ones((self.n_samples, self.n_annotators)) } bound = check_bound(X=self.X) self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, feature_bound=bound) maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y} self.ma_qs.n_annotators = None self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, feature_bound=bound) fig, _ = plt.subplots(ncols=7) self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, A_cand=np.ones((100, 5)), fig=fig, feature_bound=bound) self.ma_qs.n_annotators = 5 self.assertRaises(ValueError, plot_ma_utility, self.ma_qs, maqs_arg_dict, fig=fig, feature_bound=bound) def test_ma_plot_utility(self): y = np.array(self.y, dtype=float) y[np.arange(5), np.arange(5)] = np.nan maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y} bound = check_bound(X=self.X) fig = plot_ma_utility(self.ma_qs, maqs_arg_dict, feature_bound=bound, title='utility', fig_size=(20, 5)) fig.tight_layout() fig.savefig(self.path_prefix + 'plot_utility_returned_result.pdf') comparison = compare_images( self.path_prefix + 'plot_utility_expected_result.pdf', self.path_prefix + 'plot_utility_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_utility_with_X(self): maqs_arg_dict = {'clf': self.clf_multi, 'X': self.X, 'y': self.y} A_cand = np.ones((self.n_samples, self.n_annotators)) fig = plot_ma_utility(self.ma_qs, maqs_arg_dict, X_cand=self.X, A_cand=A_cand) fig.tight_layout() fig.savefig(self.path_prefix + 'plot_utility_X_returned_result.pdf') comparison = compare_images( self.path_prefix + 'plot_utility_X_expected_result.pdf', self.path_prefix + 'plot_utility_X_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_decision_boundary_args(self): bound = check_bound(X=self.X) self.assertRaises(ValueError, plot_ma_decision_boundary, self.clf, bound) def test_ma_plot_decision_boundary(self): bound = check_bound(X=self.X) self.clf.fit(self.X, majority_vote(self.y, random_state=0)) fig = plot_ma_decision_boundary(self.clf, bound, n_annotators=self.n_annotators) fig.tight_layout() fig.savefig(self.path_prefix + 'plot_decision_boundary_returned_result.pdf') comparison = compare_images(self.path_prefix + 'plot_decision_boundary' '_expected_result.pdf', self.path_prefix + 'plot_decision_boundary' '_returned_result.pdf', tol=0) self.assertIsNone(comparison) def test_ma_plot_current_state(self): maqs_arg_dict = {'clf': self.clf, 'X': self.X, 'y': self.y} self.clf.fit(self.X, majority_vote(self.y, random_state=0)) fig = plot_ma_current_state(self.X, self.y, self.y_true, self.ma_qs, self.clf, maqs_arg_dict) fig.tight_layout() fig.savefig(self.path_prefix + 'ma_plot_current_state_returned_result.pdf') comparison = compare_images( self.path_prefix + 'ma_plot_current_state_expected_result.pdf', self.path_prefix + 'ma_plot_current_state_returned_result.pdf', tol=0) self.assertIsNone(comparison)
def test_predict_proba(self): pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan') self.assertRaises(NotFittedError, pwc.predict_proba, X=self.X) pwc.fit(X=self.X, y=self.y_nan) P = pwc.predict_proba(X=self.X) np.testing.assert_array_equal(np.ones((len(self.X), 2)) * 0.5, P) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) P = pwc.predict_proba(X=[self.X[0]]) np.testing.assert_array_equal([[1 / 3, 2 / 3]], P) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1, metric='precomputed', class_prior=1) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) P = pwc.predict_proba(X=[[1, 0, 0]]) np.testing.assert_array_equal([[1 / 5, 1 / 5, 3 / 5]], P) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1, metric='precomputed', class_prior=[0, 0, 1]) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) P = pwc.predict_proba(X=[[1, 0, 0]]) np.testing.assert_array_equal([[0, 0, 1]], P)
class TestFeatureSpace(unittest.TestCase): def setUp(self): self.path_prefix = os.path.dirname(visualization.__file__) + \ '/tests/images/' np.random.seed(0) self.X, self.y = make_classification(n_features=2, n_redundant=0, random_state=0) train_indices = np.random.randint(0, len(self.X), size=20) cand_indices = np.setdiff1d(np.arange(len(self.X)), train_indices) self.X_train = self.X[train_indices] self.y_train = self.y[train_indices] self.X_cand = self.X[cand_indices] self.clf = PWC() self.clf.fit(self.X_train, self.y_train) self.qs = UncertaintySampling() self.qs_dict = {'clf': self.clf, 'X': self.X_train, 'y': self.y_train} x1_min = min(self.X[:, 0]) x1_max = max(self.X[:, 0]) x2_min = min(self.X[:, 1]) x2_max = max(self.X[:, 1]) self.bound = [[x1_min, x2_min], [x1_max, x2_max]] self.cmap = 'jet' testing.set_font_settings_for_testing() testing.set_reproducibility_for_testing() testing.setup() # Tests for plot_decision_boundary function def test_decision_boundary_clf(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.qs, feature_bound=self.bound) clf = TestClassifier() self.assertRaises(AttributeError, plot_decision_boundary, clf=clf, feature_bound=self.bound) def test_decision_boundary_bound(self): self.assertRaises(ValueError, plot_decision_boundary, clf=self.clf, feature_bound=[0, 0, 1, 1]) def test_decision_boundary_res(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, res='string') def test_decision_boundary_ax(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, ax=3) def test_decision_boundary_confidence(self): self.assertRaises(ValueError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, confidence=0.0) self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, confidence='string') plot_decision_boundary(self.clf, self.bound, confidence=None) svc = LinearSVC() svc.fit(self.X_train, self.y_train) self.assertWarns(Warning, plot_decision_boundary, clf=svc, feature_bound=self.bound, confidence=0.75) def test_decision_boundary_cmap(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, cmap=4) def test_decision_boundary_boundary_dict(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, boundary_dict='string') plot_decision_boundary(clf=self.clf, feature_bound=self.bound, boundary_dict={'colors': 'r'}) def test_decision_boundary_confidence_dict(self): self.assertRaises(TypeError, plot_decision_boundary, clf=self.clf, feature_bound=self.bound, confidence_dict='string') plot_decision_boundary(clf=self.clf, feature_bound=self.bound, confidence_dict={'linestyles': ':'}) # Tests for plot_utility function def test_utility_qs(self): self.assertRaises(TypeError, plot_utility, qs=self.clf, qs_dict=self.qs_dict, feature_bound=self.bound) def test_utility_qs_dict(self): self.assertRaises(TypeError, plot_utility, qs=self.qs, qs_dict={0, 1, 2}, feature_bound=self.bound) qs_dict = self.qs_dict qs_dict['X_cand'] = [] self.assertRaises(ValueError, plot_utility, qs=self.qs, qs_dict=qs_dict, feature_bound=self.bound) def test_utility_X_cand(self): self.assertRaises(ValueError, plot_utility, qs=self.qs, qs_dict=self.qs_dict) def test_utility_res(self): self.assertRaises(ValueError, plot_utility, qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, res=-3) def test_utility_ax(self): self.assertRaises(TypeError, plot_utility, qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, ax=2) def test_utility_contour_dict(self): self.assertRaises(TypeError, plot_utility, qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, contour_dict='string') plot_utility(qs=self.qs, qs_dict=self.qs_dict, feature_bound=self.bound, contour_dict={'linestyles': '.'}) # Graphical tests def test_no_candidates(self): fig, ax = plt.subplots() plot_utility(self.qs, self.qs_dict, feature_bound=self.bound, ax=ax) ax.scatter(self.X_cand[:, 0], self.X_cand[:, 1], c='k', marker='.') ax.scatter(self.X_train[:, 0], self.X_train[:, 1], c=self.y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_wo_cand.pdf') comparison = compare_images(self.path_prefix + 'dec_bound_wo_cand_base.pdf', self.path_prefix + 'dec_bound_wo_cand.pdf', tol=0) self.assertIsNone(comparison) def test_with_candidates(self): fig, ax = plt.subplots() plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax) ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.') ax.scatter(self.X_train[:, 0], self.X_train[:, 1], c=self.y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(self.clf, self.bound, ax=ax, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_w_cand.pdf') comparison = compare_images(self.path_prefix + 'dec_bound_w_cand_base.pdf', self.path_prefix + 'dec_bound_w_cand.pdf', tol=0) self.assertIsNone(comparison) def test_multi_class(self): X, y = make_classification(n_features=2, n_redundant=0, random_state=0, n_classes=3, n_clusters_per_class=1) train_indices = np.random.randint(0, len(X), size=20) cand_indices = np.setdiff1d(np.arange(len(X)), train_indices) X_train = X[train_indices] y_train = y[train_indices] X_cand = X[cand_indices] clf = PWC() clf.fit(X_train, y_train) qs = UncertaintySampling() bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]] fig, ax = plt.subplots() plot_utility(qs, { 'clf': clf, 'X': X_train, 'y': y_train }, feature_bound=bound, ax=ax) ax.scatter(X_cand[:, 0], X_cand[:, 1], c='k', marker='.') ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(clf, bound, ax=ax, res=101, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_multiclass.pdf') comparison = compare_images( self.path_prefix + 'dec_bound_multiclass_base.pdf', self.path_prefix + 'dec_bound_multiclass.pdf', tol=0) self.assertIsNone(comparison) def test_svc(self): svc = LinearSVC() svc.fit(self.X_train, self.y_train) fig, ax = plt.subplots() plot_utility(self.qs, self.qs_dict, X_cand=self.X_cand, ax=ax) ax.scatter(self.X[:, 0], self.X[:, 1], c='k', marker='.') ax.scatter(self.X_train[:, 0], self.X_train[:, 1], c=self.y_train, cmap=self.cmap, alpha=.9, marker='.') plot_decision_boundary(svc, self.bound, ax=ax, cmap=self.cmap) fig.savefig(self.path_prefix + 'dec_bound_svc.pdf') comparison = compare_images(self.path_prefix + 'dec_bound_svc_base.pdf', self.path_prefix + 'dec_bound_svc.pdf', tol=0) self.assertIsNone(comparison)
def test_predict(self): pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', random_state=0) self.assertRaises(NotFittedError, pwc.predict, X=self.X) pwc.fit(X=self.X, y=self.y_nan) y = pwc.predict(self.X) np.testing.assert_array_equal(['tokyo', 'paris', 'tokyo'], y) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', random_state=1) pwc.fit(X=self.X, y=self.y_nan) y = pwc.predict(self.X) np.testing.assert_array_equal(['tokyo', 'tokyo', 'paris'], y) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) y = pwc.predict(self.X) np.testing.assert_array_equal(['tokyo', 'tokyo', 'tokyo'], y) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', cost_matrix=[[0, 1, 4], [10, 0, 5], [2, 2, 0]]) pwc.fit(X=self.X, y=self.y_nan) y = pwc.predict(self.X) np.testing.assert_array_equal(['paris', 'paris', 'paris'], y) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', cost_matrix=[[0, 1], [10, 0]]) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) y = pwc.predict(self.X) np.testing.assert_array_equal(['paris', 'paris', 'paris'], y)
def test_predict_freq(self): pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=10, metric='rbf', metric_dict={'gamma': 2}) self.assertRaises(NotFittedError, pwc.predict_freq, X=self.X) pwc.fit(X=self.X, y=self.y_nan) F = pwc.predict_freq(X=self.X) np.testing.assert_array_equal(np.zeros((len(self.X), 3)), F) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) F = pwc.predict_freq(X=[self.X[0]]) np.testing.assert_array_equal([[0, 1, 2]], F) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1) pwc.fit(X=self.X, y=self.y, sample_weight=self.w) F = pwc.predict_freq(X=[self.X[0]]) np.testing.assert_array_equal([[0, 1, 0]], F) pwc = PWC(classes=['tokyo', 'paris', 'new york'], missing_label='nan', n_neighbors=1, metric='precomputed') pwc.fit(X=self.X, y=self.y, sample_weight=self.w) self.assertRaises(ValueError, pwc.predict_freq, X=[[1, 0]]) self.assertRaises(ValueError, pwc.predict_freq, X=[[1], [0]]) F = pwc.predict_freq(X=[[1, 0, 0]]) np.testing.assert_array_equal([[0, 0, 2]], F) rbf_kernel = lambda x, y, gamma: np.exp(-gamma * np.sum((x - y)**2)) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', random_state=0, metric=rbf_kernel, metric_dict={'gamma': 2}) F_call = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X)) pwc = PWC(classes=['tokyo', 'paris'], missing_label='nan', metric='rbf', metric_dict={'gamma': 2}, random_state=0) F_rbf = pwc.fit(X=self.X, y=self.y).predict_freq(np.ones_like(self.X)) np.testing.assert_array_equal(F_call, F_rbf)