def test_n_queries(self): annotator = StandardAnnot(self.X, self.Y, self.C) # test querying class labels of selected annotators ids = [0] annotator.class_labels(self.X[0:2], ids, query_value=3) # test number of queries np.testing.assert_array_equal([3, 0], annotator.n_queries())
def setUp(self): self.X, self.y_true = load_iris(return_X_y=True) self.C = np.random.uniform(0, 1, len(self.X) * 3).reshape((len(self.X), 3)) self.y_missing = np.full(len(self.X), np.nan) self.annot = StandardAnnot( X=self.X, Y=np.array([self.y_true, self.y_true, self.y_missing]).T, C=self.C)
def test_init(self): # test initialisation with false parameters self.assertRaises(ValueError, StandardAnnot, self.X, self.Y[:3], self.C[:3]) self.assertRaises(ValueError, StandardAnnot, self.X, self.Y, self.C[:3]) self.assertRaises(ValueError, StandardAnnot, self.X, self.Y[:, 0].reshape(-1, 1), self.C) # test initialisation with correct parameters self.assertEqual( StandardAnnot(self.X, self.Y, self.C).n_annotators(), 2) np.testing.assert_array_equal(self.C.shape, StandardAnnot(self.X, self.Y).C_.shape)
def test_confidence_noise(self): # test wrong confidences self.assertRaises(ValueError, StandardAnnot, self.X, self.Y, self.C, [.2, .3, .5], 42, False) # test correct confidences annotator = StandardAnnot(self.X, self.Y, np.copy(self.C), [.3, 200], 42, True) self.assertTrue( np.logical_and(annotator.C_ >= 0, annotator.C_ <= 1).all())
def test_confidence_scores(self): annotator = StandardAnnot(self.X, self.Y, self.C) # test querying confidence scores ids = [0, 2, 3] X = self.X[ids] C = annotator.confidence_scores(X) np.testing.assert_array_equal(self.C[ids], C) # test querying class labels of missing samples X = np.array([[-1, -1], [-2, -3]]) C = annotator.confidence_scores(X) np.testing.assert_array_equal( np.array([[np.nan, np.nan], [np.nan, np.nan]]), C) # test querying class labels of selected annotators ids = [0] C = annotator.confidence_scores(self.X[0:2], ids) np.testing.assert_array_equal( np.array([[self.C[0, 0], np.nan], [self.C[1, 0], np.nan]]), C)
def test_class_labels(self): annotator = StandardAnnot(self.X, self.Y, self.C) # test querying class labels ids = [0, 2, 3] X = self.X[ids] Y = annotator.class_labels(X) np.testing.assert_array_equal(self.Y[ids], Y) # test querying class labels of missing samples X = np.array([[-1, -1], [-2, -3]]) Y = annotator.class_labels(X) np.testing.assert_array_equal( np.array([[np.nan, np.nan], [np.nan, np.nan]]), Y) # test querying class labels of selected annotators ids = [0] Y = annotator.class_labels(self.X[0:2], ids) np.testing.assert_array_equal( np.array([[self.Y[0, 0], np.nan], [self.Y[0, 1], np.nan]]), Y)
def test_queried_samples(self): annotator = StandardAnnot(self.X, self.Y, self.C) # test querying class labels of selected annotators ids = [0] annotator.class_labels(self.X[0:2], ids) # test queried samples np.testing.assert_array_equal(self.X[0:2], annotator.queried_samples()[0]) np.testing.assert_array_equal( np.array([]).reshape(0, 2), annotator.queried_samples()[1])
class TestBaseAnnot(unittest.TestCase): def setUp(self): self.X, self.y_true = load_iris(return_X_y=True) self.C = np.random.uniform(0, 1, len(self.X) * 3).reshape((len(self.X), 3)) self.y_missing = np.full(len(self.X), np.nan) self.annot = StandardAnnot( X=self.X, Y=np.array([self.y_true, self.y_true, self.y_missing]).T, C=self.C) @patch.multiple(BaseAnnot, __abstractmethods__=set()) def test_interface(self): base_annot = BaseAnnot() base_annot.n_annotators() base_annot.n_queries() base_annot.queried_samples() base_annot.class_labels(X=None, annotator_ids=None, query_value=None) base_annot.confidence_scores(X=None, annotator_ids=None) def test_labelling_performance(self): # test accuracy as default measure of labelling performance accuracies = self.annot.labelling_performance(X=self.X, y_true=self.y_true) np.testing.assert_array_equal([1, 1, np.nan], accuracies) # test confusion matrix as measure of labelling performance conf_matrices = self.annot.labelling_performance( X=self.X, y_true=self.y_true, perf_func=confusion_matrix) correct_matrix = np.array([[50, 0, 0], [0, 50, 0], [0, 0, 50]]) self.assertEqual(len(conf_matrices), self.annot.n_annotators()) np.testing.assert_array_equal(correct_matrix, conf_matrices[0]) np.testing.assert_array_equal(correct_matrix, conf_matrices[1]) np.testing.assert_array_equal(np.nan, conf_matrices[2]) def test_plot_labelling_accuracy(self): # test wrong annotator ids self.assertRaises(ValueError, self.annot.plot_labelling_accuracy, self.X, self.y_true, [-1]) # test correct annotator ids fig, ax = self.annot.plot_labelling_accuracy(X=self.X, y_true=self.y_true, figsize=(5, 5)) self.assertEqual(5, fig.get_figheight()) self.assertEqual(5, fig.get_figwidth()) np.testing.assert_array_equal([0, 1, 2], ax.get_xticks()) def test_plot_labelling_confusion_matrices(self): # test wrong annotator ids self.assertRaises(ValueError, self.annot.plot_labelling_confusion_matrices, self.X, self.y_true, np.unique(self.y_true), [-1]) # test correct annotator ids fig, ax = self.annot.plot_labelling_confusion_matrices( X=self.X, y_true=self.y_true, y_unique=np.unique(self.y_true), figsize=(5, 5)) self.assertEqual(5, fig.get_figwidth()) self.assertEqual(5 * self.annot.n_annotators(), fig.get_figheight()) self.assertEqual(self.annot.n_annotators(), len(ax)) def test_plot_class_labels(self): # test wrong annotator ids self.assertRaises(ValueError, self.annot.plot_class_labels, self.X, None, [-1]) # test wrong feature ids self.assertRaises(ValueError, self.annot.plot_class_labels, self.X, [-1]) # test different options of correct usage fig, ax = self.annot.plot_class_labels(X=self.X, y_true=self.y_true, plot_confidences=True, figsize=(5, 5)) self.assertEqual(5, fig.get_figwidth()) self.assertEqual(self.annot.n_annotators() * 5, fig.get_figheight()) self.assertEqual(self.annot.n_annotators(), len(ax)) fig, ax = self.annot.plot_class_labels(X=self.X, y_true=None, plot_confidences=True, figsize=(5, 5)) self.assertEqual(5, fig.get_figwidth()) self.assertEqual(self.annot.n_annotators() * 5, fig.get_figheight()) self.assertEqual(self.annot.n_annotators(), len(ax)) fig, ax = self.annot.plot_class_labels(X=self.X, y_true=self.y_true, plot_confidences=False, figsize=(5, 5)) self.assertEqual(5, fig.get_figwidth()) self.assertEqual(self.annot.n_annotators() * 5, fig.get_figheight()) self.assertEqual(self.annot.n_annotators(), len(ax)) fig, ax = self.annot.plot_class_labels(X=self.X, y_true=self.y_true, plot_confidences=True, features_ids=[0], figsize=(5, 5)) self.assertEqual(5, fig.get_figwidth()) self.assertEqual(self.annot.n_annotators() * 5, fig.get_figheight()) self.assertEqual(self.annot.n_annotators(), len(ax))
def run(results_path, data_set, query_strategy, budget, test_ratio, seed): """ Run experiments to compare query selection strategies. Experimental results are stored in a .csv-file. Parameters ---------- results_path: str Absolute path to store results. data_set: str Name of the data set. query_strategy: str Determines query strategy. budget: int Maximal number of labeled samples. test_ratio: float in (0, 1) Ratio of test samples. seed: float Random seed. """ # --------------------------------------------- LOAD DATA ---------------------------------------------------------- is_cosine = 'reports' in data_set X, y_true, y = load_data(data_set_name=data_set) n_features = np.size(X, axis=1) n_classes = len(np.unique(y)) n_annotators = np.size(y, axis=1) print(data_set + ': ' + str(investigate_data_set(data_set))) budget_str = str(budget) if budget > len(X) * n_annotators * (1 - test_ratio): budget = int(math.floor(len(X) * n_annotators * (1 - test_ratio))) elif budget > 1: budget = int(budget) elif 0 < budget <= 1: budget = int( math.floor(len(X) * n_annotators * (1 - test_ratio) * budget)) else: raise ValueError( "'budget' must be a float in (0, 1] or an integer in [0, n_samples]" ) budget = np.min((budget, 1000)) # --------------------------------------------- STATISTICS --------------------------------------------------------- # define storage for performances results = {} # define performance functions C = 1 - np.eye(n_classes) perf_funcs = { 'micro-misclf-rate': [partial(misclassification_costs, C=C, average='micro'), {}], 'macro-misclf-rate': [partial(misclassification_costs, C=C, average='macro'), {}] } # ------------------------------------------- LOAD DATA ---------------------------------------------------- print('seed: {}'.format(str(seed))) X_train, X_test, y_true_train, y_true_test, y_train, y_test = train_test_split( X, y_true, y, test_size=test_ratio, random_state=seed) while not np.array_equal(np.unique(y_true_train), np.unique(y_true_test)): X_train, X_test, y_true_train, y_true_test, y_train, y_test = train_test_split( X, y_true, y, random_state=seed, test_size=test_ratio) seed += 1000 print('new seed: {}'.format(seed)) n_samples = len(X_train) # --------------------------------------------- CSV NAMES ---------------------------------------------------------- csv_name = '{}_{}_{}_{}_{}.csv'.format(data_set, query_strategy, budget_str, test_ratio, seed) # ------------------------------------------ PREPROCESS DATA ------------------------------------------------------- # standardize data if is_cosine: kwargs = {'metric': 'cosine'} else: # standardize data scaler = StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) # compute bandwidth bandwidth = estimate_bandwidth(n_samples=n_samples, n_features=n_features) print('bandwidth: {}'.format(str(bandwidth))) gamma = 0.5 * (bandwidth**(-2)) kwargs = {'metric': 'rbf', 'gamma': gamma} # setup classifiers pwc_train = PWC(n_classes=n_classes, combine_labels=False, random_state=seed, **kwargs) S_train = pairwise_kernels(X_train, X_train, **kwargs) pwc_test = PWC(n_classes=n_classes, metric='precomputed', combine_labels=False, probabilistic=False, random_state=seed) S_test = pairwise_kernels(X_test, X_train, **kwargs) # set up data set data_set = DataSet(X_train, n_annotators=n_annotators) annotators = StandardAnnot(X=X_train, Y=y_train) # create query strategy if query_strategy == 'ceal': query_strategy = CEAL(data_set=data_set, n_classes=n_classes, clf=pwc_train, n_neighbors=10, label_proportion=0.2 * budget / n_annotators, random_state=seed, **kwargs) elif query_strategy == 'alio': query_strategy = ALIO(data_set=data_set, n_classes=n_classes, clf=pwc_train, label_proportion=0.2 * budget / n_annotators, random_state=seed) elif query_strategy == 'proactive': query_strategy = Proactive(data_set=data_set, n_classes=n_classes, clf=pwc_train, n_components=20, label_proportion=0.2 * budget / n_annotators, random_state=seed) elif 'mapal' in query_strategy: params = query_strategy.split('-') mean_prior = float(params[1]) sum_prior = (np.sum(S_train) - n_samples) / ( n_samples**2 - n_samples) if params[2] == 'mean' else float( params[2]) prior = np.array([mean_prior, 1 - mean_prior]) prior /= np.sum(prior) prior *= sum_prior print('prior = {}'.format(prior)) m_max = int(params[3]) alpha = float(params[4]) weights_type = str(params[5]) bam = BAM(n_classes=n_classes, weights_type=weights_type, prior=prior, random_state=seed, **kwargs) query_strategy = MAPAL(data_set=data_set, m_max=m_max, n_classes=n_classes, S=S_train, bam=bam, alpha_x=alpha, alpha_c=alpha, random_state=seed) elif query_strategy == 'ie-adj-cost': query_strategy = IEAdjCost(data_set=data_set, clf=pwc_train, n_classes=n_classes, delta=0.4, lmbda=0.4, alpha=0.05, epsilon=0.8, random_state=seed) elif query_strategy == 'ie-thresh': query_strategy = IEThresh(data_set=data_set, clf=pwc_train, n_classes=n_classes, epsilon=0.8, alpha=0.05, random_state=seed) elif query_strategy == 'random': query_strategy = RS(data_set=data_set, random_state=seed) else: raise ValueError( "query strategy must be in ['ceal', 'ie-thresh', 'pal-1-all', 'pal-1-single', 'mapal-..., random]" ) # ----------------------------------------- ACTIVE LEARNING CYCLE -------------------------------------------------- times = [0] for b in range(budget): print("budget: {}".format(b)) # evaluate results eval_perfs(clf=pwc_test, X_train=S_train, y_train=y_true_train, X_test=S_test, y_test=y_true_test, perf_results=results, perf_funcs=perf_funcs) eval_annot_stats(y=data_set.y_, y_true=y_true_train, results=results) # select sample and annotator t = time() selection = query_strategy.make_query() times.append(time() - t) sample_id = selection[0, 0] annotator_id = [selection[0, 1]] print("selected sample: {}".format(sample_id)) print("selected annotator: {}".format(annotator_id)) # query selected annotator for labeling selected sample X_query = [X_train[sample_id]] y_query = annotators.class_labels(X_query, annotator_ids=annotator_id) print('class label: {}'.format(y_query[0, annotator_id[0]])) # update training data data_set.update_entries(sample_id, y_query) print(data_set.len_labeled(per_annotator=True)) # retrain classifier pwc_test.fit(X=data_set.X_, y=data_set.y_, c=data_set.c_) # evaluate results eval_perfs(clf=pwc_test, X_train=S_train, y_train=y_true_train, X_test=S_test, y_test=y_true_test, perf_results=results, perf_funcs=perf_funcs) eval_annot_stats(y=data_set.y_, y_true=y_true_train, results=results) # store performance results results['times'] = times df = pd.DataFrame(results) df.to_csv('{}/{}'.format(results_path, csv_name), index_label='index')