def test_LogisticRegression(self): clf = sklearn.linear_model.LogisticRegression() clf.fit(self.X_train, self.y_train) lr = LogisticRegression() lr.train(Dataset(self.X_train, self.y_train)) assert_array_equal( clf.predict(self.X_train), lr.predict(self.X_train)) assert_array_equal( clf.predict(self.X_test), lr.predict(self.X_test)) self.assertEqual( clf.score(self.X_train, self.y_train), lr.score(Dataset(self.X_train, self.y_train))) self.assertEqual( clf.score(self.X_test, self.y_test), lr.score(Dataset(self.X_test, self.y_test)))
def main(): global pos_filepath, dataset_filepath, csv_filepath, vectors_list, ids_list dataset_filepath = "/Users/dndesign/Desktop/active_learning/vecteurs_et_infos/vectors_2015.txt" csv_filepath = "/Users/dndesign/Desktop/active_learning/donnees/corpus_2015_id-time-text.csv" pos_filepath = "/Users/dndesign/Desktop/active_learning/donnees/oriane_pos_id-time-text.csv" vectors_list, ids_list = get_vectors_list(dataset_filepath) timestr = time.strftime("%Y%m%d_%H%M%S") text_file = codecs.open("task_" + str(timestr) + ".txt", "w", "utf-8") print("Loading data...") text_file.write("Loading data...\n") # Open this file t0 = time.time() file = openfile_txt(dataset_filepath) num_lines = sum(1 for line in file) print("Treating " + str(num_lines) + " entries...") text_file.write("Treating : %s entries...\n" % str(num_lines)) # Number of queries to ask human to label quota = 10 E_out1, E_out2, E_out3, E_out4, E_out6, E_out7 = [], [], [], [], [], [] trn_ds, tst_ds = split_train_test(csv_filepath) model = SVM(kernel='linear') # model = LogisticRegression() ''' UncertaintySampling (Least Confident) UncertaintySampling : it queries the instances about which it is least certain how to label Least Confident : it queries the instance whose posterior probability of being positive is nearest 0.5 ''' qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression(C=.01)) model.train(trn_ds) E_out1 = np.append(E_out1, 1 - model.score(tst_ds)) ''' UncertaintySampling (Max Margin) ''' trn_ds2 = copy.deepcopy(trn_ds) qs2 = USampling(trn_ds2, method='mm', model=SVM(kernel='linear')) model.train(trn_ds2) E_out2 = np.append(E_out2, 1 - model.score(tst_ds)) ''' CMB Sampling Combination of active learning algorithms (distance-based (DIST), diversity-based (DIV)) ''' trn_ds3 = copy.deepcopy(trn_ds) qs3 = CMBSampling(trn_ds3, model=SVM(kernel='linear')) model.train(trn_ds3) E_out3 = np.append(E_out3, 1 - model.score(tst_ds)) ''' Random Sampling Random : it chooses randomly a query ''' trn_ds4 = copy.deepcopy(trn_ds) qs4 = RandomSampling(trn_ds4, random_state=1126) model.train(trn_ds4) E_out4 = np.append(E_out4, 1 - model.score(tst_ds)) ''' QueryByCommittee (Vote Entropy) QueryByCommittee : it keeps a committee of classifiers and queries the instance that the committee members disagree, it also examines unlabeled examples and selects only those that are most informative for labeling Vote Entropy : a way of measuring disagreement Disadvantage : it does not consider the committee members’ class distributions. It also misses some informative unlabeled examples to label ''' trn_ds6 = copy.deepcopy(trn_ds) qs6 = QueryByCommittee(trn_ds6, disagreement='vote', models=[LogisticRegression(C=1.0), LogisticRegression(C=0.01), LogisticRegression(C=100)], random_state=1126) model.train(trn_ds6) E_out6 = np.append(E_out6, 1 - model.score(tst_ds)) ''' QueryByCommittee (Kullback-Leibler Divergence) QueryByCommittee : it examines unlabeled examples and selects only those that are most informative for labeling Disadvantage : it misses some examples on which committee members disagree ''' trn_ds7 = copy.deepcopy(trn_ds) qs7 = QueryByCommittee(trn_ds7, disagreement='kl_divergence', models=[LogisticRegression(C=1.0), LogisticRegression(C=0.01), LogisticRegression(C=100)], random_state=1126) model.train(trn_ds7) E_out7 = np.append(E_out7, 1 - model.score(tst_ds)) with sns.axes_style("darkgrid"): fig = plt.figure() ax = fig.add_subplot(1, 1, 1) query_num = np.arange(0, 1) p1, = ax.plot(query_num, E_out1, 'red') p2, = ax.plot(query_num, E_out2, 'blue') p3, = ax.plot(query_num, E_out3, 'green') p4, = ax.plot(query_num, E_out4, 'orange') p6, = ax.plot(query_num, E_out6, 'black') p7, = ax.plot(query_num, E_out7, 'purple') plt.legend(('Least Confident', 'Max Margin', 'Distance Diversity CMB', 'Random Sampling', 'Vote Entropy', 'KL Divergence'), loc=1) plt.ylabel('Accuracy') plt.xlabel('Number of Queries') plt.title('Active Learning - Query choice strategies') plt.ylim([0, 1]) plt.show(block=False) for i in range(quota): print("\n#################################################") print("Query number " + str(i) + " : ") print("#################################################\n") text_file.write("\n#################################################\n") text_file.write("Query number %s : " % str(i)) text_file.write("\n#################################################\n") ask_id = qs.make_query() print("\033[4mUsing Uncertainty Sampling (Least confident) :\033[0m") print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True) print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n") text_file.write("Using Uncertainty Sampling (Least confident) :\n") text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id))) text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id))) trn_ds.update(ask_id, simulate_human_decision(ask_id)) model.train(trn_ds) E_out1 = np.append(E_out1, 1 - model.score(tst_ds)) ask_id = qs2.make_query() print("\033[4mUsing Uncertainty Sampling (Max Margin) :\033[0m") print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True) print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n") text_file.write("Using Uncertainty Sampling (Smallest Margin) :\n") text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id))) text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id))) trn_ds2.update(ask_id, simulate_human_decision(ask_id)) model.train(trn_ds2) E_out2 = np.append(E_out2, 1 - model.score(tst_ds)) ask_id = qs3.make_query() print("\033[4mUsing CMB Distance-Diversity Sampling :\033[0m") print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True) print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n") text_file.write("Using Uncertainty Sampling (Entropy) :\n") text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id))) text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id))) trn_ds3.update(ask_id, simulate_human_decision(ask_id)) model.train(trn_ds3) E_out3 = np.append(E_out3, 1 - model.score(tst_ds)) ask_id = qs4.make_query() print("\033[4mUsing Random Sampling :\033[0m") print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True) print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n") text_file.write("Using Random Sampling :\n") text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id))) text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id))) trn_ds4.update(ask_id, simulate_human_decision(ask_id)) model.train(trn_ds4) E_out4 = np.append(E_out4, 1 - model.score(tst_ds)) ask_id = qs6.make_query() print("\033[4mUsing QueryByCommittee (Vote Entropy) :\033[0m") print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True) print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n") text_file.write("Using QueryByCommittee (Vote Entropy) :\n") text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id))) text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id))) trn_ds6.update(ask_id, simulate_human_decision(ask_id)) model.train(trn_ds6) E_out6 = np.append(E_out6, 1 - model.score(tst_ds)) ask_id = qs7.make_query() print("\033[4mUsing QueryByCommittee (KL Divergence) :\033[0m") print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True) print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n") text_file.write("Using QueryByCommittee (KL Divergence) :\n") text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id))) text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id))) trn_ds7.update(ask_id, simulate_human_decision(ask_id)) model.train(trn_ds7) E_out7 = np.append(E_out7, 1 - model.score(tst_ds)) ax.set_xlim((0, i + 1)) ax.set_ylim((0, max(max(E_out1), max(E_out2), max(E_out3), max(E_out4), max(E_out6), max(E_out7)) + 0.2)) query_num = np.arange(0, i + 2) p1.set_xdata(query_num) p1.set_ydata(E_out1) p2.set_xdata(query_num) p2.set_ydata(E_out2) p3.set_xdata(query_num) p3.set_ydata(E_out3) p4.set_xdata(query_num) p4.set_ydata(E_out4) p6.set_xdata(query_num) p6.set_ydata(E_out6) p7.set_xdata(query_num) p7.set_ydata(E_out7) plt.draw() t2 = time.time() time_total = t2 - t0 print("\n\n\n#################################################\n") print("Execution time : %fs \n\n" % time_total) text_file.write("\n\n\n#################################################\n") text_file.write("Execution time : %fs \n" % time_total) text_file.close() input("Press any key to save the plot...") plt.savefig('task_' + str(timestr) + '.png') print("Done")
def main(): quota = 10 # ask human to label 10 samples n_classes = 5 E_out1, E_out2 = [], [] trn_ds, tst_ds, ds = split_train_test(n_classes) trn_ds2 = copy.deepcopy(trn_ds) # print(trn_ds.get_entries()) # print(len(trn_ds)) qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression()) qs2 = RandomSampling(trn_ds2) model = LogisticRegression() fig = plt.figure() ax = fig.add_subplot(2, 1, 1) ax.set_xlabel('Number of Queries') ax.set_ylabel('Error') model.train(trn_ds) E_out1 = np.append(E_out1, 1 - model.score(tst_ds)) model.train(trn_ds2) E_out2 = np.append(E_out2, 1 - model.score(tst_ds)) query_num = np.arange(0, 1) p1, = ax.plot(query_num, E_out1, 'g', label='qs Eout') p2, = ax.plot(query_num, E_out2, 'k', label='random Eout') plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=5) plt.show(block=False) img_ax = fig.add_subplot(2, 1, 2) box = img_ax.get_position() img_ax.set_position( [box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9]) # Give each label its name (labels are from 0 to n_classes-1) lbr = InteractiveLabeler(label_name=[str(lbl) for lbl in range(n_classes)]) for i in range(quota): ask_id = qs.make_query() print("asking sample from Uncertainty Sampling") # reshape the image to its width and height lb = lbr.label(trn_ds.data[ask_id][0].reshape(8, 8)) trn_ds.update(ask_id, lb) model.train(trn_ds) E_out1 = np.append(E_out1, 1 - model.score(tst_ds)) ask_id = qs2.make_query() print("asking sample from Random Sample") lb = lbr.label(trn_ds2.data[ask_id][0].reshape(8, 8)) trn_ds2.update(ask_id, lb) model.train(trn_ds2) E_out2 = np.append(E_out2, 1 - model.score(tst_ds)) ax.set_xlim((0, i + 1)) ax.set_ylim((0, max(max(E_out1), max(E_out2)) + 0.2)) query_num = np.arange(0, i + 2) p1.set_xdata(query_num) p1.set_ydata(E_out1) p2.set_xdata(query_num) p2.set_ydata(E_out2) plt.draw() input("Press any key to continue...")
def test_VarianceReduction(self): trn_ds = Dataset( self.X, np.concatenate([self.y[:2], [None] * (len(self.y) - 2)])) qs = VarianceReduction(trn_ds, model=LogisticRegression(), sigma=0.1) qseq = run_qs(trn_ds, qs, self.y, self.quota) assert_array_equal(qseq, np.array([4, 5, 2, 3]))
def main(): quota = 10 # ask human to label 30 samples n_classes = 5 E_out1, E_out2 = [], [] trn_ds, tst_ds, ds = split_train_test(n_classes) trn_ds2 = copy.deepcopy(trn_ds) qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression()) qs2 = RandomSampling(trn_ds2) model = LogisticRegression() fig = plt.figure() ax = fig.add_subplot(2, 1, 1) ax.set_xlabel('Number of Queries') ax.set_ylabel('Error') model.train(trn_ds) E_out1 = np.append(E_out1, 1 - model.score(tst_ds)) model.train(trn_ds2) E_out2 = np.append(E_out2, 1 - model.score(tst_ds)) query_num = np.arange(0, 1) p1, = ax.plot(query_num, E_out1, 'g', label='qs Eout') p2, = ax.plot(query_num, E_out2, 'k', label='random Eout') plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=5) plt.show(block=False) img_ax = fig.add_subplot(2, 1, 2) box = img_ax.get_position() img_ax.set_position([box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9]) # Give each label its name (labels are from 0 to n_classes-1) lbr = InteractiveLabeler(label_name=[str(lbl) for lbl in range(n_classes)]) for i in range(quota): ask_id = qs.make_query() print("asking sample from Uncertainty Sampling") # reshape the image to its width and height lb = lbr.label(trn_ds.data[ask_id][0].reshape(8, 8)) trn_ds.update(ask_id, lb) model.train(trn_ds) E_out1 = np.append(E_out1, 1 - model.score(tst_ds)) ask_id = qs2.make_query() print("asking sample from Random Sample") lb = lbr.label(trn_ds2.data[ask_id][0].reshape(8, 8)) trn_ds2.update(ask_id, lb) model.train(trn_ds2) E_out2 = np.append(E_out2, 1 - model.score(tst_ds)) ax.set_xlim((0, i + 1)) ax.set_ylim((0, max(max(E_out1), max(E_out2)) + 0.2)) query_num = np.arange(0, i + 2) p1.set_xdata(query_num) p1.set_ydata(E_out1) p2.set_xdata(query_num) p2.set_ydata(E_out2) plt.draw() input("Press any key to continue...")
def test_quire(self): trn_ds = init_toyexample(self.X, self.y) qs = QUIRE(trn_ds) model = LogisticRegression() qseq = run_qs(trn_ds, self.lbr, model, qs, self.quota) assert_array_equal(qseq, np.array([6, 7, 9, 8]))
def main(): test_size = 0.25 # the percentage of samples in the dataset that will be # randomly selected and assigned to the test set result = {'E1': [], 'E2': [], 'E3': [], 'E4': [], 'E5': [], 'E6': []} for i in range(10): # repeat experiment trn_ds, tst_ds, fully_labeled_trn_ds = split_train_test(test_size) trn_ds2 = copy.deepcopy(trn_ds) trn_ds3 = copy.deepcopy(trn_ds) trn_ds4 = copy.deepcopy(trn_ds) trn_ds5 = copy.deepcopy(trn_ds) trn_ds6 = copy.deepcopy(trn_ds) lbr = IdealLabeler(fully_labeled_trn_ds) model = BinaryRelevance(LogisticRegression()) quota = 150 # number of samples to query qs = MMC(trn_ds, br_base=LogisticRegression()) _, E_out_1 = run(trn_ds, tst_ds, lbr, model, qs, quota) result['E1'].append(E_out_1) qs2 = RandomSampling(trn_ds2) _, E_out_2 = run(trn_ds2, tst_ds, lbr, model, qs2, quota) result['E2'].append(E_out_2) qs3 = MultilabelWithAuxiliaryLearner(trn_ds3, BinaryRelevance( LogisticRegression()), BinaryRelevance(SVM()), criterion='hlr') _, E_out_3 = run(trn_ds3, tst_ds, lbr, model, qs3, quota) result['E3'].append(E_out_3) qs4 = MultilabelWithAuxiliaryLearner(trn_ds4, BinaryRelevance( LogisticRegression()), BinaryRelevance(SVM()), criterion='shlr') _, E_out_4 = run(trn_ds4, tst_ds, lbr, model, qs4, quota) result['E4'].append(E_out_4) qs5 = MultilabelWithAuxiliaryLearner(trn_ds5, BinaryRelevance( LogisticRegression()), BinaryRelevance(SVM()), criterion='mmr') _, E_out_5 = run(trn_ds5, tst_ds, lbr, model, qs5, quota) result['E5'].append(E_out_5) qs6 = BinaryMinimization(trn_ds6, LogisticRegression()) _, E_out_6 = run(trn_ds6, tst_ds, lbr, model, qs6, quota) result['E6'].append(E_out_6) E_out_1 = np.mean(result['E1'], axis=0) E_out_2 = np.mean(result['E2'], axis=0) E_out_3 = np.mean(result['E3'], axis=0) E_out_4 = np.mean(result['E4'], axis=0) E_out_5 = np.mean(result['E5'], axis=0) E_out_6 = np.mean(result['E6'], axis=0) print("MMC: ", E_out_1[::5].tolist()) print("Random: ", E_out_2[::5].tolist()) print("MultilabelWithAuxiliaryLearner_hlr: ", E_out_3[::5].tolist()) print("MultilabelWithAuxiliaryLearner_shlr: ", E_out_4[::5].tolist()) print("MultilabelWithAuxiliaryLearner_mmr: ", E_out_5[::5].tolist()) print("BinaryMinimization: ", E_out_6[::5].tolist()) query_num = np.arange(1, quota + 1) fig = plt.figure(figsize=(9, 6)) ax = plt.subplot(111) ax.plot(query_num, E_out_1, 'g', label='MMC') ax.plot(query_num, E_out_2, 'k', label='Random') ax.plot(query_num, E_out_3, 'r', label='AuxiliaryLearner_hlr') ax.plot(query_num, E_out_4, 'b', label='AuxiliaryLearner_shlr') ax.plot(query_num, E_out_5, 'c', label='AuxiliaryLearner_mmr') ax.plot(query_num, E_out_6, 'm', label='BinaryMinimization') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) plt.legend(loc=2, bbox_to_anchor=(1.05, 1), borderaxespad=0.) plt.xlabel('Number of Queries') plt.ylabel('Loss') plt.title('Experiment Result (Hamming Loss)') plt.show()
labeled_train = list(set(range(labeled_entries)).difference(test_index)) trn_ds = Dataset(X[train_index], Y.iloc[labeled_train].values.tolist() + [None]*(len(train_index) - len(labeled_train))) tst_ds = Dataset(X[test_index],Y.iloc[test_index].values.tolist()) data_CV_train = data_CV_concat.iloc[train_index] ''' MAIN FUNCTION ''' result = {'Hamming': [],'F1': []} model = BinaryRelevance(LogisticRegression()) quota = 20 # number of samples to query #EXECUTE FROM HERE FOR ITERATIONS qs1 = MultilabelWithAuxiliaryLearner( trn_ds, BinaryRelevance(LogisticRegression()), BinaryRelevance(SVM()), criterion='hlr') run(data_CV_train,trn_ds, qs1, quota) model.train(trn_ds)
def initialModelSetup(modelID, modelParams=None, fixRandomState=False): if modelID == 0: # Single random forest if modelParams is None: modelParams = (1000, ) model=[SklearnProbaAdapter(RandomForestClassifier(random_state=42 \ if fixRandomState else None, n_estimators=modelParams[0]))] elif modelID == 1: # List of random forests if modelParams is None: modelParams = (100, 15) model=[SklearnProbaAdapter(RandomForestClassifier(random_state=i \ if fixRandomState else None, n_estimators=modelParams[0])) for i in range(modelParams[1])] elif modelID == 2: # Small varied committee # SVC can be made a probabilistic model with probability=true, # but that slows down the fit model=[LogisticRegression(C=1.0, random_state=0 \ if fixRandomState else None), SklearnProbaAdapter(RandomForestClassifier(n_estimators=100, random_state=1 if fixRandomState else None)), SklearnAdapter(SVC(random_state=2 \ if fixRandomState else None)), SklearnProbaAdapter(DecisionTreeClassifier(random_state=3 \ if fixRandomState else None)), SklearnProbaAdapter(KNeighborsClassifier(n_neighbors=19))] elif modelID == 3: # Large varied committee # SVC can be made a probabilistic model with probability=true, # but that slows down the fit model=[LogisticRegression(C=1.0, random_state=0 if fixRandomState else None), LogisticRegression(C=0.1, random_state=1 \ if fixRandomState else None), SklearnProbaAdapter(RandomForestClassifier(n_estimators=100, random_state=2 if fixRandomState else None)), SklearnProbaAdapter(RandomForestClassifier(n_estimators=200, random_state=3 if fixRandomState else None)), SklearnProbaAdapter(RandomForestClassifier(n_estimators=300, random_state=4 if fixRandomState else None)), SklearnProbaAdapter(RandomForestClassifier(n_estimators=400, random_state=5 if fixRandomState else None)), SklearnProbaAdapter(RandomForestClassifier(n_estimators=500, random_state=6 if fixRandomState else None)), SklearnAdapter(SVC(C=1.0,#kernel='rbf', random_state=7 if fixRandomState else None)), SklearnAdapter(SVC(C=0.1,kernel='rbf', random_state=8 if fixRandomState else None)), SklearnAdapter(SVC(C=1.0,kernel='poly', degree=3, random_state=9 if fixRandomState else None)), SklearnAdapter(SVC(C=0.1,kernel='poly', degree=3, random_state=10 if fixRandomState else None)), SklearnProbaAdapter(DecisionTreeClassifier(random_state=11 if fixRandomState else None)), SklearnProbaAdapter(KNeighborsClassifier(n_neighbors=19, weights='distance')), SklearnProbaAdapter(KNeighborsClassifier(n_neighbors=19)), SklearnProbaAdapter(KNeighborsClassifier(n_neighbors=9)) ] return model
def main(args): acc_reviewer, acc_train, acc_test = [], [], [] trn_ds, tst_ds, y_train = split_train_test() # query strategy # https://libact.readthedocs.io/en/latest/libact.query_strategies.html # #libact-query-strategies-uncertainty-sampling-module qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression()) # The passive learning model. The model given in the query strategy is not # the same. Have a look at this one. model = LogisticRegression() fig = plt.figure() ax = fig.add_subplot(2, 1, 1) ax.set_xlabel('Number of Queries') ax.set_ylabel('Error') oracle = y_train[get_indices_labeled_entries(trn_ds)] review = [label for feat, label in trn_ds.get_labeled_entries()] reviewer_acc = accuracy_score(oracle, review) # Train the model on the train dataset. # Append the score (error). model.train(trn_ds) acc_reviewer = np.append(acc_reviewer, reviewer_acc) acc_train = np.append( acc_train, model.model.score([x[0] for x in trn_ds.get_entries()], y_train)) acc_test = np.append(acc_test, model.score(tst_ds)) query_num = np.arange(0, 1) p0, = ax.plot(query_num, acc_reviewer, 'g', label='Acc reviewer') p1, = ax.plot(query_num, acc_reviewer, 'b', label='Acc train') p2, = ax.plot(query_num, acc_test, 'r', label='Acc test') plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=5) plt.show(block=False) img_ax = fig.add_subplot(2, 1, 2) box = img_ax.get_position() img_ax.set_position( [box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9]) # Give each label its name (labels are from 0 to n_classes-1) lbr = InteractiveLabeler(label_name=["0", "1"]) # lbr = InteractivePaperLabeler(label_name=["0", "1"]) for i in range(args.quota): # make a query from the pool ask_id = qs.make_query() print("asking sample from Uncertainty Sampling") # reshape the image to its width and height data_point = trn_ds.data[ask_id][0].reshape(8, 8) lb = lbr.label(data_point) # update the label in the train dataset trn_ds.update(ask_id, lb) # train the model again model.train(trn_ds) # compute accuracy of the reviewer oracle = y_train[get_indices_labeled_entries(trn_ds)] review = [label for feat, label in trn_ds.get_labeled_entries()] reviewer_acc = accuracy_score(oracle, review) # append the score to the model acc_reviewer = np.append(acc_reviewer, reviewer_acc) acc_train = np.append( acc_train, model.model.score([x[0] for x in trn_ds.get_entries()], y_train)) acc_test = np.append(acc_test, model.score(tst_ds)) # adjust the limits of the axes ax.set_xlim((0, i + 1)) ax.set_ylim((0, max(acc_test) + 0.2)) query_num = np.arange(0, i + 2) p0.set_xdata(query_num) p0.set_ydata(acc_reviewer) p1.set_xdata(query_num) p1.set_ydata(acc_train) p2.set_xdata(query_num) p2.set_ydata(acc_test) plt.draw() input("Press any key to continue...")