def learn(self): # seeding classes = self.short_df['grades_round'].unique() seed_index = [] for i in classes: seed_index.append(self.short_df['grades_round'][self.short_df['grades_round']==i].index[0]) seed_index act_data = self.short_df.copy() accuracy_list = [] # initialising train_idx = seed_index X_train = self.X[train_idx] y_train = self.Y[train_idx] # generating the pool X_pool = np.delete(self.X, train_idx, axis=0) y_pool = np.delete(self.Y, train_idx) act_data = act_data.drop(axis=0,index = train_idx) act_data.reset_index(drop = True,inplace=True) initiated_committee = [] for learner_idx,model in enumerate(self.learners): learner = ActiveLearner( estimator=model, X_training=X_train, y_training=y_train ) initiated_committee.append(learner) # Commitee creation committee = Committee( learner_list= initiated_committee, # query_strategy=vote_entropy_sampling ) committee.teach(X_train,y_train) # pool-based sampling n_queries = int(len(X)/(100/self.percent)) for idx in range(n_queries): query_idx = np.random.choice(range(len(X_pool))) committee.teach( X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, ) ) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) act_data = act_data.drop(axis=0,index = query_idx) act_data.reset_index(drop=True, inplace=True) accuracy_list.append(accuracy_score(committee.predict(X_pool),y_pool)) # print('Accuracy after query no. %d: %f' % (idx+1, accuracy_score(committee.predict(X_pool),y_pool))) print("By just labelling ",round(n_queries*100.0/len(X),2),"% of total data accuracy of ", round(accuracy_score(committee.predict(X_pool),y_pool),3), " % is achieved on the unseen data" ) model_pred = committee.predict(X_pool) model_f1 = f1_score(y_pool,model_pred,average='weighted') return accuracy_list,model_f1
class Committee2Level: def __init__(self,estimators_1,estimators_2,X,Y): indexs2 = list(filter(lambda i: Y[i]!=2,range(len(Y)))) X1 = X Y1 = np.array([i if i==2 else 1 for i in Y]) X2 = X[indexs2] Y2 = Y[indexs2] learners1 = [ActiveLearner(e,X_training=X1,y_training=Y1) for e in estimators_1] learners2 = [ActiveLearner(e,X_training=X2,y_training=Y2) for e in estimators_2] self.osCommittee = Committee(learners1) self.pnCommittee = Committee(learners2) self.choice_indx = 0 def teach(self,X,Y): indexs2 = list(filter(lambda i: Y[i]!=2,range(len(Y)))) X1 = X Y1 = np.array([i if i==2 else 1 for i in Y]) X2 = X[indexs2] Y2 = Y[indexs2] self.osCommittee.teach(X1,Y1) if len(Y2) > 0: self.pnCommittee.teach(X2,Y2) def predict(self,X): predicts1 = self.osCommittee.predict(X) indexs = range(len(X)) predicts2 = self.pnCommittee.predict(X) predicts = [predicts1[i] if predicts1[i]==2 else predicts2[i] for i in indexs] return predicts def query(self,X,Y): indexs2 = list(filter(lambda i: Y[i]!=2,range(len(Y)))) X1 = X X2 = X[indexs2] q2 = ([],2) if len(X2)>0: q2 = self.pnCommittee.query(X2) alternatives = [self.osCommittee.query(X1), q2] r = alternatives[self.choice_indx%2] #print(self.choice_indx) self.choice_indx +=1 if len(r[0]) == 0: return alternatives[self.choice_indx%2] return r
def run(X, y, n_samples_for_intial, n_queries, n_comittee_members, estimator): # start timer start_time = time.time() # init list of different learners learners = [] X_train, y_train, X_pool, y_pool = create_random_pool_and_initial_sets(X, y, n_samples_for_intial) for member_idx in range(n_comittee_members): learners.append(ActiveLearner(estimator=estimator, X_training=X_train, y_training=y_train)) # init committee committee = Committee(learner_list=learners, query_strategy=max_disagreement_sampling) unqueried_score = committee.score(X, y) print('Score over unqueried samples {:0.4f}'.format(unqueried_score)) performance_history = [] f1_score = 0 index = 0 while f1_score < 0.65: index += 1 # get sample from pool query_idx, query_instance = committee.query(X_pool) # retrain comittee with new sample committee.teach( X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, ) ) # remove queried instance from pool X_pool = delete_rows_csr(X_pool, query_idx) y_pool = np.delete(y_pool, query_idx) y_pred = committee.predict(X) f1_score = metrics.f1_score(y, y_pred, average='micro') if index % 100 == 0: print('F1 score after {n} training samples: {f1:0.4f}'.format(n=index, f1=f1_score)) # save accuracy score performance_history.append(f1_score) print("--- %s seconds ---" % (time.time() - start_time)) print(performance_history) return index
with plt.style.context('seaborn-white'): plt.figure(figsize=(n_members * 7, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_members, learner_idx + 1) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=50) plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) plt.show() # visualizing the initial predictions with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = committee.predict(iris['data']) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) plt.title('Committee initial predictions, accuracy = %1.3f' % committee.score(iris['data'], iris['target'])) plt.show() # query by committee n_queries = 10 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx)
# assembling the Committee committee = Committee(learner_list) # visualizing every learner in the Committee with plt.style.context('seaborn-white'): plt.figure(figsize=(7*n_learners, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_learners, learner_idx+1) plt.imshow(learner.predict(X_pool).reshape(im_height, im_width)) plt.title('Learner no. %d' % (learner_idx + 1)) plt.show() # visualizing the Committee's predictions per learner with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) plt.imshow(committee.predict(X_pool).reshape(im_height, im_width)) plt.title('Committee consensus predictions') plt.show() # rebagging the data committee.rebag() # visualizing the learners in the retrained Committee with plt.style.context('seaborn-white'): plt.figure(figsize=(7*n_learners, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_learners, learner_idx+1) plt.imshow(learner.predict(X_pool).reshape(im_height, im_width)) plt.title('Learner no. %d after rebagging' % (learner_idx + 1)) plt.show()
def learn(self): # seeding classes = self.short_df['grades_round'].unique() seed_index = [] for i in classes: seed_index.append(self.short_df['grades_round'][ self.short_df['grades_round'] == i].index[0]) seed_index act_data = self.short_df.copy() accuracy_list = [] f1_total_list = [] kappa_total_list = [] # initialising train_idx = seed_index X_train = self.X[train_idx] y_train = self.Y[train_idx] # generating the pool X_pool = np.delete(self.X, train_idx, axis=0) y_pool = np.delete(self.Y, train_idx) act_data = act_data.drop(axis=0, index=train_idx) act_data.reset_index(drop=True, inplace=True) initiated_committee = [] for learner_idx, model in enumerate(self.learners): learner = ActiveLearner(estimator=model, X_training=X_train, y_training=y_train) initiated_committee.append(learner) # Commitee creation committee = Committee( learner_list=initiated_committee, # query_strategy=vote_entropy_sampling ) committee.teach(X_train, y_train) # pool-based sampling n_queries = int(len(X) / (100 / self.percent)) for idx in range(n_queries): query_idx = np.random.choice(range(len(X_pool))) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) act_data = act_data.drop(axis=0, index=query_idx) act_data.reset_index(drop=True, inplace=True) accuracy_list.append( accuracy_score(committee.predict(X_pool), y_pool)) model_pred = committee.predict(X_pool) f1_total_list.append( f1_score(y_pool, model_pred, average="weighted", labels=np.unique(model_pred))) kappa_total_list.append(cohen_kappa_score(y_pool, model_pred)) return accuracy_list, f1_total_list, kappa_total_list
# assembling the committee committee = Committee(learner_list=learner_list) # visualizing the initial predictions with plt.style.context('seaborn-white'): plt.figure(figsize=(n_members*7, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_members, learner_idx + 1) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=50) plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) plt.show() # visualizing the Committee's predictions per learner with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = committee.predict(iris['data']) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) plt.title('Committee initial predictions') plt.show() # query by committee n_queries = 10 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool) committee.teach( X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, ) ) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx)
# assembling the Committee committee = Committee(learner_list) # visualizing every learner in the Committee with plt.style.context('seaborn-white'): plt.figure(figsize=(7 * n_learners, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_learners, learner_idx + 1) plt.imshow(learner.predict(X_pool).reshape(im_height, im_width)) plt.title('Learner no. %d' % (learner_idx + 1)) plt.show() # visualizing the Committee's predictions per learner with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) plt.imshow(committee.predict(X_pool).reshape(im_height, im_width)) plt.title('Committee consensus predictions') plt.show() # rebagging the data committee.rebag() # visualizing the learners in the retrained Committee with plt.style.context('seaborn-white'): plt.figure(figsize=(7 * n_learners, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_learners, learner_idx + 1) plt.imshow(learner.predict(X_pool).reshape(im_height, im_width)) plt.title('Learner no. %d after rebagging' % (learner_idx + 1)) plt.show()
with plt.style.context('seaborn-white'): plt.figure(figsize=(n_members * 7, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_members, learner_idx + 1) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(data), cmap='viridis', s=50) plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) plt.show() # visualizing the initial predictions with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = committee.predict(data) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) plt.title('Committee initial predictions, accuracy = %1.3f' % committee.score(data, target)) plt.show() # query by committee n_queries = 10 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx)
X_initial, y_initial, X_pool, y_pool = init_data_sampling(xtrain, ytrain) # creating learners. if committee flag == True then create a committee with a subset with the returned learners. # else: use only the first learner learner1, learner2, learner3 = create_learners(qs, X_initial, y_initial) if committee_flag: learner = Committee( learner_list=[learner1, learner3], query_strategy=vote_entropy_sampling ) else: learner = learner1 # the active learning loop update_results(ytest, learner.predict(xtest), i) n_queries = 10 for idx in range(n_queries): query_idx, query_instance = learner.query(X_pool, n_instances=300) learner.teach( X=X_pool[query_idx], y=y_pool[query_idx] ) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx, axis=0) print(idx) update_results(ytest, learner.predict(xtest), i) plots(i) reset_tables() f1_plot()