def committee(X, y, n_learners=3, n_queries=50): '''Query by committee :param X: Data matrix :param y: labels :param n_learners: number of committee members :param n_queries: number of calls to the oracle :returns max_cost_idx: which query yield largest difference in test accuracy betweed the two committees :returns entropy_acc: Accuracy of entropy sampling committee at query max_cost_idx :returns random_acc: Accuracy of random sampling committee at query max_cost_idx''' X_training, X_pool_entropy, y_training, y_pool_entropy, X_test, y_test = split_data( X, y) X_pool_random, y_pool_random = copy.deepcopy( X_pool_entropy), copy.deepcopy(y_pool_entropy) ###TODO: Query by committee. Compare vote entropy sampling with random sampling learner_entropy_list = list() learner_random_list = list() for seed in range(n_learners): learner_entropy = active_learner( RandomForestClassifier(random_state=seed), X_training, y_training, vote_entropy_sampling, seed) learner_entropy_list.append(learner_entropy) learner_random = active_learner( RandomForestClassifier(random_state=seed), X_training, y_training, random_query, seed) learner_random_list.append(learner_random) committee_entropy = Committee(learner_list=learner_entropy_list, query_strategy=vote_entropy_sampling) _, acc_list_entropy, confusion_matrices = learn(committee_entropy, n_queries, X_pool_entropy, y_pool_entropy, X_test, y_test, True) committee_random = Committee(learner_list=learner_random_list, query_strategy=random_query) _, acc_list_random = learn(committee_random, n_queries, X_pool_random, y_pool_random, X_test, y_test) for m in confusion_matrices: print(m) acc_list = [acc_list_entropy, acc_list_random] plots(acc_list, ["Uncertainty sampling", "Random Sampling"], "Committee Test Accuracy Over Queries", "committee_accuracies.png") diff = np.asarray(acc_list_entropy) - np.asarray(acc_list_random) diff = np.absolute(diff) max_cost_idx = np.argmax(diff) entropy_acc = acc_list_entropy[max_cost_idx] random_acc = acc_list_random[max_cost_idx] return max_cost_idx, entropy_acc, random_acc
def __init__(self,estimators_1,estimators_2,X,Y): indexs2 = list(filter(lambda i: Y[i]!=2,range(len(Y)))) X1 = X Y1 = np.array([i if i==2 else 1 for i in Y]) X2 = X[indexs2] Y2 = Y[indexs2] learners1 = [ActiveLearner(e,X_training=X1,y_training=Y1) for e in estimators_1] learners2 = [ActiveLearner(e,X_training=X2,y_training=Y2) for e in estimators_2] self.osCommittee = Committee(learners1) self.pnCommittee = Committee(learners2) self.choice_indx = 0
def learn(self): # seeding classes = self.short_df['grades_round'].unique() seed_index = [] for i in classes: seed_index.append(self.short_df['grades_round'][self.short_df['grades_round']==i].index[0]) seed_index act_data = self.short_df.copy() accuracy_list = [] # initialising train_idx = seed_index X_train = self.X[train_idx] y_train = self.Y[train_idx] # generating the pool X_pool = np.delete(self.X, train_idx, axis=0) y_pool = np.delete(self.Y, train_idx) act_data = act_data.drop(axis=0,index = train_idx) act_data.reset_index(drop = True,inplace=True) initiated_committee = [] for learner_idx,model in enumerate(self.learners): learner = ActiveLearner( estimator=model, X_training=X_train, y_training=y_train ) initiated_committee.append(learner) # Commitee creation committee = Committee( learner_list= initiated_committee, # query_strategy=vote_entropy_sampling ) committee.teach(X_train,y_train) # pool-based sampling n_queries = int(len(X)/(100/self.percent)) for idx in range(n_queries): query_idx = np.random.choice(range(len(X_pool))) committee.teach( X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, ) ) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) act_data = act_data.drop(axis=0,index = query_idx) act_data.reset_index(drop=True, inplace=True) accuracy_list.append(accuracy_score(committee.predict(X_pool),y_pool)) # print('Accuracy after query no. %d: %f' % (idx+1, accuracy_score(committee.predict(X_pool),y_pool))) print("By just labelling ",round(n_queries*100.0/len(X),2),"% of total data accuracy of ", round(accuracy_score(committee.predict(X_pool),y_pool),3), " % is achieved on the unseen data" ) model_pred = committee.predict(X_pool) model_f1 = f1_score(y_pool,model_pred,average='weighted') return accuracy_list,model_f1
def run(X, y, n_samples_for_intial, n_queries, n_comittee_members, estimator): # start timer start_time = time.time() # init list of different learners learners = [] X_train, y_train, X_pool, y_pool = create_random_pool_and_initial_sets(X, y, n_samples_for_intial) for member_idx in range(n_comittee_members): learners.append(ActiveLearner(estimator=estimator, X_training=X_train, y_training=y_train)) # init committee committee = Committee(learner_list=learners, query_strategy=max_disagreement_sampling) unqueried_score = committee.score(X, y) print('Score over unqueried samples {:0.4f}'.format(unqueried_score)) performance_history = [] f1_score = 0 index = 0 while f1_score < 0.65: index += 1 # get sample from pool query_idx, query_instance = committee.query(X_pool) # retrain comittee with new sample committee.teach( X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, ) ) # remove queried instance from pool X_pool = delete_rows_csr(X_pool, query_idx) y_pool = np.delete(y_pool, query_idx) y_pred = committee.predict(X) f1_score = metrics.f1_score(y, y_pred, average='micro') if index % 100 == 0: print('F1 score after {n} training samples: {f1:0.4f}'.format(n=index, f1=f1_score)) # save accuracy score performance_history.append(f1_score) print("--- %s seconds ---" % (time.time() - start_time)) print(performance_history) return index
def cmte_loop(estimator1,estimator2,X_0,Y_0,X_train,Y_train,X_test,Y_test,indexs,n=5): #learners = [] X_pool = deepcopy(np.delete(X_train,indexs,axis=0)) Y_pool = deepcopy(np.delete(Y_train,indexs)) #committee = Committee2Level(estimator1,estimator2,X_0,Y_0) committee = Committee([ActiveLearner(e,X_training=X_0,y_training=Y_0) for e in estimator1]) accuracies = [] while len(X_pool)>0: #query_indxs,_ = committee.query(X_pool,Y_pool) query_indxs,_ = committee.query(X_pool) committee.teach(X_pool[query_indxs],Y_pool[query_indxs]) X_0 = np.append(X_0,X_pool[query_indxs],axis=0) Y_0 = np.append(Y_0,Y_pool[query_indxs][0]) X_pool = np.delete(X_pool,query_indxs,axis=0) Y_pool = np.delete(Y_pool,query_indxs,axis=0) accuracies.append(evaluate(committee,X_0,Y_0,X_test,Y_test)) return (committee,accuracies)
def modAL_QBC(X, y, n_queries): learner_list = [ ActiveLearner(LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr'), X_training=X[[0, 50, 100]], y_training=y[[0, 50, 100]]), ActiveLearner(LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr'), X_training=X[[0, 50, 100]], y_training=y[[0, 50, 100]]) ] modAL_learner = Committee(learner_list) for _ in range(n_queries): query_idx, query_inst = modAL_learner.query(X) modAL_learner.teach(X[query_idx], y[query_idx])
def cmte_loop(estimator, X_0, Y_0, X_train, Y_train, X_test, Y_test, indexs, n=5): learners = [] print(len(indexs)) X_pool = deepcopy(np.delete(X_train, indexs, axis=0)) Y_pool = deepcopy(np.delete(Y_train, indexs)) for i in range(len(estimator)): learners.append( ActiveLearner(estimator=estimator[i], X_training=X_0, y_training=Y_0)) committee = Committee(learner_list=learners) index = 0 accuracies = [] while len(X_pool) > 0: query_indxs, _ = committee.query(X_pool) if len(query_indxs) > 1: raise Exception("NOOOOOOOOOOOOOOO") committee.teach(X=X_pool[query_indxs], y=Y_pool[query_indxs]) X_0 = np.append(X_0, X_pool[query_indxs], axis=0) Y_0 = np.append(Y_0, Y_pool[query_indxs][0]) X_pool = np.delete(X_pool, query_indxs, axis=0) Y_pool = np.delete(Y_pool, query_indxs, axis=0) accuracies.append(evaluate(committee, X_0, Y_0, X_test, Y_test)) #model_accuracy = 1- committee.score(X_0,Y_0) #print('Error after query {n}: {acc:0.4f}'.format(n=index + 1, acc=model_accuracy)) index += 1 #predicts = committee.predict(X_test) #corrects = (predicts==Y_test) #accs =1 - sum([1 if i else 0 for i in corrects])/len(predicts) #print(accs) #plts_train.append(model_accuracy) #plts_test.append(accs) return (committee, accuracies)
def get_committee(query_strategy): global X_train, y_train # initializing Committee members n_members = 5 learner_list = list() for member_idx in range(n_members): # initializing learner learner = ActiveLearner(estimator=RandomForestClassifier(), X_training=X_train, y_training=y_train) learner_list.append(learner) # assembling the committee committee = Committee(learner_list=learner_list, query_strategy=query_strategy) return committee
def learn(self): # seeding classes = self.short_df['grades_round'].unique() seed_index = [] for i in classes: seed_index.append(self.short_df['grades_round'][ self.short_df['grades_round'] == i].index[0]) seed_index act_data = self.short_df.copy() accuracy_list = [] f1_total_list = [] kappa_total_list = [] # initialising train_idx = seed_index X_train = self.X[train_idx] y_train = self.Y[train_idx] # generating the pool X_pool = np.delete(self.X, train_idx, axis=0) y_pool = np.delete(self.Y, train_idx) act_data = act_data.drop(axis=0, index=train_idx) act_data.reset_index(drop=True, inplace=True) initiated_committee = [] for learner_idx, model in enumerate(self.learners): learner = ActiveLearner(estimator=model, X_training=X_train, y_training=y_train) initiated_committee.append(learner) # Commitee creation committee = Committee( learner_list=initiated_committee, # query_strategy=vote_entropy_sampling ) committee.teach(X_train, y_train) # pool-based sampling n_queries = int(len(X) / (100 / self.percent)) for idx in range(n_queries): query_idx = np.random.choice(range(len(X_pool))) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) act_data = act_data.drop(axis=0, index=query_idx) act_data.reset_index(drop=True, inplace=True) accuracy_list.append( accuracy_score(committee.predict(X_pool), y_pool)) model_pred = committee.predict(X_pool) f1_total_list.append( f1_score(y_pool, model_pred, average="weighted", labels=np.unique(model_pred))) kappa_total_list.append(cohen_kappa_score(y_pool, model_pred)) return accuracy_list, f1_total_list, kappa_total_list
def query(): # n_initial = 100 # X, y = load_digits(return_X_y=True) # X_train, X_test, y_train, y_test = train_test_split(X, y) # # initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) # # X_initial, y_initial = X_train[initial_idx], y_train[initial_idx] # X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) strategy = None classifier = None file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename filename = secure_filename(file.filename) # shutil.rmtree(os.path.join(app.config['UPLOAD_FOLDER'],filename.split(".")[0])) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(UPLOAD_FOLDER, filename)) if(filename.split(".")[1]=="rar"): patoolib.extract_archive(os.path.join(UPLOAD_FOLDER, filename), outdir=os.path.join(UPLOAD_FOLDER)) else: zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r') zip_ref.extractall(UPLOAD_FOLDER) zip_ref.close() print("Succesfull") st = request.form.get('strategy_select') cl = request.form.get('classifier_select') option = int(request.form.get('structure_select')) print(cl) if(str(cl)=='Random Forest'): classifier = RandomForestClassifier() elif(str(cl)=='KNN'): classifier = KNeighborsClassifier() else: classifier = DecisionTreeClassifier() n_queries = request.form['queries'] print(st) classlist =[] classes = {} data = {} data['image'] = [] data['label'] = [] filename = secure_filename(file.filename) print(filename) if option == 0: for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): print(filenames) for filename in filenames: if('.jpg' in filename or 'jpeg' in filename or 'png' in filename): image = Image.open(os.path.join(dirname, filename)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) data['image'] = np.append(data['image'],x,axis=0) except: continue if(dirname.split('\\')[-1] not in classes.keys()): classlist.append({'name':dirname.split('\\')[-1],'number':len(classes)}) classes[dirname.split('\\')[-1]] = len(classes) #print(os.path.join(dirname, filename)) #print(dirname) data['label'].append(classes[dirname.split('\\')[-1]]) print(classes) else: for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"): image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) data['image'] = np.append(data['image'],x,axis=0) except: continue if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()): classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)}) classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes) data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))]) print(classes) else: continue X = data['image'] y = data['label'] n_initial = 100 X_train, X_test, y_train, y_test = train_test_split(X, y) initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) X_initial=[] y_initial = [] print(type(X_initial)) for i in range(n_initial): v = np.array(X_train[initial_idx[i]]).reshape((1,size)) #print(v.shape) y_initial.append(y_train[i]) if(i==0): X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size)) print(X_initial.shape) else: X_initial = np.append(X_initial,v,axis=0) #print("X Shape",X_initial.shape) # X_initial = X_initial.append(X_train[initial_idx[i]]) X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) print(X.shape) print(X[0].shape) print(X_initial.shape) params = {} params["X_test"] = X_test params["y_test"] = y_test params["counter"] = n_queries params["X_pool"] = X_pool params["y_pool"] = y_pool if(str(st)=='Uncertainty Sampling'): print(classifier) print(cl) learner = ActiveLearner( estimator=classifier, query_strategy=uncertainty_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) print("Calling Helper") return helper() elif(str(st)=='Entropy Sampling'): print(classifier) print(cl) learner = ActiveLearner( estimator=classifier, query_strategy=entropy_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Random Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=random_sampling, X_training=X_train, y_training=y_train ) accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Vote Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=vote_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Uncertainty Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=uncertainty_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Max Disagreement Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_disagreement_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Max STD Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_std_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=consensus_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper()
q_strategies = ['us', 'es', 'ms'] if committee_flag: q_strategies.append("rs") # random sampling did not work without committee for i in q_strategies: if i == 'us': qs = uncertainty_sampling elif i == 'es': qs = entropy_sampling elif i == 'ms': qs = margin_sampling elif i == 'rs': qs = random_sampling X_initial, y_initial, X_pool, y_pool = init_data_sampling(xtrain, ytrain) # creating learners. if committee flag == True then create a committee with a subset with the returned learners. # else: use only the first learner learner1, learner2, learner3 = create_learners(qs, X_initial, y_initial) if committee_flag: learner = Committee( learner_list=[learner1, learner3], query_strategy=vote_entropy_sampling ) else: learner = learner1 # the active learning loop update_results(ytest, learner.predict(xtest), i) n_queries = 10 for idx in range(n_queries): query_idx, query_instance = learner.query(X_pool, n_instances=300) learner.teach( X=X_pool[query_idx], y=y_pool[query_idx] ) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx, axis=0)
replace=False) X_train = X_pool[train_idx] y_train = y_pool[train_idx] # creating a reduced copy of the data with the known instances removed X_pool = np.delete(X_pool, train_idx, axis=0) y_pool = np.delete(y_pool, train_idx) # initializing learner learner = ActiveLearner(estimator=RandomForestClassifier(), X_training=X_train, y_training=y_train) learner_list.append(learner) # assembling the committee committee = Committee(learner_list=learner_list, query_strategy=vote_entropy_sampling) ''' with plt.style.context('seaborn-white'): plt.figure(figsize=(n_members*7, 7)) for learner_idx, learner in enumerate(committee): plt.subplot(1, n_members, learner_idx + 1) plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=50) plt.title('Learner no. %d initial predictions' % (learner_idx + 1)) plt.show() ''' unqueried_score = committee.score(iris['data'], iris['target']) ''' with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = committee.predict(iris['data'])
def query(): strategy = None classifier = None file = request.files['file'] test = request.files['test_file'] filename = secure_filename(file.filename) test_filename = secure_filename(test.filename) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(UPLOAD_FOLDER, filename)) zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r') zip_ref.extractall(UPLOAD_FOLDER) zip_ref.close() if test and allowed_file(test.filename): filename = secure_filename(test.filename) test.save(os.path.join(UPLOAD_FOLDER, filename)) zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r') zip_ref.extractall(UPLOAD_FOLDER) zip_ref.close() st = request.form.get('strategy_select') cl = request.form.get('classifier_select') option = int(request.form.get('structure_select')) if(str(cl)=='Random Forest'): classifier = RandomForestClassifier() elif(str(cl)=='KNN'): classifier = KNeighborsClassifier() else: classifier = DecisionTreeClassifier() n_queries = request.form['queries'] classlist =[] classes = {} data = {} data['image'] = [] data['label'] = [] data['image_name'] = [] image_data = {} filename = secure_filename(file.filename) if option == 0: for root,dirs,filename in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): for name in filename: if name.endswith(".jpg") or name.endswith(".jpeg") or name.endswith(".png"): image_file_name = os.path.join(root,name) image = Image.open(image_file_name) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name else: try: x = numpy.array(image).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name data['image'] = np.append(data['image'],x,axis=0) except: continue if root.split("\\")[-1] not in classes.keys(): classlist.append({'name':root.split('\\')[-1],'number':len(classes)}) classes[root.split('\\')[-1]] = len(classes) data['label'].append(classes[root.split('\\')[-1]]) data['image_name'].append(image_file_name) else: for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith(".png"): image_file_name = os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile) image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name else: try: x = numpy.array(image).reshape((1,size)) image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name data['image'] = np.append(data['image'],x,axis=0) except: continue if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()): classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)}) classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes) data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))]) data['image_name'].append(imfile) else: continue test_classlist =[] test_classes = {} test_data = {} test_data['image'] = [] test_data['label'] = [] test_data['image_name'] = [] if option == 0: for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0])): for filename in filenames: if('.jpg' in filename or 'jpeg' in filename or 'png' in filename): image = Image.open(os.path.join(dirname, filename)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(test_classes)==0): test_data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) test_data['image'] = np.append(test_data['image'],x,axis=0) except: continue if(dirname.split('\\')[-1] not in test_classes.keys()): test_classlist.append({'name':dirname.split('\\')[-1],'number':len(test_classes)}) test_classes[dirname.split('\\')[-1]] = len(test_classes) test_data['label'].append(test_classes[dirname.split('\\')[-1]]) test_data['image_name'].append(filename) else: for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0])): if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"): image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0]), imfile)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(test_classes)==0): test_data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) test_data['image'] = np.append(test_data['image'],x,axis=0) except: continue if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in test_classes.keys()): test_classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(test_classes)}) test_classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(test_classes) test_data['label'].append(test_classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))]) test_data['image_name'].append(imfile) else: continue X_train = data['image'] y_train = data['label'] X_test = test_data['image'] y_test = test_data['label'] n_initial = 100 initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) X_initial=[] y_initial = [] for i in range(n_initial): v = np.array(X_train[initial_idx[i]]).reshape((1,size)) y_initial.append(y_train[i]) if(i==0): X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size)) else: X_initial = np.append(X_initial,v,axis=0) X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) params = {} params["X_test"] = X_test params["y_test"] = y_test params["counter"] = n_queries params["X_pool"] = X_pool params["y_pool"] = y_pool if(str(st)=='Uncertainty Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=uncertainty_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Entropy Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=entropy_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Random Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=random_sampling, X_training=X_train, y_training=y_train ) accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Vote Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=vote_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Uncertainty Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=uncertainty_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Max Disagreement Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_disagreement_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper() elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=consensus_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data) return helper()
estimator=RandomForestClassifier(n_estimators=1), query_strategy=strategy) member3 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=RandomForestClassifier(n_estimators=10), query_strategy=strategy) member4 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=KNeighborsClassifier(n_neighbors=8), query_strategy=strategy) member5 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=KNeighborsClassifier(n_neighbors=10), query_strategy=strategy) committee = Committee( learner_list=[member1, member2, member3, member4, member5]) # In[8]: print("Initial accuracy =", committee.score(X, Y)) # In[9]: member1r = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=RandomForestClassifier(n_estimators=8), query_strategy=strategy) member2r = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=RandomForestClassifier(n_estimators=1), query_strategy=strategy)
def active_learn(df1, first_item_index_of_each_category): train_idx = first_item_index_of_each_category # X_train = iris['data'][train_idx] # y_train = iris['target'][train_idx] # initial training data data = df1.values[:, 1:] target = df1['label'].values X_full = df1.values[:, 1:] y_full = df1['label'].values X_train = df1.values[:, 1:][ train_idx] #item from second column as the first column is the label.. y_train = df1['label'].values[train_idx] # X_pool = np.delete(data, train_idx, axis=0) # y_pool = np.delete(target, train_idx) X_pool = deepcopy(X_full) y_pool = deepcopy(y_full) # initializing Committee members n_members = 2 learner_list = list() for member_idx in range(n_members): # initial training data # n_initial = 5 # train_idx = np.random.choice(range(X_pool.shape[0]), size=n_initial, replace=False) # X_train = X_pool[train_idx] # y_train = y_pool[train_idx] # creating a reduced copy of the data with the known instances removed X_pool = np.delete(X_pool, train_idx, axis=0) y_pool = np.delete(y_pool, train_idx) # initializing learner learner = ActiveLearner(estimator=RandomForestClassifier(), X_training=X_train, y_training=y_train) learner_list.append(learner) # assembling the committee committee = Committee(learner_list=learner_list) # print('Committee initial predictions, accuracy = %1.3f' % committee.score(data, target)) print('%1.3f' % committee.score(data, target)) performance_array = [] n_queries = 505 for idx in range(n_queries): query_idx, query_instance = committee.query(X_pool) committee.teach(X=X_pool[query_idx].reshape(1, -1), y=y_pool[query_idx].reshape(1, )) # remove queried instance from pool X_pool = np.delete(X_pool, query_idx, axis=0) y_pool = np.delete(y_pool, query_idx) learner_score = committee.score(data, target) # print('Committee %d th query predictions, accuracy = %1.3f' % (idx , learner_score)) print('%1.3f' % (learner_score)) if (idx % 100 == 0): performance_array.append(learner_score) percentage_increase(performance_array)
# initial training data: 100 random pixels initial_idx = np.random.choice(range(len(X_pool)), size=100) # initializing the learners n_learners = 3 learner_list = [] for _ in range(n_learners): learner = ActiveLearner( estimator=RandomForestClassifier(), X_training=X_pool[initial_idx], y_training=y_pool[initial_idx], bootstrap_init=True ) learner_list.append(learner) # assembling the Committee committee = Committee(learner_list) # ensemble active learner from the Committee ensemble_learner = ActiveLearner( estimator=committee ) query_idx, query_instance = ensemble_learner.query(X_pool) # ... # ... obtain label from the Oracle ... # ... ensemble_learner.teach(X_pool[query_idx], y_pool[query_idx], bootstrap=True)
estimator=RandomForestClassifier(n_estimators=1), query_strategy=strategy) member3 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=RandomForestClassifier(n_estimators=10), query_strategy=strategy) member4 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=KNeighborsClassifier(n_neighbors=8), query_strategy=strategy) member5 = ActiveLearner(X_training=X_train, y_training=Y_train, estimator=KNeighborsClassifier(n_neighbors=10), query_strategy=strategy) committee = Committee( learner_list=[member1, member2, member3, member4, member5]) # In[65]: import math unlab_length = X_unlab.shape[0] disagreement = np.zeros(unlab_length * 2).reshape(unlab_length, 2) for i in range(unlab_length): index = [i] predict = [-1, -1, -1, -1, -1] predict[0] = member1.predict(X_unlab[index])[0] predict[1] = member2.predict(X_unlab[index])[0] predict[2] = member3.predict(X_unlab[index])[0] predict[3] = member4.predict(X_unlab[index])[0] predict[4] = member5.predict(X_unlab[index])[0]