def bidirectional_active_expert(): TrainData, TrainLabels, OracleData, OracleLabels,\ TestData, TestLabels, _, _, lenn_s, lenn_t, _ = load_process() batch = tf.placeholder(tf.float32,[None,unit.H,unit.W,unit.Channel]) label = tf.placeholder(tf.uint8,[None]) keep_prop = tf.placeholder(tf.float32) feature = SharePart(batch,keep_prop) result = MissionPart(feature) loss = SoftmaxWithLoss(result,label) acc = Test(result,label) opt = train_net(loss) softmax = tf.nn.softmax(result) entropy = tf.reduce_sum(-softmax * tf.log(softmax),1) predict_class = tf.cast(tf.argmax(softmax,axis = 1),tf.uint8) saver = tf.train.Saver(max_to_keep = ACTIVE_TIME) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.45 config.gpu_options.allow_growth = False init = tf.global_variables_initializer() sess = tf.Session(config = config) sess.run(init) saver.restore(sess,PRETRAIN_MODEL_NAME) def test(): ACC = 0 for i in range(lenn_t): test_batch = TestData[i*BATCH_SIZE:(i+1)*BATCH_SIZE] test_label = TestLabels[i*BATCH_SIZE:(i+1)*BATCH_SIZE] ACC+=sess.run(acc,feed_dict={batch:test_batch,label:test_label,keep_prop:1.0}) return ACC/lenn_t '''Begin Active Learning!''' log_file = open('./simple_log/'+Experiment_NAME+'_ALBT_man.txt','w') pretrain_accuracy = test() print 'the pre train model accuracy is : ', pretrain_accuracy log_file.write("the pre train model accuracy is " + str(pretrain_accuracy)) log_file.write("\n") for a in range(ACTIVE_TIME): oracle_idx = np.arange(len(OracleData)) oracle_que = [] for i in oracle_idx: candidate_entropy = sess.run(entropy, feed_dict={batch:unit.changeshape_1(OracleData[i]),keep_prop:1.0}) candidate_predict = sess.run(predict_class, feed_dict={batch:unit.changeshape_1(OracleData[i]),keep_prop:1.0}) oracle_que.append((i,candidate_entropy[0],candidate_predict[0])) oracle_que = sorted(oracle_que, key = lambda candidate:candidate[1], reverse = True) temp = {} tag_queue = [] tag_queue2 = [] tag_queue2_labels = [] for k in range(CLASS_NUM): temp[str(k)] = [] for k in range(len(oracle_que)): temp[str(oracle_que[k][2])].append(oracle_que[k]) for k in temp: temp[k] = sorted(temp[k], key=lambda x:x[1], reverse=True) idx = 0 temp_class = 0 temp_order = range(CLASS_NUM) shuffle(temp_order) while(idx<QUEUE_SIZE): if len(temp[str(temp_order[temp_class])]) != 0: tag_queue.append(temp[str(temp_order[temp_class])].pop(0)[0]) idx += 1 temp_class = (temp_class+1)%(CLASS_NUM) else: temp_class = (temp_class+1)%(CLASS_NUM) idx = 0 temp_class = 0 while(idx< GOOD_SIZE ): if len(temp[str(temp_order[temp_class])]) != 0: tag_temporary = temp[str(temp_order[temp_class])].pop() tag_queue2.append(tag_temporary[0]) tag_queue2_labels.append(tag_temporary[2]) idx += 1 temp_class = (temp_class + 1)%(CLASS_NUM) else: temp_class = (temp_class + 1)%(CLASS_NUM) ##################################not put back ,x_train+x_oracle # TrainData = np.concatenate((TrainData,OracleData[tag_queue])) # TrainData = np.concatenate((TrainData,OracleData[tag_queue2])) # TrainLabels = np.concatenate((TrainLabels, OracleLabels[tag_queue])) # TrainLabels = np.concatenate((TrainLabels, OracleLabels[tag_queue2])) # tag_queue2_rlabels = OracleLabels[tag_queue2] # np.delete(OracleData, tag_queue + tag_queue2), np.delete(OracleLabels,tag_queue + tag_queue2) ##########################################put back, x_train+x_oracle # TrainData0 = dc(TrainData) # TrainLabels0 = dc(TrainLabels) # TrainData0 = np.concatenate((TrainData0,OracleData[tag_queue])) # TrainLabels0 = np.concatenate((TrainLabels0,OracleLabels[tag_queue])) # TrainData = np.concatenate((TrainData0, OracleData[tag_queue2])) # TrainLabels = np.concatenate((TrainLabels0, OracleLabels[tag_queue2])) # tag_queue2_rlabels = OracleLabels[tag_queue2] # np.delete(OracleData, tag_queue), np.delete(OracleLabels,tag_queue) ################################################not put back,x_oracle if a == 0: TrainData = np.concatenate((OracleData[tag_queue],OracleData[tag_queue2])) TrainLabels = np.concatenate((OracleLabels[tag_queue], np.array(tag_queue2_labels))) else: TrainData = np.concatenate((TrainData,OracleData[tag_queue])) TrainData = np.concatenate((TrainData,OracleData[tag_queue2])) TrainLabels = np.concatenate((TrainLabels, OracleLabels[tag_queue])) TrainLabels = np.concatenate((TrainLabels, np.array(tag_queue2_labels))) tag_queue2_rlabels = OracleLabels[tag_queue2] np.delete(OracleData, tag_queue + tag_queue2), np.delete(OracleLabels,tag_queue + tag_queue2) train_queue = np.arange(len(TrainData)) best = 0 for i in range(EPOCH_all): shuffle(train_queue) for j in range(len(TrainData)/BATCH_SIZE): trainbatch = TrainData[train_queue[j*BATCH_SIZE:(j+1)*BATCH_SIZE]] trainlabels = TrainLabels[train_queue[j*BATCH_SIZE:(j+1)*BATCH_SIZE]] sess.run(opt, feed_dict = {batch:trainbatch, label:trainlabels,keep_prop:0.5}) accuracy = test() print 'the ',a+1, 'time acmodel acc is:', accuracy if accuracy > best: best = accuracy saver.save(sess,MODEL_PATH+Experiment_NAME+'_ALBT_man_'+str(a+1)+'.ckpt') print 'the ',a+1,' time acmodel best acc is: ', best cnn_acc = np.float(np.sum(np.equal(tag_queue2_rlabels,np.array(tag_queue2_labels)))) / GOOD_SIZE log_file.write("the " + str(a+1) + "time acmodel best acc is " + str(best)) log_file.write("\n") log_file.write("the " + str(a+1) + "time cnn_que acc is " + str(cnn_acc)) log_file.write("\n") log_file.close() return
def Test_model_process(): file_test = FILE_PATH + Experiment_NAME + '_x_test.txt' # file_test2 = FILE_PATH + Experiment_NAME + '_x_oracle.txt' ac_methods = ['_ALBT_','_ALBT_man_','_ALST_','_ALRA_'] #_ALBT_,_ALST_,_ALRA_ # OList = unit.LoadCarTxT(file_test2) TestList = unit.LoadCarTxT(file_test) # OData, OLabels = unit.Getlist(OList) TestData, TestLabels = unit.Getlist(TestList) # TestData = np.concatenate((TestData,OData)) # TestLabels = np.concatenate((TestLabels, OLabels)) batch = tf.placeholder(tf.float32,[None,unit.H,unit.W,unit.Channel]) label = tf.placeholder(tf.uint8,[None]) keep_prop = tf.placeholder(tf.float32) feature = SharePart(batch,keep_prop) result = MissionPart(feature) loss = SoftmaxWithLoss(result,label) acc = Test(result,label) opt = train_net(loss) softmax = tf.nn.softmax(result) entropy = tf.reduce_sum(-softmax * tf.log(softmax),1) predict_class = tf.cast(tf.argmax(result,axis = 1),tf.uint8) saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.9 config.gpu_options.allow_growth = False init = tf.global_variables_initializer() sess = tf.Session(config = config) sess.run(init) #这是测初始模型时用到的部分,可以忽略 # confunsion_matix = [] # file_report = open(MODEL_PATH+'confusion_matrix/'+Experiment_NAME+'_pre_'+'report.txt','w') # recall = [] # precision = [] # f1 = [] # support = [] # accuracy = 0 # c_m_t = np.zeros([CLASS_NUM,CLASS_NUM]) # saver.restore(sess,MODEL_PATH+PRETRAIN_MODEL_NAME) # for i in range(len(TestData)): # predict_label = sess.run(predict_class,feed_dict = { batch:unit.changeshape_1(TestData[i]),keep_prop:1.0}) # c_m_t[TestLabels[i],predict_label] += 1 # confunsion_matix.append(c_m_t) # np.save(MODEL_PATH+'confusion_matrix/'+Experiment_NAME+'_pre'+'.npy',c_m_t) # file_report.write(" stage " + "confusion_matrix with testsets\n" ) # file_report.write(" "+"precision".rjust(10,)+"recall".rjust(10,)+"f1-score".rjust(10,)+"support".rjust(10,)+'\n') # plotconfusion(c_m_t,MODEL_PATH+'confusion_matrix/'+Experiment_NAME+'_pre',CLASS_NUM) # for i in range(CLASS_NUM): # accuracy += c_m_t[i,i] # try: # recall.append(round(c_m_t[i,i]/np.sum(c_m_t[i]),3)) # except: # recall.apprend(round(0,3)) # try: # precision.append(round(c_m_t[i,i]/np.sum(c_m_t[:,i]),3)) # except: # precision.append(round(0,3)) # try: # f1.append(round(2*recall[i]*precision[i]/(recall[i]+precision[i]),3)) # except: # f1.append(round(0,3)) # support.append(np.sum(c_m_t[i])) # file_report.write(str(i).rjust(10,)+str(precision[i]).rjust(10,)+str(recall[i]).rjust(10,)+str(f1[i]).rjust(10,)+str(support[i]).rjust(10,)+'\n') # try: # recall_avg = round(np.sum(np.array(recall))/CLASS_NUM,3) # except: # recall_avg = 0 # try: # precision_avg = round(np.sum(np.array(precision))/CLASS_NUM,3) # except: # precision_avg = 0 # try: # f1_avg = round(np.sum(np.array(f1))/CLASS_NUM,3) # except: # f1_avg = 0 # support_num = np.sum(np.array(support)) # accuracy = round(accuracy/support_num,5) # file_report.write("average".rjust(10,)+str(precision_avg).rjust(10,)+str(recall_avg).rjust(10,)+str(f1_avg).rjust(10,)+str(support_num).rjust(10,)+'\n') # file_report.write(" stage acc is " +str(accuracy)) # file_report.write("\n\n\n\n") # file_report.close() # for ac_method in ac_methods: file_report = open('./confusion_matrix/'+Experiment_NAME+ac_method+'report.txt','w') for a in range(ACTIVE_TIME): recall = [] precision = [] f1 = [] support = [] accuracy = 0 c_m_t = np.zeros([CLASS_NUM,CLASS_NUM]) saver.restore(sess,MODEL_PATH+Experiment_NAME+ac_method+str(a+1)+'.ckpt') for i in range(len(TestData)): predict_label = sess.run(predict_class,feed_dict = { batch:unit.changeshape_1(TestData[i]),keep_prop:1.0}) c_m_t[TestLabels[i],predict_label] += 1 confunsion_matix.append(c_m_t) np.save('./confusion_matrix/'+Experiment_NAME+ac_method+str(a+1)+'.npy',c_m_t) # np.save('./confusion_matrix/'+Experiment_NAME+'_pre'+'.npy',c_m_t) file_report.write(str(a+1) + " stage " + "confusion_matrix with testsets\n" ) file_report.write(" "+"precision".rjust(10,)+"recall".rjust(10,)+"f1-score".rjust(10,)+"support".rjust(10,)+'\n') plotconfusion(c_m_t,'./confusion_matrix/'+Experiment_NAME+ac_method+str(a+1),CLASS_NUM) # plotconfusion(c_m_t,'./confusion_matrix/'+Experiment_NAME+'_pre',CLASS_NUM) for i in range(CLASS_NUM): accuracy += c_m_t[i,i] try: recall.append(round(c_m_t[i,i]/np.sum(c_m_t[i]),3)) except: recall.apprend(round(0,3)) try: precision.append(round(c_m_t[i,i]/np.sum(c_m_t[:,i]),3)) except: precision.append(round(0,3)) try: f1.append(round(2*recall[i]*precision[i]/(recall[i]+precision[i]),3)) except: f1.append(round(0,3)) support.append(np.sum(c_m_t[i])) file_report.write(str(i).rjust(10,)+str(precision[i]).rjust(10,)+str(recall[i]).rjust(10,)+str(f1[i]).rjust(10,)+str(support[i]).rjust(10,)+'\n') try: recall_avg = round(np.sum(np.array(recall))/CLASS_NUM,3) except: recall_avg = 0 try: precision_avg = round(np.sum(np.array(precision))/CLASS_NUM,3) except: precision_avg = 0 try: f1_avg = round(np.sum(np.array(f1))/CLASS_NUM,3) except: f1_avg = 0 support_num = np.sum(np.array(support)) accuracy = round(accuracy/support_num,5) file_report.write("average".rjust(10,)+str(precision_avg).rjust(10,)+str(recall_avg).rjust(10,)+str(f1_avg).rjust(10,)+str(support_num).rjust(10,)+'\n') file_report.write(str(a+1) + " stage acc is " +str(accuracy)) file_report.write("\n\n\n\n") file_report.close() return
def entropy_active(): #载入数据 TrainData, TrainLabels, OracleData, OracleLabels,\ TestData, TestLabels, _, _, lenn_s, lenn_t, _ = load_process() batch = tf.placeholder(tf.float32,[None,unit.H,unit.W,unit.Channel]) label = tf.placeholder(tf.uint8,[None]) keep_prop = tf.placeholder(tf.float32) feature = SharePart(batch,keep_prop) result = MissionPart(feature) loss = SoftmaxWithLoss(result,label) acc = Test(result,label) opt = train_net(loss) softmax = tf.nn.softmax(result) entropy = tf.reduce_sum(-softmax * tf.log(softmax),1) predict_class = tf.cast(tf.argmax(softmax,axis = 1),tf.uint8) saver = tf.train.Saver(max_to_keep = ACTIVE_TIME) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.45 config.gpu_options.allow_growth = False init = tf.global_variables_initializer() sess = tf.Session(config = config) sess.run(init) saver.restore(sess,PRETRAIN_MODEL_NAME) def test(): ACC = 0 for i in range(lenn_t): test_batch = TestData[i*BATCH_SIZE:(i+1)*BATCH_SIZE] test_label = TestLabels[i*BATCH_SIZE:(i+1)*BATCH_SIZE] ACC+=sess.run(acc,feed_dict={batch:test_batch,label:test_label,keep_prop:1.0}) return ACC/lenn_t '''Begin Active Learning!''' log_file = open('./simple_log/'+Experiment_NAME+'_ALST.txt','w') pretrain_accuracy = test() print 'the pre train model accuracy is : ', pretrain_accuracy log_file.write("the pre train model accuracy is " + str(pretrain_accuracy)) log_file.write("\n") #整个主动过程 for a in range(ACTIVE_TIME): oracle_idx = np.arange(len(OracleData)) oracle_que = [] #计算每个样本的信息熵 for i in oracle_idx: candidate_entropy = sess.run(entropy, feed_dict={batch:unit.changeshape_1(OracleData[i]),keep_prop:1.0}) candidate_predict = sess.run(predict_class, feed_dict={batch:unit.changeshape_1(OracleData[i]),keep_prop:1.0}) oracle_que.append((i,candidate_entropy[0],candidate_predict[0])) oracle_que = sorted(oracle_que, key = lambda candidate:candidate[1], reverse = True) #oracle_que 包含了三个变量,[图片编号;信息熵;预测标签] #这里要注意temp变量的格式,他的下层每个类目是字符型的名称,名称与类目成对应关系。每个图片按照预测标签存放在temp名下,并进行信息熵排序 temp = {} tag_queue = [] for k in range(CLASS_NUM): temp[str(k)] = [] for k in range(len(oracle_que)): temp[str(oracle_que[k][2])].append(oracle_que[k]) for k in temp: temp[k] = sorted(temp[k], key=lambda x:x[1], reverse=True) #先对类目进行洗牌,然后按照次序从中一个一个挑选,直到选满QUEUE_SIZE idx = 0 temp_class = 0 temp_order = range(CLASS_NUM) shuffle(temp_order) while(idx<QUEUE_SIZE): if len(temp[str(temp_order[temp_class])]) != 0: tag_queue.append(temp[str(temp_order[temp_class])].pop(0)[0]) idx += 1 temp_class = (temp_class+1)%(CLASS_NUM) else: temp_class = (temp_class+1)%(CLASS_NUM) if a == 0 : TrainData = OracleData[tag_queue] TrainLabels = OracleLabels[tag_queue] np.delete(OracleData, tag_queue), np.delete(OracleLabels,tag_queue) #标注后将数据从无标样本池中删除 else: TrainData = np.concatenate((TrainData,OracleData[tag_queue])) TrainLabels = np.concatenate((TrainLabels, OracleLabels[tag_queue])) np.delete(OracleData, tag_queue), np.delete(OracleLabels,tag_queue) train_queue = np.arange(len(TrainData)) best = 0 for i in range(EPOCH_all): shuffle(train_queue) for j in range(len(TrainData)/BATCH_SIZE): trainbatch = TrainData[train_queue[j*BATCH_SIZE:(j+1)*BATCH_SIZE]] trainlabels = TrainLabels[train_queue[j*BATCH_SIZE:(j+1)*BATCH_SIZE]] sess.run(opt, feed_dict = {batch:trainbatch, label:trainlabels,keep_prop:0.5}) accuracy = test() print 'the ',a+1, 'time acmodel acc is:', accuracy if accuracy > best: best = accuracy saver.save(sess,MODEL_PATH+Experiment_NAME+'_ALST_'+str(a+1)+'.ckpt') print 'the ',a+1,' time acmodel best acc is: ', best log_file.write("the " + str(a+1) + "time acmodel best acc is " + str(best)) log_file.write("\n") return