def loadModel(self, dataset): path = self.__C.CKPTS_PATH + 'ckpt_1305312/epoch5.pkl' print("loaded path: ", path) net = Net( self.__C, ) net.cuda() # Load the network parameters ckpt = torch.load(path) net.load_state_dict(ckpt['state_dict']) loader_params = {'batch_size': 8, 'num_gpus':1} dataloader = TheLoader.from_dataset(dataset, **loader_params) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') g = open("golds.txt", "w") p = open("preds.txt", "w") for b, (time_per_batch, batch) in enumerate(time_batch(dataloader)): x, goldsentence = net(**batch) goldsentence = goldsentence[:, 1:] x = x[:,:31,:] pred_argmax = np.argmax(x.cpu().data.numpy(), axis=2) for i in range(pred_argmax.shape[0]): pred = pred_argmax[i,:] gold = goldsentence[i,:] pred_tokens = tokenizer.convert_ids_to_tokens(pred) gold_tokens = tokenizer.convert_ids_to_tokens(gold) pred_string = listToString(pred_tokens) gold_string = listToString(gold_tokens) encoded_pred_string = str(pred_string) #.encode('utf-8').strip() encoded_gold_string = str(gold_string) #.encode('utf-8').strip() print(encoded_pred_string) print(encoded_gold_string) p.write(encoded_pred_string + '\n') g.write(encoded_gold_string + '\n') g.close() p.close()
def train(self, dataset, dataset_eval=None): # Obtain needed information data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb # Define the MCAN model net = Net(self.__C, pretrained_emb, token_size, ans_size) net.cuda() net.train() # Define the Question-only model qnet = QNet(self.__C, pretrained_emb, token_size, ans_size) qnet.cuda() qnet.train() # Watch net & qnet wandb.watch(net) wandb.watch(qnet) # Define the multi-gpu training if needed if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) # Define the binary cross entropy loss # loss_fn = torch.nn.BCELoss(size_average=False).cuda() loss_qm = torch.nn.BCELoss(reduction='sum').cuda() loss_qo = torch.nn.BCELoss(reduction='sum').cuda() # Load checkpoint if resume training if self.__C.RESUME: # default -> FALSE print(' ========== Resume training') if self.__C.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' # Load the network parameters print('Loading ckpt {}'.format(path)) ckpt = torch.load(path) print('Finish!') net.load_state_dict(ckpt['state_dict']) # Load the optimizer paramters #params = list(net.parameters()) + list(qnet.parameters()) optim = get_optim(self.__C, net, data_size, ckpt['lr_base']) optim._step = int(data_size / self.__C.BATCH_SIZE * self.__C.CKPT_EPOCH) optim.optimizer.load_state_dict(ckpt['optimizer']) start_epoch = self.__C.CKPT_EPOCH else: if ('ckpt_' + self.__C.VERSION) in os.listdir(self.__C.CKPTS_PATH): shutil.rmtree(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) os.mkdir(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) #params = net.parameters() + qnet.parameters() optim = get_optim(self.__C, net, data_size) optim_q = get_optim(self.__C, qnet, data_size) start_epoch = 0 loss_sum = 0 L_qo_sum = 0 L_qm_sum = 0 named_params = list(net.named_parameters()) + list( qnet.named_parameters()) grad_norm = np.zeros(len(named_params)) # Define multi-thread dataloader if self.__C.SHUFFLE_MODE in ['external']: dataloader = Data.DataLoader(dataset, batch_size=self.__C.BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=self.__C.PIN_MEM, drop_last=True) else: dataloader = Data.DataLoader(dataset, batch_size=self.__C.BATCH_SIZE, shuffle=True, num_workers=self.__C.NUM_WORKERS, pin_memory=self.__C.PIN_MEM, drop_last=True) # Training script for epoch in range(start_epoch, self.__C.MAX_EPOCH): # Save log information logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write( 'nowTime: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n') logfile.close() # Learning Rate Decay if epoch in self.__C.LR_DECAY_LIST: adjust_lr(optim, self.__C.LR_DECAY_R) adjust_lr(optim_q, self.__C.LR_DECAY_R) # Externally shuffle if self.__C.SHUFFLE_MODE == 'external': shuffle_list(dataset.ans_list) time_start = time.time() # Iteration for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): optim.zero_grad() optim_q.zero_grad() img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() ans_iter = ans_iter.cuda() for accu_step in range(self.__C.GRAD_ACCU_STEPS): sub_img_feat_iter = \ img_feat_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] sub_ques_ix_iter = \ ques_ix_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] sub_ans_iter = \ ans_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] out, q_emb, lang_feat_mask = net(sub_img_feat_iter, sub_ques_ix_iter) pred_qo, q_out = qnet(q_emb, lang_feat_mask) #print(pred_qo.shape, sub_ans_iter.shape) #print(torch.argmax(sub_ans_iter.long(), dim=1)) ans_idx = torch.argmax(sub_ans_iter.long(), dim=1) pred_idx = torch.argmax( pred_qo.long(), dim=1) # predicted answer index from QO qo_scale = pred_qo.detach().clone() for i in range(self.__C.SUB_BATCH_SIZE): if (ans_idx[i] == pred_idx[i]): qo_scale[i, :] = torch.ones(3129) L_qo = loss_qo(q_out, sub_ans_iter) L_qm = loss_qm( torch.sigmoid(out * torch.sigmoid(qo_scale)), sub_ans_iter) #L_qo = loss_qo(q_out, sub_ans_iter) #L_qm = loss_qm(torch.sigmoid(out*torch.sigmoid(pred_qo)), sub_ans_iter) loss = L_qo + L_qm # only mean-reduction needs be divided by grad_accu_steps # removing this line wouldn't change our results because the speciality of Adam optimizer, # but would be necessary if you use SGD optimizer. # loss /= self.__C.GRAD_ACCU_STEPS loss.backward() loss_sum += loss.cpu().data.numpy( ) * self.__C.GRAD_ACCU_STEPS L_qo_sum += L_qo.cpu().data.numpy( ) * self.__C.GRAD_ACCU_STEPS L_qm_sum += L_qm.cpu().data.numpy( ) * self.__C.GRAD_ACCU_STEPS wandb.log({ "Training loss": loss.cpu().data.numpy() / self.__C.SUB_BATCH_SIZE, "Question only loss": L_qo.cpu().data.numpy() / self.__C.SUB_BATCH_SIZE, "Fusion loss": L_qm.cpu().data.numpy() / self.__C.SUB_BATCH_SIZE }) # Tracking training loss if self.__C.VERBOSE: # print loss every step -> TRUE if dataset_eval is not None: mode_str = self.__C.SPLIT[ 'train'] + '->' + self.__C.SPLIT['val'] else: mode_str = self.__C.SPLIT[ 'train'] + '->' + self.__C.SPLIT['test'] print( "\r[version %s][epoch %2d][step %4d/%4d][%s] loss: %.4f, lr: %.2e" % (self.__C.VERSION, epoch + 1, step, int(data_size / self.__C.BATCH_SIZE), mode_str, loss.cpu().data.numpy() / self.__C.SUB_BATCH_SIZE, optim._rate), end=' ') # Gradient norm clipping if self.__C.GRAD_NORM_CLIP > 0: nn.utils.clip_grad_norm_(net.parameters(), self.__C.GRAD_NORM_CLIP) # Save the gradient information for name in range(len(named_params)): norm_v = torch.norm(named_params[name][1].grad).cpu().data.numpy() \ if named_params[name][1].grad is not None else 0 grad_norm[name] += norm_v * self.__C.GRAD_ACCU_STEPS # print('Param %-3s Name %-80s Grad_Norm %-20s'% # (str(grad_wt), # params[grad_wt][0], # str(norm_v))) optim.step() optim_q.step() time_end = time.time() print('Finished in {}s'.format(int(time_end - time_start))) # print('') epoch_finish = epoch + 1 # Save checkpoint state = { 'state_dict': net.state_dict(), 'optimizer': optim.optimizer.state_dict(), 'lr_base': optim.lr_base } torch.save( state, self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION + '/epoch' + str(epoch_finish) + '.pkl') # Logging logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write('epoch = ' + str(epoch_finish) + ' Q loss = ' + str(L_qo_sum / data_size) + ' fusion loss = ' + str(L_qm_sum / data_size) + ' loss = ' + str(loss_sum / data_size) + '\n' + 'lr = ' + str(optim._rate) + '\n\n') logfile.close() # Eval after every epoch if dataset_eval is not None: self.eval(dataset_eval, state_dict=net.state_dict(), valid=True) # if self.__C.VERBOSE: # logfile = open( # self.__C.LOG_PATH + # 'log_run_' + self.__C.VERSION + '.txt', # 'a+' # ) # for name in range(len(named_params)): # logfile.write( # 'Param %-3s Name %-80s Grad_Norm %-25s\n' % ( # str(name), # named_params[name][0], # str(grad_norm[name] / data_size * self.__C.BATCH_SIZE) # ) # ) # logfile.write('\n') # logfile.close() loss_sum = 0 L_qo_sum = 0 L_qm_sum = 0 grad_norm = np.zeros(len(named_params))
def eval(self, dataset, state_dict=None, valid=False): # Load parameters if self.__C.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' val_ckpt_flag = False if state_dict is None: val_ckpt_flag = True print('Loading ckpt {}'.format(path)) state_dict = torch.load(path)['state_dict'] print('Finish!') # Store the prediction list qid_list = [ques['question_id'] for ques in dataset.ques_list] ans_ix_list = [] pred_list = [] data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb net = Net(self.__C, pretrained_emb, token_size, ans_size) net.cuda() net.eval() if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) net.load_state_dict(state_dict) dataloader = Data.DataLoader(dataset, batch_size=self.__C.EVAL_BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=True) for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): print("\rEvaluation: [step %4d/%4d]" % ( step, int(data_size / self.__C.EVAL_BATCH_SIZE), ), end=' ') img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() pred = net(img_feat_iter, ques_ix_iter) #print(pred) pred_np = pred[0].cpu().data.numpy() pred_argmax = np.argmax(pred_np, axis=1) # Save the answer index if pred_argmax.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_argmax = np.pad( pred_argmax, (0, self.__C.EVAL_BATCH_SIZE - pred_argmax.shape[0]), mode='constant', constant_values=-1) ans_ix_list.append(pred_argmax) # Save the whole prediction vector if self.__C.TEST_SAVE_PRED: if pred_np.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_np = np.pad( pred_np, ((0, self.__C.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)), mode='constant', constant_values=-1) pred_list.append(pred_np) print('') ans_ix_list = np.array(ans_ix_list).reshape(-1) result = [ { 'answer': dataset.ix_to_ans[str( ans_ix_list[qix] )], # ix_to_ans(load with json) keys are type of string 'question_id': int(qid_list[qix]) } for qix in range(qid_list.__len__()) ] # Write the results to result file if valid: if val_ckpt_flag: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.VERSION + \ '.json' else: if self.__C.CKPT_PATH is not None: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '_epoch' + str(self.__C.CKPT_EPOCH) + \ '.json' print('Save the result to file: {}'.format(result_eval_file)) json.dump(result, open(result_eval_file, 'w')) # Save the whole prediction vector if self.__C.TEST_SAVE_PRED: if self.__C.CKPT_PATH is not None: ensemble_file = \ self.__C.PRED_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: ensemble_file = \ self.__C.PRED_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '_epoch' + str(self.__C.CKPT_EPOCH) + \ '.json' print( 'Save the prediction vector to file: {}'.format(ensemble_file)) pred_list = np.array(pred_list).reshape(-1, ans_size) result_pred = [{ 'pred': pred_list[qix], 'question_id': int(qid_list[qix]) } for qix in range(qid_list.__len__())] pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1) # Run validation script if valid: # create vqa object and vqaRes object ques_file_path = self.__C.QUESTION_PATH['test'] ans_file_path = self.__C.ANSWER_PATH['test'] vqa = VQA(ans_file_path, ques_file_path) vqaRes = vqa.loadRes(result_eval_file, ques_file_path) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval( vqa, vqaRes, n=2 ) # n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies print("\n") print("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall'])) # print("Per Question Type Accuracy is the following:") # for quesType in vqaEval.accuracy['perQuestionType']: # print("%s : %.02f" % (quesType, vqaEval.accuracy['perQuestionType'][quesType])) # print("\n") print("Per Answer Type Accuracy is the following:") for ansType in vqaEval.accuracy['perAnswerType']: print("%s : %.02f" % (ansType, vqaEval.accuracy['perAnswerType'][ansType])) print("\n") if val_ckpt_flag: print('Write to log file: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+')) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+') else: print('Write to log file: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+')) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall'])) for ansType in vqaEval.accuracy['perAnswerType']: logfile.write( "%s : %.02f " % (ansType, vqaEval.accuracy['perAnswerType'][ansType])) logfile.write("\n\n") logfile.close()
def eval(hyper, dataset, state_dict=None, valid=False): # Load parameters if hyper.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = hyper.CKPT_PATH else: path = hyper.CKPTS_PATH + \ 'ckpt_' + hyper.CKPT_VERSION + \ '/epoch' + str(hyper.CKPT_EPOCH) + '.pkl' val_ckpt_flag = False if state_dict is None: val_ckpt_flag = True print('Loading ckpt {}'.format(path)) state_dict = torch.load(path)['state_dict'] print('Finish!') # Store the prediction list qid_list = [ques['question_id'] for ques in dataset.ques_list] q_list = [ques['question'] for ques in dataset.ques_list] im_id_list = [ques['image_id'] for ques in dataset.ques_list] ans_ix_list = [] pred_list = [] data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb net = Net(hyper, pretrained_emb, token_size, ans_size) net.cuda() net.eval() if hyper.N_GPU > 1: net = nn.DataParallel(net, device_ids=hyper.DEVICES) net.load_state_dict(state_dict) dataloader = Data.DataLoader(dataset, batch_size=hyper.EVAL_BATCH_SIZE, shuffle=False, num_workers=hyper.NUM_WORKERS, pin_memory=True) for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): print("\rEvaluation: [step %4d/%4d]" % ( step, int(data_size / hyper.EVAL_BATCH_SIZE), ), end=' ') img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() pred = net(img_feat_iter, ques_ix_iter) pred_np = pred.cpu().data.numpy() pred_argmax = np.argmax(pred_np, axis=1) # Save the answer index if pred_argmax.shape[0] != hyper.EVAL_BATCH_SIZE: pred_argmax = np.pad( pred_argmax, (0, hyper.EVAL_BATCH_SIZE - pred_argmax.shape[0]), mode='constant', constant_values=-1) ans_ix_list.append((pred_argmax)) break #st.write(dataset.ix_to_ans[str([pred_argmax])]) # Save the whole prediction vector if hyper.TEST_SAVE_PRED: if pred_np.shape[0] != hyper.EVAL_BATCH_SIZE: pred_np = np.pad( pred_np, ((0, hyper.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)), mode='constant', constant_values=-1) pred_list.append(pred_np) print('') ans_ix_list = np.array(ans_ix_list).reshape(-1) old = 0 st.header("Question and Answer") for qix in range(qid_list.__len__()): bbb = int(qid_list[qix]) aaa = dataset.ix_to_ans[str(ans_ix_list[qix])] if old != int(im_id_list[qix]): num_digit = len(str(im_id_list[qix])) name = 'COCO_val2014_' for x in range(0, 12 - num_digit): name = name + '0' image = Image.open('./datasets/coco_extract/images/' + name + str(im_id_list[qix]) + '.jpg') cap = q_list[qix] + " " + aaa # st.image(image) old = int(im_id_list[qix]) q_a = ("(" + str(qix + 1) + ") " + q_list[qix] + " " + aaa) st.subheader(q_a) result = [{ 'answer': aaa, # ix_to_ans(load with json) keys are type of string 'question_id': bbb }] # Write the results to result file if valid: if val_ckpt_flag: result_eval_file = \ hyper.CACHE_PATH + \ 'result_run_' + hyper.CKPT_VERSION + \ '.json' else: result_eval_file = \ hyper.CACHE_PATH + \ 'result_run_' + hyper.VERSION + \ '.json' else: if hyper.CKPT_PATH is not None: result_eval_file = \ hyper.RESULT_PATH + \ 'result_run_' + hyper.CKPT_VERSION + \ '.json' else: result_eval_file = \ hyper.RESULT_PATH + \ 'result_run_' + hyper.CKPT_VERSION + \ '_epoch' + str(hyper.CKPT_EPOCH) + \ '.json' print('Save the result to file: {}'.format(result_eval_file)) json.dump(result, open(result_eval_file, 'w')) # Save the whole prediction vector if hyper.TEST_SAVE_PRED: if hyper.CKPT_PATH is not None: ensemble_file = \ hyper.PRED_PATH + \ 'result_run_' + hyper.CKPT_VERSION + \ '.json' else: ensemble_file = \ hyper.PRED_PATH + \ 'result_run_' + hyper.CKPT_VERSION + \ '_epoch' + str(hyper.CKPT_EPOCH) + \ '.json' print('Save the prediction vector to file: {}'.format(ensemble_file)) pred_list = np.array(pred_list).reshape(-1, ans_size) result_pred = [{ 'pred': pred_list[qix], 'question_id': int(qid_list[qix]) } for qix in range(qid_list.__len__())] pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1)
def train(self, dataset, dataset_eval=None): #1.3 首先训练的开始前要获取需要的信息,数据集大小,数据集的问题嵌入大小,答案大小,文本嵌入向量 data_size = dataset.data_size token_size = dataset.token_size #18405 ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb #1.4 需要的信息获取后,开始定义MCAN模型,传入需要的参数,输出:多模态融合特征 proj_feat net = Net(self.__C, pretrained_emb, token_size, ans_size) net.cuda() net.train() # 1.5 调用train进行训练,这步的前一步是?后一步是? # 如果需要的话,定义多gpu训练 if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) loss_fn = torch.nn.BCELoss(reduction='sum').cuda() # 如果恢复训练,则加载检查点 if self.__C.RESUME: print(' ========== 恢复性训练 ==========') if self.__C.CKPT_PATH is not None: print('警告:您现在正在使用CKPT_PATH参数,CKPT_VERSION和CKPT_EPOCH不能工作') path = self.__C.CKPT_PATH #此处要设置ckpt_path的目录,不能为None else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' # 加载模型网络参数 print('加载ckpt {} 文件'.format(path)) ckpt = torch.load(path) print('参数加载完成!') #...............state_dict这里存什么数据............. net.load_state_dict(ckpt['state_dict']) # 加载优化器参数 optim = get_optim(self.__C, net, data_size, ckpt['lr_base']) optim._step = int(data_size / self.__C.BATCH_SIZE * self.__C.CKPT_EPOCH) optim.optimizer.load_state_dict(ckpt['optimizer']) # epoch start_epoch = self.__C.CKPT_EPOCH # 如果不恢复训练,则重新训练 else: if ('ckpt_' + self.__C.VERSION) in os.listdir(self.__C.CKPTS_PATH): shutil.rmtree(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) os.mkdir(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) optim = get_optim(self.__C, net, data_size) start_epoch = 0 loss_sum = 0 named_params = list(net.named_parameters()) # 参数名称 grad_norm = np.zeros(len(named_params)) # 梯度规范化 # 定义多线程数据加载 dataloader if self.__C.SHUFFLE_MODE in ['external']: dataloader = Data.DataLoader( dataset, batch_size=self.__C.BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, # 进程数 pin_memory=self.__C.PIN_MEM, drop_last=True) else: dataloader = Data.DataLoader(dataset, batch_size=self.__C.BATCH_SIZE, shuffle=True, num_workers=self.__C.NUM_WORKERS, pin_memory=self.__C.PIN_MEM, drop_last=True) # 训练过程 这里max_epoch我设置为1 for epoch in range(start_epoch, self.__C.MAX_EPOCH): # 保存日志信息 logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') # 写入日志信息 logfile.write( 'nowTime: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n') logfile.close() # 学习率衰减 if epoch in self.__C.LR_DECAY_LIST: adjust_lr(optim, self.__C.LR_DECAY_R) # Externally shuffle if self.__C.SHUFFLE_MODE == 'external': shuffle_list(dataset.ans_list) time_start = time.time() # 迭代的加载 图像特征迭代器,问题特征迭代器,答案迭代器 for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): optim.zero_grad() # 梯度清零 img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() ans_iter = ans_iter.cuda() # grad_accu_steps:累计梯度,来解决本地显存不足的问题, # 其是变相扩大batchsize,如果batch_size=6,样本总量为24,grad_acc_steps=2 # 那么参数更新次数为24/6=4,如果减小batch_size = 6/2=3,则参数更新次数不变 for accu_step in range(self.__C.GRAD_ACCU_STEPS): sub_img_feat_iter = \ img_feat_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] sub_ques_ix_iter = \ ques_ix_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] sub_ans_iter = \ ans_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] pred = net( sub_img_feat_iter, #[5,100,2048] sub_ques_ix_iter # [5,14] ) loss = loss_fn(pred, sub_ans_iter) # 只有平均减少需要被grad_accu_steps划分 loss.backward() # 反向传播,计算当前梯度 loss_sum += loss.cpu().data.numpy( ) * self.__C.GRAD_ACCU_STEPS # 输出每个train的loss if self.__C.VERBOSE: if dataset_eval is not None: mode_str = self.__C.SPLIT['train'] else: mode_str = self.__C.SPLIT[ 'train'] + '->' + self.__C.SPLIT['train'] print( "\r[version %s][epoch %2d][step %4d/%4d][%s] loss: %.4f, lr: %.2e" % (self.__C.VERSION, epoch + 1, step, int(data_size / self.__C.BATCH_SIZE), mode_str, loss.cpu().data.numpy() / self.__C.SUB_BATCH_SIZE, optim._rate), end=' ') # Gradient norm clipping 梯度标准剪裁 if self.__C.GRAD_NORM_CLIP > 0: nn.utils.clip_grad_norm_(net.parameters(), self.__C.GRAD_NORM_CLIP) # 保存梯度下降信息 for name in range(len(named_params)): norm_v = torch.norm(named_params[name][1].grad).cpu().data.numpy() \ if named_params[name][1].grad is not None else 0 grad_norm[name] += norm_v * self.__C.GRAD_ACCU_STEPS optim.step() # with open('One_epoch_data.txt','w') as F: # F.write(net.state_dict()+optim.optimizer.state_dict()+optim.lr_base) time_end = time.time() print('Finished in {}s'.format(int(time_end - time_start))) # print('') epoch_finish = epoch + 1 # 保存检查点 state = { 'state_dict': net.state_dict(), 'optimizer': optim.optimizer.state_dict(), 'lr_base': optim.lr_base } print("===========训练模型的state=====") print(state) torch.save( state, self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION + '/epoch' + str(epoch_finish) + '.pkl') # 打开日志文件 logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write('epoch = ' + str(epoch_finish) + ' loss = ' + str(loss_sum / data_size) + '\n' + 'lr = ' + str(optim._rate) + '\n\n') logfile.close() # 每个epoch后,进行模型评估,调用评估函数 if dataset_eval is not None: self.eval(dataset_eval, state_dict=net.state_dict(), valid=True) loss_sum = 0 grad_norm = np.zeros(len(named_params))
def eval(self, dataset, state_dict=None, valid=False): # 评估模型,传入的数据集是验证集,主要是利用epoch1的检查点.pkl文件进行评估验证集 # 的准确度,从而获取每种答案类型的精确度, # 加载模型参数 if self.__C.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' val_ckpt_flag = False if state_dict is None: val_ckpt_flag = True print('加载 ckpt {}'.format(path)) state_dict = torch.load(path)['state_dict'] print("评估模型网络的state_dict:", state_dict) print('完成!') # 存储预测列表 问题id列表,答案列表 qid_list = [ques['question_id'] for ques in dataset.ques_list] ans_ix_list = [] pred_list = [] data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb # 和train一样调用网络 net = Net(self.__C, pretrained_emb, token_size, ans_size) net.cuda() # 评估 net.eval() if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) net.load_state_dict(state_dict) # 数据集加载,此处传入的是验证集, dataloader = Data.DataLoader(dataset, batch_size=self.__C.EVAL_BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=True) for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): print("\rEvaluation: [step %4d/%4d]" % ( step, int(data_size / self.__C.EVAL_BATCH_SIZE), ), end=' ') img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() # 输出预测的特征向量 pred = net(img_feat_iter, ques_ix_iter) # 转换成cpu的数据的numpy类型 pred_np = pred.cpu().data.numpy() # 沿着一个轴返回最大值:取出一个向量中预测值最大的,最有可能接近真实答案 pred_argmax = np.argmax(pred_np, axis=1) np.savetxt("pre_np.txt", pred_np) np.savetxt("pre_argmax.txt", pred_argmax) # 保存最接近真实答案的索引 if pred_argmax.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_argmax = np.pad( pred_argmax, (0, self.__C.EVAL_BATCH_SIZE - pred_argmax.shape[0]), mode='constant', constant_values=-1) # 将这个最接近真实答案的预测答案pre_argmax加入到ans_ix_list中, ans_ix_list.append(pred_argmax) file = open('ans_ix_list.txt', 'w') file.write(str(ans_ix_list)) file.close() # 保存整个预测向量 if self.__C.TEST_SAVE_PRED: if pred_np.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_np = np.pad( pred_np, ((0, self.__C.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)), mode='constant', constant_values=-1) pred_list.append(pred_np) print('') ans_ix_list = np.array(ans_ix_list).reshape(-1) result = [ { 'answer': dataset.ix_to_ans[str( ans_ix_list[qix] )], # ix_to_ans(load with json) keys are type of string 'question_id': int(qid_list[qix]) } for qix in range(qid_list.__len__()) ] # 将结果写入结果文件:问题id:对应预测答案 if valid: if val_ckpt_flag: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.VERSION + \ '.json' else: if self.__C.CKPT_PATH is not None: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '_epoch' + str(self.__C.CKPT_EPOCH) + \ '.json' print('Save the result to file: {}'.format(result_eval_file)) json.dump(result, open(result_eval_file, 'w')) # 保存整个预测向量 if self.__C.TEST_SAVE_PRED: if self.__C.CKPT_PATH is not None: ensemble_file = \ self.__C.PRED_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: ensemble_file = \ self.__C.PRED_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '_epoch' + str(self.__C.CKPT_EPOCH) + \ '.json' print( 'Save the prediction vector to file: {}'.format(ensemble_file)) pred_list = np.array(pred_list).reshape(-1, ans_size) file = open('pred_list.txt', 'w') file.write(str(pred_list)) file.close() result_pred = [{ 'pred': pred_list[qix], 'question_id': int(qid_list[qix]) } for qix in range(qid_list.__len__())] pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1) # 运行验证脚本 if valid: # 创建vqa对象和vqaRes对象 ques_file_path = self.__C.QUESTION_PATH['train'] ans_file_path = self.__C.ANSWER_PATH['train'] vqa = VQA(ans_file_path, ques_file_path) vqaRes = vqa.loadRes(result_eval_file, ques_file_path) # 通过获取vqa和vqaRes创建vqaEval对象 vqaEval = VQAEval( vqa, vqaRes, n=2 ) # n is precision of accuracy (number of places after decimal), default is 2 # 评估结果 """ 如果您有一个问题id列表,希望对其进行结果评估,请将其作为列表传递给下面的函数 默认情况下,它使用注释文件中的所有问题id """ vqaEval.evaluate() # print accuracies print("\n") #计算全部准确率 print("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall'])) # 计算每种答案类型准确率,yes/no,number,other print("Per Answer Type Accuracy is the following:") for ansType in vqaEval.accuracy['perAnswerType']: print("%s : %.02f" % (ansType, vqaEval.accuracy['perAnswerType'][ansType])) print("\n") # 将评估结果写入log文件 if val_ckpt_flag: print('Write to log file: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+')) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+') else: print('写入日志文件: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+')) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall'])) for ansType in vqaEval.accuracy['perAnswerType']: logfile.write( "%s : %.02f " % (ansType, vqaEval.accuracy['perAnswerType'][ansType])) logfile.write("\n\n") logfile.close()
def train(self, dataset, dataset_eval=None): # Obtain needed information data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb # Define the MCAN model net = Net( self.__C, pretrained_emb, token_size, ans_size ) net.cuda() net.train() # Define the multi-gpu training if needed if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) # Define the binary cross entropy loss # loss_fn = torch.nn.BCELoss(size_average=False).cuda() loss_fn = torch.nn.BCELoss(reduction='sum').cuda() # Load checkpoint if resume training if self.__C.RESUME: print(' ========== Resume training') if self.__C.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' # Load the network parameters print('Loading ckpt {}'.format(path)) ckpt = torch.load(path) print('Finish!') net.load_state_dict(ckpt['state_dict']) # Load the optimizer paramters optim = get_optim(self.__C, net, data_size, ckpt['lr_base']) optim._step = int(data_size / self.__C.BATCH_SIZE * self.__C.CKPT_EPOCH) optim.optimizer.load_state_dict(ckpt['optimizer']) start_epoch = self.__C.CKPT_EPOCH else: if ('ckpt_' + self.__C.VERSION) in os.listdir(self.__C.CKPTS_PATH): shutil.rmtree(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) os.mkdir(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) optim = get_optim(self.__C, net, data_size) start_epoch = 0 loss_sum = 0 named_params = list(net.named_parameters()) grad_norm = np.zeros(len(named_params)) # Define multi-thread dataloader if self.__C.SHUFFLE_MODE in ['external']: dataloader = Data.DataLoader( dataset, batch_size=self.__C.BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=self.__C.PIN_MEM, drop_last=True ) else: dataloader = Data.DataLoader( dataset, batch_size=self.__C.BATCH_SIZE, shuffle=True, num_workers=self.__C.NUM_WORKERS, pin_memory=self.__C.PIN_MEM, drop_last=True ) # Training script for epoch in range(start_epoch, self.__C.MAX_EPOCH): # Save log information logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+' ) logfile.write( 'nowTime: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n' ) logfile.close() # Learning Rate Decay if epoch in self.__C.LR_DECAY_LIST: adjust_lr(optim, self.__C.LR_DECAY_R) # Externally shuffle if self.__C.SHUFFLE_MODE == 'external': shuffle_list(dataset.ans_list) time_start = time.time() # Iteration for step, ( img_feat_iter, ques_ix_iter, ans_iter, fact_idx_iter, ) in enumerate(dataloader): optim.zero_grad() img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() ans_iter = ans_iter.cuda() fact_idx_iter = fact_idx_iter.cuda() for accu_step in range(self.__C.GRAD_ACCU_STEPS): sub_img_feat_iter = \ img_feat_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] sub_ques_ix_iter = \ ques_ix_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] sub_ans_iter = \ ans_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] sub_fact_idx_iter = \ fact_idx_iter[accu_step * self.__C.SUB_BATCH_SIZE: (accu_step + 1) * self.__C.SUB_BATCH_SIZE] pred = net( sub_img_feat_iter, sub_ques_ix_iter, sub_fact_idx_iter, ) loss = loss_fn(pred, sub_ans_iter) loss /= self.__C.GRAD_ACCU_STEPS loss.backward() loss_sum += loss.cpu().data.numpy() * self.__C.GRAD_ACCU_STEPS if self.__C.VERBOSE: if dataset_eval is not None: mode_str = self.__C.SPLIT['train'] + '->' + self.__C.SPLIT['val'] else: mode_str = self.__C.SPLIT['train'] + '->' + self.__C.SPLIT['test'] print("\r[version %s][epoch %2d][step %4d/%4d][%s] loss: %.4f, lr: %.2e" % ( self.__C.VERSION, epoch + 1, step, int(data_size / self.__C.BATCH_SIZE), mode_str, loss.cpu().data.numpy() / self.__C.SUB_BATCH_SIZE, optim._rate ), end=' ') # Gradient norm clipping if self.__C.GRAD_NORM_CLIP > 0: nn.utils.clip_grad_norm_( net.parameters(), self.__C.GRAD_NORM_CLIP ) # Save the gradient information for name in range(len(named_params)): norm_v = torch.norm(named_params[name][1].grad).cpu().data.numpy() \ if named_params[name][1].grad is not None else 0 grad_norm[name] += norm_v * self.__C.GRAD_ACCU_STEPS # print('Param %-3s Name %-80s Grad_Norm %-20s'% # (str(grad_wt), # params[grad_wt][0], # str(norm_v))) optim.step() time_end = time.time() print('Finished in {}s'.format(int(time_end-time_start))) # print('') epoch_finish = epoch + 1 # Save checkpoint state = { 'state_dict': net.state_dict(), 'optimizer': optim.optimizer.state_dict(), 'lr_base': optim.lr_base } torch.save( state, self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION + '/epoch' + str(epoch_finish) + '.pkl' ) # Logging logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+' ) logfile.write( 'epoch = ' + str(epoch_finish) + ' loss = ' + str(loss_sum / data_size) + '\n' + 'lr = ' + str(optim._rate) + '\n\n' ) logfile.close() # Eval after every epoch if dataset_eval is not None: self.eval( dataset_eval, state_dict=net.state_dict(), valid=True ) # if self.__C.VERBOSE: # logfile = open( # self.__C.LOG_PATH + # 'log_run_' + self.__C.VERSION + '.txt', # 'a+' # ) # for name in range(len(named_params)): # logfile.write( # 'Param %-3s Name %-80s Grad_Norm %-25s\n' % ( # str(name), # named_params[name][0], # str(grad_norm[name] / data_size * self.__C.BATCH_SIZE) # ) # ) # logfile.write('\n') # logfile.close() loss_sum = 0 grad_norm = np.zeros(len(named_params))
def eval(self, dataset, state_dict=None, valid=False): from core.model.net import Net # Load parameters if self.__C.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' val_ckpt_flag = False if state_dict is None: val_ckpt_flag = True print('Loading ckpt {}'.format(path)) state_dict = torch.load(path)['state_dict'] print('Finish!') # Store the prediction list qid_list = [ques['question_id'] for ques in dataset.ques_list] ans_ix_list = [] pred_list = [] data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb net = Net( self.__C, pretrained_emb, token_size, ans_size ) net.cuda() net.eval() if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) net.load_state_dict(state_dict) dataloader = Data.DataLoader( dataset, batch_size=self.__C.EVAL_BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=True ) for step, ( img_feat_iter, ques_ix_iter, ans_iter ) in enumerate(dataloader): print("\rEvaluation: [step %4d/%4d]" % ( step, int(data_size / self.__C.EVAL_BATCH_SIZE), ), end=' ') img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() pred, recon_loss = net( img_feat_iter, ques_ix_iter ) pred_np = pred.cpu().data.numpy() pred_argmax = np.argmax(pred_np, axis=1) # Save the answer index if pred_argmax.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_argmax = np.pad( pred_argmax, (0, self.__C.EVAL_BATCH_SIZE - pred_argmax.shape[0]), mode='constant', constant_values=-1 ) ans_ix_list.append(pred_argmax) # Save the whole prediction vector if self.__C.TEST_SAVE_PRED: if pred_np.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_np = np.pad( pred_np, ((0, self.__C.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)), mode='constant', constant_values=-1 ) pred_list.append(pred_np) print('') ans_ix_list = np.array(ans_ix_list).reshape(-1) pickle.dump(ans_ix_list, open("ans_ix_list.pkl", "wb+")) result = [{ 'answer': dataset.ix_to_ans[ans_ix_list[qix]], # ix_to_ans(load with json) keys are type of string 'question_id': int(qid_list[qix]) }for qix in range(qid_list.__len__())] ground_truth = dataset.qid_to_ques total = 0 num_correct = 0 sub_num = { 0: { "num":0, "corr":0 }, 1: { "num":0, "corr":0 }, 2: { "num":0, "corr":0 }, 3: { "num":0, "corr":0 } } for result_ in result: grth = ground_truth[str(result_['question_id'])] sub_num[grth['question_type']]['num'] = sub_num[grth['question_type']]['num'] + 1 total += 1 if grth['answer'] == result_['answer']: sub_num[grth['question_type']]['corr'] = sub_num[grth['question_type']]['corr'] + 1 num_correct += 1 test_total = 0 test_corr = 0 for key, value in sub_num.items(): test_total += value['num'] test_corr += value['corr'] assert test_total == total and test_corr == num_correct # Write the results to result file if valid: if val_ckpt_flag: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.VERSION + \ '.json' else: if self.__C.CKPT_PATH is not None: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '_epoch' + str(self.__C.CKPT_EPOCH) + \ '.json' print('Save the result to file: {}'.format(result_eval_file)) print(result_eval_file) json.dump(result, open(result_eval_file, 'w')) if val_ckpt_flag: print('Write to log file: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+') ) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+' ) else: print('Write to log file: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') ) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+' ) for k, v in sub_num.items(): if k == 0: print("Acc on object is %.2f" % (100.0 * v['corr']/v['num'])) logfile.write("Acc on object is %.2f\n" % (100.0 * v['corr']/v['num'])) if k == 1: print("Acc on number is %.2f" % (100.0 * v['corr']/v['num'])) logfile.write("Acc on number is %.2f\n" % (100.0 * v['corr']/v['num'])) if k == 2: print("Acc on color is %.2f" % (100.0 * v['corr']/v['num'])) logfile.write("Acc on color is %.2f\n" % (100.0 * v['corr']/v['num'])) if k == 3: print("Acc on location is %.2f" % (100.0 * v['corr']/v['num'])) logfile.write("Acc on location is %.2f\n" % (100.0 * v['corr']/v['num'])) print("Total ACC is %.2f" % (100.0*num_correct/total)) logfile.write("Total ACC is %.2f\n" % (100.0*num_correct/total)) logfile.close()
def eval(self, dataset, state_dict=None, valid=False): st.title('VQA Validation') # Load parameters if self.__C.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' val_ckpt_flag = False if state_dict is None: val_ckpt_flag = True print('Loading ckpt {}'.format(path)) state_dict = torch.load(path)['state_dict'] print('Finish!') # Store the prediction list qid_list = [ques['question_id'] for ques in dataset.ques_list] q_list = [ques['question'] for ques in dataset.ques_list] im_id_list = [ques['image_id'] for ques in dataset.ques_list] images = [] if st.button('New set of images'): for x in range(0, 10): index = randint(0, len(im_id_list)) num_digit = len(str(index)) name = 'COCO_val2014_' for x in range(0, 12 - num_digit): name = name + '0' image = Image.open(name + '.jpg') images.append(image) pick_img = st.sidebar.radio("Which image?", [x for x in range(1, len(images))]) image_iterator = paginator("Select a sunset page", images) indices_on_page, images_on_page = map(list, zip(*image_iterator)) st.write('result:') st.image(images_on_page, width=200, caption=indices_on_page) # do something with what the user selected here if pick_img: st.write('yass') ans_ix_list = [] pred_list = [] data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb net = Net(self.__C, pretrained_emb, token_size, ans_size) net.cuda() net.eval() if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) net.load_state_dict(state_dict) dataloader = Data.DataLoader(dataset, batch_size=self.__C.EVAL_BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=True) for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): print("\rEvaluation: [step %4d/%4d]" % ( step, int(data_size / self.__C.EVAL_BATCH_SIZE), ), end=' ') img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() pred = net(img_feat_iter, ques_ix_iter) pred_np = pred.cpu().data.numpy() pred_argmax = np.argmax(pred_np, axis=1) # Save the answer index if pred_argmax.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_argmax = np.pad( pred_argmax, (0, self.__C.EVAL_BATCH_SIZE - pred_argmax.shape[0]), mode='constant', constant_values=-1) ans_ix_list.append((pred_argmax)) break #st.write(dataset.ix_to_ans[str([pred_argmax])]) # Save the whole prediction vector if self.__C.TEST_SAVE_PRED: if pred_np.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_np = np.pad( pred_np, ((0, self.__C.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)), mode='constant', constant_values=-1) pred_list.append(pred_np) print('') ans_ix_list = np.array(ans_ix_list).reshape(-1) old = 0 for qix in range(qid_list.__len__()): bbb = int(qid_list[qix]) aaa = dataset.ix_to_ans[str(ans_ix_list[qix])] if old != int(im_id_list[qix]): image = Image.open('/home/akshay/Downloads/val2014/' + 'COCO_val2014_000000' + str(im_id_list[qix]) + '.jpg') cap = q_list[qix] + " " + aaa st.image(image) old = int(im_id_list[qix]) st.write(q_list[qix], " ", aaa) result = [{ 'answer': aaa, # ix_to_ans(load with json) keys are type of string 'question_id': bbb }] # Write the results to result file if valid: if val_ckpt_flag: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.CACHE_PATH + \ 'result_run_' + self.__C.VERSION + \ '.json' else: if self.__C.CKPT_PATH is not None: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: result_eval_file = \ self.__C.RESULT_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '_epoch' + str(self.__C.CKPT_EPOCH) + \ '.json' print('Save the result to file: {}'.format(result_eval_file)) json.dump(result, open(result_eval_file, 'w')) # Save the whole prediction vector if self.__C.TEST_SAVE_PRED: if self.__C.CKPT_PATH is not None: ensemble_file = \ self.__C.PRED_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '.json' else: ensemble_file = \ self.__C.PRED_PATH + \ 'result_run_' + self.__C.CKPT_VERSION + \ '_epoch' + str(self.__C.CKPT_EPOCH) + \ '.json' print( 'Save the prediction vector to file: {}'.format(ensemble_file)) pred_list = np.array(pred_list).reshape(-1, ans_size) result_pred = [{ 'pred': pred_list[qix], 'question_id': int(qid_list[qix]) } for qix in range(qid_list.__len__())] pickle.dump(result_pred, open(ensemble_file, 'wb+'), protocol=-1) # Run validation script if valid: # create vqa object and vqaRes object ques_file_path = self.__C.QUESTION_PATH['val'] ans_file_path = self.__C.ANSWER_PATH['val'] vqa = VQA(ans_file_path, ques_file_path) vqaRes = vqa.loadRes(result_eval_file, ques_file_path) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval( vqa, vqaRes, n=2 ) # n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies print("\n") print("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall'])) # print("Per Question Type Accuracy is the following:") # for quesType in vqaEval.accuracy['perQuestionType']: # print("%s : %.02f" % (quesType, vqaEval.accuracy['perQuestionType'][quesType])) # print("\n") print("Per Answer Type Accuracy is the following:") for ansType in vqaEval.accuracy['perAnswerType']: print("%s : %.02f" % (ansType, vqaEval.accuracy['perAnswerType'][ansType])) print("\n") if val_ckpt_flag: print('Write to log file: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+')) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.CKPT_VERSION + '.txt', 'a+') else: print('Write to log file: {}'.format( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+')) logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall'])) for ansType in vqaEval.accuracy['perAnswerType']: logfile.write( "%s : %.02f " % (ansType, vqaEval.accuracy['perAnswerType'][ansType])) logfile.write("\n\n") logfile.close()
def train(self, dataset): net = Net( self.__C, ) net.cuda() net.train() #Create checkpoint if ('ckpt_' + self.__C.VERSION) in os.listdir(self.__C.CKPTS_PATH): shutil.rmtree(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) os.mkdir(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) loader_params = {'batch_size': 16, 'num_gpus':1} dataloader = TheLoader.from_dataset(dataset, **loader_params) loss_sum = 0 named_params = list(net.named_parameters()) grad_norm = np.zeros(len(named_params)) loss_fn = torch.nn.NLLLoss().cuda() # Load checkpoint if resume training if self.__C.RESUME: print(' ========== Resume training') path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' # Load the network parameters print('Loading ckpt {}'.format(path)) ckpt = torch.load(path) print('Finish!') net.load_state_dict(ckpt['state_dict']) # Load the optimizer paramters optim = get_optim(self.__C, net, len(dataloader), ckpt['lr_base']) optim._step = int(len(dataloader) / self.__C.BATCH_SIZE * self.__C.CKPT_EPOCH) optim.optimizer.load_state_dict(ckpt['optimizer']) start_epoch = self.__C.CKPT_EPOCH else: optim = get_optim(self.__C, net, len(dataloader)) start_epoch = 0 for epoch in range(start_epoch, self.__C.MAX_EPOCH): print("Training epoch...", epoch) # Learning Rate Decay if epoch in self.__C.LR_DECAY_LIST: adjust_lr(optim, self.__C.LR_DECAY_R) time_start = time.time() print("time_start:" , time_start) pred_argmax = [] for b, (time_per_batch, batch) in enumerate(time_batch(dataloader)): optim.zero_grad() x, goldsentence = net(**batch) goldsentence = goldsentence[:, 1:] x = x[:,:31,:] pred_argmax = np.argmax(x.cpu().data.numpy(), axis=2) loss = loss_fn(x.permute(0,2,1), goldsentence) loss /= self.__C.GRAD_ACCU_STEPS loss.backward() loss_sum += loss.cpu().data.numpy() * self.__C.GRAD_ACCU_STEPS mode_str = self.__C.SPLIT['train'] print("\r[version %s][epoch %2d][%s] loss: %.4f, lr: %.2e" % ( self.__C.VERSION, epoch + 1, mode_str, loss.cpu().data.numpy() / self.__C.SUB_BATCH_SIZE, optim._rate ), end=' ') # Gradient norm clipping if self.__C.GRAD_NORM_CLIP > 0: nn.utils.clip_grad_norm_( net.parameters(), self.__C.GRAD_NORM_CLIP ) # Save the gradient information for name in range(len(named_params)): norm_v = torch.norm(named_params[name][1].grad).cpu().data.numpy() \ if named_params[name][1].grad is not None else 0 grad_norm[name] += norm_v * self.__C.GRAD_ACCU_STEPS optim.step() time_end = time.time() print('Finished in {}s'.format(int(time_end-time_start))) epoch_finish = epoch + 1 loss_sum = 0 grad_norm = np.zeros(len(named_params)) # Save checkpoint state = { 'state_dict': net.state_dict(), 'optimizer': optim.optimizer.state_dict(), 'lr_base': optim.lr_base } torch.save( state, self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION + '/epoch' + str(epoch_finish) + '.pkl' ) print("Gold sentence: " , str(goldsentence.cpu().data)) print("A sample prediction: ", pred_argmax ) print("Checkpoint saved. " )
def eval(self, dataset, state_dict=None, valid=False): # Load parameters if self.__C.CKPT_PATH is not None: print('Warning: you are now using CKPT_PATH args, ' 'CKPT_VERSION and CKPT_EPOCH will not work') path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + \ 'ckpt_' + self.__C.CKPT_VERSION + \ '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' val_ckpt_flag = False if state_dict is None: val_ckpt_flag = True print('Loading ckpt {}'.format(path)) state_dict = torch.load(path)['state_dict'] print('Finish!') groundtruth_ans_list = dataset.ans_list groundtruth_question_to_ans_dict = {} for _ in groundtruth_ans_list: if _['multiple_choice_answer'] in dataset.ans_to_ix: groundtruth_question_to_ans_dict[ _['question_id']] = dataset.ans_to_ix[ _['multiple_choice_answer']] else: groundtruth_question_to_ans_dict[ _['question_id']] = dataset.ans_to_ix['0'] # Store the prediction list qid_list = [ques['question_id'] for ques in dataset.ques_list] ans_ix_list = [] pred_list = [] data_size = dataset.data_size # token_size = dataset.token_size ans_size = dataset.ans_size # pretrained_emb = dataset.pretrained_emb net = Net(self.__C, ans_size) net.cuda() net.eval() if self.__C.N_GPU > 1: net = nn.DataParallel(net, device_ids=self.__C.DEVICES) net.load_state_dict(state_dict) dataloader = Data.DataLoader(dataset, batch_size=self.__C.EVAL_BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=True) for step, (img_feat_iter, ans_iter, ques_input_idx, ques_attention_mask) in enumerate(dataloader): print("\rEvaluation: [step %4d/%4d]" % ( step, int(data_size / self.__C.EVAL_BATCH_SIZE), ), end=' ') img_feat_iter = img_feat_iter.cuda() ans_iter = ans_iter.cuda() ques_input_idx = ques_input_idx.cuda() ques_attention_mask = ques_attention_mask.cuda() pred = net(img_feat_iter, ques_input_idx.squeeze(1), ques_attention_mask.squeeze(1)) pred_np = pred.cpu().data.numpy() pred_argmax = np.argmax(pred_np, axis=1) # Save the answer index if pred_argmax.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_argmax = np.pad( pred_argmax, (0, self.__C.EVAL_BATCH_SIZE - pred_argmax.shape[0]), mode='constant', constant_values=-1) ans_ix_list.append(pred_argmax) # Save the whole prediction vector if self.__C.TEST_SAVE_PRED: if pred_np.shape[0] != self.__C.EVAL_BATCH_SIZE: pred_np = np.pad( pred_np, ((0, self.__C.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)), mode='constant', constant_values=-1) pred_list.append(pred_np) print('') ans_ix_list = np.array(ans_ix_list).reshape(-1) result = [ { 'answer': dataset.ix_to_ans[str( ans_ix_list[qix] )], # ix_to_ans(load with json) keys are type of string 'question_id': int(qid_list[qix]), 'answer_id': int(str(ans_ix_list[qix])) } for qix in range(qid_list.__len__()) ] y_true = [] y_pred = [] for _ in result: qid = _['question_id'] if qid in groundtruth_question_to_ans_dict: y_pred.append(_['answer_id']) y_true.append(groundtruth_question_to_ans_dict[qid]) acc = accuracy_score(y_true, y_pred) print('acc :', acc) logfile = open( self.__C.CACHE_PATH + \ 'result_runacc_' + self.__C.CKPT_VERSION + \ '.txt', 'a+' ) logfile.write('acc = ' + str(acc)) logfile.close()
def show(self, dataset, state_dict=None): # Load parameters: if self.__C.CKPT_PATH is not None: print( 'Warning: you are now using CKPT_PATH args, CKPT_VERSION and CKPT_EPOCH will not work' ) path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + 'ckpt_' + self.__C.CKPT_VERSION + '/epoch' \ + str(self.__C.CKPT_EPOCH) + '.pkl' val_ckpt_flag = False if state_dict is None: val_ckpt_flag = True print('========== Loading ckpt {}'.format(path)) state_dict = torch.load(path)['state_dict'] print('========== Finished!') ques_list = [ques['question'] for ques in dataset.ques_list] qid_list = [ques['question_id'] for ques in dataset.ques_list] ans_ix_list = [] pred_list = [] result = [] data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb net = Net(self.__C, pretrained_emb, token_size, ans_size) net.cuda() net.eval() net.load_state_dict(state_dict) dataloader = Data.DataLoader(dataset, batch_size=self.__C.BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=True) for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() pred = net(img_feat_iter, ques_ix_iter) pred_np = pred.cpu().data.numpy() pred_argmax = np.argmax(pred_np, axis=1) # [835] # Save the answer index ans_ix_list.append(pred_argmax) ans_ix_list = np.array(ans_ix_list).reshape(-1) for qix in range(qid_list.__len__()): result.append({ 'question': ques_list[qix], 'answer': dataset.ix_to_ans[str(ans_ix_list[qix])] }) print('========== result') print(result)
def train(self, dataset, dataset_eval=None): super_time_start = time.time() # Obtain needed information data_size = dataset.data_size token_size = dataset.token_size ans_size = dataset.ans_size pretrained_emb = dataset.pretrained_emb # Define the MCAN model net = Net(self.__C, pretrained_emb, token_size, ans_size) net.cuda() net.train() # Define the binary cross entropy loss loss_fn = torch.nn.BCELoss(reduction='sum').cuda() # Load checkpoint if resume training if self.__C.RESUME: print('========== Resume training') if self.__C.CKPT_PATH is not None: print( 'Warning: you are now using CKPT_PATH args, CKPT_VERSION and CKPT_EPOCH will not work' ) path = self.__C.CKPT_PATH else: path = self.__C.CKPTS_PATH + 'ckpt_' + self.__C.CKPT_VERSION \ + '/epoch' + str(self.__C.CKPT_EPOCH) + '.pkl' # Load the network parameters print('========== Loading ckpt {}'.format(path)) ckpt = torch.load(path) print('========== Finished!') net.load_state_dict(ckpt['state_dict']) # Load the optimizer parameters optim = get_optim(self.__C, net, data_size, ckpt['lr_base']) optim._step = int(data_size / self.__C.BATCH_SIZE * self.__C.CKPT_EPOCH) optim.optimizer.load_state_dict(ckpt['optimizer']) start_epoch = self.__C.CKPT_EPOCH else: if ('ckpt_' + self.__C.VERSION) in os.listdir(self.__C.CKPTS_PATH): shutil.rmtree(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) os.mkdir(self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION) optim = get_optim(self.__C, net, data_size) start_epoch = 0 loss_sum = 0 named_params = list(net.named_parameters()) grad_norm = np.zeros(len(named_params)) # Define multi-thread dataloader dataloader = Data.DataLoader(dataset, batch_size=self.__C.BATCH_SIZE, shuffle=False, num_workers=self.__C.NUM_WORKERS, pin_memory=self.__C.PIN_MEM, drop_last=True) # Training script for epoch in range(start_epoch, self.__C.MAX_EPOCH): epoch_finish = epoch + 1 # Save log information logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write( 'nowTime: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n') logfile.close() # Learning Rate Decay if epoch in self.__C.LR_DECAY_LIST: adjust_lr(optim, self.__C.LR_DECAY_R) # Externally shuffle shuffle_list(dataset.ans_list) time_start = time.time() # Iteration for step, (img_feat_iter, ques_ix_iter, ans_iter) in enumerate(dataloader): optim.zero_grad() img_feat_iter = img_feat_iter.cuda() ques_ix_iter = ques_ix_iter.cuda() ans_iter = ans_iter.cuda() pred = net(img_feat_iter, ques_ix_iter) loss = loss_fn(pred, ans_iter) loss.backward() loss_sum += loss.cpu().data.numpy() if self.__C.VERBOSE: # print loss every step if dataset_eval is not None: mode_str = self.__C.SPLIT[ 'train'] + '->' + self.__C.SPLIT['val'] else: mode_str = self.__C.SPLIT[ 'train'] + '->' + self.__C.SPLIT['test'] print( "\r[version %s][epoch %2d][step %4d/%4d][%s] loss: %.4f, lr: %.2e" % (self.__C.VERSION, epoch_finish, step, int(data_size / self.__C.BATCH_SIZE), mode_str, loss.cpu().data.numpy() / self.__C.BATCH_SIZE, optim._rate), end=' ') # Save the gradient information for name in range(len(named_params)): if named_params[name][1].grad is not None: norm_v = torch.norm( named_params[name][1].grad).cpu().data.numpy() else: norm_v = 0 grad_norm[name] += norm_v optim.step() time_end = time.time() print('========== Finished in {}s'.format( int(time_end - time_start))) # Save checkpoint state = { 'state_dict': net.state_dict(), 'optimizer': optim.optimizer.state_dict(), 'lr_base': optim.lr_base } torch.save( state, self.__C.CKPTS_PATH + 'ckpt_' + self.__C.VERSION + '/epoch' + str(epoch_finish) + '.pkl') # Logging logfile = open( self.__C.LOG_PATH + 'log_run_' + self.__C.VERSION + '.txt', 'a+') logfile.write('epoch = ' + str(epoch_finish) + ' loss = ' + str(loss_sum / data_size) + '\n' + 'lr = ' + str(optim._rate) + '\n\n') logfile.close() # Eval after every epoch if dataset_eval is not None: self.eval(dataset_eval, state_dict=net.state_dict(), valid=True) loss_sum = 0 grad_norm = np.zeros(len(named_params)) print('========== Total Training time is {}s'.format( int(time.time() - super_time_start)))