class VQA: def __init__(self): # Datasets self.train_tuple = get_data_tuple(args.train, bs=args.batch_size, shuffle=True, drop_last=True) if args.valid != "": self.valid_tuple = get_data_tuple(args.valid, bs=1024, shuffle=False, drop_last=False) else: self.valid_tuple = None # Model self.model = VQAModel(self.train_tuple.dataset.num_answers) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() if args.multiGPU: self.model.lxrt_encoder.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def train(self, train_tuple, eval_tuple): dset, loader, evaluator = train_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader)) ) if args.tqdm else (lambda x: x) best_valid = 0. for epoch in range(args.epochs): quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in iter_wrapper(enumerate(loader)): self.model.train() self.optim.zero_grad() feats, boxes, target = feats.cuda(), boxes.cuda(), target.cuda( ) logit = self.model(feats, boxes, sent) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 5.) self.optim.step() score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans log_str = "\nEpoch %d: Train %0.2f\n" % ( epoch, evaluator.evaluate(quesid2ans) * 100.) if self.valid_tuple is not None: # Do Validation valid_score = self.evaluate(eval_tuple) if valid_score > best_valid: best_valid = valid_score self.save("BEST") log_str += "Epoch %d: Valid %0.2f\n" % (epoch, valid_score * 100.) + \ "Epoch %d: Best %0.2f\n" % (epoch, best_valid * 100.) print(log_str, end='') with open(self.output + "/log.log", 'a') as f: f.write(log_str) f.flush() self.save("LAST") def predict(self, eval_tuple: DataTuple, dump=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple quesid2ans = {} for i, datum_tuple in enumerate(loader): ques_id, feats, boxes, sent = datum_tuple[: 4] # Avoid seeing ground truth with torch.no_grad(): feats, boxes = feats.cuda(), boxes.cuda() logit = self.model(feats, boxes, sent) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump) return eval_tuple.evaluator.evaluate(quesid2ans) @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in enumerate(loader): _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans) def save(self, name): torch.save(self.model.state_dict(), os.path.join(self.output, "%s.pth" % name)) def load(self, path): print("Load model from %s" % path) state_dict = torch.load("%s.pth" % path) self.model.load_state_dict(state_dict)
class VQA: def __init__(self): # Datasets self.train_tuple = get_data_tuple(args.train, bs=args.batch_size, shuffle=True, drop_last=True) if args.valid != "": self.valid_tuple = get_data_tuple(args.valid, bs=1024, shuffle=False, drop_last=False) else: self.valid_tuple = None # Model self.model = VQAModel(self.train_tuple.dataset.num_answers) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() if args.multiGPU: self.model.lxrt_encoder.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def train(self, train_tuple, eval_tuple): dset, loader, evaluator = train_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader)) ) if args.tqdm else (lambda x: x) best_valid = 0. for epoch in range(args.epochs): quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in iter_wrapper(enumerate(loader)): self.model.train() self.optim.zero_grad() feats, boxes, target = feats.cuda(), boxes.cuda(), target.cuda( ) logit = self.model(feats, boxes, sent) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 5.) self.optim.step() score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans log_str = "\nEpoch %d: Train %0.2f\n" % ( epoch, evaluator.evaluate(quesid2ans) * 100.) if self.valid_tuple is not None: # Do Validation valid_score = self.evaluate(eval_tuple) if valid_score > best_valid: best_valid = valid_score self.save("BEST") log_str += "Epoch %d: Valid %0.2f\n" % (epoch, valid_score * 100.) + \ "Epoch %d: Best %0.2f\n" % (epoch, best_valid * 100.) print(log_str, end='') with open(self.output + "/log.log", 'a') as f: f.write(log_str) f.flush() self.save("LAST") def predict(self, eval_tuple: DataTuple, dump=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple question_id2img_id = {x["question_id"]: x["img_id"] for x in dset.data} tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True) plt.rcParams['figure.figsize'] = (12, 10) num_regions = 36 count = 0 quesid2ans = {} for i, datum_tuple in enumerate(loader): ques_id, feats, boxes, sent = datum_tuple[: 4] # Avoid seeing ground truth with torch.no_grad(): feats, boxes = feats.cuda(), boxes.cuda() logit = self.model(feats, boxes, sent) for layer in [0, 4]: for head in [0, 1]: for datapoint in range(len(sent)): print(count, len(sent)) count += 1 lang2vis_attention_probs = self.model.lxrt_encoder.model.bert.encoder.x_layers[ layer].lang_att_map[datapoint][head].detach( ).cpu().numpy() vis2lang_attention_probs = self.model.lxrt_encoder.model.bert.encoder.x_layers[ layer].visn_att_map[datapoint][head].detach( ).cpu().numpy() plt.clf() plt.subplot(2, 3, 1) plt.gca().set_axis_off() plt.title("Image (regions 0-7)") im = cv2.imread( os.path.join( "/mnt/8tera/claudio.greco/mscoco_trainval_2014", question_id2img_id[ ques_id[datapoint].item()]) + ".jpg") im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) plt.imshow(im) plt.subplot(2, 3, 2) plt.gca().set_axis_off() plt.title("Image (regions 8-15)") im = cv2.imread( os.path.join( "/mnt/8tera/claudio.greco/mscoco_trainval_2014", question_id2img_id[ ques_id[datapoint].item()]) + ".jpg") im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) plt.imshow(im) plt.subplot(2, 3, 3) plt.gca().set_axis_off() plt.title("Image (regions 16-35)") im = cv2.imread( os.path.join( "/mnt/8tera/claudio.greco/mscoco_trainval_2014", question_id2img_id[ ques_id[datapoint].item()]) + ".jpg") im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) plt.imshow(im) img_info = loader.dataset.imgid2img[ question_id2img_id[ques_id[datapoint].item()]] img_h, img_w = img_info['img_h'], img_info['img_w'] unnormalized_boxes = boxes[datapoint].clone() unnormalized_boxes[:, (0, 2)] *= img_w unnormalized_boxes[:, (1, 3)] *= img_h for i, bbox in enumerate(unnormalized_boxes): if i < 8: plt.subplot(2, 3, 1) elif i < 16: plt.subplot(2, 3, 2) else: plt.subplot(2, 3, 3) bbox = [ bbox[0].item(), bbox[1].item(), bbox[2].item(), bbox[3].item() ] if bbox[0] == 0: bbox[0] = 2 if bbox[1] == 0: bbox[1] = 2 plt.gca().add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0] - 4, bbox[3] - bbox[1] - 4, fill=False, edgecolor='red', linewidth=1)) plt.gca().text(bbox[0], bbox[1] - 2, '%s' % i, bbox=dict(facecolor='blue'), fontsize=9, color='white') ax = plt.subplot(2, 1, 2) plt.title("Cross-modal attention lang2vis") tokenized_question = tokenizer.tokenize( sent[datapoint]) tokenized_question = [ "<CLS>" ] + tokenized_question + ["<SEP>"] transposed_attention_map = lang2vis_attention_probs[:len( tokenized_question), :num_regions] im = plt.imshow(transposed_attention_map, vmin=0, vmax=1) for i in range(len(tokenized_question)): for j in range(num_regions): att_value = round( transposed_attention_map[i, j], 1) text = ax.text( j, i, att_value, ha="center", va="center", color="w" if att_value <= 0.5 else "b", fontsize=6) ax.set_xticks(np.arange(num_regions)) ax.set_xticklabels(list(range(num_regions))) ax.set_yticks(np.arange(len(tokenized_question))) ax.set_yticklabels(tokenized_question) plt.tight_layout() # plt.gca().set_axis_off() plt.savefig( "/mnt/8tera/claudio.greco/guesswhat_lxmert/guesswhat/visualization_vqa/lang2vis_question_{}_layer_{}_head_{}.png" .format(ques_id[datapoint].item(), layer, head), bbox_inches='tight', pad_inches=0.5) plt.close() ## vis2lang plt.clf() plt.subplot(2, 3, 1) plt.gca().set_axis_off() plt.title("Image (regions 0-7)") im = cv2.imread( os.path.join( "/mnt/8tera/claudio.greco/mscoco_trainval_2014", question_id2img_id[ ques_id[datapoint].item()]) + ".jpg") im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) plt.imshow(im) plt.subplot(2, 3, 2) plt.gca().set_axis_off() plt.title("Image (regions 8-15)") im = cv2.imread( os.path.join( "/mnt/8tera/claudio.greco/mscoco_trainval_2014", question_id2img_id[ ques_id[datapoint].item()]) + ".jpg") im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) plt.imshow(im) plt.subplot(2, 3, 3) plt.gca().set_axis_off() plt.title("Image (regions 16-35)") im = cv2.imread( os.path.join( "/mnt/8tera/claudio.greco/mscoco_trainval_2014", question_id2img_id[ ques_id[datapoint].item()]) + ".jpg") im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) plt.imshow(im) img_info = loader.dataset.imgid2img[ question_id2img_id[ques_id[datapoint].item()]] img_h, img_w = img_info['img_h'], img_info['img_w'] unnormalized_boxes = boxes[datapoint].clone() unnormalized_boxes[:, (0, 2)] *= img_w unnormalized_boxes[:, (1, 3)] *= img_h for i, bbox in enumerate(unnormalized_boxes): if i < 8: plt.subplot(2, 3, 1) elif i < 16: plt.subplot(2, 3, 2) else: plt.subplot(2, 3, 3) bbox = [ bbox[0].item(), bbox[1].item(), bbox[2].item(), bbox[3].item() ] if bbox[0] == 0: bbox[0] = 2 if bbox[1] == 0: bbox[1] = 2 plt.gca().add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0] - 4, bbox[3] - bbox[1] - 4, fill=False, edgecolor='red', linewidth=1)) plt.gca().text(bbox[0], bbox[1] - 2, '%s' % i, bbox=dict(facecolor='blue'), fontsize=9, color='white') ax = plt.subplot(2, 1, 2) plt.title("Cross-modal attention vis2lang") tokenized_question = tokenizer.tokenize( sent[datapoint]) tokenized_question = [ "<CLS>" ] + tokenized_question + ["<SEP>"] transposed_attention_map = vis2lang_attention_probs.transpose( )[:len(tokenized_question), :num_regions] im = plt.imshow(transposed_attention_map, vmin=0, vmax=1) for i in range(len(tokenized_question)): for j in range(num_regions): att_value = round( transposed_attention_map[i, j], 1) text = ax.text( j, i, att_value, ha="center", va="center", color="w" if att_value <= 0.5 else "b", fontsize=6) ax.set_xticks(np.arange(num_regions)) ax.set_xticklabels(list(range(num_regions))) ax.set_yticks(np.arange(len(tokenized_question))) ax.set_yticklabels(tokenized_question) plt.tight_layout() # plt.gca().set_axis_off() plt.savefig( "/mnt/8tera/claudio.greco/guesswhat_lxmert/guesswhat/visualization_vqa/vis2lang_question_{}_layer_{}_head_{}.png" .format(ques_id[datapoint].item(), layer, head), bbox_inches='tight', pad_inches=0.5) plt.close() # print(datapoint, len(sent)) # # print(datapoint) # if datapoint > 20: # break # if datapoint > 20: # break # if datapoint > 20: # break score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump) return eval_tuple.evaluator.evaluate(quesid2ans) @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in enumerate(loader): _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans) def save(self, name): torch.save(self.model.state_dict(), os.path.join(self.output, "%s.pth" % name)) def load(self, path): print("Load model from %s" % path) state_dict = torch.load("%s.pth" % path) self.model.load_state_dict(state_dict)
class VQA: def __init__(self,folder="/",load=True): # Datasets if load: self.train_tuple = get_data_tuple( args.train, bs=args.batch_size, shuffle=True, drop_last=True,folder=folder ) if args.valid != "": self.valid_tuple = get_data_tuple( args.valid, bs=128, shuffle=False, drop_last=False, folder=folder ) else: self.valid_tuple = None # Model # self.model = VQAModel(self.train_tuple.dataset.num_answers) self.model = VQAModel(3129) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() self.yes_index=425 self.no_index=1403 self.mask_yes = torch.zeros(len(self.indexlist)).cuda() self.mask_yes[self.yes_index]=1.0 self.mask_yes[self.no_index]=1.0 if args.multiGPU: self.model.lxrt_encoder.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if load : if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def train(self, train_tuple, eval_tuple): dset, loader, evaluator = train_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader),ascii=True)) if args.tqdm else (lambda x: x) best_valid = 0. for epoch in range(args.epochs): quesid2ans = {} for i, (ques_id, feats, boxes, ques, op, q1, q2, typetarget, q1typetarget, q2typetarget, yesnotypetargets, q1yntypetargets, q2yntypetargets, target, q1_target, q2_target) in iter_wrapper(enumerate(loader)): self.model.train() self.optim.zero_grad() feats, boxes, target, yntypetarget, typetarget = feats.cuda(), boxes.cuda(), target.cuda(), yesnotypetargets.cuda(), typetarget.cuda() op, q1typetarget, q2typetarget, q1yntypetargets, q2yntypetargets , q1_target, q2_target = op.cuda(), q1typetarget.cuda(), q2typetarget.cuda(), q1yntypetargets.cuda(), q2yntypetargets.cuda() , q1_target.cuda(), q2_target.cuda() logit = self.model(feats, boxes, sent) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) q1logit = self.model(feats, boxes, q1) q2logit = self.model(feats, boxes, q2) constraint_loss = self.constraint_loss(logit,q1logit,q2logit,op) loss = 0.5*loss + 0.5*constraint_loss loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 5.) self.optim.step() score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans log_str = "\nEpoch %d: Train %0.2f\n" % (epoch, evaluator.evaluate(quesid2ans) * 100.) if self.valid_tuple is not None: # Do Validation valid_score = self.evaluate(eval_tuple) if valid_score > best_valid: best_valid = valid_score self.save("BEST") log_str += "Epoch %d: Valid %0.2f\n" % (epoch, valid_score * 100.) + \ "Epoch %d: Best %0.2f\n" % (epoch, best_valid * 100.) print(log_str, end='') with open(self.output + "/log.log", 'a') as f: f.write(log_str) f.flush() self.save("LAST") return best_valid def rangeloss(self,x,lower,upper,lamb=4): mean = (lower+upper)/2 sigma = (upper-lower+0.00001)/lamb loss = 1 - torch.exp(-0.5*torch.pow(torch.div(x-mean,sigma),2)) return loss.sum() def select_yesnoprobs(self,logit,x,op): op_mask = torch.eq(op,x) logit = logit[op_mask].view(-1,3129) logit_m = logit * self.mask_yes m = logit_m == 0 logit_m = logit_m[~m].view(-1,2) logit_m = torch.softmax(logit_m,1) return logit_m.select(dim=1,index=0).view(-1,1) def constraintloss(self,logit,q1_logit,q2_logit,op): total_loss=torch.zeros([1]).cuda() for x in range(1,11): logit_m= self.select_yesnoprobs(logit,x,op) q1_logit_m= self.select_yesnoprobs(q1_logit,x,op) q2_logit_m= self.select_yesnoprobs(q2_logit,x,op) if logit_m.nelement()==0: continue ideal_logit_m = op_map[x](q1_logit_m,q2_logit_m) rangeloss = self.mseloss(logit_m,ideal_logit_m) total_loss+=rangeloss return total_loss def predict(self, eval_tuple: DataTuple, dump=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple quesid2ans = {} for i, datum_tuple in tqdm(enumerate(loader),ascii=True,desc="Evaluating"): # ques_id, feats, boxes, sent = datum_tuple[:4] # Avoid seeing ground truth ques_id, feats, boxes, ques, op, q1, q2, typetarget, q1typetarget, q2typetarget, yesnotypetargets, q1yntypetargets, q2yntypetargets, target, q1_target, q2_target = datum_tuple with torch.no_grad(): feats, boxes = feats.cuda(), boxes.cuda() logit = self.model(feats, boxes, sent) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump) return eval_tuple.evaluator.evaluate(quesid2ans) @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, datum_tuple in enumerate(loader): ques_id, feats, boxes, ques, op, q1, q2, typetarget, q1typetarget, q2typetarget, yesnotypetargets, q1yntypetargets, q2yntypetargets, target, q1_target, q2_target = datum_tuple _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans)
class VQA: def __init__(self): # Datasets self.train_tuple = get_data_tuple(args.train, bs=args.batch_size, shuffle=True, drop_last=True) if args.valid != "": valid_bsize = args.get("valid_batch_size", 16) self.valid_tuple = get_data_tuple(args.valid, bs=valid_bsize, shuffle=False, drop_last=False) else: self.valid_tuple = None # Model self.model = VQAModel(self.train_tuple.dataset.num_answers) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.get("load_lxmert_pretrain", None) is not None: load_lxmert_from_pretrain_noqa(args.load_lxmert_pretrain, self.model) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() if args.multiGPU: self.model.lxrt_encoder.multi_gpu() self.model.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def train(self, train_tuple, eval_tuple): dset, loader, evaluator = train_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader)) ) if args.tqdm else (lambda x: x) best_valid = 0. train_results = [] report_every = args.get("report_every", 100) for epoch in range(args.epochs): quesid2ans = {} for i, batch in iter_wrapper(enumerate(loader)): ques_id, feats, boxes, sent, tags, target = zip(*batch) self.model.train() self.optim.zero_grad() target = torch.stack(target).cuda() logit = self.model(feats, boxes, sent, tags) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 5.) self.optim.step() train_results.append( pd.Series({"loss": loss.detach().mean().item()})) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid] = ans if i % report_every == 0 and i > 0: print("Epoch: {}, Iter: {}/{}".format( epoch, i, len(loader))) print(" {}\n~~~~~~~~~~~~~~~~~~\n".format( pd.DataFrame(train_results[-report_every:]).mean())) log_str = "\nEpoch %d: Train %0.2f\n" % ( epoch, evaluator.evaluate(quesid2ans) * 100.) if self.valid_tuple is not None: # Do Validation valid_score = self.evaluate(eval_tuple) if valid_score > best_valid and not args.get( "special_test", False): best_valid = valid_score self.save("BEST") log_str += "Epoch %d: Valid %0.2f\n" % (epoch, valid_score * 100.) + \ "Epoch %d: Best %0.2f\n" % (epoch, best_valid * 100.) if epoch >= 5: self.save("Epoch{}".format(epoch)) print(log_str, end='') print(args.output) self.save("LAST") def predict(self, eval_tuple: DataTuple, dump=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple quesid2ans = {} for i, batch in enumerate(tqdm(loader)): _ = list(zip(*batch)) ques_id, feats, boxes, sent, tags = _[:5] #, target = zip(*batch) with torch.no_grad(): #target = torch.stack(target).cuda() logit = self.model(feats, boxes, sent, tags) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid] = ans if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump) return eval_tuple.evaluator.evaluate(quesid2ans) @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in enumerate(loader): _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans) def save(self, name): torch.save(self.model.state_dict(), os.path.join(self.output, "%s.pth" % name)) def load(self, path): print("Load model from %s" % path) state_dict = torch.load("%s.pth" % path) self.model.load_state_dict(state_dict)
class VQA: MAX_SIZE = 1333 MIN_SIZE = 800 def __init__(self): # Datasets self.train_tuple = get_data_tuple(args.train, bs=args.batch_size, shuffle=True, drop_last=True) if args.valid != "": self.valid_tuple = get_data_tuple(args.valid, bs=args.batch_size, shuffle=False, drop_last=False) else: self.valid_tuple = None # Model self.model = VQAModel(self.train_tuple.dataset.num_answers) self.args = self.model.args # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() if args.multiGPU: self.model.lxrt_encoder.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def _image_transform(self, path): img = Image.open(path) im = np.array(img).astype(np.float32) # IndexError: too many indices for array, grayscale images if len(im.shape) < 3: im = np.repeat(im[:, :, np.newaxis], 3, axis=2) im = im[:, :, ::-1] im -= np.array([102.9801, 115.9465, 122.7717]) im_shape = im.shape im_height = im_shape[0] im_width = im_shape[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) # Scale based on minimum size im_scale = self.MIN_SIZE / im_size_min # Prevent the biggest axis from being more than max_size # If bigger, scale it down if np.round(im_scale * im_size_max) > self.MAX_SIZE: im_scale = self.MAX_SIZE / im_size_max im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) img = torch.from_numpy(im).permute(2, 0, 1) im_info = {"width": im_width, "height": im_height} return img, im_scale, im_info def _process_feature_extraction(self, output, im_scales, im_infos, feature_name="fc6", conf_thresh=0): batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feature_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] info_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros((scores.shape[0])).to(cur_device) conf_thresh_tensor = torch.full_like(max_conf, conf_thresh) start_index = 1 # Column 0 of the scores matrix is for the background class if self.args.background: start_index = 0 for cls_ind in range(start_index, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where( # Better than max one till now and minimally greater than conf_thresh (cls_scores[keep] > max_conf[keep]) & (cls_scores[keep] > conf_thresh_tensor[keep]), cls_scores[keep], max_conf[keep], ) sorted_scores, sorted_indices = torch.sort(max_conf, descending=True) num_boxes = (sorted_scores[:self.args.num_features] != 0).sum() keep_boxes = sorted_indices[:self.args.num_features] feat = feats[i][keep_boxes] feat_list.append(feat) bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i] # Normalize the boxes (to 0 ~ 1) img_h, img_w = im_infos[i]['height'], im_infos[i]['width'] # boxes = boxes.copy() bbox[:, (0, 2)] /= img_w bbox[:, (1, 3)] /= img_h info_list.append(bbox) # print('size:', bbox.size(), feat.size()) return feat_list, info_list def predict(self, eval_tuple: DataTuple, dump=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple quesid2ans = {} import time from tqdm import tqdm import torchprof # import torch.autograd.profiler as profiler start = time.time() print('model set up, starting warming up prediction...') count = 0 batches = 0 # with torch.no_grad(), profiler.profile(record_shapes=True) as prof: with torch.no_grad(): for i, datum_tuple in tqdm(enumerate(loader)): ques_id, img_paths, sent = datum_tuple[: 3] # Avoid seeing ground truth img_tensor, im_scales, im_infos = [], [], [] for img_path in img_paths: im, im_scale, im_info = self._image_transform(img_path) # im, im_scale, im_info = img_item img_tensor.append(im) im_scales.append(im_scale) im_infos.append(im_info) current_img_list = to_image_list(img_tensor, size_divisible=32) # print('current_img_list.device', current_img_list.tensors.size()) current_img_list = current_img_list.to("cuda") output = self.model.detection_model(current_img_list) # get bbox and features feat_list, info_list = self._process_feature_extraction( output, im_scales, im_infos, self.args.feature_name, self.args.confidence_threshold, ) feats = torch.stack(feat_list) boxes = torch.stack(info_list) # feats, boxes = feats.cuda(), boxes.cuda() logit = self.model(feats, boxes, sent) score, label = logit.max(1) batches += 1 if batches >= 2: break batches = 0 count = 0 print('model warmed up, starting predicting...') with torch.no_grad(), torchprof.Profile(self.model, use_cuda=True) as prof: for i, datum_tuple in tqdm(enumerate(loader)): ques_id, img_paths, sent = datum_tuple[: 3] # Avoid seeing ground truth img_tensor, im_scales, im_infos = [], [], [] for img_path in img_paths: im, im_scale, im_info = self._image_transform(img_path) # im, im_scale, im_info = img_item img_tensor.append(im) im_scales.append(im_scale) im_infos.append(im_info) current_img_list = to_image_list(img_tensor, size_divisible=32) # print('current_img_list.device', current_img_list.tensors.size()) current_img_list = current_img_list.to("cuda") output = self.model.detection_model(current_img_list) # get bbox and features feat_list, info_list = self._process_feature_extraction( output, im_scales, im_infos, self.args.feature_name, self.args.confidence_threshold, ) feats = torch.stack(feat_list) boxes = torch.stack(info_list) # feats, boxes = feats.cuda(), boxes.cuda() logit = self.model(feats, boxes, sent) score, label = logit.max(1) batches += 1 for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans count += 1 print(prof.display(show_events=False)) end = time.time() trace, event_lists_dict = prof.raw() import pickle with open(args.profile_save or 'profile.pk', 'wb') as f: pickle.dump(event_lists_dict, f) print('prediction finished!', end - start, batches, count) if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump) return eval_tuple.evaluator.evaluate(quesid2ans) @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in enumerate(loader): _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans) def save(self, name): torch.save(self.model.state_dict(), os.path.join(self.output, "%s.pth" % name)) def load(self, path): # FIXME: load correct checkpoints print("Load model from %s" % path) state_dict = torch.load("%s.pth" % path) print(self.args.model_file) checkpoint = torch.load(self.args.model_file, map_location=torch.device("cpu")) detection_stat_dict = checkpoint.pop("model") state_dict.update(detection_stat_dict) # print(checkpoint) # load_state_dict(model, checkpoint.pop("model")) self.model.load_state_dict(state_dict)
class VQA: def __init__(self): # Datasets self.train_tuple = get_data_tuple(args.train, bs=args.batch_size, shuffle=True, drop_last=True) if args.valid != "": self.valid_tuple = get_data_tuple(args.valid, bs=1024, shuffle=False, drop_last=False) else: self.valid_tuple = None # Model self.model = VQAModel(self.train_tuple.dataset.num_answers, finetune_strategy=args.finetune_strategy) # if finetune strategy is spottune if args.finetune_strategy in PolicyStrategies: self.policy_model = PolicyLXRT( PolicyStrategies[args.finetune_strategy]) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() if args.finetune_strategy in PolicyStrategies: self.policy_model = self.policy_model.cuda() if args.multiGPU: self.model.lxrt_encoder.multi_gpu() self.policy_model.policy_lxrt_encoder.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Optimizer for policy net if args.finetune_strategy in PolicyStrategies: self.policy_optim = args.policy_optimizer( self.policy_model.parameters(), args.policy_lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def train(self, train_tuple, eval_tuple, visualizer=None): dset, loader, evaluator = train_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader)) ) if args.tqdm else (lambda x: x) wandb.watch(self.model, log='all') if args.finetune_strategy in PolicyStrategies: wandb.watch(self.policy_model, log='all') best_valid = 0. for epoch in range(args.epochs): # for policy vec plotting if args.finetune_strategy in PolicyStrategies: policy_save = torch.zeros( PolicyStrategies[args.finetune_strategy] // 2).cpu() policy_max = 0 quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in iter_wrapper(enumerate(loader)): self.model.train() self.optim.zero_grad() if args.finetune_strategy in PolicyStrategies: self.policy_model.train() self.policy_optim.zero_grad() feats, boxes, target = feats.cuda(), boxes.cuda(), target.cuda( ) if args.finetune_strategy in PolicyStrategies: # calculate the policy vector here policy_vec = self.policy_model(feats, boxes, sent) policy_action = gumbel_softmax( policy_vec.view(policy_vec.size(0), -1, 2)) policy = policy_action[:, :, 1] policy_save = policy_save + policy.clone().detach().cpu( ).sum(0) policy_max += policy.size(0) logit = self.model(feats, boxes, sent, policy) else: logit = self.model(feats, boxes, sent) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 5.) self.optim.step() if args.finetune_strategy in PolicyStrategies: self.policy_optim.step() score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans # check if visualizer is not none if visualizer is not None: print(f'Creating training visualizations for epoch {epoch}') visualizer.plot(policy_save, policy_max, epoch=epoch, mode='train') train_acc = evaluator.evaluate(quesid2ans) * 100. log_str = "\nEpoch %d: Train %0.2f\n" % (epoch, train_acc) wandb.log({'Training Accuracy': train_acc}) if self.valid_tuple is not None: # Do Validation valid_score = self.evaluate(eval_tuple, epoch=epoch, visualizer=visualizer) if valid_score > best_valid: best_valid = valid_score self.save("BEST") log_str += "Epoch %d: Valid %0.2f\n" % (epoch, valid_score * 100.) + \ "Epoch %d: Best %0.2f\n" % (epoch, best_valid * 100.) wandb.log({'Validation Accuracy': valid_score * 100.}) print(log_str, end='') with open(self.output + "/log.log", 'a') as f: f.write(log_str) f.flush() self.save("LAST") def predict(self, eval_tuple: DataTuple, dump=None, epoch=0, visualizer=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() if args.finetune_strategy in PolicyStrategies: self.policy_model.eval() policy_save = torch.zeros( PolicyStrategies[args.finetune_strategy] // 2) policy_max = 0 dset, loader, evaluator = eval_tuple quesid2ans = {} for i, datum_tuple in enumerate(loader): ques_id, feats, boxes, sent = datum_tuple[: 4] # Avoid seeing ground truth with torch.no_grad(): feats, boxes = feats.cuda(), boxes.cuda() if args.finetune_strategy in PolicyStrategies: # calculate the policy vector here policy_vec = self.policy_model(feats, boxes, sent) policy_action = gumbel_softmax( policy_vec.view(policy_vec.size(0), -1, 2)) policy = policy_action[:, :, 1] policy_save = policy_save + policy.clone().detach().cpu( ).sum(0) policy_max += policy.size(0) logit = self.model(feats, boxes, sent, policy) else: logit = self.model(feats, boxes, sent) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans if visualizer is not None: print(f'Creating validation visualization for epoch {epoch}...') visualizer.plot(policy_save, policy_max, epoch=epoch, mode='val') if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None, epoch=0, visualizer=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump, epoch=epoch, visualizer=visualizer) return eval_tuple.evaluator.evaluate(quesid2ans) @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in enumerate(loader): _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans) def save(self, name): torch.save(self.model.state_dict(), os.path.join(self.output, "%s.pth" % name)) def load(self, path): print("Load model from %s" % path) state_dict = torch.load("%s.pth" % path) self.model.load_state_dict(state_dict)
class VQA: def __init__(self): # Datasets self.train_tuple = get_data_tuple(args.train, bs=args.batch_size, shuffle=True, drop_last=True) if args.valid != "": self.valid_tuple = get_data_tuple(args.valid, bs=1024, shuffle=False, drop_last=False) else: self.valid_tuple = None # Model self.model = VQAModel(self.train_tuple.dataset.num_answers) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() if args.multiGPU: self.model.lxrt_encoder.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def train(self, train_tuple, eval_tuple, adversarial=False, adv_batch_prob=0.0, attack_name=None, attack_params={}): dset, loader, evaluator = train_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader)) ) if args.tqdm else (lambda x: x) use_adv_batch = False best_valid = 0. for epoch in range(args.epochs): quesid2ans = {} # Count the number of batches that were adversarially perturbed n_adv_batches = 0 for i, (ques_id, feats, boxes, sent, target) in iter_wrapper(enumerate(loader)): self.model.train() self.optim.zero_grad() feats, boxes, target = feats.cuda(), boxes.cuda(), target.cuda( ) # If doing adversarial training, perturb input features # with probability adv_batch_prob if adversarial: rand = random.uniform(0, 1) use_adv_batch = rand <= adv_batch_prob if use_adv_batch: # Create adversary from given class name and parameters n_adv_batches += 1 AdversaryClass_ = getattr(advertorch_module, attack_name) adversary = AdversaryClass_( lambda x: self.model(x, boxes, sent), loss_fn=self.bce_loss, **attack_params) # Perturb feats using adversary feats = adversary.perturb(feats, target) logit = self.model(feats, boxes, sent) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 5.) self.optim.step() score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans log_str = "\nEpoch %d: Train %0.2f\n" % (epoch, evaluator.evaluate(quesid2ans) * 100.) + \ "Epoch %d: Num adversarial batches %d / %d\n" % (epoch, n_adv_batches, i+1) if self.valid_tuple is not None: # Do Validation valid_score = self.evaluate(eval_tuple) if valid_score > best_valid: best_valid = valid_score self.save("BEST") log_str += "Epoch %d: Valid %0.2f\n" % (epoch, valid_score * 100.) + \ "Epoch %d: Best %0.2f\n" % (epoch, best_valid * 100.) print(log_str, end='') with open(self.output + "/log.log", 'a') as f: f.write(log_str) f.flush() self.save("LAST") def predict(self, eval_tuple: DataTuple, dump=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple quesid2ans = {} for i, datum_tuple in enumerate(loader): ques_id, feats, boxes, sent = datum_tuple[: 4] # Avoid seeing ground truth with torch.no_grad(): feats, boxes = feats.cuda(), boxes.cuda() logit = self.model(feats, boxes, sent) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def adversarial_predict(self, eval_tuple: DataTuple, dump=None, attack_name='GradientAttack', attack_params={}): """ Predict the answers to questions in a data split, but using a specified adversarial attack on the inputs. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple quesid2ans = {} sim_trace = [] # Track avg cos similarity across batches for i, datum_tuple in enumerate(tqdm(loader)): ques_id, feats, boxes, sent, target = datum_tuple feats, boxes, target = feats.cuda(), boxes.cuda(), target.cuda() # Create adversary from given class name and parameters AdversaryClass_ = getattr(advertorch_module, attack_name) adversary = AdversaryClass_(lambda x: self.model(x, boxes, sent), loss_fn=self.bce_loss, **attack_params) # Perturb feats using adversary feats_adv = adversary.perturb(feats, target) # Compute average cosine similarity between true # and perturbed features sim_trace.append(self.avg_cosine_sim(feats, feats_adv)) # Compute prediction on adversarial examples with torch.no_grad(): feats_adv = feats_adv.cuda() logit = self.model(feats_adv, boxes, sent) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans if dump is not None: evaluator.dump_result(quesid2ans, dump) print( f"Average cosine similarity across batches: {torch.mean(torch.Tensor(sim_trace))}" ) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump) return eval_tuple.evaluator.evaluate(quesid2ans) def adversarial_evaluate(self, eval_tuple: DataTuple, dump=None, attack_name='GradientAttack', attack_params={}): """Evaluate model on adversarial inputs""" quesid2ans = self.adversarial_predict(eval_tuple, dump, attack_name, attack_params) return eval_tuple.evaluator.evaluate(quesid2ans) def avg_cosine_sim(self, feats: torch.Tensor, feats_adv: torch.Tensor): """Computes the average cosine similarity between true and adversarial examples""" return nn.functional.cosine_similarity(feats, feats_adv, dim=-1).mean() @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in enumerate(loader): _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans) def save(self, name): torch.save(self.model.state_dict(), os.path.join(self.output, "%s.pth" % name)) def load(self, path): print("Load model from %s" % path) state_dict = torch.load("%s.pth" % path) self.model.load_state_dict(state_dict)
class VQA: def __init__(self, attention=False): # Datasets print("Fetching data") self.train_tuple = get_data_tuple(args.train, bs=args.batch_size, shuffle=True, drop_last=True, dataset_name="test") print("Got data") print("fetching val data") if args.valid != "": self.valid_tuple = get_data_tuple(args.valid, bs=args.batch_size, shuffle=False, drop_last=False, dataset_name="test") print("got data") else: self.valid_tuple = None print("Got data") # Model print("Making model") self.model = VQAModel(self.train_tuple.dataset.num_answers, attention) print("Ready model") # Print model info: print("Num of answers:") print(self.train_tuple.dataset.num_answers) # print("Model info:") # print(self.model) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) # GPU options self.model = self.model.cuda() if args.multiGPU: self.model.lxrt_encoder.multi_gpu() # Loss and Optimizer self.bce_loss = nn.BCEWithLogitsLoss() if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("BertAdam Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(self.model.parameters(), args.lr) # Output Directory self.output = args.output os.makedirs(self.output, exist_ok=True) def train(self, train_tuple, eval_tuple): log_freq = 810 dset, loader, evaluator = train_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader)) ) if args.tqdm else (lambda x: x) best_valid = 0. flag = True for epoch in range(args.epochs): quesid2ans = {} correct = 0 total_loss = 0 total = 0 print("Len of the dataloader: ", len(loader)) # Our new TGIFQA-Dataset returns: # return gif_tensor, self.questions[i], self.ans2id[self.answer[i]] for i, (feats1, feats2, sent, target) in iter_wrapper(enumerate(loader)): ques_id, boxes = -1, None self.model.train() self.optim.zero_grad() feats1, feats2, target = feats1.cuda(), feats2.cuda( ), target.cuda() feats = [feats1, feats2] logit = self.model(feats, boxes, sent) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) total_loss += loss.item() loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 5.) self.optim.step() score, label = logit.max(1) score_t, target = target.max(1) correct += (label == target).sum().cpu().numpy() total += len(label) #if epoch > -1: #for l,s,t in zip(label, sent, target): # print(l) # print(s) # print("Prediction", loader.dataset.label2ans[int(l.cpu().numpy())]) # print("Answer", loader.dataset.label2ans[int(t.cpu().numpy())]) if i % log_freq == 1 and i > 1: results = [] for l, s, t in zip(label, sent, target): result = [] result.append(s) result.append("Prediction: {}".format( loader.dataset.label2ans[int(l.cpu().numpy())])) result.append("Answer: {}".format( loader.dataset.label2ans[int(t.cpu().numpy())])) results.append(result) torch.cuda.empty_cache() val_loss, val_acc, val_results = self.val(eval_tuple) logger.log(total_loss / total, correct / total * 100, val_loss, val_acc, epoch, results, val_results) print("==" * 30) print("Accuracy = ", correct / total * 100) print("Loss =", total_loss / total) print("==" * 30) # log_str = "\nEpoch %d: Train %0.2f\n" % (epoch, evaluator.evaluate(quesid2ans) * 100.) # if self.valid_tuple is not None: # Do Validation # valid_score = self.evaluate(eval_tuple) # if valid_score > best_valid: # best_valid = valid_score # self.save("BEST") # log_str += "Epoch %d: Valid %0.2f\n" % (epoch, valid_score * 100.) + \ # "Epoch %d: Best %0.2f\n" % (epoch, best_valid * 100.) # print(log_str, end='') # with open(self.output + "/log.log", 'a') as f: # f.write(log_str) # f.flush() self.save("Check" + str(epoch)) def val(self, eval_tuple): dset, loader, evaluator = eval_tuple iter_wrapper = (lambda x: tqdm(x, total=len(loader)) ) if args.tqdm else (lambda x: x) self.model.eval() best_valid = 0. flag = True quesid2ans = {} correct = 0 total_loss = 0 total = 0 results = [] print("Len of the dataloader: ", len(loader)) # Our new TGIFQA-Dataset returns: # return gif_tensor, self.questions[i], self.ans2id[self.answer[i]] with torch.no_grad(): for i, (feats1, feats2, sent, target) in iter_wrapper(enumerate(loader)): ques_id, boxes = -1, None feats1, feats2, target = feats1.cuda(), feats2.cuda( ), target.cuda() feats = [feats1, feats2] logit = self.model(feats, boxes, sent) assert logit.dim() == target.dim() == 2 loss = self.bce_loss(logit, target) loss = loss * logit.size(1) total_loss += loss.item() score, label = logit.max(1) score_t, target = target.max(1) correct += (label == target).sum().cpu().numpy() total += len(label) for l, s, t in zip(label, sent, target): result = [] result.append(s) result.append("Prediction: {}".format( loader.dataset.label2ans[int(l.cpu().numpy())])) result.append("Answer: {}".format( loader.dataset.label2ans[int(t.cpu().numpy())])) results.append(result) return total_loss / total, correct / total * 100, results def predict(self, eval_tuple: DataTuple, dump=None): """ Predict the answers to questions in a data split. :param eval_tuple: The data tuple to be evaluated. :param dump: The path of saved file to dump results. :return: A dict of question_id to answer. """ self.model.eval() dset, loader, evaluator = eval_tuple quesid2ans = {} for i, datum_tuple in enumerate(loader): ques_id, feats, boxes, sent = datum_tuple[: 4] # Avoid seeing ground truth with torch.no_grad(): feats, boxes = feats.cuda(), boxes.cuda() logit = self.model(feats, boxes, sent) score, label = logit.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans if dump is not None: evaluator.dump_result(quesid2ans, dump) return quesid2ans def evaluate(self, eval_tuple: DataTuple, dump=None): """Evaluate all data in data_tuple.""" quesid2ans = self.predict(eval_tuple, dump) return eval_tuple.evaluator.evaluate(quesid2ans) @staticmethod def oracle_score(data_tuple): dset, loader, evaluator = data_tuple quesid2ans = {} for i, (ques_id, feats, boxes, sent, target) in enumerate(loader): _, label = target.max(1) for qid, l in zip(ques_id, label.cpu().numpy()): ans = dset.label2ans[l] quesid2ans[qid.item()] = ans return evaluator.evaluate(quesid2ans) def save(self, name): torch.save(self.model.state_dict(), os.path.join(self.output, "%s.pth" % name)) def load(self, path): print("Load model from %s" % path) state_dict = torch.load("%s.pth" % path) self.model.load_state_dict(state_dict)