def __getitem__(self, idx): ent = self.data[idx] sent_id = ent['sentence']['sent_id'] file_name = ent['image_info']['file_name'] img_id = ent['image_id'] ans = ent['category_id'] W = ent['image_info']['width'] H = ent['image_info']['height'] que = ent['sentence']['sent'] gtbox = ent['gtbox'] gtbox = torch.tensor(gtbox) #boxes from refcoc is in xywh format gtboxorig = convert_xywh_x1y1x2y2(gtbox.unsqueeze(0)).squeeze(0) box_coords = ent['boxes'] box_coords = torch.tensor(box_coords) L, W, H, box_feats, box_coords_6d, box_coordsorig = self._load_image_coco( img_id) box_coords_6d = torch.from_numpy(box_coords_6d) #boxes in h4files are in x1 y1 x2 y2 format iou = getIOU(gtboxorig.unsqueeze(0), torch.from_numpy(box_coordsorig)) correct = iou > 0.5 _, idx = torch.max(iou, dim=0) # print (iou,iou.shape,box_coordsorig,"index",idx) gtboxiou = box_coordsorig[idx] gtboxiou = torch.from_numpy(gtboxiou) tokens = tokenize_ques(self.dictionary, que) qfeat = torch.from_numpy(tokens).long() Lvec = torch.zeros(100).long() Lvec[:L] = 1 return sent_id, ans, box_feats, box_coordsorig, box_coords_6d.float( ), gtboxorig.float(), qfeat, Lvec, idx, correct.view(-1)
def __getitem__(self, idx): ent = self.data[idx] sent_id = ent['question_id'] img_id = ent['image_id'] ans = 0 W = ent['width'] H = ent['height'] que = ent['question'] #0 to N boxes gtbox = ent['gtbox'] L_gtboxes = len(gtbox) Max_box = 15 #the max number of boxes in VQD if len(gtbox[0]) == 0: gtbox = [[0, 0, 1, 1.0]] * Max_box else: gtbox = gtbox + [[0, 0, 1, 1.0]] * (Max_box - L_gtboxes) gtbox = torch.tensor(gtbox).float() #boxes from refcoc is in xywh format gtboxorig = convert_xywh_x1y1x2y2(gtbox) L, W, H, box_feats, box_coords_6d, box_coordsorig = self._load_image_coco( img_id) box_coords_6d = torch.from_numpy(box_coords_6d) #boxes in h4files are in x1 y1 x2 y2 format iou = getIOU(gtboxorig.unsqueeze(1), torch.from_numpy(box_coordsorig)).squeeze(-1) correct = iou > 0.5 correct = correct.sum(dim=0).clamp(max=1) _, idxall = torch.max(iou, dim=1) #maybe more than one indices so sample for now idx = torch.tensor([int(np.random.choice(idxall))]) idx = torch.tensor([int(idxall[0])]) # print (iou,iou.shape,box_coordsorig,"index",idx) gtboxiou = box_coordsorig[idx] gtboxiou = torch.from_numpy(gtboxiou) tokens = tokenize_ques(self.dictionary, que) qfeat = torch.from_numpy(tokens).long() #tortal number of entries N = box_coordsorig.shape[0] Lvec = torch.zeros(N).long() Lvec[:L] = 1 return sent_id,ans,box_feats,box_coordsorig,box_coords_6d.float(),\ gtboxorig[0].float(),qfeat,Lvec,idx,correct.view(-1)
def saveimage(ent,boxes): image = ent['image_info']['file_name'] if "train2014" in image: image = os.path.join('/home/manoj/train2014',image) else: image = os.path.join('/home/manoj/val2014',image) image_id = ent['image_id'] sent_id = ent['sentence']['sent_id'] npimg = Image.open(image) plt.figure() plt.imshow(npimg) ansidx = ent['gtnms'] scores = ent['scores'] classify = ent['cls'] clspred = ent['pred'] cocogtbox = torch.tensor(ent['gtbox']) cocogtbox = convert_xywh_x1y1x2y2(cocogtbox.unsqueeze(0)).squeeze(0) predbox = torch.tensor(boxes[clspred]).unsqueeze(0) bottomupgtbox = torch.tensor(boxes[ansidx]) iou_cocogt = getIOU(predbox,cocogtbox).item() iou_bottomupgt= getIOU(predbox,bottomupgtbox).item() for i in range(ent['L']): xmin,ymin,xmax,ymax = boxes[i] x =[xmin,ymin,xmax,ymax] rect = retbox(x) alpha = np.abs(scores[i])/ np.max(np.abs(scores)) #alpha = abs(scores[i]) if i == ansidx: plt.plot(rect[:,0],rect[:,1],'g',linewidth=3.0) loc = (xmin,0.5*(ymin+ymax)) plt.text(*loc,"{:.2f}, {:d}".format(scores[i],classify[i]),color='g', fontsize=8) if i == clspred: plt.plot(rect[:,0],rect[:,1],'r-.',linewidth=2.0) loc = (0.5*(xmin+xmax),0.5*(ymin+ymax)) plt.text(*loc,"{:.2f}, {:d}".format(scores[i],classify[i]),color='r', fontsize=8) else: plt.plot(rect[:,0],rect[:,1],'y',alpha = alpha,linewidth=1.0) plt.text(xmin,ymin,"{:.2f}[{:d}]".format(scores[i],classify[i]),color='c', fontsize=7,alpha = alpha) cocogt = retbox(ent['gtbox'],format='xywh') plt.plot(cocogt[:,0],cocogt[:,1],'k',linewidth=3.0) question = ent['sentence']['raw'] imglast = image.split("/")[-1] plt.title("Pred index: {} .. G = GT, K = COCOGT , R = pred".format(clspred)) plt.xlabel("{}".format(question)) plt.ylabel("IOU COCO: {:.2f},BUP: {:.2f}".format(iou_cocogt,iou_bottomupgt)) path = os.path.join(DIR,"ann_{}_{}".format(sent_id,imglast)) if iou_cocogt >=0.5: plt.savefig(path,dpi=150) plt.close()
def __getitem__(self, idx): ent = self.data[idx] sent_id = ent['sentence']['sent_id'] file_name = ent['image_info']['file_name'] img_id = ent['image_id'] ans = ent['category_id'] W = ent['image_info']['width'] H = ent['image_info']['height'] que = ent['sentence']['sent'] gtbox = ent['gtbox'] gtbox = torch.tensor(gtbox) #boxes from refcoc is in xywh format gtboxorig = convert_xywh_x1y1x2y2(gtbox.unsqueeze(0)).squeeze(0) box_coords = ent['boxes'] box_coords = torch.tensor(box_coords) if 'train' in file_name: pk = pickle.load(open(osp.join("feats","train2014",file_name+".pkl"),"rb")) elif 'val' in file_name: pk = pickle.load(open(osp.join("feats","val2014",file_name+".pkl"),"rb")) L = len(pk) - 1 # lenght of entries in pickle file if L<=0: L = 1 N = 20 # maximum entries to use L = 20 # uncomment if you want variable L if L>N: L=N box_feats = np.zeros((N,2048),dtype=np.float32) box_coords = np.zeros((N,4)) for i,ent in enumerate(pk[:-1]): if i == N: break box_feats[i,:] = ent['feat'].flatten() box_coords[i,:] = np.array(ent['coords']) lastent = pk[-1] wholefeat = lastent['image'] W = lastent['w'] H = lastent['h'] box_feats = torch.from_numpy(box_feats) box_coordsorig = box_coords box_coords_6d = self._process_boxes(box_coords,W,H) box_coords_6d = torch.from_numpy(box_coords_6d) #boxes in h4files are in x1 y1 x2 y2 format iou = getIOU(gtboxorig.unsqueeze(0),torch.from_numpy(box_coordsorig).float()) correct = iou>0.5 _,idx = torch.max(iou,dim=0) # print (iou,iou.shape,box_coordsorig,"index",idx) gtboxiou = box_coordsorig[idx] gtboxiou = torch.from_numpy(gtboxiou) tokens = tokenize_ques(self.dictionary,que) qfeat = torch.from_numpy(tokens).long() Lvec = torch.zeros(N).long() Lvec[:L] = 1 return sent_id,ans,box_feats,box_coordsorig,box_coords_6d.float(),\ gtboxorig.float(),qfeat,Lvec,idx,correct.view(-1)
print(kid, len(qid2ent[kid])) #%% # box functions def xywh_to_xyxy(boxes): """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) def xyxy_to_xywh(boxes): """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) acc = 0 L = len(testds.data) for ent in testds.data: imgid = ent['image_id'] gtbox_xywh = np.array([ent['gtbox']]) boxes_xywh = np.array([b['box'] for b in qid2ent[imgid]]) gtbox_xyxy = torch.from_numpy(xywh_to_xyxy(gtbox_xywh)) boxes_xyxy = torch.from_numpy(xywh_to_xyxy(boxes_xywh)) ious = eval_extra.getIOU(gtbox_xyxy, boxes_xyxy) > 0.5 iou = ious.sum().item() if iou >= 1: acc += 1.0 print("\nAccuracy using Mattnet Boxes {:.2f}%".format(100 * acc / L))