def __call__(self, cri_out, net_out, batch): out = {} for j, x in enumerate(net_out): acc_out = accuracy(x.data.cpu(), batch['class_id'].data.cpu(), topk=self.topk) for i, k in enumerate(self.topk): out['accuracy_top{}_head,{}'.format(k, j)] = acc_out[i] return out
def forward(self, cri_out, net_out, batch): out = {} class_id = batch['class_id'].data.cpu() for key in ['', '_rubi', '_q']: logits = net_out[f'logits{key}'].data.cpu() acc_out = accuracy(logits, class_id, topk=self.topk) for i, k in enumerate(self.topk): out[f'accuracy{key}_top{k}'] = acc_out[i] return out
def __call__(self, cri_out, net_out, batch): out = {} logits = net_out['logits'].data.cpu() class_id = batch['class_id'].data.cpu() acc_out = accuracy(logits, class_id, topk=self.topk) for i, k in enumerate(self.topk): out['accuracy_top{}'.format(k)] = acc_out[i] return out
def forward(self, cri_out, net_out, batch): out = {} logits = net_out["logits"].data.cpu() class_id = batch["class_id"] acc_out = accuracy(logits, class_id.data.cpu(), topk=self.topk) for i, k in enumerate(self.topk): out["accuracy_top{}".format(k)] = acc_out[i] # compute accuracy on simple and difficult examples answers = torch.argmax(logits, dim=1) for i in range(len(net_out["logits"])): pred = answers[i].item() gt = batch["answer"][i] categories = {"overall"} if "issimple" in batch and batch["issimple"][i]: main_cat = "simple" categories.add("simple") elif "issimple" in batch and not batch["issimple"][i]: main_cat = "complex" categories.add("complex") else: main_cat = None # add categories per number categories.add(f"overall-{gt}") # if "simple" in categories: if main_cat is not None: categories.add(f"{main_cat}-{gt}") # elif "complex" in categories: # categories.add(f"complex-{gt}") if any(word in batch["raw_question"][i] for word in [ "left of", "right of", "behind", "front of", ]): categories.add("positional") if any(word in batch["raw_question"][i] for word in ["type", "types"]): categories.add("type") if int(batch["answer"][i]) % 2 == 0: categories.add("even") if int(batch["answer"][i]) % 2 == 1: categories.add("odd") if hasattr(self.engine.dataset[self.mode], "own_numbers"): own_numbers = self.engine.dataset[self.mode].own_numbers opposite_numbers = self.engine.dataset[ self.mode].opposite_numbers if int(batch["answer"][i]) in own_numbers: categories.add("overall-own") if main_cat is not None: categories.add(f"{main_cat}-own") if int(batch["answer"][i]) in opposite_numbers: categories.add("overall-opposite") if main_cat is not None: categories.add(f"{main_cat}-opposite") for cat in categories: if "pred" in net_out: self.answers[cat]["pred"].append(net_out["pred"][i].item()) self.answers[cat]["ans"].append(pred) self.answers[cat]["gt"].append(gt) if "final_attention_map" in net_out: thresh_prediction = (net_out["final_attention_map"][i] > 0.5).sum() # breakpoint() self.answers[cat]["hard.ans"].append( thresh_prediction.item()) # GROUNDING if "scores" in net_out and "gt_bboxes" in batch: Logger()("Computing COCO grounding") bsize = logits.shape[0] # compute grounding ious = [] threshold = self.score_threshold_grounding for i in range(bsize): gt = batch["answer"][i] scores = net_out["scores"][i] # (regions, 1) selection = (scores >= threshold).view((scores.shape[0], )) coords = batch["coord"][i] coord_thresh = coords[selection] iou, inter, union, ioo, iogt = compute_iou( batch["gt_bboxes"][i], coord_thresh.cpu().numpy()) ious.append(iou) self.ious["overall"].append(iou) self.ious[gt].append(iou) if batch["answer"][i] != 0: self.ious_nonzero["overall"].append(iou) self.ious_nonzero[gt].append(iou) # try another method width = batch["img_width"][i] height = batch["img_height"][i] img_gt = np.full((width, height), False, dtype=bool) # (x, y) img_proposed = np.zeros((width, height)) for bbox in batch["gt_bboxes"][i]: x, y, x2, y2 = [round(x) for x in bbox] img_gt[x:x2, y:y2] = True scores = net_out["scores"][i] candidate_bbox = list( zip( batch["coord"][i].tolist(), scores.view((scores.shape[0], )).cpu().tolist(), )) for bbox, score in candidate_bbox: x, y, x2, y2 = [round(x) for x in bbox] img_proposed[x:x2, y:y2] += score thresh = img_proposed >= threshold intersection = thresh & img_gt union = thresh | img_gt union_sum = union.sum() inter_sum = intersection.sum() thresh_sum = thresh.sum() img_gt_sum = img_gt.sum() if union_sum == 0: iou_sum = 1.0 else: iou_sum = inter_sum / union_sum if thresh_sum != 0: ioo_sum = inter_sum / thresh_sum else: ioo_sum = 1.0 if img_gt_sum != 0: iogt_sum = inter_sum / img_gt_sum else: iogt_sum = 1.0 self.ious_sum["overall"].append(iou_sum) self.ious_sum[gt].append(iou_sum) if batch["answer"][i] != 0: self.ious_sum_nonzero["overall"].append(iou_sum) self.ious_sum_nonzero[gt].append(iou_sum) # try a third method iou_boxes = 0 iogt_boxes = 0 ioo_boxes = 0 for bbox, score in candidate_bbox: iou_box, _, _, ioo_box, iogt_box = compute_iou( batch["gt_bboxes"][i], [bbox]) iou_boxes += iou_box * score ioo_boxes += ioo_box * score iogt_boxes += iogt_box * score if "pred" in net_out: pred = net_out["pred"][i].item() elif "counter-pred" in net_out: pred = net_out["counter-pred"][i].item() # print("counter-pred", pred) iou_boxes_norm = iou_boxes / pred ioo_boxes_norm = ioo_boxes / pred iogt_boxes_norm = iogt_boxes / pred # average precision # Predicted bounding boxes : numpy array [n, 4] # Predicted classes: numpy array [n] # Predicted confidences: numpy array [n] # Ground truth bounding boxes:numpy array [m, 4] # Ground truth classes: numpy array [m] # pred_bb1 = coords pred_bb = coords.cpu().numpy() pred_cls = np.zeros((len(pred_bb))) pred_conf = scores.view((scores.shape[0], )).cpu().numpy() gt_bb = np.array(batch["gt_bboxes"][i]) gt_cls = np.zeros(len(gt_bb)) if len(gt_bb) > 0: for t in THRESHOLDS_mAP: # breakpoint() try: self.mean_ap[t].evaluate(pred_bb, pred_cls, pred_conf, gt_bb, gt_cls) except IndexError: traceback.print_exc() breakpoint() self.all_ious.append([ batch["question_id"][i], batch["name"][i], gt, batch["gt_bboxes"][i], pred, round(pred), candidate_bbox, iou, iou_sum, ioo, ioo_sum, iogt, iogt_sum, iou_boxes, iou_boxes_norm, ioo_boxes, ioo_boxes_norm, iogt_boxes, iogt_boxes_norm, ]) iou = np.mean(ious) out["iou"] = iou return out
def forward(self, cri_out, net_out, batch): out = {} nb_classes = net_out['rel_scores'].size(1) out['rel_scores'] = net_out['rel_scores'] det_labels, det_boxes = [], [] gt_labels, gt_boxes = [], [] rel_scores = torch.sigmoid(net_out['rel_scores']) acc_out = accuracy( rel_scores[(batch['target_cls_id'] > 0)], batch['target_cls_id'][batch['target_cls_id'] > 0], topk=[1,5]) out['accuracy_top1'] = acc_out[0].item() out['accuracy_top5'] = acc_out[1].item() for batch_id in range(len(batch['rels'])): _index = (batch['batch_id'] == batch_id) * (batch['target_cls_id'] > 0) n_index = int(_index.int().sum().cpu().data) oid_to_box = { obj['object_id']: [obj['x'], obj['y'], obj['w'], obj['h']] \ for obj in batch['objects'][batch_id] } rel_pred = rel_scores[_index] subj_pred_boxes = batch['subject_boxes_raw'][_index] obj_pred_boxes = batch['object_boxes_raw'][_index] subj_gt_boxes = np.array([ oid_to_box[rel['subject_id']] \ for rel in batch['rels'][batch_id] ]) obj_gt_boxes = np.array([ oid_to_box[rel['object_id']] \ for rel in batch['rels'][batch_id] ]) subj_gt_boxes[:,2] += subj_gt_boxes[:,0] obj_gt_boxes[:,2] += obj_gt_boxes[:,0] subj_gt_boxes[:,3] += subj_gt_boxes[:,1] obj_gt_boxes[:,3] += obj_gt_boxes[:,1] _gt_boxes = np.concatenate([ subj_gt_boxes[:,None,:], obj_gt_boxes[:,None,:] ], 1) _gt_labels = torch.cat([ batch['subject_cls_id'][_index][:,None], batch['target_cls_id'][_index][:,None], batch['object_cls_id'][_index][:,None] ],1).cpu().data.numpy() top_score, top_pred = rel_pred.topk(self.k) top_score = top_score.cpu().data.numpy() top_pred = top_pred.cpu().data.numpy() _det_labels, _det_boxes = [], [] for i in range(n_index): s = _gt_labels[i,0] o = _gt_labels[i,2] box_s = _gt_boxes[i,0] box_o = _gt_boxes[i,1] _det_labels.append( np.concatenate([ np.ones((self.k,1)), top_score[i][:,None], np.ones((self.k,1)), s*np.ones((self.k,1)), top_pred[i][:,None], [o]*np.ones((self.k,1)) ], 1)) _det_boxes.append( np.tile( _gt_boxes[i][None,:,:], (self.k,1,1))) det_labels.append(np.vstack(_det_labels)) det_boxes.append(np.vstack(_det_boxes)) gt_labels.append(_gt_labels) gt_boxes.append(_gt_boxes) for R in [50, 100]: _tp, _fp, _score, _num_gts = vrd_utils.eval_batch( [det_labels, det_boxes], [gt_labels, gt_boxes], num_dets=R) self.total_num_gts[R] += _num_gts self.tps[R] += _tp self.fps[R] += _fp self.scores[R] += _score return out