예제 #1
0
 def __call__(self, cri_out, net_out, batch):
     out = {}
     for j, x in enumerate(net_out):
         acc_out = accuracy(x.data.cpu(),
                            batch['class_id'].data.cpu(),
                            topk=self.topk)
         for i, k in enumerate(self.topk):
             out['accuracy_top{}_head,{}'.format(k, j)] = acc_out[i]
     return out
예제 #2
0
 def forward(self, cri_out, net_out, batch):
     out = {}
     class_id = batch['class_id'].data.cpu()
     for key in ['', '_rubi', '_q']:
         logits = net_out[f'logits{key}'].data.cpu()
         acc_out = accuracy(logits, class_id, topk=self.topk)
         for i, k in enumerate(self.topk):
             out[f'accuracy{key}_top{k}'] = acc_out[i]
     return out
예제 #3
0
    def __call__(self, cri_out, net_out, batch):
        out = {}
        logits = net_out['logits'].data.cpu()
        class_id = batch['class_id'].data.cpu()
        acc_out = accuracy(logits,
                           class_id,
                           topk=self.topk)

        for i, k in enumerate(self.topk):
            out['accuracy_top{}'.format(k)] = acc_out[i]
        return out
    def forward(self, cri_out, net_out, batch):
        out = {}
        logits = net_out["logits"].data.cpu()
        class_id = batch["class_id"]
        acc_out = accuracy(logits, class_id.data.cpu(), topk=self.topk)

        for i, k in enumerate(self.topk):
            out["accuracy_top{}".format(k)] = acc_out[i]

        # compute accuracy on simple and difficult examples
        answers = torch.argmax(logits, dim=1)

        for i in range(len(net_out["logits"])):
            pred = answers[i].item()
            gt = batch["answer"][i]

            categories = {"overall"}
            if "issimple" in batch and batch["issimple"][i]:
                main_cat = "simple"
                categories.add("simple")
            elif "issimple" in batch and not batch["issimple"][i]:
                main_cat = "complex"
                categories.add("complex")
            else:
                main_cat = None

            # add categories per number
            categories.add(f"overall-{gt}")
            # if "simple" in categories:
            if main_cat is not None:
                categories.add(f"{main_cat}-{gt}")
            # elif "complex" in categories:
            #     categories.add(f"complex-{gt}")

            if any(word in batch["raw_question"][i] for word in [
                    "left of",
                    "right of",
                    "behind",
                    "front of",
            ]):
                categories.add("positional")

            if any(word in batch["raw_question"][i]
                   for word in ["type", "types"]):
                categories.add("type")

            if int(batch["answer"][i]) % 2 == 0:
                categories.add("even")
            if int(batch["answer"][i]) % 2 == 1:
                categories.add("odd")

            if hasattr(self.engine.dataset[self.mode], "own_numbers"):
                own_numbers = self.engine.dataset[self.mode].own_numbers
                opposite_numbers = self.engine.dataset[
                    self.mode].opposite_numbers
                if int(batch["answer"][i]) in own_numbers:
                    categories.add("overall-own")
                    if main_cat is not None:
                        categories.add(f"{main_cat}-own")
                if int(batch["answer"][i]) in opposite_numbers:
                    categories.add("overall-opposite")
                    if main_cat is not None:
                        categories.add(f"{main_cat}-opposite")

            for cat in categories:
                if "pred" in net_out:
                    self.answers[cat]["pred"].append(net_out["pred"][i].item())
                self.answers[cat]["ans"].append(pred)
                self.answers[cat]["gt"].append(gt)

                if "final_attention_map" in net_out:
                    thresh_prediction = (net_out["final_attention_map"][i] >
                                         0.5).sum()
                    # breakpoint()
                    self.answers[cat]["hard.ans"].append(
                        thresh_prediction.item())

        # GROUNDING
        if "scores" in net_out and "gt_bboxes" in batch:
            Logger()("Computing COCO grounding")
            bsize = logits.shape[0]
            # compute grounding
            ious = []
            threshold = self.score_threshold_grounding
            for i in range(bsize):
                gt = batch["answer"][i]
                scores = net_out["scores"][i]  # (regions, 1)
                selection = (scores >= threshold).view((scores.shape[0], ))
                coords = batch["coord"][i]
                coord_thresh = coords[selection]
                iou, inter, union, ioo, iogt = compute_iou(
                    batch["gt_bboxes"][i],
                    coord_thresh.cpu().numpy())

                ious.append(iou)
                self.ious["overall"].append(iou)
                self.ious[gt].append(iou)
                if batch["answer"][i] != 0:
                    self.ious_nonzero["overall"].append(iou)
                    self.ious_nonzero[gt].append(iou)

                # try another method
                width = batch["img_width"][i]
                height = batch["img_height"][i]
                img_gt = np.full((width, height), False, dtype=bool)  # (x, y)
                img_proposed = np.zeros((width, height))
                for bbox in batch["gt_bboxes"][i]:
                    x, y, x2, y2 = [round(x) for x in bbox]
                    img_gt[x:x2, y:y2] = True
                scores = net_out["scores"][i]
                candidate_bbox = list(
                    zip(
                        batch["coord"][i].tolist(),
                        scores.view((scores.shape[0], )).cpu().tolist(),
                    ))

                for bbox, score in candidate_bbox:
                    x, y, x2, y2 = [round(x) for x in bbox]
                    img_proposed[x:x2, y:y2] += score
                thresh = img_proposed >= threshold
                intersection = thresh & img_gt
                union = thresh | img_gt
                union_sum = union.sum()
                inter_sum = intersection.sum()
                thresh_sum = thresh.sum()
                img_gt_sum = img_gt.sum()

                if union_sum == 0:
                    iou_sum = 1.0
                else:
                    iou_sum = inter_sum / union_sum
                if thresh_sum != 0:
                    ioo_sum = inter_sum / thresh_sum
                else:
                    ioo_sum = 1.0

                if img_gt_sum != 0:
                    iogt_sum = inter_sum / img_gt_sum
                else:
                    iogt_sum = 1.0

                self.ious_sum["overall"].append(iou_sum)
                self.ious_sum[gt].append(iou_sum)
                if batch["answer"][i] != 0:
                    self.ious_sum_nonzero["overall"].append(iou_sum)
                    self.ious_sum_nonzero[gt].append(iou_sum)

                # try a third method
                iou_boxes = 0
                iogt_boxes = 0
                ioo_boxes = 0
                for bbox, score in candidate_bbox:
                    iou_box, _, _, ioo_box, iogt_box = compute_iou(
                        batch["gt_bboxes"][i], [bbox])
                    iou_boxes += iou_box * score
                    ioo_boxes += ioo_box * score
                    iogt_boxes += iogt_box * score

                if "pred" in net_out:
                    pred = net_out["pred"][i].item()
                elif "counter-pred" in net_out:
                    pred = net_out["counter-pred"][i].item()
                    # print("counter-pred", pred)

                iou_boxes_norm = iou_boxes / pred
                ioo_boxes_norm = ioo_boxes / pred
                iogt_boxes_norm = iogt_boxes / pred

                # average precision
                # Predicted bounding boxes : numpy array [n, 4]
                # Predicted classes: numpy array [n]
                # Predicted confidences: numpy array [n]
                # Ground truth bounding boxes:numpy array [m, 4]
                # Ground truth classes: numpy array [m]
                # pred_bb1 = coords
                pred_bb = coords.cpu().numpy()
                pred_cls = np.zeros((len(pred_bb)))
                pred_conf = scores.view((scores.shape[0], )).cpu().numpy()
                gt_bb = np.array(batch["gt_bboxes"][i])
                gt_cls = np.zeros(len(gt_bb))
                if len(gt_bb) > 0:
                    for t in THRESHOLDS_mAP:
                        # breakpoint()
                        try:
                            self.mean_ap[t].evaluate(pred_bb, pred_cls,
                                                     pred_conf, gt_bb, gt_cls)
                        except IndexError:
                            traceback.print_exc()
                            breakpoint()

                self.all_ious.append([
                    batch["question_id"][i],
                    batch["name"][i],
                    gt,
                    batch["gt_bboxes"][i],
                    pred,
                    round(pred),
                    candidate_bbox,
                    iou,
                    iou_sum,
                    ioo,
                    ioo_sum,
                    iogt,
                    iogt_sum,
                    iou_boxes,
                    iou_boxes_norm,
                    ioo_boxes,
                    ioo_boxes_norm,
                    iogt_boxes,
                    iogt_boxes_norm,
                ])

            iou = np.mean(ious)
            out["iou"] = iou
        return out
    def forward(self, cri_out, net_out, batch):
        out = {}
        nb_classes = net_out['rel_scores'].size(1)
        out['rel_scores'] = net_out['rel_scores']
        det_labels, det_boxes = [], []
        gt_labels, gt_boxes = [], []
        rel_scores = torch.sigmoid(net_out['rel_scores'])
        acc_out = accuracy(
            rel_scores[(batch['target_cls_id'] > 0)],
            batch['target_cls_id'][batch['target_cls_id'] > 0],
            topk=[1,5])
        out['accuracy_top1'] = acc_out[0].item()
        out['accuracy_top5'] = acc_out[1].item()

        for batch_id in range(len(batch['rels'])):
            _index = (batch['batch_id'] == batch_id) * (batch['target_cls_id'] > 0)
            n_index = int(_index.int().sum().cpu().data)
            oid_to_box = {
                obj['object_id']: [obj['x'], obj['y'], obj['w'], obj['h']] \
                for obj in batch['objects'][batch_id]
            }
            rel_pred = rel_scores[_index]
            subj_pred_boxes = batch['subject_boxes_raw'][_index]
            obj_pred_boxes = batch['object_boxes_raw'][_index]

            subj_gt_boxes = np.array([
                oid_to_box[rel['subject_id']] \
                for rel in batch['rels'][batch_id]
            ])
            obj_gt_boxes = np.array([
                oid_to_box[rel['object_id']] \
                for rel in batch['rels'][batch_id]
            ])

            subj_gt_boxes[:,2] += subj_gt_boxes[:,0] 
            obj_gt_boxes[:,2] += obj_gt_boxes[:,0]
            subj_gt_boxes[:,3] += subj_gt_boxes[:,1]
            obj_gt_boxes[:,3] += obj_gt_boxes[:,1]

            _gt_boxes = np.concatenate([
                subj_gt_boxes[:,None,:], obj_gt_boxes[:,None,:]
            ], 1)
            _gt_labels = torch.cat([
                batch['subject_cls_id'][_index][:,None],
                batch['target_cls_id'][_index][:,None],
                batch['object_cls_id'][_index][:,None]
            ],1).cpu().data.numpy()
            top_score, top_pred = rel_pred.topk(self.k)
            top_score = top_score.cpu().data.numpy()
            top_pred = top_pred.cpu().data.numpy()
            _det_labels, _det_boxes = [], []
            for i in range(n_index):
                s = _gt_labels[i,0]
                o = _gt_labels[i,2]
                box_s = _gt_boxes[i,0]
                box_o = _gt_boxes[i,1]
                _det_labels.append(
                    np.concatenate([
                        np.ones((self.k,1)),
                        top_score[i][:,None],
                        np.ones((self.k,1)),
                        s*np.ones((self.k,1)),
                        top_pred[i][:,None],
                        [o]*np.ones((self.k,1))
                    ], 1))
                _det_boxes.append(
                    np.tile(
                        _gt_boxes[i][None,:,:],
                        (self.k,1,1)))
            det_labels.append(np.vstack(_det_labels))
            det_boxes.append(np.vstack(_det_boxes))
            gt_labels.append(_gt_labels)
            gt_boxes.append(_gt_boxes)

        for R in [50, 100]:
            _tp, _fp, _score, _num_gts = vrd_utils.eval_batch(
                [det_labels, det_boxes],
                [gt_labels, gt_boxes],
                num_dets=R)
            self.total_num_gts[R] += _num_gts
            self.tps[R] += _tp
            self.fps[R] += _fp
            self.scores[R] += _score

        return out