Beispiel #1
0
    def _average_precision(self, all_scores):
        aps = [0.] * self.num_classes
        valid = [0] * self.num_classes

        ind_all = np.arange(self.gt_classes.shape[0])
        num_cls = self.num_classes
        num_ins = ind_all.shape[0]

        for i, c in enumerate(self._classes):
            if i == 0:
                continue
            gt_this = (self.gt_classes == i).astype(np.float32)
            num_this = np.sum(gt_this)
            if i % 10 == 0:
                print('AP for %s: %d/%d' % (c, i, num_cls))
            if num_this > 0:
                valid[i] = num_this
                sco_this = all_scores[ind_all, i]

                ind_sorted = np.argsort(-sco_this)

                tp = gt_this[ind_sorted]
                max_ind = num_ins - np.argmax(tp[::-1])
                tp = tp[:max_ind]
                fp = 1. - tp

                tp = np.cumsum(tp)
                fp = np.cumsum(fp)
                rec = tp / float(num_this)
                prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)

                aps[i] = voc_ap(rec, prec)

        mcls_ap = np.mean([s for s, v in zip(aps, valid) if v])

        # Compute the overall score
        max_inds = np.argmax(all_scores[:, 1:], axis=1) + 1
        max_scores = np.empty_like(all_scores)
        max_scores[:] = 0.
        max_scores[ind_all, max_inds] = 1.
        pred_all = max_scores[ind_all, self.gt_classes]
        sco_all = all_scores[ind_all, self.gt_classes]
        ind_sorted = np.argsort(-sco_all)

        tp = pred_all[ind_sorted]
        fp = 1. - tp

        tp = np.cumsum(tp)
        fp = np.cumsum(fp)
        rec = tp / float(num_ins)
        prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)

        mins_ap = voc_ap(rec, prec)
        return aps[1:], mcls_ap, mins_ap
Beispiel #2
0
    def evaluate(self,
                 all_image_ids,
                 all_detection_bboxes,
                 all_detection_vertices,
                 metric='apk'):
        if metric == 'ap':

            npos = 0
            for img_folder in self.annotations:
                for img_name in self.annotations[img_folder]:
                    npos += len(self.annotations[img_folder][img_name])

            tp = np.zeros(len(all_image_ids))
            fp = np.zeros(len(all_image_ids))
            y_scores = torch.ones(len(all_image_ids))
            for i, (img_id,
                    bbox) in enumerate(zip(all_image_ids,
                                           all_detection_bboxes)):
                image_name = img_id.split('/')[-1]
                image_folder = img_id.split('/')[0]
                gt_bboxes = [
                    obj['bbox']
                    for obj in self.annotations[image_folder][image_name]
                ]
                if len(gt_bboxes) == 0:
                    fp[i] = 1
                    continue
                gt_bboxes = torch.Tensor(gt_bboxes)
                bbox = torch.Tensor(bbox).unsqueeze(0)
                ious = BBox.iou(bbox, gt_bboxes)
                max_ious, _ = ious.max(dim=2)
                if max_ious.item() > 0.5:
                    tp[i] = 1
                else:
                    fp[i] = 1

            fp = np.cumsum(fp)
            tp = np.cumsum(tp)
            rec = tp / float(npos)
            # avoid divide by zero in case the first detection matches a difficult
            # ground truth
            prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)

            score = voc_ap(rec, prec)

        return score, metric
Beispiel #3
0
def vg_eval(detpath,
            gt_roidb,
            image_index,
            classindex,
            ovthresh=0.5,
            use_07_metric=False,
            eval_attributes=False):
    """rec, prec, ap, sorted_scores, npos = voc_eval(
                                detpath, 
                                gt_roidb,
                                image_index,
                                classindex,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the Visual Genome evaluation.

    detpath: Path to detections
    gt_roidb: List of ground truth structs.
    image_index: List of image ids.
    classindex: Category index
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for item, imagename in zip(gt_roidb, image_index):
        if eval_attributes:
            bbox = item['boxes'][np.where(
                np.any(item['gt_attributes'].toarray() ==
                       classindex, axis=1))[0], :]
        else:
            bbox = item['boxes'][np.where(
                item['gt_classes'] == classindex)[0], :]
        difficult = np.zeros((bbox.shape[0], )).astype(np.bool)
        det = [False] * bbox.shape[0]
        npos = npos + sum(~difficult)
        class_recs[str(imagename)] = {
            'bbox': bbox,
            'difficult': difficult,
            'det': det
        }
    if npos == 0:
        # No ground truth examples
        return 0, 0, 0, 0, npos

    # read dets
    with open(detpath, 'r') as f:
        lines = f.readlines()
    if len(lines) == 0:
        # No detection examples
        return 0, 0, 0, 0, npos

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = -np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap, sorted_scores, npos
def eval_seg_ap(results_all,
                dat_list,
                nb_class=59,
                ovthresh_seg=0.5,
                Sparse=False,
                From_pkl=False,
                task_id=None):
    '''
    From_pkl: load results from pickle files 
    Sparse: Indicate that the masks in the results are sparse matrices
    '''
    confidence = []
    image_ids = []
    BB = []
    Local_segs_ptr = []

    for imagename in tqdm(results_all.keys(), desc='Loading results ..'):
        if From_pkl:
            results = pickle.load(gzip.open(results_all[imagename]))
        else:
            results = results_all[imagename]

        det_rects = results['DETS']
        for idx, rect in enumerate(det_rects):
            image_ids.append(imagename)
            confidence.append(rect[-1])
            BB.append(rect[:4])
            Local_segs_ptr.append(idx)

    confidence = np.array(confidence)
    BB = np.array(BB)
    Local_segs_ptr = np.array(Local_segs_ptr)

    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    Local_segs_ptr = Local_segs_ptr[sorted_ind]
    image_ids = [image_ids[x] for x in sorted_ind]

    class_recs, npos = get_gt(dat_list, task_id=task_id)
    nd = len(image_ids)
    tp_seg = np.zeros(nd)
    fp_seg = np.zeros(nd)
    pcp_list = []

    for d in trange(nd, desc='Finding AP^P at thres %f..' % ovthresh_seg):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        jmax = -1
        if From_pkl:
            results = pickle.load(gzip.open(results_all[image_ids[d]]))
        else:
            results = results_all[image_ids[d]]

        mask0 = results['MASKS'][Local_segs_ptr[d]]
        if Sparse:
            mask_pred = mask0.toarray().astype(
                np.int)  # decode sparse array if it is one
        else:
            mask_pred = mask0.astype(np.int)

        for i in xrange(len(R['anno_adds'])):
            mask_gt = np.array(Image.open(R['anno_adds'][i]))
            if len(mask_gt.shape) == 3:
                mask_gt = mask_gt[:, :,
                                  0]  # Make sure ann is a two dimensional np array.

            seg_iou = cal_one_mean_iou(mask_pred.astype(np.uint8), mask_gt,
                                       nb_class)

            mean_seg_iou = np.nanmean(seg_iou)
            if mean_seg_iou > ovmax:
                ovmax = mean_seg_iou
                seg_iou_max = seg_iou
                jmax = i
                mask_gt_u = np.unique(mask_gt)

        if ovmax > ovthresh_seg:
            if not R['det'][jmax]:
                tp_seg[d] = 1.
                R['det'][jmax] = 1
                pcp_d = len(mask_gt_u[np.logical_and(mask_gt_u > 0,
                                                     mask_gt_u < nb_class)])
                pcp_n = float(np.sum(seg_iou_max[1:] > ovthresh_seg))
                if pcp_d > 0:
                    pcp_list.append(pcp_n / pcp_d)
                else:
                    pcp_list.append(0.0)
            else:
                fp_seg[d] = 1.
        else:
            fp_seg[d] = 1.

    # compute precision recall
    fp_seg = np.cumsum(fp_seg)
    tp_seg = np.cumsum(tp_seg)
    rec_seg = tp_seg / float(npos)
    prec_seg = tp_seg / (tp_seg + fp_seg)

    ap_seg = voc_ap(rec_seg, prec_seg)

    assert (np.max(tp_seg) == len(pcp_list)), "%d vs %d" % (np.max(tp_seg),
                                                            len(pcp_list))
    pcp_list.extend([0.0] * (npos - len(pcp_list)))
    pcp = np.mean(pcp_list)

    print 'AP_seg, PCP:', ap_seg, pcp
    return ap_seg, pcp
Beispiel #5
0
def eval_seg_ap(results_all,
                dat_list,
                nb_class=59,
                ovthresh_seg_list=[0.5],
                Sparse=False,
                From_pkl=False,
                task_id=None):
    '''
    From_pkl: load results from pickle files
    Sparse: Indicate that the masks in the results are sparse matrices
    '''
    # Save all the info corresponding to a specific person.
    confidence = []
    image_ids  = []
    BB = []
    Local_segs_ptr = []

    for imagename in tqdm(results_all.keys(), desc='Loading results ..'):
        if From_pkl:
            results = pickle.load(gzip.open(results_all[imagename]))
        else:
            results = results_all[imagename]

        det_rects = results['DETS']
        for idx, rect in enumerate(det_rects):
            image_ids.append(imagename)
            confidence.append(rect[-1])
            BB.append(rect[:4])
            Local_segs_ptr.append(idx)

    confidence = np.array(confidence)
    BB = np.array(BB)
    Local_segs_ptr = np.array(Local_segs_ptr)

    # Sort the boxes based on their score.
    sorted_ind = np.argsort(-confidence)
    # sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    Local_segs_ptr = Local_segs_ptr[sorted_ind]
    image_ids = [image_ids[x] for x in sorted_ind]

    class_recs, npos = get_gt(dat_list, len(ovthresh_seg_list), task_id=task_id)
    nd = len(image_ids)

    final_results = {}
    final_results['ap_list'], final_results['pcp_list'] = [], []

    tp_seg = [np.zeros(nd) for i in range(len(ovthresh_seg_list))]
    fp_seg = [np.zeros(nd) for i in range(len(ovthresh_seg_list))]
    pcp_list = [[] for i in range(len(ovthresh_seg_list))]

    # for d in trange(nd, desc='Finding AP^P at thres %f..' % ovthresh_seg):
    for d in trange(nd, desc="Computing"):
        R = class_recs[image_ids[d]]
        # bb = BB[d, :].astype(float)
        ovmax = -np.inf
        jmax = -1
        if From_pkl:
            results = pickle.load(gzip.open(results_all[image_ids[d]]))
        else:
            results = results_all[image_ids[d]]

        mask0 = results['MASKS'][Local_segs_ptr[d]]
        if Sparse:
            mask_pred = mask0.toarray().astype(np.int) # decode sparse array if it is one
        else:
            mask_pred = mask0.astype(np.int)

        for i in range(len(R['anno_adds'])):
            mask_gt = np.array(Image.open(R['anno_adds'][i]))
            if len(mask_gt.shape) == 3:
                # Make sure ann is a two dimensional np array.
                mask_gt = mask_gt[:, :, 0]
            # Get the mean IoU of one person.
            seg_iou = cal_one_mean_iou(mask_pred.astype(np.uint8),
                                       mask_gt,
                                       nb_class)

            mean_seg_iou = np.nanmean(seg_iou)
            if mean_seg_iou > ovmax:
                ovmax = mean_seg_iou
                seg_iou_max = seg_iou
                jmax = i
                mask_gt_u = np.unique(mask_gt)

        for i, ovthresh_seg in enumerate(ovthresh_seg_list):
            if ovmax > ovthresh_seg:
                # [TODO] What's the if means?
                # Maybe, for every predicted person, if the most suitable
                # gt person was used, the predicted person is regarded as
                # a false positive.
                if not R['det'][i][jmax]:
                    tp_seg[i][d] = 1.
                    R['det'][i][jmax] = 1
                    pcp_d = len(mask_gt_u[np.logical_and(mask_gt_u > 0,
                                                         mask_gt_u < nb_class)])
                    pcp_n = float(np.sum(seg_iou_max[1:] > ovthresh_seg))
                    if pcp_d > 0:
                        pcp_list[i].append(pcp_n / pcp_d)
                    else:
                        pcp_list[i].append(0.0)
                else:
                    fp_seg[i][d] = 1.
            else:
                fp_seg[i][d] = 1.

    # compute precision recall
    for i, ovthresh_seg in enumerate(ovthresh_seg_list):
        fp_seg[i] = np.cumsum(fp_seg[i])
        tp_seg[i] = np.cumsum(tp_seg[i])

    rec_seg = [tp_seg[i] / float(npos) for i in range(len(tp_seg))]
    prec_seg =[tp_seg[i] / (tp_seg[i] + fp_seg[i]) for i in range(len(tp_seg))]

    for i, ovthresh_seg in enumerate(ovthresh_seg_list):
        ap_seg = voc_ap(rec_seg[i], prec_seg[i])

        # assert(np.max(tp_seg) == len(pcp_list),
        #         "%d vs %d" % (np.max(tp_seg), len(pcp_list)))
        pcp_list[i].extend([0.0] * (npos - len(pcp_list[i])))
        pcp = np.mean(pcp_list[i])

        print('AP_seg, PCP under thre {}:'.format(ovthresh_seg), ap_seg, pcp)
        # return ap_seg, pcp
        final_results['ap_list'].append(ap_seg)
        final_results['pcp_list'].append(pcp)
Beispiel #6
0
    def evaluate(self,
                 all_image_ids,
                 all_detection_bboxes,
                 all_detection_vertices,
                 metric='apk'):

        if metric == 'apk':
            #apk = []
            y_true = np.zeros(len(all_image_ids) * 8)

            for i, (img_id, vert, bbox) in enumerate(
                    zip(all_image_ids, all_detection_vertices,
                        all_detection_bboxes)):
                best_nb_correct = -1
                width = bbox[2] - bbox[0]
                height = bbox[3] - bbox[1]
                max_dim = max(width, height)
                for ann in self.annotations[img_id]:
                    gt_vert = ann['vertices']

                    vert_tensor = torch.Tensor(vert)
                    gt_vert_tensor = torch.Tensor(gt_vert)
                    t_dist = [
                        torch.norm(vert_tensor[:, j] - gt_vert_tensor[:, j], 2)
                        for j in range(8)
                    ]

                    correct = [d.item() < 0.1 * max_dim for d in t_dist]
                    nb_correct = sum(correct)
                    if nb_correct > best_nb_correct:
                        best_nb_correct = nb_correct  #max(nb_correct, best_nb_correct)
                        y_true[i:i + 8] = np.asarray(correct)

            y_scores = np.ones(len(all_image_ids) * 8)
            score = np.sum(y_true) / np.sum(y_scores)

        elif metric == 'ap':

            npos = 0
            for img_name in self.annotations:
                npos += len(self.annotations[img_name])

            tp = np.zeros(len(all_image_ids))
            fp = np.zeros(len(all_image_ids))
            y_scores = torch.ones(len(all_image_ids))
            for i, (img_id,
                    bbox) in enumerate(zip(all_image_ids,
                                           all_detection_bboxes)):
                gt_bboxes = [obj['bbox'] for obj in self.annotations[img_id]]
                if len(gt_bboxes) == 0:
                    fp[i] = 1
                    continue
                gt_bboxes = torch.Tensor(gt_bboxes)
                bbox = torch.Tensor(bbox).unsqueeze(0)
                ious = BBox.iou(bbox, gt_bboxes)
                max_ious, _ = ious.max(dim=2)
                if max_ious.item() > 0.5:
                    tp[i] = 1
                else:
                    fp[i] = 1

            fp = np.cumsum(fp)
            tp = np.cumsum(tp)
            rec = tp / float(npos)
            # avoid divide by zero in case the first detection matches a difficult
            # ground truth
            prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)

            score = voc_ap(rec, prec)

        return score, metric