Esempio n. 1
0
def filter_annotations(img_all_annotations, used_classes):
    """Filters out annotations from the unused classes and dontcare regions.

  Filters out the annotations that belong to classes we do now wish to use and
  (optionally) also removes all boxes that overlap with dontcare regions.

  Args:
    img_all_annotations: A list of annotation dictionaries. See documentation of
      read_annotation_file for more details about the format of the annotations.
    used_classes: A list of strings listing the classes we want to keep, if the
    list contains "dontcare", all bounding boxes with overlapping with dont
    care regions will also be filtered out.

  Returns:
    img_filtered_annotations: A list of annotation dictionaries that have passed
      the filtering.
  """

    img_filtered_annotations = {}

    # Filter the type of the objects.
    relevant_annotation_indices = [
        i for i, x in enumerate(img_all_annotations['type'])
        if x in used_classes
    ]

    for key in img_all_annotations.keys():
        img_filtered_annotations[key] = (
            img_all_annotations[key][relevant_annotation_indices])

    if 'dontcare' in used_classes:
        dont_care_indices = [
            i for i, x in enumerate(img_filtered_annotations['type'])
            if x == 'dontcare'
        ]

        # bounding box format [y_min, x_min, y_max, x_max]
        all_boxes = np.stack([
            img_filtered_annotations['2d_bbox_top'],
            img_filtered_annotations['2d_bbox_left'],
            img_filtered_annotations['2d_bbox_bottom'],
            img_filtered_annotations['2d_bbox_right']
        ],
                             axis=1)

        ious = iou(boxes1=all_boxes, boxes2=all_boxes[dont_care_indices])

        # Remove all bounding boxes that overlap with a dontcare region.
        if ious.size > 0:
            boxes_to_remove = np.amax(ious, axis=1) > 0.0
            for key in img_all_annotations.keys():
                img_filtered_annotations[key] = (img_filtered_annotations[key][
                    np.logical_not(boxes_to_remove)])

    return img_filtered_annotations
Esempio n. 2
0
def iou(boxlist1, boxlist2):
    """Computes pairwise intersection-over-union between box collections.

  Args:
    boxlist1: BoxList holding N boxes
    boxlist2: BoxList holding M boxes

  Returns:
    a numpy array with shape [N, M] representing pairwise iou scores.
  """
    return np_box_ops.iou(boxlist1.get(), boxlist2.get())
Esempio n. 3
0
def bb_cls_matching(det_bb, gt_bb, gt_cls, iou_thresh=0.5):
    det_cls = np.zeros(len(det_bb))

    iou_array = iou(det_bb, gt_bb)
    pos_ind = np.where(np.amax(iou_array, axis=1) >= iou_thresh)
    if not len(pos_ind[0]):
        print('Warning: No matching bb!')
    iou_array = iou_array[pos_ind]
    pos_det_cls = gt_cls[np.argmax(iou_array, axis=1)]
    det_cls[pos_ind] = pos_det_cls

    return det_cls, pos_ind[0]
Esempio n. 4
0
    def _calc_single_sample_det_info(self,
                                     ground_bbox_list,
                                     det_bbox_list,
                                     iou_threshold=0.5):
        '''
        返回某张图片的检测结果信息(虚警个数、漏警个数)
        :param ground_bbox_list: ground truth检测框列表,ymin, xmin, ymax, xmax
        :param det_bbox_list: 检测结果框列表,ymin, xmin, ymax, xmax
        :param iou_threshold:
        :return:
        '''
        if len(ground_bbox_list) == 0:
            return 0, len(det_bbox_list), 0

        if len(det_bbox_list) == 0:
            return 0, 0, len(ground_bbox_list)

        iou_mat = box_ops.iou(np.array(ground_bbox_list),
                              np.array(det_bbox_list))

        # iou过阈值的为正确检测的个数TP
        tp = 0
        gt_over_map = {}
        det_over_map = {}
        gt_iou_list = []
        for i in range(len(ground_bbox_list)):
            if i in gt_over_map:
                continue
            max_j = -1
            max_iou = 0
            gt_iou = 0
            for j in range(len(det_bbox_list)):
                if j in det_over_map:
                    continue
                if iou_mat[i, j] > gt_iou:
                    gt_iou = iou_mat[i, j]
                if iou_mat[i, j] >= iou_threshold and iou_mat[i, j] > max_iou:
                    max_iou = iou_mat[i, j]
                    max_j = j
            gt_iou_list.append(gt_iou)
            if max_j >= 0:
                tp += 1
                gt_over_map[i] = True
                det_over_map[max_j] = True

        # 检测的框没有ground truth的框跟其对应的,即过检(虚警)的框,FP
        fp = len(det_bbox_list) - tp

        # ground truth的框,没检测到的,即过杀(漏警)的框,FN
        fn = len(ground_bbox_list) - tp

        return tp, fp, fn, gt_iou_list
def track_dets(dets):
    """Perform tracking for each object class for multi-object multi-class tracking.
    Note, classes from dets might already be merged by prepare_dets merge_classes
    """

    num_frames = len(dets[list(dets.keys())[0]])
    tracks = collections.defaultdict(dict)
    for class_id in dets:
        tracker = Sort(max_age=2, min_hits=3)
        tracker.reset()

        # process tracks for one frame
        for fn in range(num_frames):
            ndets = len(dets[class_id][fn]['boxes'])
            # prepare box and score for tracker
            fn_bxsc = np.array(dets[class_id][fn]['bxsc'])
            tracks[class_id][fn] = tracker.update(fn_bxsc)

    ## associate the tid with the dets
    # loop over object classes, and frames in video
    for class_id in dets:
        for fn in dets[class_id]:
            # get current det and track boxes
            det_boxes = np.array(dets[class_id][fn]['boxes'])
            trk_boxes = tracks[class_id][fn][:, :4]

            # if no dets or no tracks, go to next frame
            if det_boxes.size == 0 or trk_boxes.size == 0:
                continue

            # do munkres iou between det-boxes and track-boxes
            iou = np_box_ops.iou(det_boxes, trk_boxes)
            dd, tt = munkres(-iou)
            for d, t in zip(dd, tt):
                dets[class_id][fn]['tid'][d] = int(tracks[class_id][fn][t, 4])

    return dets
Esempio n. 6
0
def non_max_suppression(boxlist,
                        max_output_size=10000,
                        iou_threshold=1.0,
                        score_threshold=-10.0):
    """Non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes. In each iteration, the detected bounding box with
  highest score in the available pool is selected.

  Args:
    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
      representing detection scores. All scores belong to the same class.
    max_output_size: maximum number of retained boxes
    iou_threshold: intersection over union threshold.
    score_threshold: minimum score threshold. Remove the boxes with scores
                     less than this value. Default value is set to -10. A very
                     low threshold to pass pretty much all the boxes, unless
                     the user sets a different score threshold.

  Returns:
    a BoxList holding M boxes where M <= max_output_size
  Raises:
    ValueError: if 'scores' field does not exist
    ValueError: if threshold is not in [0, 1]
    ValueError: if max_output_size < 0
  """
    if not boxlist.has_field('scores'):
        raise ValueError('Field scores does not exist')
    if iou_threshold < 0. or iou_threshold > 1.0:
        raise ValueError('IOU threshold must be in [0, 1]')
    if max_output_size < 0:
        raise ValueError('max_output_size must be bigger than 0.')

    boxlist = filter_scores_greater_than(boxlist, score_threshold)
    if boxlist.num_boxes() == 0:
        return boxlist

    boxlist = sort_by_field(boxlist, 'scores')

    # Prevent further computation if NMS is disabled.
    if iou_threshold == 1.0:
        if boxlist.num_boxes() > max_output_size:
            selected_indices = np.arange(max_output_size)
            return gather(boxlist, selected_indices)
        else:
            return boxlist

    boxes = boxlist.get()
    num_boxes = boxlist.num_boxes()
    # is_index_valid is True only for all remaining valid boxes,
    is_index_valid = np.full(num_boxes, 1, dtype=bool)
    selected_indices = []
    num_output = 0
    for i in range(num_boxes):
        if num_output < max_output_size:
            if is_index_valid[i]:
                num_output += 1
                selected_indices.append(i)
                is_index_valid[i] = False
                valid_indices = np.where(is_index_valid)[0]
                if valid_indices.size == 0:
                    break

                intersect_over_union = np_box_ops.iou(
                    np.expand_dims(boxes[i, :], axis=0),
                    boxes[valid_indices, :])
                intersect_over_union = np.squeeze(intersect_over_union, axis=0)
                is_index_valid[valid_indices] = np.logical_and(
                    is_index_valid[valid_indices],
                    intersect_over_union <= iou_threshold)
    return gather(boxlist, np.array(selected_indices))
Esempio n. 7
0
 def testIOU(self):
     iou = np_box_ops.iou(self.boxes1, self.boxes2)
     expected_iou = np.array(
         [[2.0 / 16.0, 0.0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]],
         dtype=float)
     self.assertAllClose(iou, expected_iou)
def sthelse_annots_masks(vid_id, sthelse_annots, obj_iou_th=0.3):
    """Create segmentation masks based on sth-else annotations
    """

    oH, oW, oN, _ = sth_dataset.video_properties(vid_id)

    ### load pickle segmentation data
    pkl_fname = SEG_FNAME_TEMPLATE % vid_id
    with open(pkl_fname, 'rb') as fid:
        seg_data = pickle.load(fid)

    # get image H, W
    _, H, W = seg_data[0]['pred_masks'].shape

    ### combine segmentation results with sthelse bounding boxes
    hand_masks = collections.defaultdict(None)
    objs_masks = collections.defaultdict(dict)
    bbox_keys = ['x1', 'y1', 'x2', 'y2']
    # assumes first frame annotation exists, get indices to objects
    num_obj = list(range(len(sthelse_annots[0]['gt_placeholders'])))
    for fn in range(oN):
        # if no labels for this frame, skip
        if fn not in sthelse_annots:
            continue

        # get mask-rcnn data
        seg_frame = seg_data[fn]
        classes = seg_frame['pred_classes']
        boxes = seg_frame['pred_boxes'].astype('int64')  # x1, y1, x2, y2
        masks = seg_frame['pred_masks']  # N x HxW masks

        # process label
        annots = sthelse_annots[fn]['labels']
        hand_label = [label for label in annots if label['category'] == 'hand']
        objs_label = [label for label in annots if label['category'] != 'hand']

        # process hand annotations
        if hand_label:
            # should not have more than one hand in videos
            if len(hand_label) > 1:
                pdb.set_trace()

            person_idx = np.where(classes == 0)[0]
            if person_idx.size > 0:
                # if mask-RCNN outputs don't have a "person" category
                # leave as None, we'll append empty masks later

                hand_bbox = [[label['box2d'][key] for key in bbox_keys] \
                                for label in hand_label]
                hand_bbox = np.array(hand_bbox).astype(
                    'int64')  # M=1 x 4 for computing iou

                # compute iou with mask-rcnn "person" category boxes
                ious = np_box_ops.iou(hand_bbox,
                                      boxes[person_idx])[0]  # M=1 x N --> N
                pick_mask = person_idx[np.argmax(ious)]

                # filter mask with label bbox
                hand_masks[fn] = filter_mask(masks[pick_mask], hand_bbox[0])

        # process obj box annotations
        if objs_label:
            obj_idx = np.where(classes != 0)[0]
            if obj_idx.size > 0:
                # if mask-RCNN outputs don't have "non-person" category
                # leave as None, we'll append empty masks later
                objs_bbox = [[label['box2d'][key] for key in bbox_keys] \
                                for label in objs_label]
                objs_bbox = np.array(objs_bbox).astype(
                    'int64')  # M x 4 for computing iou

                # compute iou with mask-rcnn "non-person" category boxes
                ious = np_box_ops.iou(objs_bbox, boxes[obj_idx])  # M x N
                # compute amount of object pixels

                # do assignment with munkres
                ll, mm = munkres(-ious)  # label indices, mask-rcnn indices
                for l, m in zip(ll, mm):
                    if ious[l, m] > obj_iou_th:
                        obj_id = int(annots[l]['gt_annotation'].split()[1])
                        objs_masks[fn][obj_id] = filter_mask(
                            masks[obj_idx[m]], objs_bbox[l])

    ### replace None or empty masks by all-zero frames
    all_zero_mask = np.zeros((H, W)).astype(bool)
    for fn in range(oN):
        if fn not in hand_masks:
            hand_masks[fn] = all_zero_mask.copy()
        for obj_id in num_obj:
            if obj_id not in objs_masks[fn]:
                objs_masks[fn][obj_id] = all_zero_mask.copy()

    # make into lists
    list_hand_masks = [hand_masks[k] for k in sorted(hand_masks.keys())]
    list_objs_masks = [[objs_masks[k][o] for k in sorted(objs_masks.keys())] \
                        for o in sorted(num_obj)]

    print('id: {}, hand: {}, obj: {}'.format(vid_id, len(list_hand_masks),
                                             len(list_objs_masks)))
    return list_hand_masks, list_objs_masks
def davidhandobj_masks(video_id,
                       david_preds,
                       dets,
                       seg_data,
                       obj_iou_th=0.3,
                       num_obj=[0]):
    """Create segmentation masks based on a mixture of David's hand-object model and MaskRCNN
    """
    oN = len(seg_data)
    # get image H, W
    _, H, W = seg_data[0]['pred_masks'].shape

    ### combine segmentation results with sthelse bounding boxes
    hand_masks = collections.defaultdict(None)
    objs_masks = collections.defaultdict(dict)

    ### process each frame where hand exists
    # we'll do tracking for rest of the frames later
    hobj_info = {}
    for fn in range(oN):
        hobj_info[fn] = {
            'hand': False,
            'hbox_sc': None,
            'o0': False,
            'o0box_sc': None,
            'o1': False,
            'o1box_sc': None
        }

        # david predictions
        state = david_preds[fn]
        # mask-rcnn
        seg_frame = seg_data[fn]
        classes = seg_frame['pred_classes']
        boxes = seg_frame['pred_boxes'].astype('int64')  # x1, y1, x2, y2
        masks = seg_frame['pred_masks']  # N x HxW masks

        ## check if 'hand' in david's predictions
        if state['hand'] is not None:
            # person class in seg-data?
            person_idx = np.where(classes == 0)[0]
            if person_idx.size > 0:
                # if mask-RCNN outputs don't have a "person" category
                # leave as None, we'll append empty masks later
                hobj_info[fn]['hand'] = True

                # M=1 x 4 for computing iou
                hand_bbox = np.array([state['hand'][:4].astype('int64')])
                hobj_info[fn]['hbox_sc'] = \
                        np.hstack((hand_bbox[0], state['hand'][4:5]))

                # compute iou with mask-rcnn "person" category boxes
                ious = np_box_ops.iou(hand_bbox,
                                      boxes[person_idx])[0]  # M=1 x N --> N
                pick_mask = person_idx[np.argmax(ious)]

                # filter mask with label bbox
                hand_masks[fn] = filter_mask(masks[pick_mask], hand_bbox[0])

            ## add object information only if hand is present
            if state['obj'] is not None:
                obj_idx = np.where(classes != 0)[0]
                if obj_idx.size > 0:
                    # if mask-RCNN outputs don't have "non-person" category
                    # leave as None, we'll append empty masks later
                    hobj_info[fn]['o0'] = True

                    idx = 0
                    nobjs, _ = state['obj'].shape
                    if nobjs > 1:
                        idx = np.argmax(state['obj'][:, 4])
                    obj_bbox = state['obj'][idx][:4].astype('int64')
                    # M=1 x 4 for computing iou
                    obj_bbox = np.array([obj_bbox])
                    hobj_info[fn]['o0box_sc'] = \
                        np.hstack((obj_bbox[0], state['obj'][idx][4:5]))

    ### do tracking to fill in other object boxes for frames
    tracker = Sort(max_age=5, min_hits=0)
    tracker.reset()
    # process for each frame
    tracks = {fn: {0: None, 1: None} for fn in range(oN)}
    for fn in range(oN):
        ## object tracker
        if hobj_info[fn]['hand'] and hobj_info[fn][
                'o0']:  # selected box david's model
            o0box_sc = np.array([hobj_info[fn]['o0box_sc']])
            tracks[fn][0] = tracker.update(o0box_sc)

        elif dets[1][fn]['bxsc']:  # mask-rcnn detection
            bxsc = np.array(dets[1][fn]['bxsc'])
            tracks[fn][0] = tracker.update(bxsc)

        else:  # no box
            tracks[fn][0] = tracker.update([])

    # merge
    # tracks = merge_tracklets(video_id, tracks, obj_id=0, obj_sim_thr=0.95)

    # get track ids for main object
    pick_tid = [-1, -1]
    tid = []
    for fn in range(oN):
        if hobj_info[fn]['o0']:
            tid.append(tracks[fn][0][0][-1])
    if np.unique(tid).size != 1:
        print('WARNING: David object in more than one track. Using mode')
        pick_tid[0] = mode(tid).mode[0]
    else:
        pick_tid[0] = np.unique(tid)[0]

    # show all boxes for this track
    # for fn in range(oN): idx = np.where(tracks[fn][0][:, 4] == uniq_tid)[0]; print(fn, hobj_info[fn]['o0'], tracks[fn][0][idx])
    # pdb.set_trace()

    ### if looking for more than one object, track all mask-rcnn boxes to get stable object
    if len(num_obj) == 2:
        tracker1 = Sort(max_age=12, min_hits=0)
        for fn in range(oN):
            tracks[fn][1] = tracker1.update(np.array(dets[1][fn]['bxsc']))

        # object 0 final boxes
        o0boxes = {}
        for fn in tracks.keys():
            fn_tids = tracks[fn][0][:, 4]
            intrack_idx = np.where(fn_tids == pick_tid[0])[0]
            if intrack_idx.size > 0:
                # which box is track-det?
                o0boxes[fn] = np.array(tracks[fn][0][intrack_idx, :4])
            else:
                o0boxes[fn] = None

        # tid specific information
        tid2fn = collections.defaultdict(list)  # which frames they appear in
        fn2tid = collections.defaultdict(list)  # inverse map
        tid_area = collections.defaultdict(list)  # compute normalized area
        tid_olap = collections.defaultdict(
            list)  # overlap between object 0 and object 1
        for fn in range(oN):
            for x1, y1, x2, y2, tid in tracks[fn][1]:
                fn2tid[fn].append(tid)
                tid2fn[tid].append(fn)
                tid_area[tid].append((x2 - x1) * (y2 - y1) / (H * W))
                # get box of object-0 (manipulated)
                fn_tids = tracks[fn][0][:, 4]
                intrack_idx = np.where(fn_tids == pick_tid[0])[0]
                if intrack_idx.size > 0:
                    # which box is track-det?
                    o0box = np.array(tracks[fn][0][intrack_idx, :4])
                    iou = np_box_ops.iou(np.array([[x1, y1, x2, y2]]), o0box)
                    tid_olap[tid].append(iou[0, 0])
                else:
                    tid_olap[tid].append(0.)

        for tid in tid_area.keys():
            tid_area[tid] = np.mean(tid_area[tid])
            tid_olap[tid] = np.mean(tid_olap[tid])

        # get the longest track, and let's assume this is the secondary object
        tid_lens = {k: 1. * len(v) / oN for k, v in tid2fn.items()}

        # normalize lengths by area of coverage
        tid_norm = {
            k: tid_lens[k] * (1 - tid_area[k]) * (1 - tid_olap[k])
            for k in tid_lens
        }
        pick_tid[1] = max(tid_norm, key=tid_norm.get)

        # show boxes of this track id
        # for fn in range(oN): idx = np.where(tracks[fn][1][:, 4] == 24)[0]; print(fn, tracks[fn][1][idx])

    ### replace None or empty masks by all-zero frames
    all_zero_mask = np.zeros((H, W)).astype(bool)
    for fn in range(oN):
        # put zeros for hand-masks
        if fn not in hand_masks:
            hand_masks[fn] = all_zero_mask.copy()
        # put mask-rcnn / zeros for obj-masks
        for obj_id in num_obj:
            # if obj_id not in objs_masks[fn]:
            # check if object tid exists in frame, then copy
            fn_tids = tracks[fn][obj_id][:, 4]
            intrack_idx = np.where(fn_tids == pick_tid[obj_id])[0]
            if intrack_idx.size > 0:
                # which box is track-det?
                fn_tbox = tracks[fn][obj_id][intrack_idx, :4]
                frame_boxes = np.array(dets[1][fn]['boxes'])
                iousc = np_box_ops.iou(fn_tbox, frame_boxes)
                fn_dets_idx = np.argmax(iousc)
                # print(fn, np.max(iousc), iousc)
                if np.max(iousc) > obj_iou_th:
                    fn_mask_idx = dets[1][fn]['idx'][fn_dets_idx]
                    objs_masks[fn][obj_id] = seg_data[fn]['pred_masks'][
                        fn_mask_idx]
                else:
                    objs_masks[fn][obj_id] = all_zero_mask.copy()

            else:
                # if box not in track, let it be
                objs_masks[fn][obj_id] = all_zero_mask.copy()

    # make into lists
    list_hand_masks = [hand_masks[k] for k in sorted(hand_masks.keys())]
    list_objs_masks = [[objs_masks[k][o] for k in sorted(objs_masks.keys())] \
                        for o in sorted(num_obj)]

    print('id: {}, hand: {}, obj: {}'.format(video_id, len(list_hand_masks),
                                             len(list_objs_masks)))
    return list_hand_masks, list_objs_masks