def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""

    # blobs = {'data': [], 'im_info': [], 'roidb': []}

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE *
                            2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes,
                                                  anchor_aspect_ratios)
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES,
                                              cfg.RPN.ASPECT_RATIOS)
        all_anchors = foa.field_of_anchors

    # dataset_names = []
    for im_i, entry in enumerate(roidb['frames_info']):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)
        # dataset_names.append(entry['dataset'])

        # if 'training_16frames' in entry['file_name']:
        #     blobs['dataset_name'].append('training')
        # elif 'validation_16frames' in entry['file_name']:
        #     blobs['dataset_name'].append('validation')
        # elif 'test_16frames' in entry['file_name']:
        #     blobs['dataset_name'].append('test')
        # else:
        #     error('rpn.py: add dataset_name, not in train, val, or test')

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    minimal_roidb = [{} for _ in range(len(roidb['frames_info']))]
    for i, e in enumerate(roidb['frames_info']):
        minimal_roidb[i] = e

    # blobs['roidb'] = blob_utils.serialize(minimal_roidb)
    blobs['roidb'] = minimal_roidb
    # blobs['dataset_name'] = dataset_names

    # Always return valid=True, since RPN minibatches are valid by design
    return True
Example #2
0
def _create_anchors():
    blobs = {}
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(
                field_stride, anchor_sizes, anchor_aspect_ratios
            )
            blobs['anchors_%d' % lvl] = foa.cell_anchors
    else:
        foa = data_utils.get_field_of_anchors(
            cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS
        )
        all_anchors = foa.field_of_anchors
        blobs['anchors'] = foa.cell_anchors
    return blobs
Example #3
0
def generate_all_anchors():
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
    anchor_scale = cfg.RETINANET.ANCHOR_SCALE

    foas=[]
    for lvl in range(k_min, k_max + 1):
        stride = 2. ** lvl
        for octave in range(scales_per_octave):
            octave_scale = 2 ** (octave / float(scales_per_octave))
            for idx in range(num_aspect_ratios):
                anchor_sizes = (stride * octave_scale * anchor_scale, )
                anchor_aspect_ratios = (aspect_ratios[idx], )
                foa = data_utils.get_field_of_anchors(
                    stride, anchor_sizes, anchor_aspect_ratios, octave, idx)
                foas.append(foa)
    return foas
Example #4
0
def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            #field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), )
	    field_stride = min(16., 2.**lvl)
            #anchor_sizes = (min(128., cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min)), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(
                field_stride, anchor_sizes, anchor_aspect_ratios
            )
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(
            cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS
        )
        all_anchors = foa.field_of_anchors

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0)
        )[0]
        gt_rois = entry['boxes'][gt_inds, :] * scale
        # TODO(rbg): gt_boxes is poorly named;
        # should be something like 'gt_rois_info'
        gt_boxes = blob_utils.zeros((len(gt_inds), 6))
        gt_boxes[:, 0] = im_i  # batch inds
        gt_boxes[:, 1:5] = gt_rois
        gt_boxes[:, 5] = entry['gt_classes'][gt_inds]
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, foas, all_anchors, gt_rois
            )
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, [foa], all_anchors, gt_rois
            )
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    valid_keys = [
        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',
        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'
    ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    blobs['roidb'] = blob_utils.serialize(minimal_roidb)

    # Always return valid=True, since RPN minibatches are valid by design
    return True
def add_retinanet_blobs(blobs, im_scales, roidb, image_width,
                        image_height):  #this is called each iteration
    """Add RetinaNet blobs."""
    # RetinaNet is applied to many feature levels, as in the FPN paper
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
    anchor_scale = cfg.RETINANET.ANCHOR_SCALE
    # get anchors from all levels for all scales/aspect ratios
    foas = []
    for lvl in range(k_min, k_max + 1):
        stride = 2.**lvl
        for octave in range(scales_per_octave):
            octave_scale = 2**(octave / float(scales_per_octave))
            for idx in range(num_aspect_ratios):
                anchor_sizes = (stride * octave_scale * anchor_scale, )
                anchor_aspect_ratios = (aspect_ratios[idx], )
                foa = data_utils.get_field_of_anchors(stride, anchor_sizes,
                                                      anchor_aspect_ratios,
                                                      octave, idx)
                foas.append(foa)
    all_anchors = np.concatenate([f.field_of_anchors for f in foas])

    blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0
    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        assert len(gt_inds) > 0, \
            'Empty ground truth empty for image is not allowed. Please check.'

        gt_rois = entry['boxes'][gt_inds, :] * scale
        gt_classes = entry['gt_classes'][gt_inds]

        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(
            foas, all_anchors, gt_rois, gt_classes, image_width, image_height)
        for i, foa in enumerate(foas):
            for k, v in retinanet_blobs[i].items():
                # the way it stacks is:
                # [[anchors for image1] + [anchors for images 2]]
                level = int(np.log2(foa.stride))
                key = '{}_fpn{}'.format(k, level)
                if k == 'retnet_roi_fg_bbox_locs':
                    v[:, 0] = im_i
                    # loc_stride: 80 * 4 if cls_specific else 4
                    loc_stride = 4  # 4 coordinate corresponding to bbox prediction
                    if cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                        loc_stride *= (cfg.MODEL.NUM_CLASSES - 1)
                    anchor_ind = foa.octave * num_aspect_ratios + foa.aspect
                    # v[:, 1] is the class label [range 0-80] if we do
                    # class-specfic bbox otherwise it is 0. In case of class
                    # specific, based on the label, the location of current
                    # anchor is class_label * 4 and then we take into account
                    # the anchor_ind if the anchors
                    v[:, 1] *= 4
                    v[:, 1] += loc_stride * anchor_ind

                blobs[key].append(v)
        blobs['retnet_fg_num'] += fg_num
        blobs['retnet_bg_num'] += bg_num

    blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)
    blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)

    N = len(roidb)
    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            # compute number of anchors
            A = int(len(v) / N)
            # for the cls branch labels [per fpn level],
            # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step
            # and length of this list is N x A where
            # N = num_images, A = num_anchors for example, N = 2, A = 9
            # Each element of the list has the shape 1 x 1 x H x W where H, W are
            # spatial dimension of curret fpn lvl. Let a{i} denote the element
            # corresponding to anchor i [9 anchors total] in the list.
            # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]
            # however the network will make predictions like 2 x (9 * 80) x H x W
            # so we first concatenate the elements of each image to a numpy array
            # and then concatenate the two images to get the 2 x 9 x H x W

            if k.find('retnet_cls_labels') >= 0:
                tmp = []
                # concat anchors within an image
                for i in range(0, len(v), A):
                    tmp.append(np.concatenate(v[i:i + A], axis=1))
                # concat images
                blobs[k] = np.concatenate(tmp, axis=0)
            else:
                # for the bbox branch elements [per FPN level],
                #  we have the targets and the fg boxes locations
                # in the shape: M x 4 where M is the number of fg locations in a
                # given image at the current FPN level. For the given level,
                # the bbox predictions will be. The elements in the list are in
                # order [[a0, ..., a9], [a0, ..., a9]]
                # Concatenate them to form M x 4
                blobs[k] = np.concatenate(v, axis=0)

    return True
Example #6
0
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height):
    """Add RetinaNet blobs."""
    # RetinaNet is applied to many feature levels, as in the FPN paper
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
    anchor_scale = cfg.RETINANET.ANCHOR_SCALE

    # get anchors from all levels for all scales/aspect ratios
    foas = []
    for lvl in range(k_min, k_max + 1):
        stride = 2. ** lvl
        for octave in range(scales_per_octave):
            octave_scale = 2 ** (octave / float(scales_per_octave))
            for idx in range(num_aspect_ratios):
                anchor_sizes = (stride * octave_scale * anchor_scale, )
                anchor_aspect_ratios = (aspect_ratios[idx], )
                foa = data_utils.get_field_of_anchors(
                    stride, anchor_sizes, anchor_aspect_ratios, octave, idx)
                foas.append(foa)
    all_anchors = np.concatenate([f.field_of_anchors for f in foas])

    blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0
    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
        assert len(gt_inds) > 0, \
            'Empty ground truth empty for image is not allowed. Please check.'

        gt_rois = entry['boxes'][gt_inds, :] * scale
        gt_classes = entry['gt_classes'][gt_inds]

        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(
            foas, all_anchors, gt_rois, gt_classes, image_width, image_height)
        for i, foa in enumerate(foas):
            for k, v in retinanet_blobs[i].items():
                # the way it stacks is:
                # [[anchors for image1] + [anchors for images 2]]
                level = int(np.log2(foa.stride))
                key = '{}_fpn{}'.format(k, level)
                if k == 'retnet_roi_fg_bbox_locs':
                    v[:, 0] = im_i
                    # loc_stride: 80 * 4 if cls_specific else 4
                    loc_stride = 4  # 4 coordinate corresponding to bbox prediction
                    if cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                        loc_stride *= (cfg.MODEL.NUM_CLASSES - 1)
                    anchor_ind = foa.octave * num_aspect_ratios + foa.aspect
                    # v[:, 1] is the class label [range 0-80] if we do
                    # class-specfic bbox otherwise it is 0. In case of class
                    # specific, based on the label, the location of current
                    # anchor is class_label * 4 and then we take into account
                    # the anchor_ind if the anchors
                    v[:, 1] *= 4
                    v[:, 1] += loc_stride * anchor_ind
                blobs[key].append(v)
        blobs['retnet_fg_num'] += fg_num
        blobs['retnet_bg_num'] += bg_num

    blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)
    blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)

    N = len(roidb)
    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            # compute number of anchors
            A = int(len(v) / N)
            # for the cls branch labels [per fpn level],
            # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step
            # and length of this list is N x A where
            # N = num_images, A = num_anchors for example, N = 2, A = 9
            # Each element of the list has the shape 1 x 1 x H x W where H, W are
            # spatial dimension of curret fpn lvl. Let a{i} denote the element
            # corresponding to anchor i [9 anchors total] in the list.
            # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]
            # however the network will make predictions like 2 x (9 * 80) x H x W
            # so we first concatenate the elements of each image to a numpy array
            # and then concatenate the two images to get the 2 x 9 x H x W

            if k.find('retnet_cls_labels') >= 0:
                tmp = []
                # concat anchors within an image
                for i in range(0, len(v), A):
                    tmp.append(np.concatenate(v[i: i + A], axis=1))
                # concat images
                blobs[k] = np.concatenate(tmp, axis=0)
            else:
                # for the bbox branch elements [per FPN level],
                #  we have the targets and the fg boxes locations
                # in the shape: M x 4 where M is the number of fg locations in a
                # given image at the current FPN level. For the given level,
                # the bbox predictions will be. The elements in the list are in
                # order [[a0, ..., a9], [a0, ..., a9]]
                # Concatenate them to form M x 4
                blobs[k] = np.concatenate(v, axis=0)
    return True
Example #7
0
def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE *
                            2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes,
                                                  anchor_aspect_ratios)
            blobs['anchors_%d' % lvl] = foa.cell_anchors
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES,
                                              cfg.RPN.ASPECT_RATIOS)
        all_anchors = foa.field_of_anchors
        blobs['anchors'] = foa.cell_anchors

    # this is to add some fpn targets
    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        gt_rois = entry['boxes'][gt_inds, :] * scale
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors,
                                       gt_rois)
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors,
                                       gt_rois)
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    if cfg.TRAIN.CPP_RPN == 'all':
        for im_i, entry in enumerate(roidb):
            scale = im_scales[im_i]
            gt_inds = np.where(entry['gt_classes'] > 0)[0]
            # gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
            # blobs['gt_boxes_%02d' % im_i] = entry['boxes'][gt_inds, :] * scale
            gt_boxes = entry['boxes'][gt_inds, :] * scale
            gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] +
                        1.) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.)
            blobs['gt_boxes_%02d' % im_i] = np.hstack(
                (gt_boxes, gt_areas[:, np.newaxis]))
            # blobs['gt_boxes_%02d' % im_i] = entry['boxes'][gt_inds, :]
            blobs['gt_classes_%02d' % im_i] = entry['gt_classes'][gt_inds]
    else:
        valid_keys = [
            'has_visible_keypoints', 'boxes', 'segms', 'seg_areas',
            'gt_classes', 'gt_attributes', 'gt_overlaps', 'is_crowd',
            'box_to_gt_ind_map', 'gt_keypoints'
        ]

        minimal_roidb = [{} for _ in range(len(roidb))]
        for i, e in enumerate(roidb):
            for k in valid_keys:
                if k in e:
                    minimal_roidb[i][k] = e[k]
        blobs['roidb'] = blob_utils.serialize(minimal_roidb)

    # Always return valid=True, since RPN minibatches are valid by design
    return True
Example #8
0
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height):
    """Add RetinaNet blobs."""
    # RetinaNet is applied to many feature levels, as in the FPN paper
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
    anchor_scale = cfg.RETINANET.ANCHOR_SCALE

    # get anchors from all levels for all scales/aspect ratios
    foas = []
    for lvl in range(k_min, k_max + 1):
        stride = 2.**lvl
        for octave in range(scales_per_octave):
            octave_scale = 2**(octave / float(scales_per_octave))
            for idx in range(num_aspect_ratios):
                anchor_sizes = (stride * octave_scale * anchor_scale, )
                anchor_aspect_ratios = (aspect_ratios[idx], )
                foa = data_utils.get_field_of_anchors(stride, anchor_sizes,
                                                      anchor_aspect_ratios,
                                                      octave, idx)
                foas.append(foa)
    all_anchors = np.concatenate([f.field_of_anchors for f in foas])

    blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0
    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        assert len(gt_inds) > 0, \
            'Empty ground truth empty for image is not allowed. Please check.'

        gt_rois = entry['boxes'][gt_inds, :] * scale
        gt_classes = entry['gt_classes'][gt_inds]

        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(
            foas, all_anchors, gt_rois, gt_classes, image_width, image_height)
        for i, foa in enumerate(foas):
            for k, v in retinanet_blobs[i].items():
                level = int(np.log2(foa.stride))
                key = '{}_fpn{}'.format(k, level)
                blobs[key].append(v)
        blobs['retnet_fg_num'] += fg_num
        blobs['retnet_bg_num'] += bg_num

    blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)
    blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)

    N = len(roidb)
    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            # compute number of anchors
            A = int(len(v) / N)
            # for the cls branch labels [per fpn level],
            # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step
            # and length of this list is N x A where
            # N = num_images, A = num_anchors for example, N = 2, A = 9
            # Each element of the list has the shape 1 x 1 x H x W where H, W are
            # spatial dimension of curret fpn lvl. Let a{i} denote the element
            # corresponding to anchor i [9 anchors total] in the list.
            # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]
            # however the network will make predictions like 2 x (9 * 80) x H x W
            # so we first concatenate the elements of each image to a numpy array
            # and then concatenate the two images to get the 2 x 9 x H x W

            if k.find('retnet_cls_labels') >= 0 \
                    or k.find('retnet_roi_bbox_targets') >= 0:
                tmp = []
                # concat anchors within an image
                for i in range(0, len(v), A):
                    tmp.append(np.concatenate(v[i:i + A], axis=1))
                # concat images
                blobs[k] = np.concatenate(tmp, axis=0)
            else:
                # for the bbox branch elements [per FPN level],
                #  we have the targets and the fg boxes locations
                # in the shape: M x 4 where M is the number of fg locations in a
                # given image at the current FPN level. For the given level,
                # the bbox predictions will be. The elements in the list are in
                # order [[a0, ..., a9], [a0, ..., a9]]
                # Concatenate them to form M x 4
                blobs[k] = np.expand_dims(np.concatenate(v, axis=0), axis=0)

    valid_keys = [
        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',
        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'
    ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    # blobs['roidb'] = blob_utils.serialize(minimal_roidb)
    blobs['roidb'] = minimal_roidb

    return True
Example #9
0
def add_rpn_blobs(blobs, im_scales, roidb):  # LJY: called by minibatch.py
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE *
                            2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(  # foa: field of anchors
                field_stride, anchor_sizes, anchor_aspect_ratios)
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES,
                                              cfg.RPN.ASPECT_RATIOS)
        all_anchors = foa.field_of_anchors  # LJY: 105840 x 4 array

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        gt_rois = entry['boxes'][gt_inds, :] * scale  # LJY: scale them

        # filter gt_rois based on SNIP
        ind = snip_valid(gt_rois)
        gt_rois = gt_rois[ind]

        # TODO(rbg): gt_boxes is poorly named;
        # should be something like 'gt_rois_info'
        # gt_boxes = blob_utils.zeros((len(gt_inds), 6))
        # gt_boxes[:, 0] = im_i  # batch inds
        # gt_boxes[:, 1:5] = gt_rois
        # gt_boxes[:, 5] = entry['gt_classes'][gt_inds]
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors,
                                       gt_rois)
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs = _get_rpn_blobs(  # LJY: call the function below
                im_height, im_width, [foa], all_anchors, gt_rois)
            # rpn_blobs: 'rpn_labels_int32_wide', 'rpn_bbox_targets_wide',
            # 'rpn_bbox_inside_weights_wide', 'rpn_bbox_outside_weights_wide'
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

    for k, v in blobs.items():
        # if k == 'has_mask':
        #     ForkedPdb().set_trace()
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    valid_keys = [
        'has_visible_keypoints',
        'boxes',
        'segms',
        'seg_areas',
        'gt_classes',
        'gt_overlaps',
        'is_crowd',
        'box_to_gt_ind_map',
        'gt_keypoints',
        'has_mask',
        'image',
        'parts_list',
        'part_boxes'  # LJ add image parts keys
    ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    # blobs['roidb'] = blob_utils.serialize(minimal_roidb)
    blobs['roidb'] = minimal_roidb

    # Always return valid=True, since RPN minibatches are valid by design
    return True
Example #10
0
def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(
                field_stride, anchor_sizes, anchor_aspect_ratios
            )
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(
            cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS
        )
        all_anchors = foa.field_of_anchors

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0)
        )[0]
        gt_rois = entry['boxes'][gt_inds, :] * scale
        # TODO(rbg): gt_boxes is poorly named;
        # should be something like 'gt_rois_info'
        gt_boxes = blob_utils.zeros((len(gt_inds), 6))
        gt_boxes[:, 0] = im_i  # batch inds
        gt_boxes[:, 1:5] = gt_rois
        gt_boxes[:, 5] = entry['gt_classes'][gt_inds]
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, foas, all_anchors, gt_rois
            )
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, [foa], all_anchors, gt_rois
            )
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    valid_keys = [
        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',
        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'
    ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    blobs['roidb'] = blob_utils.serialize(minimal_roidb)

    # Always return valid=True, since RPN minibatches are valid by design
    return True
Example #11
0
def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE *
                            2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes,
                                                  anchor_aspect_ratios)
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES,
                                              cfg.RPN.ASPECT_RATIOS)
        all_anchors = foa.field_of_anchors

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        # Added to ignore anchors that have overlap with crowd area
        ignore_inds = np.where(entry['is_crowd'][gt_inds] == 1)[0]
        if len(ignore_inds) == 0:
            ignore_inds = None

        gt_rois = entry['boxes'][gt_inds, :] * scale
        # TODO(rbg): gt_boxes is poorly named;
        # should be something like 'gt_rois_info'
        gt_boxes = blob_utils.zeros((len(gt_inds), 6))
        gt_boxes[:, 0] = im_i  # batch inds
        gt_boxes[:, 1:5] = gt_rois
        gt_boxes[:, 5] = entry['gt_classes'][gt_inds]
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs(
                im_height, im_width, foas, all_anchors, gt_rois, ignore_inds)
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs(
                im_height, im_width, [foa], all_anchors, gt_rois, ignore_inds)
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

        if cfg.TRAIN.VIS_ANCHOR:
            im = blobs['data'][0, :, :, :].squeeze() + np.array(
                cfg.PIXEL_MEANS).transpose((2, 0, 1))
            idx = np.where(vis_labels == 1)[0]
            anchor_bboxes = vis_anchors[idx, :]
            if not osp.exists(cfg.TRAIN.VIS_ANCHOR_DIR):
                os.makedirs(cfg.TRAIN.VIS_ANCHOR_DIR)
            save_path = osp.join(
                cfg.TRAIN.VIS_ANCHOR_DIR,
                osp.splitext(os.path.basename(entry['image']))[0])
            vis2d_utils.draw_pred_and_gt_tensor(im, gt_rois, save_path,
                                                anchor_bboxes)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    if cfg.LESION.USE_POSITION or cfg.LESION.POSITION_RCNN or cfg.LESION.SHALLOW_POSITION or cfg.LESION.MM_POS:
        valid_keys = [
            'has_visible_keypoints', 'boxes', 'segms', 'seg_areas',
            'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map',
            'gt_keypoints', 'z_position'
        ]
    else:
        valid_keys = [
            'has_visible_keypoints', 'boxes', 'segms', 'seg_areas',
            'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map',
            'gt_keypoints'
        ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    # blobs['roidb'] = blob_utils.serialize(minimal_roidb)
    blobs['roidb'] = minimal_roidb

    # Always return valid=True, since RPN minibatches are valid by design
    return True
Example #12
0
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height):
    """Add RetinaNet blobs."""
    # RetinaNet is applied to many feature levels, as in the FPN paper
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
    anchor_scale = cfg.RETINANET.ANCHOR_SCALE

    # get anchors from all levels for all scales/aspect ratios
    foas = []
    for lvl in range(k_min, k_max + 1):
        stride = 2. ** lvl
        for octave in range(scales_per_octave):
            octave_scale = 2 ** (octave / float(scales_per_octave))
            for idx in range(num_aspect_ratios):
                anchor_sizes = (stride * octave_scale * anchor_scale, )
                anchor_aspect_ratios = (aspect_ratios[idx], )
                foa = data_utils.get_field_of_anchors(
                    stride, anchor_sizes, anchor_aspect_ratios, octave, idx)
                foas.append(foa)
    all_anchors = np.concatenate([f.field_of_anchors for f in foas])

    blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0
    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
        assert len(gt_inds) > 0, \
            'Empty ground truth empty for image is not allowed. Please check.'


        # --------------------------- vis image -------------------
        #print("image:", entry['image'])
        img = cv2.imread(entry['image'])
        bb, gg, rr = cv2.split(img)
        img = cv2.merge([rr, gg, bb])
        if entry['flipped']:
                img = cv2.flip(img, 1)

        #--------------------- segms --------------------       
        rects = []
        coss = []
	rois = []

        for seg_index in range(len(entry['segms'])):
                seg = entry['segms'][seg_index]
                seg = [y for s in seg for y in s]
                #print(seg)
                try:
                        seg = np.array(seg, dtype=np.float32) * scale
                        cnt = np.reshape(seg, (-1, 1, 2))
                        rect = cv2.minAreaRect(cnt)
                except:
                        w = entry['boxes'][seg_index][2] - entry['boxes'][seg_index][0] + 1
                        h = entry['boxes'][seg_index][3] - entry['boxes'][seg_index][1] + 1
                        x = entry['boxes'][seg_index][0] + 0.5 * w
                        y = entry['boxes'][seg_index][1] + 0.5 * h
                        rect = ((x * scale, y * scale), (w * scale, h * scale), 0.0)

                # ---------- restrict theta ---------------------       
                #if (rect[1][0] == rect[1][1]):   # a = b
                #       rect = ((rect[0][0], rect[0][1]), (rect[1][0], rect[1][1]), 0.0)        
                #if (rect < -45):
                #       rect = ((rect[0][0], rect[0][1]), (rect[1][1], rect[1][0]), rect[2] + 90.0)

                # --------- restrict a > b & theta-------------------
                if (rect[1][0] < rect[1][1]):
                        rect = ((rect[0][0], rect[0][1]), (rect[1][1], rect[1][0]), rect[2] + 90.0)

                # -------------- vis rect -----------------------
                #w = entry['boxes'][seg_index][2] - entry['boxes'][seg_index][0] + 1
                #h = entry['boxes'][seg_index][3] - entry['boxes'][seg_index][1] + 1
                #left = entry['boxes'][seg_index][0]
                #top = entry['boxes'][seg_index][1]
                #cv2.rectangle(img, (int(left), int(top)), (int(left + w), int(top + h)), (0, 255, 0), 2)

                #color = [int(random.random() * 255), int(random.random() * 255), int(random.random() * 255)]
                #ell = (rect[0][0] / scale, rect[0][1] / scale), (rect[1][0] / scale, rect[1][1] / scale), rect[2]
                #cv2.ellipse(img, ell, color, 2)        

                #if rect[1][0] < rect[1][1]:
                #       rect = ((rect[0][0], rect[0][1]), (rect[1][1], rect[1][0]), rect[2] - 90.0)

                # -----------------------#
                #print("rect", rect)    
		x1 = rect[0][0]
                y1 = rect[0][1]
                a = rect[1][0]
                b = rect[1][1]
                theta = rect[2]

                theta_pi = theta * np.pi / 180
                T = np.array([[np.cos(theta_pi), -np.sin(theta_pi)], [np.sin(theta_pi), np.cos(theta_pi)]])
                C = np.array([[4.0 / (a * a), 0],[0, 4.0 / (b * b)]])
                M = np.dot(np.dot(np.transpose(T), C), T)
                dy = np.sqrt(4 * M[0][0] / (4 * M[0][0] * M[1][1] - (M[0][1] + M[1][0]) ** 2))
                dx = np.sqrt(4 * M[1][1] / (4 * M[0][0] * M[1][1] - (M[0][1] + M[1][0]) ** 2))

                x_min = x1 - dx
                x_max = x1 + dx
                y_min = y1 - dy
                y_max = y1 + dy

                #bbox_fit = [max(x_min, 0), max(y_min, 0), x_max - x_min, y_max - y_min]

                #print(x_min, x_max, y_min, y_max)

                rois.append([x_min, y_min, x_max, y_max])
                rects.append([(x_min + x_max) / 2.0, (y_min + y_max) / 2.0, x_max - x_min, y_max - y_min])
		#rois.append([rect[0][0] - rect[1][0] / 2.0, rect[0][1] - rect[1][1] / 2.0, rect[0][0] + rect[1][0] / 2.0, rect[0][1] + rect[1][1] / 2.0])
                #rects.append([rect[0][0], rect[0][1], rect[1][0], rect[1][1]]) # x, y, a, b

                cos = []
                theta_segs = 4
                for i in range(theta_segs):
                        cos.append(math.cos((rect[2] * 2.0 - i * (360.0 / theta_segs)) / 180.0 * math.pi))
                coss.append(cos)
	
	# -------------------- gt rects -------------
	gt_rects = np.array(rects)[gt_inds, :]  # [[x, y, a, b]]
        gt_cos = np.array(coss)[gt_inds, :]     # [[cos(t- i *10), .., ]]
	gt_rois = np.array(rois, dtype=np.float32)[gt_inds, :]

        #gt_rois = entry['boxes'][gt_inds, :] * scale
        gt_classes = entry['gt_classes'][gt_inds]

        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)
	
	# ------------------ edit func -----------------------------
        retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(
            foas, all_anchors, gt_rois, gt_classes, gt_rects, gt_cos, image_width, image_height)

        for i, foa in enumerate(foas):
            for k, v in retinanet_blobs[i].items():
                # the way it stacks is:
                # [[anchors for image1] + [anchors for images 2]]
                level = int(np.log2(foa.stride))
                key = '{}_fpn{}'.format(k, level)
                if k == 'retnet_roi_fg_bbox_locs':
                    v[:, 0] = im_i
                    # loc_stride: 80 * 4 if cls_specific else 4
                    loc_stride = 4  # 4 coordinate corresponding to bbox prediction
                    if cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                        loc_stride *= (cfg.MODEL.NUM_CLASSES - 1)
                    anchor_ind = foa.octave * num_aspect_ratios + foa.aspect
                    # v[:, 1] is the class label [range 0-80] if we do
                    # class-specfic bbox otherwise it is 0. In case of class
                    # specific, based on the label, the location of current
                    # anchor is class_label * 4 and then we take into account
                    # the anchor_ind if the anchors
                    v[:, 1] *= 4
                    v[:, 1] += loc_stride * anchor_ind
                blobs[key].append(v)
        blobs['retnet_fg_num'] += fg_num
        blobs['retnet_bg_num'] += bg_num

    blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)
    blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)

    N = len(roidb)
    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            # compute number of anchors
            A = int(len(v) / N)
            # for the cls branch labels [per fpn level],
            # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step
            # and length of this list is N x A where
            # N = num_images, A = num_anchors for example, N = 2, A = 9
            # Each element of the list has the shape 1 x 1 x H x W where H, W are
            # spatial dimension of curret fpn lvl. Let a{i} denote the element
            # corresponding to anchor i [9 anchors total] in the list.
            # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]
            # however the network will make predictions like 2 x (9 * 80) x H x W
            # so we first concatenate the elements of each image to a numpy array
            # and then concatenate the two images to get the 2 x 9 x H x W

            if k.find('retnet_cls_labels') >= 0:
                tmp = []
                # concat anchors within an image
                for i in range(0, len(v), A):
                    tmp.append(np.concatenate(v[i: i + A], axis=1))
                # concat images
                blobs[k] = np.concatenate(tmp, axis=0)
            else:
                # for the bbox branch elements [per FPN level],
                #  we have the targets and the fg boxes locations
                # in the shape: M x 4 where M is the number of fg locations in a
                # given image at the current FPN level. For the given level,
                # the bbox predictions will be. The elements in the list are in
                # order [[a0, ..., a9], [a0, ..., a9]]
                # Concatenate them to form M x 4
                blobs[k] = np.concatenate(v, axis=0)
    return True