Exemple #1
0
def decode(boxes, scores, all_anchors, ih, iw):
    """Decode outputs into boxes
  Parameters
  ---------
  boxes: an array of shape (1, h, w, Ax4)
  scores: an array of shape (1, h, w, Ax2),
  all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2]
  
  Returns
  --------
  final_boxes: of shape (R x 4)
  classes: of shape (R) in {0,1,2,3... K-1}
  scores: of shape (R) in [0 ~ 1]
  """
    h, w = boxes.shape[1], boxes.shape[2]
    if all_anchors == None:
        stride = 2**int(round(np.log2((iw + 0.0) / w)))
        all_anchors = anchors_plane(h, w, stride=stride)
    all_anchors = all_anchors.reshape((-1, 4))
    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, 2))
    assert scores.shape[0] == boxes.shape[0] == all_anchors.reshape[0], \
      'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0])
    boxes = bbox_transform_inv(all_anchors, boxes)
    classes = np.argmax(scores, axis=1)
    scores = scores[:, 1]
    final_boxes = np.zeros((boxes.shape[0], 4))
    for i in np.arange(final_boxes.shape[0]):
        c = classes[i] * 4
        final_boxes[i, 0:4] = boxes[i, c:c + 4]
    final_boxes = clip_boxes(final_boxes, (ih, iw))
    return final_boxes, classes, scores
 def _build_anchors(self):
     if len(self.ANCHORS) == 0:
         ih, iw = cfg.input_size
         all_anchors = []
         for i, stride in enumerate(cfg.strides):
             height, width = int(ih / stride), int(iw / stride)
             scales = cfg.anchor_scales[i] if isinstance(cfg.anchor_scales[i], list) else cfg.anchor_scales
             anchors = anchors_plane(height, width, stride,
                                     scales=scales,
                                     ratios=cfg.anchor_ratios,
                                     base=cfg.anchor_base)
             all_anchors.append(anchors)
         self.ANCHORS = all_anchors
Exemple #3
0
def encode(gt_boxes, all_anchors, height, width, stride):
    """Matching and Encoding groundtruth into learning targets
  Sampling
  
  Parameters
  ---------
  gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
  all_anchors: an array of shape (h, w, A, 4),
  width: width of feature
  height: height of feature
  stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
  Returns
  --------
  labels:   Nx1 array in [0, num_classes]
  anchors:  Sampled anchors
  bbox_targets: N x (4) regression targets
  bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
  """
    # TODO: speedup this module
    if all_anchors is None:
        all_anchors = anchors_plane(height, width, stride=stride)

    # anchors, inds_inside, total_anchors
    all_anchors = all_anchors.reshape((-1, 4))
    inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0)
                           & (all_anchors[:, 2] < width * stride)
                           & (all_anchors[:, 3] < height * stride))[0]
    anchors = all_anchors[inds_inside, :]
    total_anchors = all_anchors.shape[0]

    # choose boxes to assign to this stride
    # TODO gt assignment outside
    areas = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) * (gt_boxes[:, 2] -
                                                     gt_boxes[:, 0] + 1)
    ks = np.floor(4 + np.log2(np.sqrt(areas) / 224.0))
    K = int(np.log2(stride))
    inds = np.where((K == ks + 4))[0]
    if inds.size > 0:
        gt_boxes = gt_boxes[inds]
    else:
        labels = np.zeros((total_anchors), dtype=np.float32)
        bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
        bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
        return labels, bbox_targets, bbox_inside_weights

    labels = np.zeros((anchors.shape[0], ), dtype=np.float32)
    overlaps = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

    gt_assignment = overlaps.argmax(axis=1)  # (A)
    max_overlaps = overlaps[np.arange(len(inds_inside)), gt_assignment]
    gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]

    if False:
        # this is sentive to boxes of little overlaps, no need!
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    # fg label: for each gt, assign anchor with highest overlap despite its overlaps
    labels[gt_argmax_overlaps] = 1
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.FLAGS.fg_threshold] = 1
    # print (np.min(labels), np.max(labels))

    # subsample positive labels if there are too many
    num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = np.random.choice(fg_inds,
                                        size=(len(fg_inds) - num_fg),
                                        replace=False)
        labels[disable_inds] = -1

    # TODO: mild hard negative mining
    # subsample negative labels if there are too many
    num_bg = cfg.FLAGS.rpn_batch_size - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = 1

    # mapping to whole outputs
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)

    labels = labels.reshape((1, height, width, -1))
    bbox_targets = bbox_targets.reshape((1, height, width, -1))
    bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1))

    return labels, bbox_targets, bbox_inside_weights
Exemple #4
0
def encode(gt_boxes, all_anchors, height, width, stride):
    """Matching and Encoding groundtruth into learning targets
  Sampling
  
  Parameters
  ---------
  gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
  all_anchors: an array of shape (h, w, A, 4),
  width: width of feature
  height: height of feature
  stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
  Returns
  --------
  labels:   Nx1 array in [0, num_classes]
  bbox_targets: N x (4) regression targets
  bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
  """
    # TODO: speedup this module
    if all_anchors is None:
        all_anchors = anchors_plane(height, width, stride=stride)

    # anchors, inds_inside, total_anchors
    border = cfg.FLAGS.allow_border
    all_anchors = all_anchors.reshape((-1, 4))
    inds_inside = np.where((all_anchors[:, 0] >= -border)
                           & (all_anchors[:, 1] >= -border)
                           & (all_anchors[:, 2] < (width * stride) + border) &
                           (all_anchors[:, 3] < (height * stride) + border))[0]
    anchors = all_anchors[inds_inside, :]
    total_anchors = all_anchors.shape[0]

    labels = np.zeros((anchors.shape[0], ), dtype=np.float32)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

        # if _DEBUG:
        #     print ('gt_boxes shape: ', gt_boxes.shape)
        #     print ('anchors shape: ', anchors.shape)
        #     print ('overlaps shape: ', overlaps.shape)

        gt_assignment = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(len(inds_inside)), gt_assignment]
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]

        if True:
            # this is sentive to boxes of little overlaps, no need!
            # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

            # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
            labels[gt_argmax_overlaps] = 1

            # exclude examples with little overlaps
            # added later
            excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0]
            labels[gt_argmax_overlaps[excludes]] = -1

            if _DEBUG:
                min_ov = np.min(gt_max_overlaps)
                max_ov = np.max(gt_max_overlaps)
                mean_ov = np.mean(gt_max_overlaps)
                if min_ov < cfg.FLAGS.bg_threshold:
                    LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)'
                        % (min_ov, mean_ov, max_ov, stride, height, width))
                    worst = gt_boxes[np.argmin(gt_max_overlaps)]
                    anc = anchors[
                        gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :]
                    LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)'
                        % (min_ov, worst[0], worst[1], worst[2], worst[3],
                           worst[4], anc[0], anc[1], anc[2], anc[3]))

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.FLAGS.fg_threshold] = 1
        # print (np.min(labels), np.max(labels))

        # subsample positive labels if there are too many
        num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = np.random.choice(fg_inds,
                                            size=(len(fg_inds) - num_fg),
                                            replace=False)
            labels[disable_inds] = -1
    else:
        # if there is no gt
        labels[:] = 0

    # TODO: mild hard negative mining
    # subsample negative labels if there are too many
    num_fg = np.sum(labels == 1)
    num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = 1

    # mapping to whole outputs
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)

    labels = labels.reshape((1, height, width, -1))
    bbox_targets = bbox_targets.reshape((1, height, width, -1))
    bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1))

    return labels, bbox_targets, bbox_inside_weights
def data_layer(img_name,
               bboxes,
               classes,
               masks,
               mask,
               is_training,
               ANCHORS=[]):
    """ Returns the learning labels
    1. resize image, boxes, masks, mask
    2. data augmentation
    3. build learning labels
    """
    im = cv2.imread(img_name).astype(np.float32)
    if im.size == im.shape[0] * im.shape[1]:
        im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)
    im = im.astype(np.float32)

    strides = cfg.strides

    if is_training:
        im, bboxes, classes, masks, mask, ori_im = \
            preprocess_train(im, bboxes, classes, masks, mask, cfg.input_size, cfg.min_size,
                             use_augment=cfg.use_augment, training_scale=cfg.training_scale)
        gt_boxes = np.hstack((bboxes, classes[:, np.newaxis]))
        # layer_ids = assign.assign_boxes(gt_boxes, min_k=int(np.log2(strides[0])), max_k=int(np.log2(strides[-1])),
        #                                 base_size=cfg.base_size)
    else:
        im, ori_im = \
            preprocess_test(im, cfg.input_size)
        masks, mask = [], []

    ih, iw = im.shape[0:2]

    ANNOTATIONS = []
    # if is_training:
    ANNOTATIONS = [bboxes, classes]

    if len(ANCHORS) == 0:
        for i, stride in enumerate(strides):

            height, width = int(ih / stride), int(iw / stride)
            scales = cfg.anchor_scales[i] if isinstance(
                cfg.anchor_scales[i], list) else cfg.anchor_scales
            all_anchors = anchors_plane(height,
                                        width,
                                        stride,
                                        scales=scales,
                                        ratios=cfg.anchor_ratios,
                                        base=cfg.anchor_base)
            ANCHORS.append(all_anchors)

    all_anchors = []
    for i in range(len(ANCHORS)):
        all_anchors.append(ANCHORS[i].reshape((-1, 4)))
    all_anchors = np.vstack(all_anchors)

    # building learning labels
    TARGETS = []
    if is_training:
        labels, label_weights, bbox_targets, bbox_inside_weights = \
            anchor.encode(gt_boxes, all_anchors)
        TARGETS = [labels, label_weights, bbox_targets,
                   bbox_inside_weights]  # flat (N, ), (N, 4), (N, 4)

    # if _DEBUG:
    #     np.set_printoptions(precision=3)
    #     bb = bbox_targets[labels > 0, :]
    #     mean = np.abs(bb).mean(0)
    #     max = np.abs(bb).max()
    #     s = bbox_targets[labels > 0, :].std()

    return im, TARGETS, masks, mask, ori_im, ANNOTATIONS
Exemple #6
0
        s = np.random.randint(20, 50, (50, 2))
        s = boxes + s
        boxes = np.concatenate((boxes, s), axis=1)
        gt_boxes = np.hstack((boxes, classes))
        # gt_boxes = boxes

        N = 100
        rois = np.random.randint(10, 50, (N, 2))
        s = np.random.randint(0, 20, (N, 2))
        s = rois + s
        rois = np.concatenate((rois, s), axis=1)
        indexs = np.arange(N)

        all_anchors = anchors_plane(200,
                                    300,
                                    stride=4,
                                    scales=[2, 4, 8, 16, 32],
                                    ratios=[0.5, 1, 2.0],
                                    base=16)
        labels, bbox_targets, bbox_inside_weights = encode(
            gt_boxes,
            all_anchors=all_anchors,
            height=200,
            width=300,
            stride=4,
            indexs=indexs)

        all_anchors = anchors_plane(100,
                                    150,
                                    stride=8,
                                    scales=[2, 4, 8, 16, 32],
                                    ratios=[0.5, 1, 2.0],