Exemple #1
0
def decode(mask_targets, rois, classes, ih, iw):
    """Decode outputs into final masks
  Params
  ------
  mask_targets: of shape (N, h, w, K)
  rois: of shape (N, 4) [x1, y1, x2, y2]
  classes: of shape (N, 1) the class-id of each roi
  height: image height
  width:  image width
  
  Returns
  ------
  M: a painted image with all masks, of shape (height, width), in [0, K]
  """
    Mask = np.zeros((ih, iw), dtype=np.float32)
    assert rois.shape[0] == mask_targets.shape[0], \
      '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0])
    num = rois.shape[0]
    rois = clip_boxes(rois, (ih, iw))
    for i in np.arange(num):
        k = classes[i]
        mask = mask_targets[i, :, :, k]
        h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1
        x, y = rois[i, 0], rois[i, 1]
        mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
        mask *= k

        # paint
        Mask[y:y + h, x:x + w] = mask

    return Mask
Exemple #2
0
def decode(boxes, scores, all_anchors, ih, iw, num_classes=None):
    """Decode outputs into boxes
    Parameters
    ---------
    boxes: an array of shape (1, h, w, Ax4)
    scores: an array of shape (1, h, w, Ax2),
    all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2]

    Returns
    --------
    final_boxes: of shape (R x 4)
    classes: of shape (R) in {0,1,2,3... K-1}
    scores: of shape (R, K) in [0 ~ 1]
    """
    num_classes = cfg.num_classes if num_classes is None else num_classes
    all_anchors = all_anchors.reshape((-1, 4))
    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, num_classes))
    assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \
        'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0], boxes.shape[0], all_anchors.reshape[0])
    if cfg.rpn_box_encoding == 'fastrcnn':
        boxes = bbox_transform.bbox_transform_inv(all_anchors, boxes)
    elif cfg.rpn_box_encoding == 'linear':
        boxes = bbox_transform.bbox_transform_inv_linear(all_anchors, boxes)
    classes = np.argmax(scores, axis=1)
    final_boxes = boxes
    final_boxes = bbox_transform.clip_boxes(final_boxes, (ih, iw))
    classes = classes.astype(np.int32)
    return final_boxes, classes, scores
Exemple #3
0
def decode(boxes, scores, all_anchors, ih, iw):
    """Decode outputs into boxes
  Parameters
  ---------
  boxes: an array of shape (1, h, w, Ax4)
  scores: an array of shape (1, h, w, Ax2),
  all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2]
  
  Returns
  --------
  final_boxes: of shape (R x 4)
  classes: of shape (R) in {0,1,2,3... K-1}
  scores: of shape (R) in [0 ~ 1]
  """
    h, w = boxes.shape[1], boxes.shape[2]
    if all_anchors == None:
        stride = 2**int(round(np.log2((iw + 0.0) / w)))
        all_anchors = anchors_plane(h, w, stride=stride)
    all_anchors = all_anchors.reshape((-1, 4))
    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, 2))
    assert scores.shape[0] == boxes.shape[0] == all_anchors.reshape[0], \
      'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0])
    boxes = bbox_transform_inv(all_anchors, boxes)
    classes = np.argmax(scores, axis=1)
    scores = scores[:, 1]
    final_boxes = np.zeros((boxes.shape[0], 4))
    for i in np.arange(final_boxes.shape[0]):
        c = classes[i] * 4
        final_boxes[i, 0:4] = boxes[i, c:c + 4]
    final_boxes = clip_boxes(final_boxes, (ih, iw))
    return final_boxes, classes, scores
Exemple #4
0
def decode(boxes, scores, all_anchors, ih, iw):
    """Decode outputs into boxes
  Parameters
  ---------
  boxes: an array of shape (1, h, w, Ax4)
  scores: an array of shape (1, h, w, Ax2),
  all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2]
  #but in pyramid_network.py these are allready in form [-1,4]
  Returns
  --------
  final_boxes: of shape (R x 4)
  classes: of shape (R) in {0,1,2,3... K-1}
  scores: of shape (R) in [0 ~ 1]
  """
    # h, w = boxes.shape[1], boxes.shape[2]
    # if all_anchors is  None:
    #   stride = 2 ** int(round(np.log2((iw + 0.0) / w)))
    #   all_anchors = anchors_plane(h, w, stride=stride)
    all_anchors = all_anchors.reshape((-1, 4))
    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, 2))
    assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \
      'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0])
    boxes = bbox_transform_inv(all_anchors, boxes)
    classes = np.argmax(scores, axis=1)
    scores = scores[:,
                    1]  #0 is background 1 is foreground. selects the probability of foregorund
    final_boxes = boxes
    final_boxes = clip_boxes(final_boxes,
                             (ih, iw))  # does not reduce the number of rois
    classes = classes.astype(np.int32)
    return final_boxes, classes, scores
Exemple #5
0
def decode(mask_targets, rois, classes, ih, iw):
  """Decode outputs into final masks
  Params
  ------
  mask_targets: of shape (N, h, w, K)
  rois: of shape (N, 4) [x1, y1, x2, y2]
  classes: of shape (N, 1) the class-id of each roi
  height: image height
  width:  image width
  
  Returns
  ------
  M: a painted image with all masks, of shape (height, width), in [0, K]
  """
  Mask = np.zeros((ih, iw), dtype=np.float32)
  assert rois.shape[0] == mask_targets.shape[0], \
    '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0])
  num = rois.shape[0]
  rois = clip_boxes(rois, (ih, iw))
  for i in np.arange(num):
    k = classes[i]
    mask = mask_targets[i, :, :, k]
    h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1
    x, y = rois[i, 0], rois[i, 1]
    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
    mask *= k
    
    # paint
    Mask[y:y+h, x:x+w] = mask
  
  return Mask
Exemple #6
0
def decode(boxes, scores, all_anchors, image_height, image_width):
    """Decode outputs into boxes
    Parameters
    ---------
    boxes: an array of shape (1, h, w, Ax4)
    scores: an array of shape (1, h, w, Ax2),
    all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2]
    
    Returns
    --------
    final_boxes: of shape (R x 4)
    classes: of shape (R) in {0,1,2,3... K-1}
    scores: of shape (R) in [0 ~ 1]
    """
    all_anchors = all_anchors.reshape((-1, 4))
    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, 2))

    assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \
      'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0], boxes.shape[0], all_anchors.reshape[0])

    boxes = bbox_transform_inv(all_anchors, boxes)
    boxes = clip_boxes(boxes, (image_height, image_width))
    classes = np.argmax(scores, axis=1).astype(np.int32)
    scores = scores[:, 1]

    return boxes, classes, scores
def _offset_boxes(boxes, im_shape, scale, offs, flip):
    if len(boxes) == 0:
        return boxes
    boxes = np.asarray(boxes, dtype=np.float)
    boxes *= scale
    boxes[:, 0::2] -= offs[0]
    boxes[:, 1::2] -= offs[1]
    boxes = clip_boxes(boxes, im_shape)

    if flip:
        boxes_x = np.copy(boxes[:, 0])
        boxes[:, 0] = im_shape[1] - boxes[:, 2]
        boxes[:, 2] = im_shape[1] - boxes_x
    return boxes
Exemple #8
0
def decode(boxes, scores, rois, ih, iw):
    """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois
    Parameters
  ---------
  boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2]
  scores: an array of shape (R, K),
  rois: an array of shape (R, 4), [x1, y1, x2, y2]
  
  Returns
  --------
  final_boxes: of shape (R x 4)
  classes: of shape (R) in {0,1,2,3... K-1}
  scores: of shape (R) in [0 ~ 1]
  """
    boxes = bbox_transform_inv(rois, deltas=boxes)
    classes = np.argmax(scores, axis=1)
    scores = np.max(scores, axis=1)
    final_boxes = np.zeros((boxes.shape[0], 4))
    for i in np.arange(0, boxes.shape[0]):
        ind = classes[i] * 4
        final_boxes[i, 0:4] = boxes[i, ind:ind + 4]
    final_boxes = clip_boxes(final_boxes, (ih, iw))
    return final_boxes, classes, scores
Exemple #9
0
def decode(boxes, scores, rois, ih, iw):
  """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois
    Parameters
  ---------
  boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2]
  scores: an array of shape (R, K),
  rois: an array of shape (R, 4), [x1, y1, x2, y2]
  
  Returns
  --------
  final_boxes: of shape (R x 4)
  classes: of shape (R) in {0,1,2,3... K-1}
  scores: of shape (R) in [0 ~ 1]
  """
  boxes = bbox_transform_inv(rois, deltas=boxes)
  classes = np.argmax(scores, axis=1)
  classes = classes.astype(np.int32)
  scores = np.max(scores, axis=1)
  final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32)
  for i in np.arange(0, boxes.shape[0]):
    ind = classes[i]*4
    final_boxes[i, 0:4] = boxes[i, ind:ind+4]
  final_boxes = clip_boxes(final_boxes, (ih, iw))
  return final_boxes, classes, scores
Exemple #10
0
def encode(gt_boxes,
           all_anchors,
           feature_height,
           feature_width,
           stride,
           image_height,
           image_width,
           ignore_cross_boundary=True):
    """Matching and Encoding groundtruth into learning targets
    Sampling
    
    Parameters
    ---------
    gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
    all_anchors: an array of shape (h, w, A, 4),
    feature_height: height of feature
    feature_width: width of feature
    image_height: height of image
    image_width: width of image
    stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
    Returns
    --------
    labels:   Nx1 array in [0, num_classes]
    bbox_targets: N x (4) regression targets
    bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
    """
    # TODO: speedup this module
    allow_border = cfg.FLAGS.allow_border
    all_anchors = all_anchors.reshape([-1, 4])
    total_anchors = all_anchors.shape[0]

    labels = np.empty((total_anchors, ), dtype=np.int32)
    labels.fill(-1)

    jittered_gt_boxes = jitter_gt_boxes(gt_boxes[:, :4])
    clipped_gt_boxes = clip_boxes(jittered_gt_boxes,
                                  (image_height, image_width))

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(all_anchors, dtype=np.float),
            np.ascontiguousarray(clipped_gt_boxes, dtype=np.float))

        gt_assignment = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]

        # bg label: less than threshold IOU
        labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0
        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1

        # ignore cross-boundary anchors
        if ignore_cross_boundary is True:
            cb_inds = _get_cross_boundary(all_anchors, image_height,
                                          image_width, allow_border)
            labels[cb_inds] = -1

        # this is sentive to boxes of little overlaps, use with caution!
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
        labels[gt_argmax_overlaps] = 1

        # subsample positive labels if there are too many
        num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = np.random.choice(fg_inds,
                                            size=(len(fg_inds) - num_fg),
                                            replace=False)
            labels[disable_inds] = -1
    else:
        # if there is no gt
        labels[:] = 0

    # TODO: mild hard negative mining
    # subsample negative labels if there are too many
    num_fg = np.sum(labels == 1)
    num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets = _compute_targets(all_anchors,
                                        gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = 1.0  #0.1

    labels = labels.reshape((1, feature_height, feature_width, -1))
    bbox_targets = bbox_targets.reshape((1, feature_height, feature_width, -1))
    bbox_inside_weights = bbox_inside_weights.reshape(
        (1, feature_height, feature_width, -1))

    return labels, bbox_targets, bbox_inside_weights