Beispiel #1
0
def make_one_rcnn_target(cfg, proposals, truth_boxes, truth_labels):
    sampled_proposal = torch.zeros((0, 7), dtype=torch.float32).to(cfg.device)
    sampled_label = torch.zeros((0, ), dtype=torch.int64).to(cfg.device)
    sampled_assign = np.zeros((0, ), np.int32)
    sampled_target = torch.zeros((0, 4), dtype=torch.float32).to(cfg.device)

    # filter invalid proposals
    num_proposal = len(proposals)
    valid = []
    for i in range(num_proposal):
        box = proposals[i, 1:5]
        if not (is_small_box(box, min_size=cfg.mask_train_min_size)):
            valid.append(i)
    proposals = proposals[valid]

    # assign fg/bg to each box
    num_proposal = len(proposals)
    if len(truth_boxes) > 0 and num_proposal > 0:
        box = proposals[:, 1:5]
        # for each bbox, the index of gt which has max box_overlap with it
        overlap = cython_box_overlap(box, truth_boxes)
        argmax_overlap = np.argmax(overlap, 1)
        max_overlap = overlap[np.arange(num_proposal), argmax_overlap]

        fg_index = np.where(max_overlap >= cfg.rcnn_train_fg_thresh_low)[0]
        bg_index = np.where((max_overlap < cfg.rcnn_train_bg_thresh_high) & \
                            (max_overlap >= cfg.rcnn_train_bg_thresh_low))[0]

        fg_index, bg_index, num_fg = balance(fg_index, bg_index,
                                             cfg.rcnn_train_batch_size,
                                             cfg.rcnn_train_fg_fraction,
                                             num_proposal)

        # selecting both fg and bg
        fg_bg_index = np.concatenate([fg_index, bg_index], 0)
        sampled_proposal = proposals[fg_bg_index]

        # label
        sampled_assign = argmax_overlap[fg_bg_index]
        sampled_label = truth_labels[sampled_assign]
        sampled_label[num_fg:] = 0  # Clamp labels for the background to 0

        # target
        if num_fg > 0:
            target_truth_box = truth_boxes[sampled_assign[:num_fg]]
            target_box = sampled_proposal[:num_fg][:, 1:5]
            sampled_target = rcnn_encode(target_box, target_truth_box)

        sampled_target = to_tensor(sampled_target, cfg.device)
        sampled_label = to_tensor(sampled_label, cfg.device)
        sampled_proposal = to_tensor(sampled_proposal, cfg.device)

    return sampled_proposal, sampled_label, sampled_assign, sampled_target
Beispiel #2
0
def make_one_rpn_target(cfg, anchor_boxes, truth_boxes):
    """
    labeling windows for one image
    :param image: input image
    :param anchor_boxes: [[x0, y0, x1, y1]]: (N, 4) ndarray of float32
    :param truth_boxes:  [[x0, y0, x1, y1]]: (N, 4) ndarray of float32
    :param truth_labels: [1, 1, 1, ...], (N, ) ndarray of int64
    :return:
        anchor_labels: 1 for pos, 0 for neg
        anchor_assigns: which truth box is assigned to the anchor box
        label_weight: pos=1, neg \in (0, 1] by rareness, otherwise 0 (don't care)
        delta: bboxes' offsets
        delta_weight: same as label_weight
    """
    num_anchor_boxes = len(anchor_boxes)
    anchor_labels  = np.zeros((num_anchor_boxes,), np.int64)
    anchor_assigns = np.zeros((num_anchor_boxes,), np.int64)
    label_weight   = np.ones((num_anchor_boxes,), np.float32)  # <todo> why use 1 for init ?
    delta          = np.zeros((num_anchor_boxes, 4), np.float32)
    delta_weight   = np.zeros((num_anchor_boxes,), np.float32)

    num_truth_box = len(truth_boxes)
    if num_truth_box != 0:

        overlap = cython_box_overlap(anchor_boxes, truth_boxes)
        argmax_overlap = np.argmax(overlap, 1)
        max_overlap = overlap[np.arange(num_anchor_boxes), argmax_overlap]
        # anchor_labels 1/0 for each anchor
        bg_index = max_overlap < cfg.rpn_train_bg_thresh_high
        anchor_labels[bg_index] = 0
        label_weight[bg_index] = 1

        fg_index = max_overlap >= cfg.rpn_train_fg_thresh_low
        anchor_labels[fg_index] = 1
        label_weight[fg_index] = 1
        anchor_assigns[...] = argmax_overlap

        # for each truth, anchor_boxes with highest overlap, include multiple maxs
        # re-assign less overlapped gt to anchor_boxes
        argmax_overlap = np.argmax(overlap, 0)
        max_overlap = overlap[argmax_overlap, np.arange(num_truth_box)]
        anchor_assignto_gt, gt_assignto_anchor = np.where(overlap == max_overlap)

        fg_index = anchor_assignto_gt
        anchor_labels[fg_index] = 1
        label_weight[fg_index] = 1
        anchor_assigns[fg_index] = gt_assignto_anchor

        # regression
        fg_index = np.where(anchor_labels != 0)
        target_window = anchor_boxes[fg_index]
        target_truth_box = truth_boxes[anchor_assigns[fg_index]]
        delta[fg_index] = rpn_encode(target_window, target_truth_box)
        delta_weight[fg_index] = 1

        # weights for class balancing
        fg_index = np.where((label_weight != 0) & (anchor_labels != 0))[0]
        bg_index = np.where((label_weight != 0) & (anchor_labels == 0))[0]

        num_fg = len(fg_index)
        num_bg = len(bg_index)
        label_weight[fg_index] = 1
        label_weight[bg_index] = num_fg / num_bg

        # task balancing
        delta_weight[fg_index] = label_weight[fg_index]

    # save
    anchor_labels  = to_tensor(anchor_labels,  cfg.device)
    anchor_assigns = to_tensor(anchor_assigns, cfg.device)
    label_weight   = to_tensor(label_weight,   cfg.device)
    delta          = to_tensor(delta,          cfg.device)
    delta_weight   = to_tensor(delta_weight,   cfg.device)

    return anchor_labels, anchor_assigns, label_weight, delta, delta_weight
Beispiel #3
0
def make_one_mask_target(cfg, image, proposals, truth_box, truth_label,
                         truth_instance):
    """
    make mask targets for one image.
    1. assign truth box to each proposals by threshold for fg/bg
    2. crop assigned instance into bbox size
    3. resize to maskhead's_train output size.
    :param image: image as (H, W, C) numpy array
    :param proposals: list of regional proposals generated by RCNN. e.g.
        [[i, x0, y0, x1, y1, score, label], ...]
    :param truth_box: list of truth boxes. e.g.
        [[x0, y0, x1, y1], ...]
    :param truth_label: 1s
        maskhead are used to predict mask,
        all masks are positive proposals. (foreground)
        here we have 2 classes so it's_train fixed to 1
    :param truth_instance: list of truth instances, (H, W)
    :return:
        sampled_proposal: same as proposals
        sampled_label: same as truth_label
        sampled_instance: cropped instance, matching maskhead's_train output
        sampled_assign: index of truth_box each proposals belongs to
    """
    sampled_proposal = torch.FloatTensor(0, 7).to(cfg.device)
    sampled_label = torch.LongTensor(0, 1).to(cfg.device)
    sampled_instance = torch.FloatTensor(0, 1, 1).to(cfg.device)

    # filter invalid proposals like small proposals
    _, height, width = image.size()
    num_proposal = len(proposals)

    valid = []
    for i in range(num_proposal):
        box = proposals[i, 1:5]
        if not (is_small_box(
                box,
                min_size=cfg.mask_train_min_size)):  # is_small_box_at_boundary
            valid.append(i)
    proposals = proposals[valid]

    num_proposal = len(proposals)
    if len(truth_box) > 0 and num_proposal > 0:
        # assign bbox to proposals by overlap threshold
        box = proposals[:, 1:5]
        # for each bbox, the index of gt which has max overlap with it
        overlap = cython_box_overlap(box, truth_box)
        argmax_overlap = np.argmax(overlap, 1)
        max_overlap = overlap[np.arange(num_proposal), argmax_overlap]

        fg_index = np.where(max_overlap >= cfg.mask_train_fg_thresh_low)[0]

        if len(fg_index) > 0:
            fg_length = len(fg_index)
            num_fg = cfg.mask_train_batch_size
            fg_index = fg_index[np.random.choice(fg_length,
                                                 size=num_fg,
                                                 replace=fg_length < num_fg)]

            sampled_proposal = proposals[fg_index]
            sampled_assign = argmax_overlap[
                fg_index]  # assign a gt to each bbox
            sampled_label = truth_label[
                sampled_assign]  # assign gt's_train label to each bbox
            sampled_instance = []
            for i in range(len(fg_index)):
                instance = truth_instance[sampled_assign[
                    i]]  # for each positive bbox, find instance it belongs to
                box = sampled_proposal[i, 1:5]
                crop = resize_instance(
                    instance, box, cfg.mask_size)  # crop the instance by box
                sampled_instance.append(crop[np.newaxis, :, :])

        # save
        sampled_instance = np.vstack(sampled_instance)

        sampled_proposal = to_tensor(sampled_proposal, cfg.device)
        sampled_label = to_tensor(sampled_label, cfg.device)
        sampled_instance = to_tensor(sampled_instance, cfg.device)

    return sampled_proposal, sampled_label, sampled_instance
Beispiel #4
0
def _nms(cfg,
         mode,
         head,
         decode,
         images,
         logits,
         deltas,
         anchor_boxes=None,
         rpn_proposals=None):
    """
    used for rpn and rcnn nms_func
    This function:
    1. Do non-maximum suppression on given window and logistic score
    2. filter small ret_proposals, crop border
    3. decode bbox regression

    :param cfg: configure
    :param mode: mode. e.g. 'train', 'test', 'eval'
    :param images: a batch of input images
    :param anchor_boxes: all anchor boxes in a batch, list of coords, e.g.
               [[x0, y0, x1, y1], ...], a total of 16*16*3 + 32*32*3 + 64*64*3 + 128*128*3
    :param logits_np: (B, N, 2) NOT nomalized
               [[0.7, 0.5], ...]
    :param deltas_np: (B, N, 2, 4)
               [[[t1, t2, t3, t4], [t1, t2, t3, t4]], ...]
    :return: all proposals in a batch. e.g.
        [i, x0, y0, x1, y1, score, label]
        proposals[0]:   image idx in the batch
        proposals[1:5]: bbox
        proposals[5]:   probability of foreground (background skipped)
        proposals[6]:   class label, 1 fore foreground, 0 for background, here we only return 1
    """
    if mode in ['train']:
        nms_prob_threshold = cfg.rpn_train_nms_pre_score_threshold if head == 'rpn' else cfg.rcnn_train_nms_pre_score_threshold
        nms_overlap_threshold = cfg.rpn_train_nms_overlap_threshold if head == 'rpn' else cfg.rcnn_train_nms_overlap_threshold
        nms_min_size = cfg.rpn_train_nms_min_size if head == 'rpn' else cfg.rcnn_train_nms_min_size

    elif mode in ['valid', 'test', 'eval']:
        nms_prob_threshold = cfg.rpn_test_nms_pre_score_threshold if head == 'rpn' else cfg.rcnn_test_nms_pre_score_threshold
        nms_overlap_threshold = cfg.rpn_test_nms_overlap_threshold if head == 'rpn' else cfg.rcnn_test_nms_overlap_threshold
        nms_min_size = cfg.rpn_test_nms_min_size if head == 'rpn' else cfg.rcnn_test_nms_min_size

        if mode in ['eval']:
            nms_prob_threshold = 0.05  # set low numbe r to make roc curve.
    else:
        raise ValueError('rpn_nms(): invalid mode = %s?' % mode)

    num_classes = 2 if head == 'rpn' else cfg.num_classes
    logits_np = logits.detach().cpu().numpy()
    deltas_np = deltas.detach().cpu().numpy(
    ) if head == 'rpn' else deltas.detach().cpu().numpy().reshape(
        -1, num_classes, 4)
    batch_size, _, height, width = images.size()

    # non-max suppression
    ret_proposals = []
    for img_idx in range(batch_size):
        pic_proposals = [np.empty((0, 7), np.float32)]
        if head == 'rpn':
            assert anchor_boxes is not None
            raw_box = anchor_boxes
            prob_distrib = np_softmax(logits_np[img_idx])  # (N, 2)
            delta_distrib = deltas_np[img_idx]  # (N, 2, 4)
        else:  # rcnn
            rpn_proposals_np = rpn_proposals.detach().cpu().numpy()
            select = np.where(rpn_proposals_np[:, 0] == img_idx)[0]
            if len(select) == 0:
                return torch.zeros((1, 7)).to(cfg.device)
            raw_box = rpn_proposals_np[select, 1:5]
            prob_distrib = np_softmax(
                logits_np[select])  # <todo>why not use np_sigmoid?
            delta_distrib = deltas_np[select]

        # skip background
        for cls_idx in range(
                1, num_classes):  # 0 for background, 1 for foreground
            index = np.where(prob_distrib[:, cls_idx] > nms_prob_threshold)[0]
            if len(index) > 0:
                valid_box = raw_box[index]
                prob = prob_distrib[index, cls_idx].reshape(-1, 1)
                delta = delta_distrib[index, cls_idx]
                # bbox regression, do some clip/filter
                box = decode(valid_box, delta)
                box = clip_boxes(box, width, height)  # take care of borders
                keep = filter_boxes(
                    box, min_size=nms_min_size)  # get rid of small boxes

                if len(keep) > 0:
                    box = box[keep]
                    prob = prob[keep]
                    keep = nms_func(np.hstack((box, prob)),
                                    nms_overlap_threshold)

                    proposal = np.zeros((len(keep), 7), np.float32)
                    proposal[:, 0] = img_idx
                    proposal[:, 1:5] = np.around(box[keep], 0)
                    proposal[:, 5] = prob[keep, 0]
                    proposal[:, 6] = cls_idx
                    pic_proposals.append(proposal)

        pic_proposals = np.vstack(pic_proposals)
        ret_proposals.append(pic_proposals)

    ret_proposals = np.vstack(ret_proposals)
    ret_proposals = to_tensor(ret_proposals, cfg.device)
    return ret_proposals
Beispiel #5
0
def mask_nms(cfg, images, proposals, mask_logits):
    """
    1. do non-maximum suppression to remove overlapping segmentations
    2. resize the masks from mask head output (28*28) into box size
    3. paste the masks into input image
    :param cfg:
    :param images: (B, C, H, W)
    :param proposals: (B, 7) [i, x0, y0, x1, y1, score, label]
    :param mask_logits: (B, num_classes, 2*crop_size, 2*crop_size)
    :return:
        b_multi_masks: (B, H, W) masks labelled with 1,2,...N (total number of masks)
        b_mask_instances: (B*N, H, W) masks with prob
        b_mask_proposals: (B*N, ) proposals
    """
    overlap_threshold = cfg.mask_test_nms_overlap_threshold
    pre_score_threshold = cfg.mask_test_nms_pre_score_threshold
    mask_threshold = cfg.mask_test_mask_threshold
    mask_min_area = cfg.mask_test_mask_min_area

    proposals = proposals.detach().cpu().numpy()
    mask_logits = mask_logits.detach().cpu().numpy()
    mask_probs = np_sigmoid(mask_logits)

    b_multi_masks = []
    b_mask_proposals = []
    b_mask_instances = []
    batch_size, C, H, W = images.size()
    for b in range(batch_size):
        multi_masks = np.zeros((H, W), np.float32)
        mask_proposals = []
        mask_instances = []
        num_keeps = 0

        index = np.where((proposals[:, 0] == b)
                         & (proposals[:, 5] > pre_score_threshold))[0]
        if len(index) > 0:
            instances = []  # all instances
            boxes = []  # all boxes
            for i in index:
                mask = np.zeros((H, W), np.float32)

                x0, y0, x1, y1 = proposals[i, 1:5].astype(np.int32)
                h, w = y1 - y0 + 1, x1 - x0 + 1
                label = int(proposals[i, 6])  # get label of the instance
                crop = mask_probs[i, label]  # get mask channel of the label
                crop = cv2.resize(crop, (w, h), interpolation=cv2.INTER_LINEAR)
                # crop = crop > mask_threshold  # turn prob feature map into 0/1 mask
                mask[y0:y1 + 1, x0:x1 + 1] = crop  # paste mask into empty mask

                instances.append(mask)
                boxes.append([x0, y0, x1, y1])

            # compute box overlap, do cython_nms
            L = len(index)
            binary = [
                instance_to_binary(m, mask_threshold, mask_min_area)
                for m in instances
            ]
            boxes = np.array(boxes, np.float32)
            box_overlap = cython_box_overlap(boxes, boxes)
            instance_overlap = np.zeros((L, L), np.float32)

            # calculate instance overlapping iou
            for i in range(L):
                instance_overlap[i, i] = 1
                for j in range(i + 1, L):
                    if box_overlap[i, j] < 0.01:
                        continue

                    x0 = int(min(boxes[i, 0], boxes[j, 0]))
                    y0 = int(min(boxes[i, 1], boxes[j, 1]))
                    x1 = int(max(boxes[i, 2], boxes[j, 2]))
                    y1 = int(max(boxes[i, 3], boxes[j, 3]))

                    mi = binary[i][y0:y1, x0:x1]
                    mj = binary[j][y0:y1, x0:x1]

                    intersection = (mi & mj).sum()
                    union = (mi | mj).sum()
                    instance_overlap[i, j] = intersection / (union + 1e-12)
                    instance_overlap[j, i] = instance_overlap[i, j]

            # non-max-suppression to remove overlapping segmentation
            score = proposals[index, 5]
            sort_idx = list(np.argsort(-score))

            # https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
            keep = []
            while len(sort_idx) > 0:
                i = sort_idx[0]
                keep.append(i)
                delete_index = list(
                    np.where(instance_overlap[i] > overlap_threshold)[0])
                sort_idx = [e for e in sort_idx if e not in delete_index]
            # filter instances & proposals
            num_keeps = len(keep)
            for i in range(num_keeps):
                k = keep[i]
                multi_masks[np.where(binary[k])] = i + 1
                mask_instances.append(instances[k].reshape(1, H, W))

                t = index[k]  # t is the index of box before nms_func
                b, x0, y0, x1, y1, score, label = proposals[t]
                mask_proposals.append(
                    np.array([b, x0, y0, x1, y1, score, label], np.float32))

        if num_keeps == 0 or len(index) == 0:
            mask_proposals = np.zeros((0, 7), np.float32)
            mask_instances = np.zeros((0, H, W), np.float32)
        else:
            mask_proposals = np.vstack(mask_proposals)
            mask_instances = np.vstack(mask_instances)

        b_mask_proposals.append(mask_proposals)
        b_mask_instances.append(mask_instances)
        b_multi_masks.append(multi_masks)

    b_mask_proposals = np.vstack(b_mask_proposals)
    b_mask_proposals = to_tensor(b_mask_proposals, cfg.device)
    return b_multi_masks, b_mask_instances, b_mask_proposals