Beispiel #1
0
def compute_precision_for_box(box, truth_box, truth_label, threshold=[0.5]):

    num_truth_box = len(truth_box)
    num_box = len(box)

    overlap = cython_box_overlap(box, truth_box)
    argmax_overlap = np.argmax(overlap, 0)
    max_overlap = overlap[argmax_overlap, np.arange(num_truth_box)]

    invalid_truth_box = truth_box[truth_label < 0]
    invalid_valid_overlap = cython_box_overlap(box, invalid_truth_box)

    precision = []
    recall = []
    result = []
    truth_result = []

    for t in threshold:
        truth_r = np.ones(num_truth_box, np.int32)
        r = np.ones(num_box, np.int32)

        # truth_result
        truth_r[...] = INVALID
        truth_r[(max_overlap < t) & (truth_label > 0)] = MISS
        truth_r[(max_overlap >= t) & (truth_label > 0)] = HIT

        # result
        r[...] = FP
        r[argmax_overlap[truth_r == HIT]] = TP

        index = np.where(r == FP)[0]
        if len(index) > 0:
            index = index[np.where(invalid_valid_overlap[index] > t)[0]]
            r[index] = INVALID

        num_truth = (truth_r != INVALID).sum()
        num_hit = (truth_r == HIT).sum()
        num_miss = (truth_r == MISS).sum()
        rec = num_hit / num_truth

        num_tp = (r == TP).sum()
        num_fp = (r == FP).sum()
        prec = num_tp / max(num_tp + num_fp + num_miss, 1e-12)

        precision.append(prec)
        recall.append(rec)
        result.append(r)
        truth_result.append(truth_r)

        # if len(thresholds)==1:
        #     precisions = precisions[0]
        #     recalls = recalls[0]
        #     results = results[0]
        #     truth_results = truth_results[0]

    return precision, recall, result, truth_result
Beispiel #2
0
def _make_one_rpn_target(cfg, image, anchor_boxes, truth_boxes, truth_labels):
    """
    labeling windows for one image
    :param image: input image
    :param anchor_boxes: list of bboxes e.g. [x0, y0, x1, y1]
    :param truth_boxes: list of boxes, e.g. [x0, y0, x1, y1]
    :param truth_labels: 1 for sure
    :return:
        label: 1 for pos, 0 for neg
        label_assign: which truth box is assigned to the window
        label_weight: pos=1, neg \in (0, 1] by rareness, otherwise 0 (don't care)
        target: bboxes' offsets
        target_weight: same as label_weight
    """
    num_anchor_boxes = len(anchor_boxes)
    label = np.zeros((num_anchor_boxes, ), np.float32)
    label_assign = np.zeros((num_anchor_boxes, ), np.int32)
    label_weight = np.ones((num_anchor_boxes, ),
                           np.float32)  # <todo> why use 1 for init ?
    target = np.zeros((num_anchor_boxes, 4), np.float32)
    target_weight = np.zeros((num_anchor_boxes, ), np.float32)

    num_truth_box = len(truth_boxes)
    if num_truth_box != 0:
        _, height, width = image.size()

        overlap = cython_box_overlap(anchor_boxes, truth_boxes)
        argmax_overlap = np.argmax(overlap, 1)
        max_overlap = overlap[np.arange(num_anchor_boxes), argmax_overlap]
        # label 1/0 for each anchor
        bg_index = max_overlap < cfg.rpn_train_bg_thresh_high
        label[bg_index] = 0
        label_weight[bg_index] = 1

        fg_index = max_overlap >= cfg.rpn_train_fg_thresh_low
        label[fg_index] = 1
        label_weight[fg_index] = 1
        label_assign[...] = argmax_overlap

        # for each truth, anchor_boxes with highest overlap, include multiple maxs
        # re-assign less overlapped gt to anchor_boxes
        argmax_overlap = np.argmax(overlap, 0)
        max_overlap = overlap[argmax_overlap, np.arange(num_truth_box)]
        anchor_assignto_gt, gt_assignto_anchor = np.where(
            overlap == max_overlap)

        fg_index = anchor_assignto_gt
        label[fg_index] = 1
        label_weight[fg_index] = 1
        label_assign[fg_index] = gt_assignto_anchor

        # regression
        fg_index = np.where(label != 0)
        target_window = anchor_boxes[fg_index]
        target_truth_box = truth_boxes[label_assign[fg_index]]
        target[fg_index] = rpn_encode(target_window, target_truth_box)
        target_weight[fg_index] = 1

        # don't care
        invalid_truth_label = np.where(truth_labels < 0)[0]
        invalid_index = np.isin(label_assign,
                                invalid_truth_label) & (label != 0)
        label_weight[invalid_index] = 0
        target_weight[invalid_index] = 0

        # weights for class balancing
        fg_index = np.where((label_weight != 0) & (label != 0))[0]
        bg_index = np.where((label_weight != 0) & (label == 0))[0]

        num_fg = len(fg_index)
        num_bg = len(bg_index)
        label_weight[fg_index] = 1
        label_weight[bg_index] = num_fg / num_bg

        if cfg.rpn_train_scale_balance:
            # weights for scale balancing
            num_scales = len(cfg.rpn_scales)
            num_bases = [len(b) for b in cfg.rpn_base_apsect_ratios]
            start = 0
            for l in range(num_scales):
                h, w = int(height // 2**l), int(width // 2**l)
                end = start + h * w * num_bases[l]
                label_weight[start:end] *= (2**l)**2
                start = end

        # task balancing
        target_weight[fg_index] = label_weight[fg_index]

    # save
    label = Variable(torch.from_numpy(label)).cuda()
    label_assign = Variable(torch.from_numpy(label_assign)).cuda()
    label_weight = Variable(torch.from_numpy(label_weight)).cuda()
    target = Variable(torch.from_numpy(target)).cuda()
    target_weight = Variable(torch.from_numpy(target_weight)).cuda()
    return label, label_assign, label_weight, target, target_weight
Beispiel #3
0
def _make_one_rcnn_target(cfg, image, proposals, truth_boxes, truth_labels):
    """
    make rcnn target for ONE IMAGE, sampling labels
    https://github.com/ruotianluo/pytorch-faster-rcnn
    :param image: input image
    :param proposals: i is the index if image in batch:
        [i, x0, y0, x1, y1, score, label, 0]
    :param truth_boxes: list of boxes, e.g.
        [x0, y0, x1, y1]
    :param truth_labels: label of each truth box
    :return:
        sampled_proposal: 1 for pos, 0 for neg
        sampled_label: label of sampled truth box
        sampled_assign: which truth box is assigned to the sampled proposal
        sampled_target: bboxes' offsets from sampled proposals to truth boxes
    """
    sampled_proposal = Variable(torch.FloatTensor((0, 8))).cuda()
    sampled_label = Variable(torch.LongTensor((0, 1))).cuda()
    sampled_assign = np.array((0, 1), np.int32)
    sampled_target = Variable(torch.FloatTensor((0, 4))).cuda()

    if len(truth_boxes) == 0 or len(proposals) == 0:
        return sampled_proposal, sampled_label, sampled_assign, sampled_target

    # filter invalid proposals
    _, height, width = image.size()
    num_proposal = len(proposals)

    valid = []
    for i in range(num_proposal):
        box = proposals[i, 1:5]
        if not (is_small_box(box, min_size=cfg.mask_train_min_size)):
            valid.append(i)

    if len(valid) == 0:
        return sampled_proposal, sampled_label, sampled_assign, sampled_target

    proposals = proposals[valid]
    # assign fg/bg to each proposal
    num_proposal = len(proposals)
    box = proposals[:, 1:5]
    # for each bbox, the index of gt which has max overlap with it
    overlap = cython_box_overlap(box, truth_boxes)
    argmax_overlap = np.argmax(overlap, 1)
    max_overlap = overlap[np.arange(num_proposal), argmax_overlap]

    fg_index = np.where(max_overlap >= cfg.rcnn_train_fg_thresh_low)[0]
    bg_index = np.where((max_overlap < cfg.rcnn_train_bg_thresh_high) & \
                        (max_overlap >= cfg.rcnn_train_bg_thresh_low))[0]

    # sampling for class balance
    num_classes = cfg.num_classes
    num = cfg.rcnn_train_batch_size
    num_fg = int(
        np.round(cfg.rcnn_train_fg_fraction * cfg.rcnn_train_batch_size))

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    # https://github.com/precedenceguo/mx-rcnn/commit/3853477d9155c1f340241c04de148166d146901d
    fg_length = len(fg_index)
    bg_length = len(bg_index)

    if fg_length > 0 and bg_length > 0:
        num_fg = min(num_fg, fg_length)
        fg_index = fg_index[np.random.choice(fg_length,
                                             size=num_fg,
                                             replace=fg_length < num_fg)]
        num_bg = num - num_fg
        bg_index = bg_index[np.random.choice(bg_length,
                                             size=num_bg,
                                             replace=bg_length < num_bg)]
    # no bgs
    elif fg_length > 0:
        num_fg = num
        num_bg = 0
        fg_index = fg_index[np.random.choice(fg_length,
                                             size=num_fg,
                                             replace=fg_length < num_fg)]
    # no fgs
    elif bg_length > 0:
        num_fg = 0
        num_bg = num
        bg_index = bg_index[np.random.choice(bg_length,
                                             size=num_bg,
                                             replace=bg_length < num_bg)]
    # no bgs and no fgs?
    else:
        num_fg = 0
        num_bg = num
        bg_index = np.random.choice(num_proposal,
                                    size=num_bg,
                                    replace=num_proposal < num_bg)

    assert ((num_fg + num_bg) == num)

    # selecting both fg and bg
    index = np.concatenate([fg_index, bg_index], 0)
    sampled_proposal = proposals[index]

    # label
    sampled_assign = argmax_overlap[index]
    sampled_label = truth_labels[sampled_assign]
    sampled_label[num_fg:] = 0  # Clamp labels for the background to 0

    # target
    if num_fg > 0:
        target_truth_box = truth_boxes[sampled_assign[:num_fg]]
        target_box = sampled_proposal[:num_fg][:, 1:5]
        sampled_target = bbox_encode(target_box, target_truth_box)

    sampled_target = Variable(torch.from_numpy(sampled_target)).cuda()
    sampled_label = Variable(torch.from_numpy(sampled_label)).long().cuda()
    sampled_proposal = Variable(torch.from_numpy(sampled_proposal)).cuda()
    return sampled_proposal, sampled_label, sampled_assign, sampled_target
Beispiel #4
0
def mask_nms(cfg, images, proposals, mask_logits):
    """
    1. do non-maximum suppression to remove overlapping segmentations
    2. resize the masks from mask head output (28*28) into proposal size
    3. paste the masks into input image
    #<todo> better nms for mask
    :param cfg:
    :param images: (B, C, H, W)
    :param proposals: (B, 8) [i, x0, y0, x1, y1, score, label, z]
    :param mask_logits: (B, num_classes, 2*crop_size, 2*crop_size)
    :return:
        b_multi_masks: (B, H, W) masks labelled with 1,2,...N (total number of masks)
        b_mask_instances: (B*N, H, W) masks with prob
        b_mask_proposals: (B*N, ) proposals
    """
    overlap_threshold   = cfg.mask_test_nms_overlap_threshold
    pre_score_threshold = cfg.mask_test_nms_pre_score_threshold
    mask_threshold      = cfg.mask_test_mask_threshold
    mask_min_area       = cfg.mask_test_mask_min_area

    proposals   = proposals.cpu().data.numpy()
    mask_logits = mask_logits.cpu().data.numpy()
    mask_probs  = np_sigmoid(mask_logits)

    b_multi_masks = []
    b_mask_proposals = []
    b_mask_instances = []
    batch_size, C, H, W = images.size()
    for b in range(batch_size):
        multi_masks = np.zeros((H, W), np.float32)  # multi masks for a image
        mask_proposals = []  # proposals for a image
        mask_instances = []  # instances for a image
        num_keeps = 0

        index = np.where((proposals[:, 0] == b) & (proposals[:, 5] > pre_score_threshold))[0]
        if len(index) != 0:
            instances = []    # all instances
            boxes = []        # all boxes
            for i in index:
                mask = np.zeros((H, W), np.float32)

                x0, y0, x1, y1 = proposals[i, 1:5].astype(np.int32)
                h, w = y1-y0+1, x1-x0+1
                label = int(proposals[i, 6])    # get label of the instance
                crop = mask_probs[i, label]     # get mask channel of the label
                crop = cv2.resize(crop, (w, h), interpolation=cv2.INTER_LINEAR)
                # crop = crop > mask_threshold  # turn prob feature map into 0/1 mask
                mask[y0:y1+1, x0:x1+1] = crop   # paste mask into empty mask

                instances.append(mask)
                boxes.append([x0, y0, x1, y1])

            # compute box overlap, do nms
            L = len(index)
            binary = [instance_to_binary(m, mask_threshold, mask_min_area) for m in instances]
            boxes = np.array(boxes, np.float32)
            box_overlap = cython_box_overlap(boxes, boxes)
            instance_overlap = np.zeros((L, L), np.float32)

            # calculate instance overlapping iou
            for i in range(L):
                instance_overlap[i, i] = 1
                for j in range(i+1, L):
                    if box_overlap[i, j] < 0.01:
                        continue

                    x0 = int(min(boxes[i, 0], boxes[j, 0]))
                    y0 = int(min(boxes[i, 1], boxes[j, 1]))
                    x1 = int(max(boxes[i, 2], boxes[j, 2]))
                    y1 = int(max(boxes[i, 3], boxes[j, 3]))

                    mi = binary[i][y0:y1, x0:x1]
                    mj = binary[j][y0:y1, x0:x1]

                    intersection = (mi & mj).sum()
                    union = (mi | mj).sum()
                    instance_overlap[i, j] = intersection/(union + 1e-12)
                    instance_overlap[j, i] = instance_overlap[i, j]

            # non-max-suppression to remove overlapping segmentation
            score = proposals[index, 5]
            sort_idx = list(np.argsort(-score))

            # https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
            keep = []
            while len(sort_idx) > 0:
                i = sort_idx[0]
                keep.append(i)
                delete_index = list(np.where(instance_overlap[i] > overlap_threshold)[0])
                sort_idx = [e for e in sort_idx if e not in delete_index]
            # filter instances & proposals
            num_keeps = len(keep)
            for i in range(num_keeps):
                k = keep[i]
                multi_masks[np.where(binary[k])] = i + 1
                mask_instances.append(instances[k].reshape(1, H, W))

                t = index[k]
                b, x0, y0, x1, y1, score, label, _ = proposals[t]
                mask_proposals.append(np.array([b, x0, y0, x1, y1, score, label, t], np.float32))

        if num_keeps==0:
            mask_proposals = np.zeros((0,8  ),np.float32)
            mask_instances = np.zeros((0,H,W),np.float32)
        else:
            mask_proposals = np.vstack(mask_proposals)
            mask_instances = np.vstack(mask_instances)

        b_mask_proposals.append(mask_proposals)
        b_mask_instances.append(mask_instances)
        b_multi_masks.append(multi_masks)

    b_mask_proposals = Variable(torch.from_numpy(np.vstack(b_mask_proposals))).cuda()
    return b_multi_masks, b_mask_instances, b_mask_proposals
Beispiel #5
0
def _make_one_mask_target(cfg, mode, image, proposals, truth_box, truth_label, truth_instance):
    """
    make mask targets for one image.
    1. assign truth box to each proposals by threshold for fg/bg
    2. crop assigned instance into bbox size
    3. resize to maskhead's_train output size.
    :param image: image as (H, W, C) numpy array
    :param proposals: list of regional proposals generated by RCNN. e.g.
        [[i, x0, y0, x1, y1, score, label], ...]
    :param truth_box: list of truth boxes. e.g.
        [[x0, y0, x1, y1], ...]
    :param truth_label: 1s
        maskhead are used to predict mask,
        all masks are positive proposals. (foreground)
        here we have 2 classes so it's_train fixed to 1
    :param truth_instance: list of truth instances, (H, W)
    :return:
        sampled_proposal: same as proposals
        sampled_label: same as truth_label
        sampled_instance: cropped instance, matching maskhead's_train output
        sampled_assign: index of truth_box each proposals belongs to
    """
    sampled_proposal = Variable(torch.FloatTensor(0, 8)).cuda()
    sampled_label    = Variable(torch.LongTensor (0, 1)).cuda()
    sampled_instance = Variable(torch.FloatTensor(0, 1, 1)).cuda()

    if len(truth_box) == 0 or len(proposals) == 0:
        return sampled_proposal, sampled_label, sampled_instance

    # filter invalid proposals like small proposals
    _, height, width = image.size()
    num_proposal = len(proposals)

    valid = []
    for i in range(num_proposal):
        box = proposals[i, 1:5]
        if not(is_small_box(box, min_size=cfg.mask_train_min_size)):  # is_small_box_at_boundary
            valid.append(i)

    if len(valid) == 0:
        return sampled_proposal, sampled_label, sampled_instance

    proposals = proposals[valid]
    # assign bbox to proposals by overlap threshold
    num_proposal = len(proposals)
    box = proposals[:, 1:5]
    # for each bbox, the index of gt which has max overlap with it
    overlap = cython_box_overlap(box, truth_box)
    argmax_overlap = np.argmax(overlap, 1)
    max_overlap = overlap[np.arange(num_proposal), argmax_overlap]

    fg_index = np.where(max_overlap >= cfg.mask_train_fg_thresh_low)[0]

    if len(fg_index) == 0:
        return sampled_proposal, sampled_label, sampled_instance

    fg_length = len(fg_index)
    num_fg = cfg.mask_train_batch_size
    fg_index = fg_index[
        np.random.choice(fg_length, size=num_fg, replace=fg_length < num_fg)
    ]

    sampled_proposal = proposals[fg_index]
    sampled_assign   = argmax_overlap[fg_index]     # assign a gt to each bbox
    sampled_label    = truth_label[sampled_assign]  # assign gt's_train label to each bbox
    sampled_instance = []
    for i in range(len(fg_index)):
        instance = truth_instance[sampled_assign[i]]  # for each positive bbox, find instance it belongs to
        box  = sampled_proposal[i, 1:5]
        crop = _crop_instance(instance, box, cfg.mask_size)  # crop the instance by box
        sampled_instance.append(crop[np.newaxis, :, :])

    # save
    sampled_instance = np.vstack(sampled_instance)
    sampled_proposal = Variable(torch.from_numpy(sampled_proposal)).cuda()
    sampled_label    = Variable(torch.from_numpy(sampled_label)).long().cuda()
    sampled_instance = Variable(torch.from_numpy(sampled_instance)).cuda()
    return sampled_proposal, sampled_label, sampled_instance