Ejemplo n.º 1
0
def _compute_targets(rois, overlaps, labels):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return np.zeros((rois.shape[0], 3), dtype=np.float32)
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.TWIN_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = twin_overlaps(
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 3), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    targets[ex_inds, 1:] = twin_transform(ex_rois, gt_rois)
    return targets
Ejemplo n.º 2
0
def _sample_positive_rois(all_rois, gt_wins, captions, fc_features):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_wins)
    overlaps = twin_overlaps(
        np.ascontiguousarray(all_rois[:, 1:3], dtype=np.float),
        np.ascontiguousarray(gt_wins[:, :2], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_wins[gt_assignment, 2]
    input_sent = captions[gt_assignment, 0, :].reshape(
        (gt_assignment.shape[0], -1)).transpose((1, 0))
    cont_sent = captions[gt_assignment, 1, :].reshape(
        (gt_assignment.shape[0], -1)).transpose((1, 0))
    target_sent = captions[gt_assignment, 2, :].reshape(
        (gt_assignment.shape[0], -1)).transpose((1, 0))

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.CAPTION_FG_THRESH)[
        0]  # __C.TRAIN.FG_THRESH = 0.5

    # The indices that we're selecting (fg)
    keep_inds = fg_inds
    rois = all_rois[keep_inds]
    fc_features = fc_features[keep_inds, :]
    input_sent = input_sent[:, keep_inds]
    cont_sent = cont_sent[:, keep_inds]
    target_sent = target_sent[:, keep_inds]

    return cont_sent, input_sent, target_sent, fc_features, rois, keep_inds
Ejemplo n.º 3
0
def _sample_rois(all_rois, gt_wins, fg_rois_per_image, rois_per_image,
                 num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_wins)
    overlaps = twin_overlaps(
        np.ascontiguousarray(all_rois[:, 1:3], dtype=np.float),
        np.ascontiguousarray(gt_wins[:, :2], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_wins[gt_assignment, 2]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(
        max_overlaps >= cfg.TRAIN.FG_THRESH)[0]  # __C.TRAIN.FG_THRESH = 0.5
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    # labels[labels>0]=1
    rois = all_rois[keep_inds]

    twin_target_data = _compute_targets(rois[:, 1:3],
                                        gt_wins[gt_assignment[keep_inds], :2],
                                        labels)

    twin_targets, twin_inside_weights = \
        _get_twin_regression_labels(twin_target_data, num_classes)

    return labels, rois, twin_targets, twin_inside_weights
Ejemplo n.º 4
0
def _sample_all_rois(all_rois, gt_wins, num_classes):
    """Generate all RoIs comprising foreground and background examples.
    """
    # overlaps: (rois x gt_wins)
    overlaps = twin_overlaps(
        np.ascontiguousarray(all_rois[:, 1:3], dtype=np.float),
        np.ascontiguousarray(gt_wins[:, :2], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_wins[gt_assignment, 2]

    labels = labels
    rois = all_rois

    twin_target_data = _compute_targets(rois[:, 1:3],
                                        gt_wins[gt_assignment, :2], labels)

    twin_targets, twin_inside_weights = \
        _get_twin_regression_labels(twin_target_data, num_classes)

    return labels, rois, twin_targets, twin_inside_weights
Ejemplo n.º 5
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted twin deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # measure GT overlap

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        length, height, width = bottom[0].data.shape[-3:]
        # GT boxes (x1, x2, label)
        gt_boxes = bottom[1].data   # what if there is no GT segments in this 512 frames?(already filter out)

        if DEBUG:
            print ''
            print 'length, height, width: ({}, {}, {})'.format(length, height, width)
            print 'rpn: gt_boxes.shape', gt_boxes.shape
            print 'rpn: gt_boxes', gt_boxes

        # 1. Generate proposals from twin deltas and shifted anchors
        shifts = np.arange(0, length) * self._feat_stride
        # add A anchors (1, A, 2) to
        # cell K shifts (K, 1, 2) to get
        # shift anchors (K, A, 2)
        # reshape to (K*A, 2) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 2)) +
                       shifts.reshape((1, K, 1)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 2))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border) &
            (all_anchors[:, 1] < bottom[2].data.shape[2] + self._allowed_border)  # length
        )[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors.shape', anchors.shape
            print 'anchors', anchors

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = twin_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if DEBUG:
          print "max_overlaps", max_overlaps
          print "gt_max_overlaps", gt_max_overlaps

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:   # __C.TRAIN.RPN_CLOBBER_POSITIVES = False
          # assign bg labels first so that positive labels can clobber them
          labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0   # __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
          # assign bg labels last so that negative labels can clobber positives
          labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(
                fg_inds, size=(len(fg_inds) - num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(
                bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1
            #print "was %s inds, disabling %s, now %s inds" % (
                #len(bg_inds), len(disable_inds), np.sum(labels == 0))

        twin_targets = np.zeros((len(inds_inside), 2), dtype=np.float32)
        twin_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

        if DEBUG:
            print "twin_targets", twin_targets

        twin_inside_weights = np.zeros((len(inds_inside), 2), dtype=np.float32)
        twin_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_TWIN_INSIDE_WEIGHTS)  # __C.TRAIN.RPN_TWIN_INSIDE_WEIGHTS = (1.0, 1.0)

        twin_outside_weights = np.zeros((len(inds_inside), 2), dtype=np.float32)
        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(labels >= 0)
            positive_weights = np.ones((1, 2)) * 1.0 / num_examples
            negative_weights = np.ones((1, 2)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                                np.sum(labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                                np.sum(labels == 0))
        twin_outside_weights[labels == 1, :] = positive_weights
        twin_outside_weights[labels == 0, :] = negative_weights

        if DEBUG:
            self._sums += twin_targets[labels == 1, :].sum(axis=0)
            self._squared_sums += (twin_targets[labels == 1, :] ** 2).sum(axis=0)
            self._counts += np.sum(labels == 1)
            means = self._sums / self._counts
            stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
            print 'means:'
            print means
            print 'stdevs:'
            print stds

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        twin_targets = _unmap(twin_targets, total_anchors, inds_inside, fill=0)
        twin_inside_weights = _unmap(twin_inside_weights, total_anchors, inds_inside, fill=0)
        twin_outside_weights = _unmap(twin_outside_weights, total_anchors, inds_inside, fill=0)

        if DEBUG:
            print 'rpn: max max_overlap', np.max(max_overlaps)
            print 'rpn: num_positive', np.sum(labels == 1)
            print 'rpn: num_negative', np.sum(labels == 0)
            self._fg_sum += np.sum(labels == 1)
            self._bg_sum += np.sum(labels == 0)
            self._count += 1
            print 'rpn: num_positive avg', self._fg_sum / self._count
            print 'rpn: num_negative avg', self._bg_sum / self._count

        print 'rpn: num_positive', np.sum(labels == 1)
        print 'rpn: num_negative', np.sum(labels == 0)
#        print "RPN: accuracy > ", float(max(np.sum(labels == 1), np.sum(labels == 0))) / ( np.sum(labels == 1) + np.sum(labels == 0) ) 
        # labels
        labels = labels.reshape((1, length, height, width, A)).transpose(0, 4, 1, 2, 3)
        labels = labels.reshape((1, 1, A * length, height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # twin_targets
        twin_targets = twin_targets \
            .reshape((1, length, height, width, A * 2)).transpose(0, 4, 1, 2, 3)
        top[1].reshape(*twin_targets.shape)
        top[1].data[...] = twin_targets

        # twin_inside_weights
        twin_inside_weights = twin_inside_weights \
            .reshape((1, length, height, width, A * 2)).transpose(0, 4, 1, 2, 3)
        assert twin_inside_weights.shape[3] == height
        assert twin_inside_weights.shape[4] == width
        top[2].reshape(*twin_inside_weights.shape)
        top[2].data[...] = twin_inside_weights

        # twin_outside_weights
        twin_outside_weights = twin_outside_weights \
            .reshape((1, length, height, width, A * 2)).transpose(0, 4, 1, 2, 3)
        assert twin_outside_weights.shape[3] == height
        assert twin_outside_weights.shape[4] == width
        top[3].reshape(*twin_outside_weights.shape)
        top[3].data[...] = twin_outside_weights