def __init__(self, dim=512, input_dim=1024): """ :param aspect_ratios: Aspect ratios for the anchors. NOTE - this can't be changed now as it depends on other things in the C code... """ super(RPNHead, self).__init__() self.anchor_target_dim = 6 self.stride = 16 self.conv = nn.Sequential( nn.Conv2d(input_dim, dim, kernel_size=3, padding=1), nn.ReLU6(inplace=True), # Tensorflow docs use Relu6, so let's use it too.... nn.Conv2d(dim, self.anchor_target_dim * self._A, kernel_size=1) ) ans_np = generate_anchors(base_size=ANCHOR_SIZE, feat_stride=self.stride, anchor_scales=ANCHOR_SCALES, anchor_ratios=ANCHOR_RATIOS, ) self.register_buffer('anchors', torch.FloatTensor(ans_np))
def anchor_target_layer(gt_boxes, im_size, allowed_border=0): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. for each (H, W) location i generate 3 anchor boxes centered on cell i filter out-of-image anchors measure GT overlap :param gt_boxes: [x1, y1, x2, y2] boxes. These are assumed to be at the same scale as the image (IM_SCALE) :param im_size: Size of the image (h, w). This is assumed to be scaled to IM_SCALE """ if max(im_size) != IM_SCALE: raise ValueError("im size is {}".format(im_size)) h, w = im_size # Get the indices of the anchors in the feature map. # h, w, A, 4 ans_np = generate_anchors( base_size=ANCHOR_SIZE, feat_stride=16, anchor_scales=ANCHOR_SCALES, anchor_ratios=ANCHOR_RATIOS, ) ans_np_flat = ans_np.reshape((-1, 4)) inds_inside = np.where((ans_np_flat[:, 0] >= -allowed_border) & (ans_np_flat[:, 1] >= -allowed_border) & (ans_np_flat[:, 2] < w + allowed_border) & # width (ans_np_flat[:, 3] < h + allowed_border) # height )[0] good_ans_flat = ans_np_flat[inds_inside] if good_ans_flat.size == 0: raise ValueError( "There were no good anchors for an image of size {} with boxes {}". format(im_size, gt_boxes)) # overlaps between the anchors and the gt boxes [num_anchors, num_gtboxes] overlaps = bbox_overlaps(good_ans_flat, gt_boxes) anchor_to_gtbox = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(anchor_to_gtbox.shape[0]), anchor_to_gtbox] gtbox_to_anchor = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gtbox_to_anchor, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # Good anchors are those that match SOMEWHERE within a decent tolerance # label: 1 is positive, 0 is negative, -1 is dont care. # assign bg labels first so that positive labels can clobber them labels = (-1) * np.ones(overlaps.shape[0], dtype=np.int64) labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0 labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: labels[npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)] = -1 # subsample negative labels if we have too many num_bg = RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: labels[npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)] = -1 # print("{} fg {} bg ratio{:.3f} inds inside {}".format(RPN_BATCHSIZE-num_bg, num_bg, (RPN_BATCHSIZE-num_bg)/RPN_BATCHSIZE, inds_inside.shape[0])) # Get the labels at the original size labels_unmap = (-1) * np.ones(ans_np_flat.shape[0], dtype=np.int64) labels_unmap[inds_inside] = labels # h, w, A labels_unmap_res = labels_unmap.reshape(ans_np.shape[:-1]) anchor_inds = np.column_stack(np.where(labels_unmap_res >= 0)) # These ought to be in the same order anchor_inds_flat = np.where(labels >= 0)[0] anchors = good_ans_flat[anchor_inds_flat] bbox_targets = gt_boxes[anchor_to_gtbox[anchor_inds_flat]] labels = labels[anchor_inds_flat] assert np.all(labels >= 0) # Anchors: [num_used, 4] # Anchor_inds: [num_used, 3] (h, w, A) # bbox_targets: [num_used, 4] # labels: [num_used] return anchors, anchor_inds, bbox_targets, labels