def __init__(self, feature_depth, cfg, mode='all', phase='train'):
        '''
        feature_depth: num of feature channels --> int
        cfg: config  --> class object
        mode: 'all'  --> (default) normal mode
              'RPN'  --> return rpn_out.deatach()
        phase: 'train' / 'test'
        '''
        super(RegionProposalNetwork, self).__init__()

        self.depth = feature_depth
        self.anchor_scales = cfg['sliding_windows_scales']
        self.anchor_ratios = cfg['sliding_windows_ratio']
        self.k = len(self.anchor_scales) * len(self.anchor_ratios)
        self.feat_stride = cfg['feature_stride']
        self.mode = mode
        self.phase = phase
        #  set RPN convolution layer
        self.rpn_conv = nn.Sequential(nn.Conv2d(self.depth, 256, 3, 1, 1),
                                      nn.ReLU(inplace=True))
        self.cls_layer = nn.Conv2d(256, 2 * self.k, 1, 1, 0)
        self.reg_layer = nn.Conv2d(256, 4 * self.k, 1, 1, 0)

        # define proposal layer
        self.RPN_proposal = ProposalLayer(self.feat_stride, self.anchor_scales,
                                          self.anchor_ratios)
Exemple #2
0
 def __init__(self,
              in_ch=512,
              out_ch=512,
              n_anchors=9,
              feat_stride=16,
              anchor_scales=[8, 16, 32],
              num_classes=21,
              rpn_sigma=3.0):
     super(RPN, self).__init__(
         rpn_conv_3x3=L.Convolution2D(in_ch, out_ch, 3, 1, 1),
         rpn_cls_score=L.Convolution2D(out_ch, 2 * n_anchors, 1, 1, 0),
         rpn_bbox_pred=L.Convolution2D(out_ch, 4 * n_anchors, 1, 1, 0))
     self.anchor_target_layer = AnchorTargetLayer(feat_stride)
     self.proposal_layer = ProposalLayer(feat_stride, anchor_scales)
     self.rpn_sigma = rpn_sigma
Exemple #3
0
    def __init__(self, input_dim, num_anchors_per_frame, output_dim):
        super(RPN, self).__init__()
        self.num_class = output_dim
        self.input_dim = input_dim
        self.num_anchors_per_frame = num_anchors_per_frame
        # this is a global value which indicate the number of anchors used in our experiments
        self.min_window_size = cfg.MIN_WINDOW_SIZE
        self.max_window_size = cfg.MAX_WINDOW_SIZE

        self.num_score_out = self.num_anchors_per_frame * self.num_class  # 2(bg/fg) * num anchors)
        self.num_bbox_out = self.num_anchors_per_frame * 2  # 2(coords) * num anchors)
        self.cls_score_RPN = nn.Linear(self.input_dim,
                                       self.num_score_out,
                                       bias=True)

        self.bbox_score_RPN = nn.Linear(self.input_dim,
                                        self.num_bbox_out,
                                        bias=True)

        self.RPN_proposal_layer = ProposalLayer(self.num_anchors_per_frame,
                                                self.min_window_size,
                                                self.max_window_size)
Exemple #4
0
def build_model(dataset,
                frcn_rois_per_img,
                train_pre_nms_N=12000,
                train_post_nms_N=2000,
                test_pre_nms_N=6000,
                test_post_nms_N=300,
                inference=False):
    """
    Returns the Faster-RCNN model. For inference, also returns a reference to the
    proposal layer.

    Faster-RCNN contains three modules: VGG, the Region Proposal Network (RPN),
    and the Classification Network (ROI-pooling + Fully Connected layers), organized
    as a tree. Tree has 4 branches:

    VGG -> b1 -> Conv (3x3) -> b2 -> Conv (1x1) -> CrossEntropyMulti (objectness label)
                               b2 -> Conv (1x1) -> SmoothL1Loss (bounding box targets)
           b1 -> PropLayer -> ROI -> Affine -> Affine -> b3 -> Affine -> CrossEntropyMulti
                                                         b3 -> Affine -> SmoothL1Loss

    When the model is constructed for inference, several elements are different:
    - The number of regions to keep before and after non-max suppression is (6000, 300) for
      training and (12000, 2000) for inference.
    - The out_shape of the proposalLayer of the network is equal to post_nms_N (number of rois
      to keep after performaing nms). This is configured by passing the inference flag to the
      proposalLayer constructor.

    Arguments:
        dataset (objectlocalization): Dataset object.
        frcn_rois_per_img (int): Number of ROIs per image considered by the classification network.
        inference (bool): Construct the model for inference. Default is False.

    Returns:
        model (Model): Faster-RCNN model.
        proposalLayer (proposalLayer): Reference to proposalLayer in the model.
                                       Returned only for inference=True.
    """
    num_classes = dataset.num_classes

    # define the branch points
    b1 = BranchNode(name="conv_branch")
    b2 = BranchNode(name="rpn_branch")
    b3 = BranchNode(name="roi_branch")

    # define VGG
    VGG = util.add_vgg_layers()

    # define RPN
    rpn_init = dict(strides=1, init=Gaussian(scale=0.01), bias=Constant(0))
    # these references are passed to the ProposalLayer.
    RPN_3x3 = Conv((3, 3, 512), activation=Rectlin(), padding=1, **rpn_init)
    RPN_1x1_obj = Conv((1, 1, 18),
                       activation=PixelwiseSoftmax(c=2),
                       padding=0,
                       **rpn_init)
    RPN_1x1_bbox = Conv((1, 1, 36),
                        activation=Identity(),
                        padding=0,
                        **rpn_init)

    # inference uses different network settings
    if not inference:
        pre_nms_N = train_pre_nms_N  # default 12000
        post_nms_N = train_post_nms_N  # default 2000
    else:
        pre_nms_N = test_pre_nms_N  # default 6000
        post_nms_N = test_post_nms_N  # default 300

    proposalLayer = ProposalLayer([RPN_1x1_obj, RPN_1x1_bbox],
                                  dataset,
                                  pre_nms_N=pre_nms_N,
                                  post_nms_N=post_nms_N,
                                  num_rois=frcn_rois_per_img,
                                  inference=inference)

    # define ROI classification network
    ROI = [
        proposalLayer,
        RoiPooling(HW=(7, 7)),
        Affine(nout=4096,
               init=Gaussian(scale=0.005),
               bias=Constant(.1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.005),
               bias=Constant(.1),
               activation=Rectlin()),
        Dropout(keep=0.5)
    ]

    ROI_category = Affine(nout=num_classes,
                          init=Gaussian(scale=0.01),
                          bias=Constant(0),
                          activation=Softmax())
    ROI_bbox = Affine(nout=4 * num_classes,
                      init=Gaussian(scale=0.001),
                      bias=Constant(0),
                      activation=Identity())

    # build the model
    # the four branches of the tree mirror the branches listed above
    frcn_tree = Tree([ROI + [b3, ROI_category], [b3, ROI_bbox]])

    model = Model(layers=Tree([
        VGG + [b1, RPN_3x3, b2, RPN_1x1_obj],
        [b2, RPN_1x1_bbox],
        [b1] + [frcn_tree],
    ]))

    if inference:
        return (model, proposalLayer)
    else:
        return model
def test_proposal_layer(backend_default, fargs):

    np.random.seed(seed=0)

    # Get a backend for tensor allocation
    be = backend_default
    be.bsz = 1

    _conv_size, im_shape_arr, SCALE, pre_nms_topN, post_nms_topN, nms_thresh, min_size = fargs

    im_shape = be.zeros((2, 1), dtype=np.float32)
    im_shape[:] = np.array(im_shape_arr)
    im_scale = be.ones((1, 1), dtype=np.float32).fill(1.0 / 16.0)
    SCALE = be.ones((1, 1), dtype=np.float32).fill(SCALE)

    real_H = np.round(im_shape.get()[1] * im_scale.get()).astype(int).reshape((1,))[0]
    real_W = np.round(im_shape.get()[0] * im_scale.get()).astype(int).reshape((1,))[0]

    frcn_labels = be.zeros((21, 128), dtype=np.int32)
    frcn_labels_mask = be.zeros(frcn_labels.shape, dtype=np.int32)
    frcn_bbtargets = be.zeros((21 * 4, 128), dtype=np.float32)
    frcn_bbmask = be.zeros(frcn_bbtargets.shape, dtype=np.float32)

    gt_boxes = be.zeros((64, 4), dtype=np.float32)
    gt_boxes[:3, :] = np.array([[262, 210, 323, 338],
                               [164, 263, 252, 371],
                               [240, 193, 294, 298]])

    gt_classes = be.zeros((64, 1), dtype=np.int32)
    gt_classes[:3, :] = np.array([[9], [9], [9]])
    num_gt_boxes = be.zeros((1, 1), dtype=np.int32).fill(3)

    num_scores = 2 * 9 * _conv_size * _conv_size
    rpn_obj_scores_dev = be.array(np.random.choice(num_scores * 2, size=num_scores,
                                  replace=False) / float(num_scores * 2.0))
    rpn_bbox_deltas_dev = be.array(np.random.random((4 * 9 * _conv_size * _conv_size, 1)))

    RPN_1x1_obj = mock_layer(rpn_obj_scores_dev)
    RPN_1x1_bbox = mock_layer(rpn_bbox_deltas_dev)

    # Mock loader
    # mock RPN_1x1_obj and RPN_1x1_bbox
    # set inference to true to skip proposal target layer
    mock_loader = mock_dataloader(_conv_size, im_scale, im_shape, SCALE,
                                  gt_boxes, gt_classes, num_gt_boxes,
                                  frcn_labels, frcn_labels_mask, frcn_bbtargets, frcn_bbmask)

    prop_layer = ProposalLayer([[RPN_1x1_obj], [RPN_1x1_bbox]], mock_loader,
                               pre_nms_N=pre_nms_topN, post_nms_N=post_nms_topN,
                               nms_thresh=nms_thresh, min_bbox_size=min_size, num_rois=128,
                               deterministic=True, inference=False, debug=True)

    prop_layer.configure(mock_layer([]))
    prop_layer.allocate()

    # mock input (is not used)
    inputs = []
    inputs, dev_proposals = prop_layer.fprop(inputs, inference=False)

    # extract final proposals and scores from the layer without buffered memory like dev_proposals
    target_proposals = prop_layer.proposals
    target_scores = prop_layer.scores

    # Prepare PyCaffe Reference Layer
    prop_layer_ref = PyCaffeProposalLayer()

    # Re-initalize inputs to same as above
    rpn_obj_scores = rpn_obj_scores_dev.get()
    rpn_bbox_deltas = rpn_bbox_deltas_dev.get()

    # reshape from (4KHW, 1) -> (1, K4, H, W) format for pycaffe
    # NB: pycaffe uses A where we use K
    # rpn_bbox_deltas = rpn_bbox_deltas.reshape((4, -1, _conv_size, _conv_size))
    # rpn_bbox_deltas = rpn_bbbox_deltas[:, :, :real_H, :real_W].transpose((1, 0, 2, 3))
    # rpn_bbox_deltas = rpn_bbox_deltas.reshape((1, -1, real_H, real_W))

    # Skip unnecessecary reshaping (previously to match caffe)
    rpn_bbox_deltas = rpn_bbox_deltas.reshape((4, -1, _conv_size, _conv_size))
    rpn_bbox_deltas = rpn_bbox_deltas[:, :, :real_H, :real_W].reshape((4, -1)).T

    # reshape from (2KHW, 1) -> (1, K2, H, W)
    rpn_obj_scores = rpn_obj_scores.reshape((2, -1, _conv_size, _conv_size))
    rpn_obj_scores = rpn_obj_scores[:, :, :real_H, :real_W].transpose((0, 1, 2, 3))
    rpn_obj_scores = rpn_obj_scores.reshape((1, -1, real_H, real_W))

    bottom = [None, None, None]
    bottom[0] = rpn_obj_scores
    bottom[1] = rpn_bbox_deltas
    bottom[2] = [im_shape[1], im_shape[0], SCALE]

    top = [None, None]

    prop_layer_ref.setup(bottom, top, pre_nms_topN=pre_nms_topN, post_nms_topN=post_nms_topN,
                         nms_thresh=nms_thresh, min_size=min_size)
    prop_layer_ref.forward(bottom, top)

    # Compare proposals and scores from proposal layer
    assert np.allclose(top[0][:, 1:], target_proposals, atol=1e-5, rtol=1e-4)
    assert np.allclose(top[1], target_scores, atol=1e-5, rtol=1e-4)

    # Now testing proposal target layer
    t_bottom = [0, 1]
    # use target proposals from neon RPN
    zeros = np.zeros((target_proposals.shape[0], 1), dtype=target_proposals.dtype)
    t_bottom[0] = np.hstack((zeros, target_proposals))
    # convert format of gt_boxes from (num_classes, 4) to (num_gt_boxes, 5)
    # concat the boxes and the classes and clip to num_gt_boxes and pass it in
    t_bottom[1] = np.hstack((prop_layer.gt_boxes.get(),
                            prop_layer.gt_classes.get()))[:prop_layer.num_gt_boxes.get()[0][0]]

    t_top = [None, None, None, None, None]

    prop_target_layer_ref = PyCaffeProposalTargetLayer()
    prop_target_layer_ref.setup(t_bottom, t_top, deterministic=True)
    prop_target_layer_ref.forward(t_bottom, t_top)

    frcn_bbtargets_reference = np.zeros(frcn_bbtargets.shape, dtype=np.float32)
    frcn_bbmask_reference = np.zeros(frcn_bbmask.shape, dtype=np.float32)

    frcn_bbtargets_reference[:t_top[2].shape[0]] = t_top[2].T
    frcn_bbmask_reference[:t_top[3].shape[0]] = t_top[3].T

    neon_labels = np.zeros((frcn_labels.shape[1],))
    label_mat = (frcn_labels.get() * frcn_labels_mask.get())

    # Convert neon labels into
    for cls in range(frcn_labels.shape[0]):
        for idx, elem in enumerate(label_mat[cls]):
            if elem != 0:
                neon_labels[idx] = cls

    # Test proposal layer targets against pycaffe layer
    assert (np.alltrue(t_top[1] == neon_labels))  # target labels
    assert (np.allclose(frcn_bbtargets_reference, frcn_bbtargets.get(), atol=1e-4))  # target bbox
    assert (np.alltrue(frcn_bbmask_reference == frcn_bbmask.get()))   # target bbox mask
Exemple #6
0
def test_proposal_layer(backend_default, fargs):

    np.random.seed(seed=0)

    # Get a backend for tensor allocation
    be = backend_default
    be.bsz = 1

    _conv_size, im_shape_arr, SCALE, pre_nms_topN, post_nms_topN, nms_thresh, min_size = fargs

    im_shape = be.zeros((2, 1), dtype=np.float32)
    im_shape[:] = np.array(im_shape_arr)
    im_scale = be.ones((1, 1), dtype=np.float32).fill(1.0 / 16.0)
    SCALE = be.ones((1, 1), dtype=np.float32).fill(SCALE)

    real_H = np.round(im_shape.get()[1] * im_scale.get()).astype(int).reshape(
        (1, ))[0]
    real_W = np.round(im_shape.get()[0] * im_scale.get()).astype(int).reshape(
        (1, ))[0]

    frcn_labels = be.zeros((21, 128), dtype=np.int32)
    frcn_labels_mask = be.zeros(frcn_labels.shape, dtype=np.int32)
    frcn_bbtargets = be.zeros((21 * 4, 128), dtype=np.float32)
    frcn_bbmask = be.zeros(frcn_bbtargets.shape, dtype=np.float32)

    gt_boxes = be.zeros((64, 4), dtype=np.float32)
    gt_boxes[:3, :] = np.array([[262, 210, 323, 338], [164, 263, 252, 371],
                                [240, 193, 294, 298]])

    gt_classes = be.zeros((64, 1), dtype=np.int32)
    gt_classes[:3, :] = np.array([[9], [9], [9]])
    num_gt_boxes = be.zeros((1, 1), dtype=np.int32).fill(3)

    num_scores = 2 * 9 * _conv_size * _conv_size
    rpn_obj_scores_dev = be.array(
        np.random.choice(num_scores * 2, size=num_scores, replace=False) /
        float(num_scores * 2.0))
    rpn_bbox_deltas_dev = be.array(
        np.random.random((4 * 9 * _conv_size * _conv_size, 1)))

    RPN_1x1_obj = mock_layer(rpn_obj_scores_dev)
    RPN_1x1_bbox = mock_layer(rpn_bbox_deltas_dev)

    # Mock loader
    # mock RPN_1x1_obj and RPN_1x1_bbox
    # set inference to true to skip proposal target layer
    mock_loader = mock_dataloader(_conv_size, im_scale, im_shape, SCALE,
                                  gt_boxes, gt_classes, num_gt_boxes,
                                  frcn_labels, frcn_labels_mask,
                                  frcn_bbtargets, frcn_bbmask)

    prop_layer = ProposalLayer([[RPN_1x1_obj], [RPN_1x1_bbox]],
                               mock_loader,
                               pre_nms_N=pre_nms_topN,
                               post_nms_N=post_nms_topN,
                               nms_thresh=nms_thresh,
                               min_bbox_size=min_size,
                               num_rois=128,
                               deterministic=True,
                               inference=False,
                               debug=True)

    prop_layer.configure(mock_layer([]))
    prop_layer.allocate()

    # mock input (is not used)
    inputs = []
    inputs, dev_proposals = prop_layer.fprop(inputs, inference=False)

    # extract final proposals and scores from the layer without buffered memory like dev_proposals
    target_proposals = prop_layer.proposals
    target_scores = prop_layer.scores

    # Prepare PyCaffe Reference Layer
    prop_layer_ref = PyCaffeProposalLayer()

    # Re-initalize inputs to same as above
    rpn_obj_scores = rpn_obj_scores_dev.get()
    rpn_bbox_deltas = rpn_bbox_deltas_dev.get()

    # reshape from (4KHW, 1) -> (1, K4, H, W) format for pycaffe
    # NB: pycaffe uses A where we use K
    # rpn_bbox_deltas = rpn_bbox_deltas.reshape((4, -1, _conv_size, _conv_size))
    # rpn_bbox_deltas = rpn_bbbox_deltas[:, :, :real_H, :real_W].transpose((1, 0, 2, 3))
    # rpn_bbox_deltas = rpn_bbox_deltas.reshape((1, -1, real_H, real_W))

    # Skip unnecessecary reshaping (previously to match caffe)
    rpn_bbox_deltas = rpn_bbox_deltas.reshape((4, -1, _conv_size, _conv_size))
    rpn_bbox_deltas = rpn_bbox_deltas[:, :, :real_H, :real_W].reshape(
        (4, -1)).T

    # reshape from (2KHW, 1) -> (1, K2, H, W)
    rpn_obj_scores = rpn_obj_scores.reshape((2, -1, _conv_size, _conv_size))
    rpn_obj_scores = rpn_obj_scores[:, :, :real_H, :real_W].transpose(
        (0, 1, 2, 3))
    rpn_obj_scores = rpn_obj_scores.reshape((1, -1, real_H, real_W))

    bottom = [None, None, None]
    bottom[0] = rpn_obj_scores
    bottom[1] = rpn_bbox_deltas
    bottom[2] = [im_shape[1], im_shape[0], SCALE]

    top = [None, None]

    prop_layer_ref.setup(bottom,
                         top,
                         pre_nms_topN=pre_nms_topN,
                         post_nms_topN=post_nms_topN,
                         nms_thresh=nms_thresh,
                         min_size=min_size)
    prop_layer_ref.forward(bottom, top)

    # Compare proposals and scores from proposal layer
    assert np.allclose(top[0][:, 1:], target_proposals, atol=1e-5, rtol=1e-4)
    assert np.allclose(top[1], target_scores, atol=1e-5, rtol=1e-4)

    # Now testing proposal target layer
    t_bottom = [0, 1]
    # use target proposals from neon RPN
    zeros = np.zeros((target_proposals.shape[0], 1),
                     dtype=target_proposals.dtype)
    t_bottom[0] = np.hstack((zeros, target_proposals))
    # convert format of gt_boxes from (num_classes, 4) to (num_gt_boxes, 5)
    # concat the boxes and the classes and clip to num_gt_boxes and pass it in
    t_bottom[1] = np.hstack(
        (prop_layer.gt_boxes.get(),
         prop_layer.gt_classes.get()))[:prop_layer.num_gt_boxes.get()[0][0]]

    t_top = [None, None, None, None, None]

    prop_target_layer_ref = PyCaffeProposalTargetLayer()
    prop_target_layer_ref.setup(t_bottom, t_top, deterministic=True)
    prop_target_layer_ref.forward(t_bottom, t_top)

    frcn_bbtargets_reference = np.zeros(frcn_bbtargets.shape, dtype=np.float32)
    frcn_bbmask_reference = np.zeros(frcn_bbmask.shape, dtype=np.float32)

    frcn_bbtargets_reference[:t_top[2].shape[0]] = t_top[2].T
    frcn_bbmask_reference[:t_top[3].shape[0]] = t_top[3].T

    neon_labels = np.zeros((frcn_labels.shape[1], ))
    label_mat = (frcn_labels.get() * frcn_labels_mask.get())

    # Convert neon labels into
    for cls in range(frcn_labels.shape[0]):
        for idx, elem in enumerate(label_mat[cls]):
            if elem != 0:
                neon_labels[idx] = cls

    # Test proposal layer targets against pycaffe layer
    assert (np.alltrue(t_top[1] == neon_labels))  # target labels
    assert (np.allclose(frcn_bbtargets_reference,
                        frcn_bbtargets.get(),
                        atol=1e-4))  # target bbox
    assert (np.alltrue(frcn_bbmask_reference == frcn_bbmask.get())
            )  # target bbox mask