Esempio n. 1
0
    def generate(self):
        conf = self
        n = caffe.NetSpec()
        param = LT.learned_param if conf.train else LT.frozen_param

        if conf.train:
            n.data = L.Python(top=["im_info", 'gt_boxes'],
                              python_param=dict(module='roi_data_layer.layer',
                                                layer='RoIDataLayer',
                                                param_str="num_classes: " +
                                                str(conf.num_classes)))
        else:
            n.data, n.im_info = LT.input()
        conv15_param = LT.learned_param if (
            conf.conv_1_to_5_learn) else LT.frozen_param
        LT.conv1_to_5(n, conv15_param)
        n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors(
            n, self, param)
        n.rpn_cls_score_reshape = LT.reshape(n.rpn_cls_score, [0, 2, -1, 0])

        if conf.train:
            n.rpn_labels = L.Python(
                bottom=["rpn_cls_score", "gt_boxes", "im_info", "data"],
                top=[
                    'rpn_bbox_targets', "rpn_bbox_inside_weights",
                    "rpn_bbox_outside_weights"
                ],
                python_param=dict(module='rpn.anchor_target_layer',
                                  layer='AnchorTargetLayer',
                                  param_str=LT.anchor_params(
                                      self.anchor_feat_stride,
                                      self.anchor_scales, self.anchor_ratios)))
            n.loss_cls = LT.soft_max_with_loss(
                ["rpn_cls_score_reshape", "rpn_labels"])
            n.loss_bbox = L.SmoothL1Loss(bottom=[
                "rpn_bbox_pred", "rpn_bbox_targets", "rpn_bbox_inside_weights",
                "rpn_bbox_outside_weights"
            ],
                                         loss_weight=1)
            # dummy RCNN layers
            n.dummy_roi_pool_conv_5 = L.DummyData(dummy_data_param=dict(
                shape=dict(dim=[1, 9216]), data_filler=LT.WEIGHT_FILLER))
            n.fc6, n.relu6 = LT.fc_relu(n.dummy_roi_pool_conv_5,
                                        4096,
                                        param=LT.frozen_param)
            n.fc7 = L.InnerProduct(n.fc6,
                                   num_output=4096,
                                   param=LT.frozen_param)
            n.silence_fc7 = LT.silence(n.fc7)
        else:
            n.rpn_cls_prob, n.rpn_cls_prob_reshape, n.rois = LT.roi_proposal(
                n, self)

        return self.save(n)
Esempio n. 2
0
    def resnet_mask_end2end(self):
        channals = self.channals
        if not self.deploy:
            data, im_info, gt_boxes, ins = \
                data_layer_train_with_ins(self.net, self.classes, with_rpn=True)
        else:
            data, im_info = data_layer_test(self.net)
            gt_boxes = None
        conv1 = conv_factory(self.net, "conv1", data, 7, channals, 2, 3, bias_term=True)
        pool1 = pooling_layer(self.net, 3, 2, 'MAX', 'pool1', conv1)
        index = 1
        out = pool1
        if self.module == "normal":
            residual_block = residual_block
        else:
            residual_block = residual_block_basic

        for i in self.stages[:-1]:
            index += 1
            for j in range(i):
                if j == 0:
                    if index == 2:
                        stride = 1
                    else:
                        stride = 2
                    out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals, stride)
                else:
                    out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals)
            channals *= 2
        if not self.deploy:
            rpn_cls_loss, rpn_loss_bbox, rpn_cls_score_reshape, rpn_bbox_pred = rpn(self.net, out, gt_boxes, im_info, data, fixed=False)
            rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights, mask_roi, masks = \
                roi_proposals(self.net, rpn_cls_score_reshape, rpn_bbox_pred, im_info, gt_boxes)
            self.net["rois_cat"] = L.Concat(rois,mask_roi, name="rois_cat", axis=0)
            rois=self.net["rois_cat"]
        else:
            rpn_cls_score_reshape, rpn_bbox_pred = rpn(self.net, out, gt_boxes, im_info, data)
            rois, scores = roi_proposals(self.net, rpn_cls_score_reshape, rpn_bbox_pred, im_info, gt_boxes)

        feat_out = out

        feat_aligned = roi_align(self.net, "det_mask", feat_out, rois)
        # if not self.deploy:
        #     self.net["silence_mask_rois"] = L.Silence(mask_rois, ntop=0)
        # if not self.deploy:
        #     mask_feat_aligned = self.roi_align("mask", feat_out, mask_rois)
        # else:
        #     mask_feat_aligned = self.roi_align("mask", feat_out, rois)
        out = feat_aligned

        index += 1
        for j in range(self.stages[-1]):
            if j == 0:
                stride = 1
                out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals, stride)
            else:
                out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals)

        if not self.deploy:
            self.net["det_feat"], self.net["mask_feat"] = L.Slice(self.net, out, ntop=2, name='slice', slice_param=dict(slice_dim=0, slice_point=self.rois_num))
            feat_mask = self.net["mask_feat"]
            out = self.net["det_feat"]

        # for bbox detection
        pool5 = ave_pool(self.net, 7, 1, "pool5",  out)
        cls_score, bbox_pred = final_cls_bbox(self.net, pool5)

        if not self.deploy:
            self.net["loss_cls"] = L.SoftmaxWithLoss(cls_score, labels, loss_weight=1, propagate_down=[1, 0])
            self.net["loss_bbox"] = L.SmoothL1Loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, \
                                                   loss_weight=1)
        else:
            self.net["cls_prob"] = L.Softmax(cls_score)


        # # for mask prediction
        if not self.deploy:
            mask_feat_aligned = feat_mask
        else:
            mask_feat_aligned = out
        # out = mask_feat_aligned
        out = L.Deconvolution(mask_feat_aligned, name = "mask_deconv1",convolution_param=dict(kernel_size=2, stride=2,
                                            num_output=256, pad=0, bias_term=False,
                                            weight_filler=dict(type='msra')))
        out = L.BatchNorm(out, name="bn_mask_deconv1",in_place=True, batch_norm_param=dict(use_global_stats=self.deploy))
        out = L.Scale(out, name = "scale_mask_deconv1", in_place=True, scale_param=dict(bias_term=True))
        out = L.ReLU(out, name="mask_deconv1_relu", in_place=True)
        mask_out = conv_factory(self.net, "mask_out", out, 1, self.classes-1, 1, 0, bias_term=True)
        # for i in range(4):
        #     out = self.conv_factory("mask_conv"+str(i), out, 3, 256, 1, 1, bias_term=False)
        # mask_out = self.conv_factory("mask_out", out, 1, 1, 1, 0, bias_term=False)

        if not self.deploy:
            self.net["loss_mask"] = L.SigmoidCrossEntropyLoss(mask_out, masks, loss_weight=1, propagate_down=[1, 0],
                                                      loss_param=dict(
                                                          normalization=1,
                                                          ignore_label = -1
                                                      ))
        else:
            self.net["mask_prob"] = L.Sigmoid(mask_out)

        return self.net.to_proto()
Esempio n. 3
0
    def generate(self):
        """Returns a NetSpec specifying CaffeNet, following the original proto text
               specification (./models/bvlc_reference_caffenet/train_val.prototxt)."""
        conf = self
        n = caffe.NetSpec()
        param = LT.learned_param if conf.train else LT.frozen_param

        if self.train:

            n.data = L.Python(top=[
                "rois", 'labels', 'bbox_targets', 'bbox_inside_weights',
                'bbox_outside_weights'
            ],
                              python_param=dict(module='roi_data_layer.layer',
                                                layer='RoIDataLayer',
                                                param_str="num_classes: " +
                                                str(conf.num_classes)))
        else:
            n.data, n.im_info = LT.input()

        conv15_param = LT.learned_param if (
            conf.conv_1_to_5_learn) else LT.frozen_param
        LT.conv1_to_5(n, conv15_param)

        if not (self.train):
            n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors(
                n, self, param)
            n.rpn_cls_score_reshape = LT.reshape(n.rpn_cls_score,
                                                 [0, 2, -1, 0])
            n.rpn_cls_prob, n.rpn_cls_prob_reshape, n.rois = LT.roi_proposal(
                n, self)

        n.roi_pool = L.ROIPooling(bottom=["conv5", "rois"],
                                  pooled_w=6,
                                  pooled_h=6,
                                  spatial_scale=0.0625)

        n.fc6, n.relu6 = LT.fc_relu(n.roi_pool, 4096, param=param)

        n.drop6 = fc7input = L.Dropout(n.relu6,
                                       in_place=True,
                                       dropout_ratio=0.5,
                                       scale_train=False)
        n.fc7, n.relu7 = LT.fc_relu(fc7input, 4096, param=param)
        n.drop7 = layer7 = L.Dropout(n.relu7,
                                     in_place=True,
                                     dropout_ratio=0.5,
                                     scale_train=False)
        weight_filler = (LT.WEIGHT_FILLER if conf.train else dict())
        bias_filler = (LT.BIAS_FILLER if conf.train else dict())
        n.cls_score = L.InnerProduct(layer7,
                                     num_output=conf.num_classes,
                                     weight_filler=weight_filler,
                                     bias_filler=bias_filler,
                                     param=LT.learned_param)

        n.bbox_pred = L.InnerProduct(layer7,
                                     num_output=conf.num_classes * 4,
                                     weight_filler=weight_filler,
                                     bias_filler=bias_filler,
                                     param=LT.learned_param)

        if conf.train:
            n.loss_cls = LT.soft_max_with_loss(["cls_score", "labels"])
            n.loss_bbox = L.SmoothL1Loss(bottom=[
                "bbox_pred", "bbox_targets", "bbox_inside_weights",
                "bbox_outside_weights"
            ],
                                         loss_weight=1)
        else:
            n.cls_prob = L.Softmax(n.cls_score,
                                   loss_param=dict(ignore_label=-1,
                                                   normalize=True))

        if self.train:
            n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors(
                n, self, LT.frozen_param)

        n.silence_rpn_cls_score = LT.silence(n.rpn_cls_score)
        n.silence_rpn_bbox_pred = LT.silence(n.rpn_bbox_pred)
        # write the net to a temporary file and return its filename
        return self.save(n)
Esempio n. 4
0
def rpn(net,
        bottom,
        gt_boxes,
        im_info,
        data,
        anchors,
        feat_stride,
        scales,
        fixed=False,
        deploy=False):
    if not fixed:
        net["rpn_conv/3x3"] = L.Convolution(bottom,
                                            kernel_size=3,
                                            stride=1,
                                            num_output=512,
                                            pad=1,
                                            param=[{
                                                'lr_mult': 1
                                            }, {
                                                'lr_mult': 2
                                            }],
                                            weight_filler=dict(type='gaussian',
                                                               std=0.01),
                                            bias_filler=dict(type='constant',
                                                             value=0),
                                            engine=2)
    else:
        net["rpn_conv/3x3"] = L.Convolution(bottom,
                                            kernel_size=3,
                                            stride=1,
                                            num_output=512,
                                            pad=1,
                                            param=[{
                                                'lr_mult': 0
                                            }, {
                                                'lr_mult': 0
                                            }],
                                            weight_filler=dict(type='gaussian',
                                                               std=0.01),
                                            bias_filler=dict(type='constant',
                                                             value=0),
                                            engine=2)
    net["rpn_relu/3x3"] = L.ReLU(net["rpn_conv/3x3"], in_place=True)
    if not fixed:
        net["rpn_cls_score"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=2 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 1
                                             }, {
                                                 'lr_mult': 2
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
        net["rpn_bbox_pred"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=4 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 1
                                             }, {
                                                 'lr_mult': 2
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
    else:
        net["rpn_cls_score"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=2 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 0
                                             }, {
                                                 'lr_mult': 0
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
        net["rpn_bbox_pred"] = L.Convolution(net["rpn_relu/3x3"],
                                             kernel_size=1,
                                             stride=1,
                                             num_output=4 * anchors,
                                             pad=0,
                                             param=[{
                                                 'lr_mult': 0
                                             }, {
                                                 'lr_mult': 0
                                             }],
                                             weight_filler=dict(
                                                 type='gaussian', std=0.01),
                                             bias_filler=dict(type='constant',
                                                              value=0),
                                             engine=2)
    net["rpn_cls_score_reshape"] = L.Reshape(
        net["rpn_cls_score"], reshape_param={"shape": {
            "dim": [0, 2, -1, 0]
        }})

    if (not deploy) and (not fixed):
        net["rpn_labels"], net["rpn_bbox_targets"], net["rpn_bbox_inside_weights"], net[
            "rpn_bbox_outside_weights"] = \
            L.Python(net["rpn_cls_score"], gt_boxes, im_info, data,
                     name='rpn-data',
                     python_param=dict(
                         module='rpn.anchor_target_layer',
                         layer='AnchorTargetLayer',
                         param_str='{"feat_stride": %s,"scales": %s}' % (feat_stride, scales)),
                     # param_str='"feat_stride": %s \n "scales": !!python/tuple %s ' %(feat_stride, scales)),
                     ntop=4, )
        net["rpn_cls_loss"] = L.SoftmaxWithLoss(net["rpn_cls_score_reshape"], net["rpn_labels"],
                                                     name="rpn_loss_cls", propagate_down=[1, 0], \
                                                     loss_weight=1, loss_param={"ignore_label": -1, "normalize": True})
        net["rpn_loss_bbox"] = L.SmoothL1Loss(net["rpn_bbox_pred"], net["rpn_bbox_targets"], \
                                                   net["rpn_bbox_inside_weights"],
                                                   net["rpn_bbox_outside_weights"], \
                                                   name="loss_bbox", loss_weight=1, smooth_l1_loss_param={"sigma": 3.0})
        return net["rpn_cls_loss"], net["rpn_loss_bbox"], net[
            "rpn_cls_score_reshape"], net["rpn_bbox_pred"]
    else:
        return net["rpn_cls_score_reshape"], net["rpn_bbox_pred"]
Esempio n. 5
0
def add_train_rfcn_layers(net,
                          split_to_rpn_layer,
                          end_body_layer=None,
                          prefix=""):
    # ensure no same output
    kwargs = {
        'param':
        [dict(lr_mult=1, decay_mult=1),
         dict(lr_mult=2, decay_mult=0)],
        'weight_filler': dict(type='msra'),
        'bias_filler': dict(type='constant', value=0.0)
    }
    proposal_param = {
        'ratio': cfg.ANCHOR_GENERATOR.RATIOS,
        'scale': cfg.ANCHOR_GENERATOR.SCALES,
        'base_size': cfg.FEAT_STRIDE,
        'feat_stride': cfg.FEAT_STRIDE,
        'pre_nms_topn': cfg.TRAIN.RPN_PRE_NMS_TOP_N,
        'post_nms_topn': cfg.TRAIN.RPN_POST_NMS_TOP_N,
        'nms_thresh': cfg.TRAIN.RPN_NMS_THRESH,
        'min_size': cfg.TRAIN.DATA_AUG.MIN_SIZE
    }

    net[prefix + 'rpn_output'] = L.Convolution(split_to_rpn_layer,
                                               num_output=256,
                                               pad=1,
                                               kernel_size=3,
                                               stride=1,
                                               **kwargs)
    net[prefix + 'rpn_output_relu'] = L.ReLU(net[prefix + 'rpn_output'],
                                             in_place=True)
    net[prefix + 'rpn_cls_score'] = L.Convolution(net[prefix +
                                                      'rpn_output_relu'],
                                                  num_output=anchors_num * 2,
                                                  pad=0,
                                                  kernel_size=1,
                                                  stride=1,
                                                  **kwargs)
    net[prefix + 'rpn_bbox_pred'] = L.Convolution(net[prefix +
                                                      'rpn_output_relu'],
                                                  num_output=anchors_num * 4,
                                                  pad=0,
                                                  kernel_size=1,
                                                  stride=1,
                                                  **kwargs)
    net[prefix + 'rpn_cls_score_reshape'] = L.Reshape(
        net[prefix + 'rpn_cls_score'],
        reshape_param={'shape': {
            'dim': [0, 2, -1, 0]
        }})
    net[prefix + 'rpn_labels'], net[prefix + 'rpn_bbox_targets'], net[
        prefix +
        'rpn_bbox_inside_weights'], net[prefix +
                                        'rpn_bbox_outside_weights'] = L.Python(
                                            net[prefix + 'rpn_cls_score'],
                                            net[prefix + 'gt_boxes'],
                                            net['im_info'],
                                            net['data'],
                                            name=prefix + "rpn-data",
                                            python_param={
                                                'module':
                                                "rpn.anchor_target_layer",
                                                'layer': "AnchorTargetLayer"
                                            },
                                            ntop=4)
    net[prefix + 'rpn_loss_cls'] = L.SoftmaxWithLoss(
        net[prefix + 'rpn_cls_score_reshape'],
        net[prefix + 'rpn_labels'],
        loss_weight=1.0,
        propagate_down=[True, False],
        loss_param={
            "ignore_label": -1,
            "normalize": True
        })
    net[prefix + 'rpn_loss_bbox'] = L.SmoothL1Loss(
        net[prefix + 'rpn_bbox_pred'],
        net[prefix + 'rpn_bbox_targets'],
        net[prefix + 'rpn_bbox_inside_weights'],
        net[prefix + 'rpn_bbox_outside_weights'],
        loss_weight=1.0,
        smooth_l1_loss_param={'sigma': 3.0})
    net[prefix + 'rpn_cls_prob'] = L.Softmax(net[prefix +
                                                 'rpn_cls_score_reshape'])
    net[prefix + 'rpn_cls_prob_reshape'] = L.Reshape(
        net[prefix + 'rpn_cls_prob'],
        reshape_param={'shape': {
            'dim': [0, 2 * anchors_num, -1, 0]
        }})
    # net[prefix+'rpn_rois'], net[prefix+'rpn_scores'] = L.Python(net[prefix+'rpn_cls_prob_reshape'], net[prefix+'rpn_bbox_pred'], net['im_info'], name=prefix+"proposal",
    # python_param={'module': "rpn.proposal_layer", 'layer': "ProposalLayer"},
    # ntop=2)
    net[prefix + 'rpn_rois'], net[prefix + 'rpn_scores'] = L.Proposal(
        net[prefix + 'rpn_cls_prob_reshape'],
        net[prefix + 'rpn_bbox_pred'],
        net['im_info'],
        name=prefix + "proposal",
        proposal_param=proposal_param,
        ntop=2)
    net[prefix + 'rpn_scores_silence'] = L.Silence(net[prefix + 'rpn_scores'],
                                                   ntop=0)
    net[prefix + 'rois'], net[prefix + 'labels'], net[prefix + 'bbox_targets'], net[prefix + 'bbox_inside_weights'], \
    net[prefix + 'bbox_outside_weights'], net[prefix + 'pos_num'] = L.Python(
     net[prefix + 'rpn_rois'], net[prefix + 'gt_boxes'], net['data'], name=prefix+"roi-data",
     python_param={'module': "rpn.proposal_target_layer", 'layer': "ProposalTargetLayer"}, ntop=6)
    net[prefix + 'conv_new_1'] = L.Convolution(end_body_layer,
                                               num_output=256,
                                               pad=0,
                                               kernel_size=1,
                                               stride=1,
                                               **kwargs)
    net[prefix + 'conv_new_1_relu'] = L.ReLU(net[prefix + 'conv_new_1'],
                                             in_place=True)
    net[prefix + 'rfcn_cls'] = L.Convolution(net[prefix + 'conv_new_1_relu'],
                                             num_output=position_num**2 *
                                             num_classes,
                                             pad=0,
                                             kernel_size=1,
                                             stride=1,
                                             **kwargs)
    net[prefix + 'rfcn_bbox'] = L.Convolution(net[prefix + 'conv_new_1_relu'],
                                              num_output=position_num**2 * 8,
                                              pad=0,
                                              kernel_size=1,
                                              stride=1,
                                              **kwargs)
    net[prefix + 'psroipooled_cls_rois'] = L.PSROIPooling(
        net[prefix + 'rfcn_cls'],
        net[prefix + 'rois'],
        psroi_pooling_param={
            'spatial_scale': 1.0 / feat_stride,
            'output_dim': num_classes,
            'group_size': position_num
        })
    net[prefix + 'cls_score'] = L.Pooling(net[prefix + 'psroipooled_cls_rois'],
                                          name=prefix + "ave_cls_score_rois",
                                          pool=P.Pooling.AVE,
                                          kernel_size=position_num,
                                          stride=position_num)
    net[prefix + 'psroipooled_loc_rois'] = L.PSROIPooling(
        net[prefix + 'rfcn_bbox'],
        net[prefix + 'rois'],
        psroi_pooling_param={
            'spatial_scale': 1.0 / feat_stride,
            'output_dim': 8,
            'group_size': position_num
        })
    net[prefix + 'bbox_pred'] = L.Pooling(net[prefix + 'psroipooled_loc_rois'],
                                          name=prefix + "ave_bbox_pred_rois",
                                          pool=P.Pooling.AVE,
                                          kernel_size=position_num,
                                          stride=position_num)
    net[prefix + 'temp_loss_cls'], net[prefix + 'temp_prob_cls'], net[
        prefix + 'per_roi_loss_cls'] = L.SoftmaxWithLossOHEM(
            net[prefix + 'cls_score'],
            net[prefix + 'labels'],
            name=prefix + "per_roi_loss_cls",
            loss_weight=[0.0, 0.0, 0.0],
            propagate_down=[False, False],
            ntop=3)
    net[prefix +
        'temp_loss_bbox'], net[prefix +
                               'per_roi_loss_bbox'] = L.SmoothL1LossOHEM(
                                   net[prefix + 'bbox_pred'],
                                   net[prefix + 'bbox_targets'],
                                   net[prefix + 'bbox_inside_weights'],
                                   name=prefix + "per_roi_loss_bbox",
                                   loss_weight=[0.0, 0.0],
                                   propagate_down=[False, False, False],
                                   ntop=2)
    net[prefix + 'per_roi_loss'] = L.Eltwise(net[prefix + 'per_roi_loss_cls'],
                                             net[prefix + 'per_roi_loss_bbox'],
                                             propagate_down=[False, False])
    net[prefix +
        'labels_ohem'], net[prefix +
                            'bbox_loss_weights_ohem'] = L.BoxAnnotatorOHEM(
                                net[prefix + 'rois'],
                                net[prefix + 'per_roi_loss'],
                                net[prefix + 'labels'],
                                net[prefix + 'bbox_inside_weights'],
                                name=prefix + "annotator_detector",
                                propagate_down=[False, False, False, False],
                                box_annotator_ohem_param={
                                    'roi_per_img': cfg.TRAIN.ROI_PER_IMG,
                                    'ignore_label': -1
                                },
                                ntop=2)
    net[prefix + 'silence'] = L.Silence(net[prefix + 'bbox_outside_weights'],
                                        net[prefix + 'temp_loss_cls'],
                                        net[prefix + 'temp_prob_cls'],
                                        net[prefix + 'temp_loss_bbox'],
                                        ntop=0)
    net[prefix + 'loss_cls'] = L.SoftmaxWithLoss(
        net[prefix + 'cls_score'],
        net[prefix + 'labels_ohem'],
        name=prefix + "loss",
        loss_weight=1.0,
        propagate_down=[True, False],
        loss_param={'ignore_label': -1})
    net[prefix + 'accuracy'] = L.Accuracy(net[prefix + 'cls_score'],
                                          net[prefix + 'labels_ohem'],
                                          propagate_down=[False, False],
                                          accuracy_param={'ignore_label': -1})
    net[prefix + 'loss_bbox'] = L.Loss(
        net[prefix + 'bbox_pred'],
        net[prefix + 'bbox_targets'],
        net[prefix + 'bbox_loss_weights_ohem'],
        net[prefix + 'pos_num'],
        type="SmoothL1LossOHEM",
        loss_weight=1.0,
        propagate_down=[True, False, False, False],
        loss_param={'normalization': P.Loss.POS_NUM})
    return net
Esempio n. 6
0
def gen_rpn_prototxt(basemodel, num_classes, deploy=False, cpp_version=False):
    assert basemodel.lower() in list_models(
    ), 'Unsupported basemodel: %s' % basemodel

    model_parts = re.findall(r'\d+|\D+', basemodel)
    model_name = model_parts[0].lower()
    model_depth = -1 if len(model_parts) == 1 else int(model_parts[1])

    rcnn = mzoo.FasterRCNN()
    model = model_dict[model_name]

    n = caffe.NetSpec()
    if not deploy:
        rcnn.add_input_data(n, num_classes)
    else:
        # create a placeholder, and replace later
        n.data = caffe.layers.Layer()
        n.im_info = caffe.layers.Layer()

    model.add_body_for_feature(n, depth=model_depth, lr=1, deploy=deploy)
    bottom = mzoo.last_layer(n)
    lr = 1.0

    # rpn
    n['rpn_conv/3x3'], n['rpn_relu/3x3'] = mzoo.conv_relu(
        bottom,
        nout=256,
        ks=3,
        stride=1,
        pad=1,
        lr=lr,
        deploy=deploy,
        weight_filler=dict(type='gaussian', std=0.01),
        bias_filler=dict(type='constant', value=0))

    n.rpn_cls_score = mzoo.conv(n['rpn_relu/3x3'],
                                nout=2 * 9,
                                ks=1,
                                stride=1,
                                pad=0,
                                lr=lr,
                                deploy=deploy,
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))
    n.rpn_bbox_pred = mzoo.conv(n['rpn_relu/3x3'],
                                nout=4 * 9,
                                ks=1,
                                stride=1,
                                pad=0,
                                lr=lr,
                                deploy=deploy,
                                weight_filler=dict(type='gaussian', std=0.01),
                                bias_filler=dict(type='constant', value=0))

    n.rpn_cls_score_reshape = L.Reshape(
        n.rpn_cls_score, reshape_param=dict(shape=dict(dim=[0, 2, -1, 0])))

    if not deploy:
        n.rpn_labels, n.rpn_bbox_targets, n.rpn_bbox_inside_weights, n.rpn_bbox_outside_weights = \
            L.Python(n.rpn_cls_score, n.gt_boxes, n.im_info, n.data, ntop=4,
                  python_param=dict(module='rpn.anchor_target_layer', layer='AnchorTargetLayer', param_str="'feat_stride': 16"))

        n.rpn_loss_cls = L.SoftmaxWithLoss(n.rpn_cls_score_reshape,
                                           n.rpn_labels,
                                           propagate_down=[1, 0],
                                           loss_weight=1,
                                           loss_param=dict(ignore_label=-1,
                                                           normalize=True))
        n.rpn_loss_bbox = L.SmoothL1Loss(n.rpn_bbox_pred,
                                         n.rpn_bbox_targets,
                                         n.rpn_bbox_inside_weights,
                                         n.rpn_bbox_outside_weights,
                                         loss_weight=1,
                                         smooth_l1_loss_param=dict(sigma=3.0))
    else:
        # roi proposal
        n.rpn_cls_prob = L.Softmax(n.rpn_cls_score_reshape)
        n.rpn_cls_prob_reshape = L.Reshape(
            n.rpn_cls_prob, reshape_param=dict(shape=dict(dim=[0, 18, -1, 0])))
        if cpp_version:
            assert deploy, "cannot generate cpp version prototxt for training. deploy must be set to True"
            n['rois'] = L.RPNProposal(n.rpn_cls_prob_reshape,
                                      n.rpn_bbox_pred,
                                      n.im_info,
                                      ntop=1,
                                      rpn_proposal_param=dict(feat_stride=16))
        else:
            n['rois' if deploy else 'rpn_rois'] = \
                L.Python(n.rpn_cls_prob_reshape, n.rpn_bbox_pred, n.im_info, ntop=1,
                      python_param=dict(module='rpn.proposal_layer', layer='ProposalLayer', param_str="'feat_stride': 16"))
        if not deploy:
            n.rois, n.labels, n.bbox_targets, n.bbox_inside_weights, n.bbox_outside_weights = \
                L.Python(n.rpn_rois, n.gt_boxes, ntop=5,
                      python_param=dict(module='rpn.proposal_target_layer', layer='ProposalTargetLayer', param_str="'num_classes': %d"%num_classes))

    layers = str(n.to_proto()).split('layer {')[1:]
    layers = ['layer {' + x for x in layers]
    if deploy:
        layers[
            0] = 'input: {}\ninput_shape {{\n  dim: {}\n  dim: {}\n  dim: {}\n  dim: {}\n}}\n'.format(
                '"data"', 1, 3, 224, 224)
        layers[
            1] = 'input: {}\ninput_shape {{\n  dim: {}\n  dim: {}\n}}\n'.format(
                '"im_info"', 1, 3)
    proto_str = ''.join(layers)
    proto_str = proto_str.replace("\\'", "'")

    return 'name: "Faster-RCNN-%s"\n' % basemodel + proto_str
Esempio n. 7
0
def net(split, vocab_size, opts):
    n = caffe.NetSpec()
    param_str = json.dumps({'split': split, 'batchsize': cfg.BATCHSIZE})
    n.qvec, n.cvec, n.img_feat, n.spt_feat, n.query_label, n.query_label_mask, n.query_bbox_targets, \
                n.query_bbox_inside_weights, n.query_bbox_outside_weights =  L.Python( \
                                        name='data', module='networks.data_layer', layer='DataProviderLayer', param_str=param_str, ntop=9 )

    n.embed_ba = L.Embed(n.qvec, input_dim=vocab_size, num_output=cfg.WORD_EMB_SIZE, \
                         weight_filler=dict(type='xavier'))
    n.embed = L.TanH(n.embed_ba)
    word_emb = n.embed

    # LSTM1
    n.lstm1 = L.LSTM(\
                   word_emb, n.cvec,\
                   recurrent_param=dict(\
                       num_output=cfg.RNN_DIM,\
                       weight_filler=dict(type='xavier')))

    tops1 = L.Slice(n.lstm1, ntop=cfg.QUERY_MAXLEN, slice_param={'axis': 0})
    for i in xrange(cfg.QUERY_MAXLEN - 1):
        n.__setattr__('slice_first' + str(i), tops1[int(i)])
        n.__setattr__('silence_data_first' + str(i),
                      L.Silence(tops1[int(i)], ntop=0))
    n.lstm1_out = tops1[cfg.QUERY_MAXLEN - 1]
    n.lstm1_reshaped = L.Reshape(
        n.lstm1_out, reshape_param=dict(shape=dict(dim=[-1, cfg.RNN_DIM])))
    n.lstm1_droped = L.Dropout(
        n.lstm1_reshaped, dropout_param={'dropout_ratio': cfg.DROPOUT_RATIO})
    n.lstm_l2norm = L.L2Normalize(n.lstm1_droped)
    n.q_emb = L.Reshape(n.lstm_l2norm,
                        reshape_param=dict(shape=dict(dim=[0, -1])))
    q_layer = n.q_emb  # (N, 1024)

    v_layer = proc_img(n, n.img_feat, n.spt_feat)  #out: (N, 100, 2053)
    out_layer = concat(n, q_layer, v_layer)
    # predict score
    n.query_score_fc = L.InnerProduct(out_layer,
                                      num_output=1,
                                      weight_filler=dict(type='xavier'))
    n.query_score_pred = L.Reshape(
        n.query_score_fc,
        reshape_param=dict(shape=dict(dim=[-1, cfg.RPN_TOPN])))
    if cfg.USE_KLD:
        n.loss_query_score = L.SoftmaxKLDLoss(n.query_score_pred,
                                              n.query_label,
                                              n.query_label_mask,
                                              propagate_down=[1, 0, 0],
                                              loss_weight=1.0)
    else:
        n.loss_query_score = L.SoftmaxWithLoss(n.query_score_pred,
                                               n.query_label,
                                               n.query_label_mask,
                                               propagate_down=[1, 0, 0],
                                               loss_weight=1.0)

    # predict bbox
    n.query_bbox_pred = L.InnerProduct(out_layer,
                                       num_output=4,
                                       weight_filler=dict(type='xavier'))
    if cfg.USE_REG:
        n.loss_query_bbox = L.SmoothL1Loss( n.query_bbox_pred, n.query_bbox_targets, \
                                        n.query_bbox_inside_weights, n.query_bbox_outside_weights, loss_weight=1.0)
    else:
        n.__setattr__('silence_query_bbox_pred',
                      L.Silence(n.query_bbox_pred, ntop=0))
        n.__setattr__('silence_query_bbox_targets',
                      L.Silence(n.query_bbox_targets, ntop=0))
        n.__setattr__('silence_query_bbox_inside_weights',
                      L.Silence(n.query_bbox_inside_weights, ntop=0))
        n.__setattr__('silence_query_bbox_outside_weights',
                      L.Silence(n.query_bbox_outside_weights, ntop=0))
    return n.to_proto()
Esempio n. 8
0
def network(split):

    num_chns = int(360 / cfg.LD_INTERVAL) + 1
    net = caffe.NetSpec()

    if split == 'train':
        pymodule = 'roi_data_layer.layer'
        pylayer = 'RoIDataLayer'
        pydata_params = dict(num_classes=2)
        net.data, net.im_info, net.gt_boxes = L.Python(
            module=pymodule, layer=pylayer, ntop=3, param_str=str(pydata_params))
    else:
        net.data = L.Input(name='data', input_param=dict(shape=dict(dim=[1, 3, 512, 512])))
        net.im_info = L.Input(name='im_info', input_param=dict(shape=dict(dim=[1, 3])))

    # Backbone
    net.conv1_1, net.relu1_1 = conv_relu(net.data, 64, pad=1)
    net.conv1_2, net.relu1_2 = conv_relu(net.relu1_1, 64)
    net.pool1 = max_pool(net.relu1_2)
    net.conv2_1, net.relu2_1 = conv_relu(net.pool1, 128)
    net.conv2_2, net.relu2_2 = conv_relu(net.relu2_1, 128)
    net.pool2 = max_pool(net.relu2_2)
    net.conv3_1, net.relu3_1 = conv_relu(net.pool2, 256)
    net.conv3_2, net.relu3_2 = conv_relu(net.relu3_1, 256)
    net.conv3_3, net.relu3_3 = conv_relu(net.relu3_2, 256)
    net.pool3 = max_pool(net.relu3_3)
    net.conv4_1, net.relu4_1 = conv_relu(net.pool3, 512)
    net.conv4_2, net.relu4_2 = conv_relu(net.relu4_1, 512)
    net.conv4_3, net.relu4_3 = conv_relu(net.relu4_2, 512)
    net.pool4 = max_pool(net.relu4_3)
    net.conv5_1, net.relu5_1 = conv_relu(net.pool4, 512)
    net.conv5_2, net.relu5_2 = conv_relu(net.relu5_1, 512)
    net.conv5_3, net.relu5_3 = conv_relu(net.relu5_2, 512)
    # net.pool_5 = max_pool(net.relu5_3)

    # Hyper Feature
    net.downsample = L.Convolution(
        net.conv3_3, num_output=64, kernel_size=3, pad=1, stride=2,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
    net.relu_downsample = L.ReLU(net.downsample, in_place=True)
    net.upsample = L.Deconvolution(
        net.conv5_3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        convolution_param=dict(num_output=512, kernel_size=2, stride=2,
        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')))
    net.relu_upsample = L.ReLU(net.upsample, in_place=True)
    net.fuse = L.Concat(net.downsample, net.upsample, net.conv4_3, name='concat', concat_param=dict({'concat_dim': 1}))
    net.conv_hyper = L.Convolution(
        net.fuse, num_output=512, kernel_size=3, pad=1,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
    net.relu_conv_hyper = L.ReLU(net.conv_hyper, in_place=True)

    net.conv_rpn = L.Convolution(
        net.conv_hyper, num_output=128, kernel_size=3, pad=1,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
    net.conv_rpn_relu = L.ReLU(net.conv_rpn, in_place=True)
    net.rpn_score_tl = L.Convolution(
        net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0))
    net.rpn_score_tr = L.Convolution(
        net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0))
    net.rpn_score_br = L.Convolution(
        net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0))
    net.rpn_score_bl = L.Convolution(
        net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0))
    net.rpn_prob_tl = L.Softmax(net.rpn_score_tl)
    net.rpn_prob_tr = L.Softmax(net.rpn_score_tr)
    net.rpn_prob_br = L.Softmax(net.rpn_score_br)
    net.rpn_prob_bl = L.Softmax(net.rpn_score_bl)

    if split == 'train':
        pymodule = 'rpn.labelmap_layer'
        pylayer = 'LabelMapLayer'
        net.rpn_label_tl, net.rpn_label_tr, net.rpn_label_br, net.rpn_label_bl = L.Python(
            net.conv_rpn, net.im_info, net.gt_boxes, module=pymodule, layer=pylayer, ntop=4)
        net.loss_rpn_tl = L.BalancedSoftmaxWithLoss(
            net.rpn_score_tl, net.rpn_label_tl, propagate_down=[1, 0],
            loss_param=dict(normalize=True, ignore_label=-1))
        net.loss_rpn_tr = L.BalancedSoftmaxWithLoss(
            net.rpn_score_tr, net.rpn_label_tr, propagate_down=[1, 0],
            loss_param=dict(normalize=True, ignore_label=-1))
        net.loss_rpn_br = L.BalancedSoftmaxWithLoss(
            net.rpn_score_br, net.rpn_label_br, propagate_down=[1, 0],
            loss_param=dict(normalize=True, ignore_label=-1))
        net.loss_rpn_bl = L.BalancedSoftmaxWithLoss(
            net.rpn_score_bl, net.rpn_label_bl, propagate_down=[1, 0],
            loss_param=dict(normalize=True, ignore_label=-1))
        pymodule = 'rpn.proposal_layer'
        pylayer = 'ProposalLayer'
        pydata_params = dict(feat_stride=8)
        net.quads = L.Python(
            net.im_info, net.rpn_prob_tl, net.rpn_prob_tr, net.rpn_prob_br, net.rpn_prob_bl,
            module=pymodule, layer=pylayer, ntop=1, param_str=str(pydata_params))
        pymodule = 'rpn.proposal_target_layer'
        pylayer = 'ProposalTargetLayer'
        net.rois, net.labels, net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights = L.Python(
            net.quads, net.gt_boxes, module=pymodule, layer=pylayer, name='roi-data', ntop=5)
        # RCNN
        net.dual_pool5 = L.RotateROIPooling(
            net.conv_hyper, net.rois, name='roi_pool5_dual',
            rotate_roi_pooling_param=dict(pooled_w=7, pooled_h=7, spatial_scale=0.125))
        net.pool5_a, net.pool5_b = L.Slice(net.dual_pool5, slice_param=dict(axis=0), ntop=2, name='slice')
        net.pool5 = L.Eltwise(net.pool5_a, net.pool5_b, name='roi_pool5', eltwise_param=dict(operation=1))
        net.fc6 = L.InnerProduct(
            net.pool5, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=4096,
            weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')))
        net.fc6_relu = L.ReLU(net.fc6, in_place=True)
        net.drop6 = L.Dropout(net.fc6, dropout_ratio=0.5, in_place=True)
        net.fc7 = L.InnerProduct(
            net.fc6, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=4096,
            weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')))
        net.fc7_relu = L.ReLU(net.fc7, in_place=True)
        net.drop7 = L.Dropout(net.fc7, dropout_ratio=0.5, in_place=True)
        net.cls_score = L.InnerProduct(
            net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=2,
            weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)))
        net.bbox_pred = L.InnerProduct(
            net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=16,
            weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0)))
        net.loss_cls = L.SoftmaxWithLoss(net.cls_score, net.labels, propagate_down=[1, 0], loss_weight=1)
        net.loss_bbox = L.SmoothL1Loss(net.bbox_pred, net.bbox_targets, net.bbox_inside_weights,
                                       net.bbox_outside_weights, loss_weight=1)

    if split == 'test':
        pymodule = 'rpn.proposal_layer'
        pylayer = 'ProposalLayer'
        pydata_params = dict(feat_stride=8)
        net.quads, net.rois = L.Python(
            net.im_info, net.rpn_prob_tl, net.rpn_prob_tr, net.rpn_prob_br, net.rpn_prob_bl,
            module=pymodule, layer=pylayer, ntop=2, param_str=str(pydata_params))
        # RCNN
        net.dual_pool5 = L.RotateROIPooling(
            net.conv_hyper, net.rois, name='roi_pool5_dual',
            rotate_roi_pooling_param=dict(pooled_w=7, pooled_h=7, spatial_scale=0.125))
        net.pool5_a, net.pool5_b = L.Slice(net.dual_pool5, slice_param=dict(axis=0), ntop=2, name='slice')
        net.pool5 = L.Eltwise(net.pool5_a, net.pool5_b, name='roi_pool5', eltwise_param=dict(operation=1))
        net.fc6 = L.InnerProduct(
            net.pool5, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=4096,
            weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')))
        net.fc6_relu = L.ReLU(net.fc6, in_place=True)
        net.drop6 = L.Dropout(net.fc6, dropout_ratio=0.5, in_place=True)
        net.fc7 = L.InnerProduct(
            net.fc6, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=4096,
            weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')))
        net.fc7_relu = L.ReLU(net.fc7, in_place=True)
        net.drop7 = L.Dropout(net.fc7, dropout_ratio=0.5, in_place=True)
        net.cls_score = L.InnerProduct(
            net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=2,
            weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)))
        net.bbox_pred = L.InnerProduct(
            net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)],
            inner_product_param=dict(num_output=16,
            weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0)))
        net.cls_prob = L.Softmax(net.cls_score)

    return net.to_proto()