Ejemplo n.º 1
0
    def attach(self, netspec, bottom):

        label = bottom[0]
        mbox_source_layers = self.params['mbox_source_layers']
        num_classes = self.params['num_classes']
        normalizations = self.params['normalizations']
        aspect_ratios = self.params['aspect_ratios']
        min_sizes = self.params['min_sizes']
        max_sizes = self.params['max_sizes']
        is_train = self.params['is_train']

        use_global_stats = False if is_train else True

        loc = []
        conf = []
        prior = []

        for i, layer in enumerate(mbox_source_layers):
            if normalizations[i] != -1:
                norm_name = "{}_norm".format(layer)
                norm_layer = BaseLegoFunction(
                    'Normalize',
                    dict(name=norm_name,
                         scale_filler=dict(type="constant",
                                           value=normalizations[i]),
                         across_spatial=False,
                         channel_shared=False)).attach(netspec,
                                                       [netspec[layer]])
                layer_name = norm_name
            else:
                layer_name = layer

            # Estimate number of priors per location given provided parameters.
            aspect_ratio = []
            if len(aspect_ratios) > i:
                aspect_ratio = aspect_ratios[i]
                if type(aspect_ratio) is not list:
                    aspect_ratio = [aspect_ratio]
            if max_sizes and max_sizes[i]:
                num_priors_per_location = 2 + len(aspect_ratio)
            else:
                num_priors_per_location = 1 + len(aspect_ratio)

            num_priors_per_location += len(aspect_ratio)

            params = dict(name=layer_name,
                          num_classes=num_classes,
                          num_priors_per_location=num_priors_per_location,
                          min_size=min_sizes[i],
                          max_size=max_sizes[i],
                          aspect_ratio=aspect_ratio,
                          use_global_stats=use_global_stats)

            params['deep_mult'] = 4
            params['type'] = 'linear'
            # params['type'] = 'deep'
            # params['depth'] = 3

            arr = MBoxUnitLego(params).attach(
                netspec, [netspec[layer_name], netspec['data']])
            loc.append(arr[0])
            conf.append(arr[1])
            prior.append(arr[2])

            mbox_layers = []
            locs = BaseLegoFunction('Concat',
                                    dict(name='mbox_loc',
                                         axis=1)).attach(netspec, loc)
            mbox_layers.append(locs)
            confs = BaseLegoFunction('Concat',
                                     dict(name='mbox_conf',
                                          axis=1)).attach(netspec, conf)
            mbox_layers.append(confs)
            priors = BaseLegoFunction('Concat',
                                      dict(name='mbox_priorbox',
                                           axis=2)).attach(netspec, prior)
            mbox_layers.append(priors)

        # MultiBoxLoss parameters.
        share_location = True
        background_label_id = 0
        train_on_diff_gt = True
        normalization_mode = P.Loss.VALID
        code_type = P.PriorBox.CENTER_SIZE
        neg_pos_ratio = 3.
        loc_weight = (neg_pos_ratio + 1.) / 4.
        multibox_loss_param = {
            'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
            'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
            'loc_weight': loc_weight,
            'num_classes': num_classes,
            'share_location': share_location,
            'match_type': P.MultiBoxLoss.PER_PREDICTION,
            'overlap_threshold': 0.5,
            'use_prior_for_matching': True,
            'background_label_id': background_label_id,
            'use_difficult_gt': train_on_diff_gt,
            'do_neg_mining': True,
            'neg_pos_ratio': neg_pos_ratio,
            'neg_overlap': 0.5,
            'code_type': code_type,
        }
        loss_param = {
            'normalization': normalization_mode,
        }

        mbox_layers.append(label)

        BaseLegoFunction(
            'MultiBoxLoss',
            dict(name='mbox_loss',
                 multibox_loss_param=multibox_loss_param,
                 loss_param=loss_param,
                 include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
                 propagate_down=[True, True, False,
                                 False])).attach(netspec, mbox_layers)

        if not is_train:
            # parameters for generating detection output.
            det_out_param = {
                'num_classes': num_classes,
                'share_location': True,
                'background_label_id': 0,
                'nms_param': {
                    'nms_threshold': 0.45,
                    'top_k': 400
                },
                'save_output_param': {
                    'output_directory':
                    "./models/voc2007/resnet_36_with4k_inception_trick/expt1/detection/",
                    'output_name_prefix': "comp4_det_test_",
                    'output_format': "VOC",
                    'label_map_file': "data/VOC0712/labelmap_voc.prototxt",
                    'name_size_file': "data/VOC0712/test_name_size.txt",
                    'num_test_image': 4952,
                },
                'keep_top_k': 200,
                'confidence_threshold': 0.01,
                'code_type': P.PriorBox.CENTER_SIZE,
            }

            # parameters for evaluating detection results.
            det_eval_param = {
                'num_classes': num_classes,
                'background_label_id': 0,
                'overlap_threshold': 0.5,
                'evaluate_difficult_gt': False,
                'name_size_file': "data/VOC0712/test_name_size.txt",
            }

            conf_name = "mbox_conf"
            reshape_name = "{}_reshape".format(conf_name)
            netspec[reshape_name] = L.Reshape(
                netspec[conf_name], shape=dict(dim=[0, -1, num_classes]))
            softmax_name = "{}_softmax".format(conf_name)
            netspec[softmax_name] = L.Softmax(netspec[reshape_name], axis=2)
            flatten_name = "{}_flatten".format(conf_name)
            netspec[flatten_name] = L.Flatten(netspec[softmax_name], axis=1)
            mbox_layers[1] = netspec[flatten_name]

            netspec.detection_out = L.DetectionOutput(
                *mbox_layers,
                detection_output_param=det_out_param,
                include=dict(phase=caffe_pb2.Phase.Value('TEST')))
            netspec.detection_eval = L.DetectionEvaluate(
                netspec.detection_out,
                netspec.label,
                detection_evaluate_param=det_eval_param,
                include=dict(phase=caffe_pb2.Phase.Value('TEST')))
Ejemplo n.º 2
0
def SsdDetector(net, train=True, data_layer="data", gt_label="label", \
                net_width=300, net_height=300, basenet="VGG", \
                visualize=False, extra_data="data", eval_enable=True, **ssdparam):
    """
    创建SSD检测器。
    train: TRAIN /TEST
    data_layer/gt_label: 数据输入和label输入。
    net_width/net_height: 网络的输入尺寸
    num_classes: 估计分类的数量。
    basenet: "vgg"/"res101",特征网络
    ssdparam: ssd检测器使用的参数列表。

    返回:整个SSD检测器网络。
    """
    # BaseNetWork
    if basenet == "VGG":
        net = VGG16Net(net, from_layer=data_layer, fully_conv=True, reduced=True, \
                dilated=True, dropout=False)
        base_feature_layers = ['conv4_3', 'fc7']
        add_layers = 3
        first_channels = 256
        second_channels = 512
    elif basenet == "Res101":
        net = ResNet101Net(net, from_layer=data_layer, use_pool5=False)
        # 1/8, 1/16, 1/32
        base_feature_layers = ['res3b3', 'res4b22', 'res5c']
        add_layers = 2
        first_channels = 256
        second_channels = 512
    elif basenet == "Res50":
        net = ResNet50Net(net, from_layer=data_layer, use_pool5=False)
        base_feature_layers = ['res3d', 'res4f', 'res5c']
        add_layers = 2
        first_channels = 256
        second_channels = 512
    elif basenet == "PVA":
        net = PvaNet(net, from_layer=data_layer)
        # 1/8, 1/16, 1/32
        base_feature_layers = [
            'conv4_1/incep/pre', 'conv5_1/incep/pre', 'conv5_4'
        ]
        add_layers = 2
        first_channels = 256
        second_channels = 512
    elif basenet == "Yolo":
        net = YoloNet(net, from_layer=data_layer)
        base_feature_layers = ssdparam.get("multilayers_feature_map", [])
        # add_layers = 2
        # first_channels = 256
        # second_channels = 512
        feature_layers = base_feature_layers

    else:
        raise ValueError(
            "only VGG16, Res50/101 and PVANet are supported in current version."
        )

    result = []
    for item in feature_layers:
        if len(item) == 1:
            result.append(item[0])
            continue
        name = ""
        for layers in item:
            name += layers
        tags = ["Down", "Ref"]
        down_methods = [["Reorg"]]
        UnifiedMultiScaleLayers(net,layers=item, tags=tags, \
                              unifiedlayer=name, dnsampleMethod=down_methods)
        result.append(name)
    feature_layers = result

    # Add extra layers
    # extralayers_use_batchnorm=True, extralayers_lr_mult=1, \
    # net, feature_layers = AddSsdExtraConvLayers(net, \
    #     use_batchnorm=ssdparam.get("extralayers_use_batchnorm",False), \
    #     feature_layers=base_feature_layers, add_layers=add_layers, \
    #     first_channels=first_channels, second_channels=second_channels)
    # create ssd detector deader
    mbox_layers = SsdDetectorHeaders(net, \
         min_ratio=ssdparam.get("multilayers_min_ratio",15), \
         max_ratio=ssdparam.get("multilayers_max_ratio",90), \
         boxsizes=ssdparam.get("multilayers_boxsizes", []), \
         net_width=net_width, \
         net_height=net_height, \
         data_layer=data_layer, \
         num_classes=ssdparam.get("num_classes",2), \
         from_layers=feature_layers, \
         use_batchnorm=ssdparam.get("multilayers_use_batchnorm",True), \
         prior_variance = ssdparam.get("multilayers_prior_variance",[0.1,0.1,0.2,0.2]), \
         normalizations=ssdparam.get("multilayers_normalizations",[]), \
         aspect_ratios=ssdparam.get("multilayers_aspect_ratios",[]), \
         flip=ssdparam.get("multilayers_flip",True), \
         clip=ssdparam.get("multilayers_clip",False), \
         inter_layer_channels=ssdparam.get("multilayers_inter_layer_channels",[]), \
         kernel_size=ssdparam.get("multilayers_kernel_size",3), \
         pad=ssdparam.get("multilayers_pad",1))
    if train == True:
        loss_param = get_loss_param(normalization=ssdparam.get(
            "multiloss_normalization", P.Loss.VALID))
        mbox_layers.append(net[gt_label])
        # create loss
        if not ssdparam["combine_yolo_ssd"]:
            multiboxloss_param = get_multiboxloss_param( \
               loc_loss_type=ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), \
               conf_loss_type=ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), \
               loc_weight=ssdparam.get("multiloss_loc_weight",1), \
               conf_weight=ssdparam.get("multiloss_conf_weight",1), \
               num_classes=ssdparam.get("num_classes",2), \
               share_location=ssdparam.get("multiloss_share_location",True), \
               match_type=ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), \
               overlap_threshold=ssdparam.get("multiloss_overlap_threshold",0.5), \
               use_prior_for_matching=ssdparam.get("multiloss_use_prior_for_matching",True), \
               background_label_id=ssdparam.get("multiloss_background_label_id",0), \
               use_difficult_gt=ssdparam.get("multiloss_use_difficult_gt",False), \
               do_neg_mining=ssdparam.get("multiloss_do_neg_mining",True), \
               neg_pos_ratio=ssdparam.get("multiloss_neg_pos_ratio",3), \
               neg_overlap=ssdparam.get("multiloss_neg_overlap",0.5), \
               code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \
               encode_variance_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \
               map_object_to_agnostic=ssdparam.get("multiloss_map_object_to_agnostic",False), \
               name_to_label_file=ssdparam.get("multiloss_name_to_label_file",""))

            net["mbox_loss"] = L.MultiBoxLoss(*mbox_layers, \
                                              multibox_loss_param=multiboxloss_param, \
                                              loss_param=loss_param, \
                                              include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                              propagate_down=[True, True, False, False])
        else:
            multimcboxloss_param = get_multimcboxloss_param( \
               loc_loss_type=ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), \
               loc_weight=ssdparam.get("multiloss_loc_weight",1), \
               conf_weight=ssdparam.get("multiloss_conf_weight",1), \
               num_classes=ssdparam.get("num_classes",2), \
               share_location=ssdparam.get("multiloss_share_location",True), \
               match_type=ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), \
               overlap_threshold=ssdparam.get("multiloss_overlap_threshold",0.5), \
               use_prior_for_matching=ssdparam.get("multiloss_use_prior_for_matching",True), \
               background_label_id=ssdparam.get("multiloss_background_label_id",0), \
               use_difficult_gt=ssdparam.get("multiloss_use_difficult_gt",False), \
               do_neg_mining=ssdparam.get("multiloss_do_neg_mining",True), \
               neg_pos_ratio=ssdparam.get("multiloss_neg_pos_ratio",3), \
               neg_overlap=ssdparam.get("multiloss_neg_overlap",0.5), \
               code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \
               encode_variance_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \
               map_object_to_agnostic=ssdparam.get("multiloss_map_object_to_agnostic",False), \
               name_to_label_file=ssdparam.get("multiloss_name_to_label_file",""),\
               rescore=ssdparam.get("multiloss_rescore",True),\
               object_scale=ssdparam.get("multiloss_object_scale",1),\
               noobject_scale=ssdparam.get("multiloss_noobject_scale",1),\
               class_scale=ssdparam.get("multiloss_class_scale",1),\
               loc_scale=ssdparam.get("multiloss_loc_scale",1))
            net["mbox_loss"] = L.MultiMcBoxLoss(*mbox_layers, \
                                              multimcbox_loss_param=multimcboxloss_param, \
                                              loss_param=loss_param, \
                                              include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                              propagate_down=[True, True, False, False])

        return net
    else:
        # create conf softmax layer
        # mbox_layers[1]
        if not ssdparam["combine_yolo_ssd"]:
            if ssdparam.get("multiloss_conf_loss_type",
                            P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
                reshape_name = "mbox_conf_reshape"
                net[reshape_name] = L.Reshape(mbox_layers[1], \
                        shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)]))
                softmax_name = "mbox_conf_softmax"
                net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
                flatten_name = "mbox_conf_flatten"
                net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
                mbox_layers[1] = net[flatten_name]
            elif ssdparam.get(
                    "multiloss_conf_loss_type",
                    P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
                sigmoid_name = "mbox_conf_sigmoid"
                net[sigmoid_name] = L.Sigmoid(mbox_layers[1])
                mbox_layers[1] = net[sigmoid_name]
            else:
                raise ValueError("Unknown conf loss type.")
        det_out_param = get_detection_out_param( \
            num_classes=ssdparam.get("num_classes",2), \
            share_location=ssdparam.get("multiloss_share_location",True), \
            background_label_id=ssdparam.get("multiloss_background_label_id",0), \
            code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \
            variance_encoded_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \
            conf_threshold=ssdparam.get("detectionout_conf_threshold",0.01), \
            nms_threshold=ssdparam.get("detectionout_nms_threshold",0.45), \
            boxsize_threshold=ssdparam.get("detectionout_boxsize_threshold",0.001), \
            top_k=ssdparam.get("detectionout_top_k",30), \
            visualize=ssdparam.get("detectionout_visualize",False), \
            visual_conf_threshold=ssdparam.get("detectionout_visualize_conf_threshold", 0.5), \
            visual_size_threshold=ssdparam.get("detectionout_visualize_size_threshold", 0), \
            display_maxsize=ssdparam.get("detectionout_display_maxsize",1000), \
            line_width=ssdparam.get("detectionout_line_width",4), \
            color=ssdparam.get("detectionout_color",[[0,255,0],]))
        if visualize:
            mbox_layers.append(net[extra_data])
        if not ssdparam["combine_yolo_ssd"]:
            net.detection_out = L.DetectionOutput(*mbox_layers, \
         detection_output_param=det_out_param, \
         include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.detection_out = L.DetectionMultiMcOutput(*mbox_layers, \
                detection_output_param=det_out_param, \
                include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        if not visualize and eval_enable:
            # create eval layer
            det_eval_param = get_detection_eval_param( \
                 num_classes=ssdparam.get("num_classes",2), \
                 background_label_id=ssdparam.get("multiloss_background_label_id",0), \
                 evaluate_difficult_gt=ssdparam.get("detectioneval_evaluate_difficult_gt",False), \
                 boxsize_threshold=ssdparam.get("detectioneval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]), \
                 iou_threshold=ssdparam.get("detectioneval_iou_threshold",[0.9,0.75,0.5]), \
                 name_size_file=ssdparam.get("detectioneval_name_size_file",""))
            net.detection_eval = L.DetectionEvaluate(net.detection_out, net[gt_label], \
               detection_evaluate_param=det_eval_param, \
               include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        if not eval_enable:
            net.slience = L.Silence(net.detection_out, ntop=0, \
                include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        return net
Ejemplo n.º 3
0
def get_caffe_layer(node, net, input_dims):
    """Generate caffe layer for corresponding mxnet op.

    Args:
        node (iterable from MxnetParser): Mxnet op summary generated by MxnetParser
        net (caffe.net): Caffe netspec object

    Returns:
        caffe.layers: Equivalent caffe layer
    """
    if node['type'] == 'Convolution':
        assert len(node['inputs']) == 1, \
            'Convolution layers can have only one input'
        conv_params = node['attr']
        kernel_size = make_list(conv_params['kernel'])
        num_filters = make_list(conv_params['num_filter'])[0]
        if 'stride' in conv_params:
            stride = make_list(conv_params['stride'])[0]
        else:
            stride = 1
        padding = make_list(conv_params['pad'])
        if 'dilate' in conv_params:
            dilation = make_list(conv_params['dilate'])[0]
        else:
            dilation = 1
        convolution_param = {
            'pad': padding,
            'kernel_size': kernel_size,
            'num_output': num_filters,
            'stride': stride,
            'dilation': dilation
        }
        return layers.Convolution(net[node['inputs'][0]],
                                  convolution_param=convolution_param)
    if node['type'] == 'Activation':
        assert len(node['inputs']) == 1, \
            'Activation layers can have only one input'
        assert node['attr']['act_type'] == 'relu'
        return layers.ReLU(net[node['inputs'][0]])

    if node['type'] == 'Pooling':
        assert len(node['inputs']) == 1, \
            'Pooling layers can have only one input'
        kernel_size = make_list(node['attr']['kernel'])
        stride = make_list(node['attr']['stride'])
        pooling_type = node['attr']['pool_type']
        if 'pad' in node['attr']:
            padding = make_list(node['attr']['pad'])
        else:
            padding = [0]
        if pooling_type == 'max':
            pooling = params.Pooling.MAX
        elif pooling_type == 'avg':
            pooling = params.Pooling.AVG
        pooling_param = {
            'pool': pooling,
            'pad': padding[0],
            'kernel_size': kernel_size[0],
            'stride': stride[0]
        }
        return layers.Pooling(net[node['inputs'][0]],
                              pooling_param=pooling_param)

    if node['type'] == 'L2Normalization':
        across_spatial = node['attr']['mode'] != 'channel'
        channel_shared = False
        scale_filler = {
            'type': "constant",
            'value': constants.NORMALIZATION_FACTOR
        }
        norm_param = {
            'across_spatial': across_spatial,
            'scale_filler': scale_filler,
            'channel_shared': channel_shared
        }
        return layers.Normalize(net[node['inputs'][0]], norm_param=norm_param)

    # Note - this layer has been implemented
    # only in WeiLiu's ssd branch of caffe not in caffe master
    if node['type'] == 'transpose':
        order = make_list(node['attr']['axes'])
        return layers.Permute(net[node['inputs'][0]],
                              permute_param={'order': order})

    if node['type'] == 'Flatten':
        if node['inputs'][0].endswith('anchors'):
            axis = 2
        else:
            axis = 1
        return layers.Flatten(net[node['inputs'][0]],
                              flatten_param={'axis': axis})

    if node['type'] == 'Concat':
        # In the ssd model, always concatenate along last axis,
        # since anchor boxes have an extra dimension in caffe (that includes variance).
        axis = -1
        concat_inputs = [net[inp] for inp in node['inputs']]
        return layers.Concat(*concat_inputs, concat_param={'axis': axis})

    if node['type'] == 'Reshape':
        if node['name'] == 'multibox_anchors':
            reshape_dims = [1, 2, -1]
        else:
            reshape_dims = make_list(node['attr']['shape'])
        return layers.Reshape(net[node['inputs'][0]],
                              reshape_param={'shape': {
                                  'dim': reshape_dims
                              }})

    if node['type'] == '_contrib_MultiBoxPrior':
        priorbox_inputs = [net[inp] for inp in node['inputs']] + [net["data"]]
        sizes = make_list(node["attr"]["sizes"])
        min_size = sizes[0] * input_dims[0]
        max_size = int(round((sizes[1] * input_dims[0])**2 / min_size))
        aspect_ratio = make_list(node["attr"]["ratios"])
        steps = make_list(node["attr"]["steps"])
        param = {
            'clip': node["attr"]["clip"] == "true",
            'flip': False,
            'min_size': min_size,
            'max_size': max_size,
            'aspect_ratio': aspect_ratio,
            'variance': [0.1, 0.1, 0.2, 0.2],
            'step': int(round(steps[0] * input_dims[0])),
        }
        return layers.PriorBox(*priorbox_inputs, prior_box_param=param)

    if node['type'] == '_contrib_MultiBoxDetection':
        multibox_inputs = [net[inp] for inp in node['inputs']]
        bottom_order = [1, 0, 2]
        multibox_inputs = [multibox_inputs[i] for i in bottom_order]
        param = {
            'num_classes': constants.NUM_CLASSES,
            'share_location': True,
            'background_label_id': 0,
            'nms_param': {
                'nms_threshold': float(node['attr']['nms_threshold']),
                'top_k': int(node['attr']['nms_topk'])
            },
            'keep_top_k': make_list(node['attr']['nms_topk'])[0],
            'confidence_threshold': 0.01,
            'code_type': params.PriorBox.CENTER_SIZE,
        }
        return layers.DetectionOutput(*multibox_inputs,
                                      detection_output_param=param)

    if node['type'] in ['SoftmaxActivation', 'SoftmaxOutput']:
        if 'mode' not in node['attr']:
            axis = 1
        elif node['attr']['mode'] == 'channel':
            axis = 1
        else:
            axis = 0
        # note: caffe expects confidence scores to be flattened before detection output layer receives it
        return layers.Flatten(layers.Permute(
            layers.Softmax(net[node['inputs'][0]], axis=axis),
            permute_param={'order': [0, 2, 1]}),
                              flatten_param={'axis': 1})
Ejemplo n.º 4
0
conf_name = "mbox_conf"
if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX:
  reshape_name = "{}_reshape".format(conf_name)
  net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))
  softmax_name = "{}_softmax".format(conf_name)
  net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
  flatten_name = "{}_flatten".format(conf_name)
  net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
  mbox_layers[1] = net[flatten_name]
elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC:
  sigmoid_name = "{}_sigmoid".format(conf_name)
  net[sigmoid_name] = L.Sigmoid(net[conf_name])
  mbox_layers[1] = net[sigmoid_name]

net.detection_out = L.DetectionOutput(*mbox_layers,
    detection_output_param=det_out_param,
    include=dict(phase=caffe_pb2.Phase.Value('TEST')))
net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label,
    detection_evaluate_param=det_eval_param,
    include=dict(phase=caffe_pb2.Phase.Value('TEST')))

with open(test_net_file, 'w') as f:
    print('name: "{}_test"'.format(model_name), file=f)
    print(net.to_proto(), file=f)
shutil.copy(test_net_file, job_dir)

# Create deploy net.
# Remove the first and last layer from test net.
deploy_net = net
with open(deploy_net_file, 'w') as f:
    net_param = deploy_net.to_proto()
    'code_type': P.PriorBox.CENTER_SIZE,
}

# parameters for evaluating detection results.
det_eval_param = {
    'num_classes': num_classes,
    'background_label_id': 0,
    'overlap_threshold': 0.5,
    'evaluate_difficult_gt': False,
}

detection_out_name = "detection_out"
tmp_layer = net.layer.add()
tmp_layer.CopyFrom(
    L.DetectionOutput(
        detection_output_param=det_out_param,
        include=dict(phase=caffe_pb2.Phase.Value('TEST'))).to_proto().layer[0])
tmp_layer.name = detection_out_name
tmp_layer.top[0] = detection_out_name
tmp_layer.bottom.append(loc_name)
tmp_layer.bottom.append(flatten_name)
tmp_layer.bottom.append("mbox_priorbox")

#print(str(net))

# net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label,
#     detection_evaluate_param=det_eval_param,
#     include=dict(phase=caffe_pb2.Phase.Value('TEST')))

outFn = './newNet.prototxt'
with open(outFn, 'w') as f:
Ejemplo n.º 6
0
def run(args):
    with open(args.cfg_name) as f:
        cfg = edict(json.load(f))

    net = caffe_pb2.NetParameter()
    with open(args.prototxt_name) as f:
        s = f.read()
        txtf.Merge(s, net)

    CreateMultiBoxHead(net,
                       data_layer='data',
                       from_layers=cfg.mbox_source_layers,
                       use_batchnorm=False,
                       min_sizes=cfg.min_sizes,
                       max_sizes=cfg.max_sizes,
                       aspect_ratios=cfg.aspect_ratios,
                       steps=cfg.steps,
                       normalizations=cfg.normalizations,
                       num_classes=cfg.num_classes,
                       share_location=cfg.share_location,
                       flip=cfg.flip,
                       clip=cfg.clip,
                       prior_variance=cfg.prior_variance,
                       kernel_size=3,
                       pad=1,
                       lr_mult=1)

    conf_name = 'mbox_conf'
    reshape_name = "{}_reshape".format(conf_name)
    reshape_layer = net.layer.add()
    reshape_layer.CopyFrom(
        L.Reshape(shape=dict(
            dim=[0, -1, cfg.num_classes])).to_proto().layer[0])
    reshape_layer.name = reshape_name
    reshape_layer.top[0] = reshape_name
    reshape_layer.bottom.append(cfg.conf_layer)

    softmax_name = "{}_softmax".format(conf_name)
    softmax_layer = net.layer.add()
    softmax_layer.CopyFrom(L.Softmax(axis=2).to_proto().layer[0])
    softmax_layer.name = softmax_name
    softmax_layer.top[0] = softmax_name
    softmax_layer.bottom.append(reshape_name)

    flatten_name = "{}_flatten".format(conf_name)
    flatten_layer = net.layer.add()
    flatten_layer.CopyFrom(L.Flatten(axis=1).to_proto().layer[0])
    flatten_layer.name = flatten_name
    flatten_layer.top[0] = flatten_name
    flatten_layer.bottom.append(softmax_name)

    det_out_param = {
        'num_classes': cfg.num_classes,
        'share_location': cfg.share_location,
        'background_label_id': 0,
        'nms_param': {
            'nms_threshold': 0.45,
            'top_k': 200
        },
        'keep_top_k': 100,
        'confidence_threshold': 0.01,
        'code_type': P.PriorBox.CENTER_SIZE,
    }

    # parameters for evaluating detection results.
    det_eval_param = {
        'num_classes': cfg.num_classes,
        'background_label_id': 0,
        'overlap_threshold': 0.5,
        'evaluate_difficult_gt': False,
    }

    detection_out_name = "detection_out"
    detection_out_layer = net.layer.add()
    detection_out_layer.CopyFrom(
        L.DetectionOutput(
            detection_output_param=det_out_param,
            include=dict(
                phase=caffe_pb2.Phase.Value('TEST'))).to_proto().layer[0])
    detection_out_layer.name = detection_out_name
    detection_out_layer.top[0] = detection_out_name
    detection_out_layer.bottom.append(cfg.loc_layer)
    detection_out_layer.bottom.append(flatten_name)
    detection_out_layer.bottom.append("mbox_priorbox")

    with open(args.save_name, 'w') as f:
        f.write(str(net))
Ejemplo n.º 7
0
def main(args):
    '''main '''

    # The database file for training data. Created by data/VOC0712/create_data.sh
    train_data = "{}/lmdb/{}_trainval_lmdb".format(CF_tool_root, args.gen_dir)
    # The database file for testing data. Created by data/VOC0712/create_data.sh
    test_data = "{}/lmdb/{}_test_lmdb".format(CF_tool_root, args.gen_dir)

    # Specify the batch sampler.
    resize_width = args.image_resize
    resize_height = args.image_resize
    resize = "{}x{}".format(resize_width, resize_height)

    batch_sampler = [
        {
            'sampler': {},
            'max_trials': 1,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.1,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.3,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.5,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.7,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.9,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'max_jaccard_overlap': 1.0,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
    ]
    train_transform_param = {
        'mirror': True,
        'mean_value': [104, 117, 123],
        'resize_param': {
            'prob':
            1,
            'resize_mode':
            P.Resize.WARP,
            'height':
            resize_height,
            'width':
            resize_width,
            'interp_mode': [
                P.Resize.LINEAR,
                P.Resize.AREA,
                P.Resize.NEAREST,
                P.Resize.CUBIC,
                P.Resize.LANCZOS4,
            ],
        },
        'distort_param': {
            'brightness_prob': 0.5,
            'brightness_delta': 32,
            'contrast_prob': 0.5,
            'contrast_lower': 0.5,
            'contrast_upper': 1.5,
            'hue_prob': 0.5,
            'hue_delta': 18,
            'saturation_prob': 0.5,
            'saturation_lower': 0.5,
            'saturation_upper': 1.5,
            'random_order_prob': 0.0,
        },
        'expand_param': {
            'prob': 0.5,
            'max_expand_ratio': 4.0,
        },
        'emit_constraint': {
            'emit_type': caffe_pb2.EmitConstraint.CENTER,
        }
    }
    test_transform_param = {
        'mean_value': [104, 117, 123],
        'resize_param': {
            'prob': 1,
            'resize_mode': P.Resize.WARP,
            'height': resize_height,
            'width': resize_width,
            'interp_mode': [P.Resize.LINEAR],
        },
    }

    # If true, use batch norm for all newly added layers.
    # Currently only the non batch norm version has been tested.
    use_batchnorm = False
    lr_mult = 2
    if use_batchnorm:
        base_lr = 0.0004
    else:
        base_lr = 0.00004 / 10

    # Modify the job name if you want.
    job_name = "FSSD_{}_{}".format(args.gen_dir, resize)
    # The name of the model. Modify it if you want.
    model_name = "VGG_{}_{}".format(args.gen_dir, job_name)

    # Directory which stores the model .prototxt file.
    save_dir = "{}/models/{}".format(CF_tool_root, job_name)
    # Directory which stores the snapshot of models.
    snapshot_dir = "{}/snapshot_models/{}".format(CF_tool_root, job_name)
    # Directory which stores the job script and log file.
    job_dir = "{}/jobs/{}".format(CF_tool_root, job_name)
    # Directory which stores the detection results.
    output_result_dir = job_dir + '/predict_ss'

    # model definition files.
    train_net_file = "{}/train.prototxt".format(save_dir)
    test_net_file = "{}/test.prototxt".format(save_dir)
    deploy_net_file = "{}/deploy.prototxt".format(save_dir)
    solver_file = "{}/solver.prototxt".format(save_dir)
    # snapshot prefix.
    snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)
    # job script path.
    job_file = "{}/{}.sh".format(job_dir, model_name)

    # Stores the test image names and sizes. Created by data/VOC0712/create_list.sh
    name_size_file = "{}/data/{}/ssd/test_name_size.txt".format(
        CF_tool_root, args.gen_dir)
    # The pretrained model. We use the Fully convolutional reduced (atrous) VGGNet.
    #pretrain_model = "{}/models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel".format(CF_tool_root)
    #pretrain_model = "{}/snapshot_models/SSD_300x300/VGG_VOC0712_SSD_300x300_iter_120000.caffemodel".format(CF_tool_root)
    pretrain_model = args.model_weights
    # Stores LabelMapItem.
    label_map_file = args.labelmap_file
    #label_map_file = "{}/data/{}/ssd/label_map.txt".format(CF_tool_root, args.gen_dir)

    # MultiBoxLoss parameters.
    num_classes = int(args.num_classes)
    share_location = True
    background_label_id = 0
    train_on_diff_gt = True
    normalization_mode = P.Loss.VALID
    code_type = P.PriorBox.CENTER_SIZE
    ignore_cross_boundary_bbox = False
    mining_type = P.MultiBoxLoss.MAX_NEGATIVE
    neg_pos_ratio = 3.
    loc_weight = (neg_pos_ratio + 1.) / 4.
    multibox_loss_param = {
        'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
        'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
        'loc_weight': loc_weight,
        'num_classes': num_classes,
        'share_location': share_location,
        'match_type': P.MultiBoxLoss.PER_PREDICTION,
        'overlap_threshold': 0.5,
        'use_prior_for_matching': True,
        'background_label_id': background_label_id,
        'use_difficult_gt': train_on_diff_gt,
        'mining_type': mining_type,
        'neg_pos_ratio': neg_pos_ratio,
        'neg_overlap': 0.5,
        'code_type': code_type,
        'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,
    }
    loss_param = {
        'normalization': normalization_mode,
    }

    # parameters for generating priors.
    # minimum dimension of input image
    min_dim = 300
    mbox_source_layers = [
        'fea_concat_bn_ds_1', 'fea_concat_bn_ds_2', 'fea_concat_bn_ds_4',
        'fea_concat_bn_ds_8', 'fea_concat_bn_ds_16', 'fea_concat_bn_ds_32'
    ]
    # in percent %
    min_ratio = 20
    max_ratio = 90
    step = int(
        math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
    min_sizes = []
    max_sizes = []
    for ratio in xrange(min_ratio, max_ratio + 1, step):
        min_sizes.append(min_dim * ratio / 100.)
        max_sizes.append(min_dim * (ratio + step) / 100.)

    min_sizes = [min_dim * 10 / 100.] + min_sizes
    max_sizes = [min_dim * 20 / 100.] + max_sizes
    steps = []
    aspect_ratios = [[2], [2, 3], [2, 3], [2], [2], [2]]
    normalizations = [-1, -1, -1, -1, -1, -1]

    # variance used to encode/decode prior bboxes.
    if code_type == P.PriorBox.CENTER_SIZE:
        prior_variance = [0.1, 0.1, 0.2, 0.2]
    else:
        prior_variance = [0.1]
    flip = True
    clip = False

    # Solver parameters.
    # Defining which GPUs to use.
    gpus = "0"
    gpulist = gpus.split(",")
    num_gpus = len(gpulist)

    batch_size = 8
    accum_batch_size = 32
    iter_size = accum_batch_size / batch_size
    solver_mode = P.Solver.CPU
    device_id = 0
    batch_size_per_device = batch_size
    if num_gpus > 0:
        batch_size_per_device = int(math.ceil(float(batch_size) / num_gpus))
        iter_size = int(
            math.ceil(
                float(accum_batch_size) / (batch_size_per_device * num_gpus)))
        solver_mode = P.Solver.GPU
        device_id = int(gpulist[0])

    if normalization_mode == P.Loss.NONE:
        base_lr /= batch_size_per_device
    elif normalization_mode == P.Loss.VALID:
        base_lr *= 25. / loc_weight
    elif normalization_mode == P.Loss.FULL:
        # Roughly there are 2000 prior bboxes per image.
        # TODO(weiliu89): Estimate the exact # of priors.
        base_lr *= 2000.

    num_test_image = 4952
    test_batch_size = 8
    test_iter = int(math.ceil(float(num_test_image) / test_batch_size))

    solver_param = {
        'base_lr': 0.0005,
        'weight_decay': 0.0005,
        'lr_policy': "multistep",
        'stepvalue': [40000, 60000, 80000],
        'gamma': 0.1,
        'momentum': 0.9,
        'iter_size': iter_size,
        'max_iter': 80000,
        'snapshot': 5000,
        'display': 10,
        'average_loss': 10,
        'type': "SGD",
        'solver_mode': solver_mode,
        'device_id': device_id,
        'debug_info': False,
        'snapshot_after_train': True,
        'test_iter': [test_iter],
        'test_interval': 5000,
        'eval_type': "detection",
        'ap_version': "11point",
        'test_initialization': False,
        'show_per_class_result': True,
    }

    det_out_param = {
        'num_classes': num_classes,
        'share_location': share_location,
        'background_label_id': background_label_id,
        'nms_param': {
            'nms_threshold': 0.45,
            'top_k': 400
        },
        'save_output_param': {
            'output_directory': output_result_dir,
            'output_name_prefix': "comp4_det_test_",
            'output_format': "VOC",
            'label_map_file': label_map_file,
            'name_size_file': name_size_file,
            'num_test_image': num_test_image,
        },
        'keep_top_k': 200,
        'confidence_threshold': 0.01,
        'code_type': code_type,
    }

    det_eval_param = {
        'num_classes': num_classes,
        'background_label_id': background_label_id,
        'overlap_threshold': 0.5,
        'evaluate_difficult_gt': False,
        'name_size_file': name_size_file,
    }

    check_if_exist(train_data)
    check_if_exist(test_data)
    check_if_exist(label_map_file)
    check_if_exist(pretrain_model)
    make_if_not_exist(save_dir)
    make_if_not_exist(job_dir)
    make_if_not_exist(snapshot_dir)

    net = caffe.NetSpec()
    net.data, net.label = CreateAnnotatedDataLayer(
        train_data,
        batch_size=batch_size_per_device,
        train=True,
        output_label=True,
        label_map_file=label_map_file,
        transform_param=train_transform_param,
        batch_sampler=batch_sampler)

    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False)

    AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

    mbox_layers = CreateMultiBoxHead(net,
                                     data_layer='data',
                                     from_layers=mbox_source_layers,
                                     use_batchnorm=use_batchnorm,
                                     min_sizes=min_sizes,
                                     max_sizes=max_sizes,
                                     aspect_ratios=aspect_ratios,
                                     steps=steps,
                                     normalizations=normalizations,
                                     num_classes=num_classes,
                                     share_location=share_location,
                                     flip=flip,
                                     clip=clip,
                                     prior_variance=prior_variance,
                                     kernel_size=3,
                                     pad=1,
                                     lr_mult=lr_mult)

    name = "mbox_loss"
    mbox_layers.append(net.label)
    net[name] = L.MultiBoxLoss(
        *mbox_layers,
        multibox_loss_param=multibox_loss_param,
        loss_param=loss_param,
        include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
        propagate_down=[True, True, False, False])

    with open(train_net_file, 'w') as f:
        print('name: "{}_train"'.format(model_name), file=f)
        print(net.to_proto(), file=f)
    shutil.copy(train_net_file, job_dir)

    net = caffe.NetSpec()
    net.data, net.label = CreateAnnotatedDataLayer(
        test_data,
        batch_size=test_batch_size,
        train=False,
        output_label=True,
        label_map_file=label_map_file,
        transform_param=test_transform_param)

    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False)

    AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

    mbox_layers = CreateMultiBoxHead(net,
                                     data_layer='data',
                                     from_layers=mbox_source_layers,
                                     use_batchnorm=use_batchnorm,
                                     min_sizes=min_sizes,
                                     max_sizes=max_sizes,
                                     aspect_ratios=aspect_ratios,
                                     steps=steps,
                                     normalizations=normalizations,
                                     num_classes=num_classes,
                                     share_location=share_location,
                                     flip=flip,
                                     clip=clip,
                                     prior_variance=prior_variance,
                                     kernel_size=3,
                                     pad=1,
                                     lr_mult=lr_mult)

    conf_name = "mbox_conf"
    if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX \
           or multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.FOCALLOSS:
        reshape_name = "{}_reshape".format(conf_name)
        net[reshape_name] = L.Reshape(net[conf_name],
                                      shape=dict(dim=[0, -1, num_classes]))
        softmax_name = "{}_softmax".format(conf_name)
        net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
        flatten_name = "{}_flatten".format(conf_name)
        net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
        mbox_layers[1] = net[flatten_name]
    elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC:
        sigmoid_name = "{}_sigmoid".format(conf_name)
        net[sigmoid_name] = L.Sigmoid(net[conf_name])
        mbox_layers[1] = net[sigmoid_name]

    net.detection_out = L.DetectionOutput(
        *mbox_layers,
        detection_output_param=det_out_param,
        include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    net.detection_eval = L.DetectionEvaluate(
        net.detection_out,
        net.label,
        detection_evaluate_param=det_eval_param,
        include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    with open(test_net_file, 'w') as f:
        print('name: "{}_test"'.format(model_name), file=f)
        print(net.to_proto(), file=f)
    shutil.copy(test_net_file, job_dir)

    deploy_net = net
    with open(deploy_net_file, 'w') as f:
        net_param = deploy_net.to_proto()

        del net_param.layer[0]
        del net_param.layer[-1]
        net_param.name = '{}_deploy'.format(model_name)
        net_param.input.extend(['data'])
        net_param.input_shape.extend(
            [caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])])
        print(net_param, file=f)
    shutil.copy(deploy_net_file, job_dir)

    solver = caffe_pb2.SolverParameter(train_net=train_net_file,
                                       test_net=[test_net_file],
                                       snapshot_prefix=snapshot_prefix,
                                       **solver_param)

    with open(solver_file, 'w') as f:
        print(solver, file=f)
    shutil.copy(solver_file, job_dir)

    max_iter = 0
    for file in os.listdir(snapshot_dir):
        if file.endswith(".solverstate"):
            basename = os.path.splitext(file)[0]
            iter = int(basename.split("{}_iter_".format(model_name))[1])
            if iter > max_iter:
                max_iter = iter

    train_src_param = '--weights="{}" \\\n'.format(pretrain_model)
    if resume_training:
        if max_iter > 0:
            train_src_param = '--snapshot="{}_iter_{}.solverstate" \\\n'.format(
                snapshot_prefix, max_iter)

    if remove_old_models:

        for file in os.listdir(snapshot_dir):
            if file.endswith(".solverstate"):
                basename = os.path.splitext(file)[0]
                iter = int(basename.split("{}_iter_".format(model_name))[1])
                if max_iter > iter:
                    os.remove("{}/{}".format(snapshot_dir, file))
            if file.endswith(".caffemodel"):
                basename = os.path.splitext(file)[0]
                iter = int(basename.split("{}_iter_".format(model_name))[1])
                if max_iter > iter:
                    os.remove("{}/{}".format(snapshot_dir, file))

    import time
    timestamp = time.strftime('%Y%m%d%H%M%S')
    with open(job_file, 'w') as f:
        #f.write('cd {}\n'.format(caffe_root))
        f.write('{}/build/tools/caffe train \\\n'.format(caffe_root))
        f.write('--solver="{}" \\\n'.format(solver_file))
        f.write(train_src_param)
        if solver_param['solver_mode'] == P.Solver.GPU:
            f.write('--gpu {} 2>&1 | tee {}/{}_{}.log\n'.format(
                gpus, job_dir, model_name, timestamp))
        else:
            f.write('2>&1 | tee {}/{}_{}.log\n'.format(job_dir, model_name,
                                                       timestamp))

    # Copy the python script to job_dir.
    py_file = os.path.abspath(__file__)
    shutil.copy(py_file, job_dir)

    # Run the job.
    print("Run file: {}".format(job_file))
    os.chmod(job_file, stat.S_IRWXU)
    if run_soon:
        subprocess.call(job_file, shell=True)
Ejemplo n.º 8
0
def Yolo_SsdDetector(net, train=True, data_layer="data", gt_label="label", \
                net_width=300, net_height=300, basenet="Res50",\
                visualize=False, extra_data="data", eval_enable=True, use_layers=2,**yolo_ssd_param):
    """
    创建YOLO检测器。
    train: TRAIN /TEST
    data_layer/gt_label: 数据输入和label输入。
    net_width/net_height: 网络的输入尺寸
    basenet: "vgg"/"res101"/"res50"/pva
    yoloparam: yolo检测器使用的参数列表。
    """
    # BaseNetWork
    # 构建基础网络,选择特征Layer
    final_layer_channels = 0
    if basenet == "VGG":
        net = VGG16Net(net, from_layer=data_layer, need_fc=False)
        final_layer_channels = 512
        # conv4_3 -> 1/8
        # conv5_3 -> 1/16
        if use_layers == 2:
            base_feature_layers = ['conv5_3']
        elif use_layers == 3:
            base_feature_layers = ['conv4_3', 'conv5_3']
        else:
            base_feature_layers = []
        # define added layers onto the top-layer
        add_layers = extra_top_layers
        add_channels = extra_top_depth
        if add_layers > 0:
            final_layer_channels = add_channels
        net, feature_layers = AddTopExtraConvLayers(net, use_pool=True, \
            use_batchnorm=True, num_layers=add_layers, channels=add_channels, \
            feature_layers=base_feature_layers)
    elif basenet == "Res101":
        net = ResNet101Net(net, from_layer=data_layer, use_pool5=False)
        final_layer_channels = 2048
        # res3b3-> 1/8
        # res4b22 -> 1/16
        # res5c -> 1/32
        if use_layers == 2:
            base_feature_layers = ['res4b22']
        elif use_layers == 3:
            base_feature_layers = ['res3b3', 'res4b22']
        else:
            base_feature_layers = []
        # define added layers onto the top-layer
        add_layers = extra_top_layers
        add_channels = extra_top_depth
        if add_layers > 0:
            final_layer_channels = add_channels
        net, feature_layers = AddTopExtraConvLayers(net, use_pool=False, \
            use_batchnorm=True, num_layers=add_layers, channels=add_channels, \
            feature_layers=base_feature_layers)
    elif basenet == "Res50":
        net = ResNet50Net(net, from_layer=data_layer, use_pool5=False)
        final_layer_channels = 2048
        # res3d-> 1/8
        # res4f -> 1/16
        # res5c -> 1/32
        if use_layers == 2:
            base_feature_layers = ['res4f']
        elif use_layers == 3:
            base_feature_layers = ['res3d', 'res4f']
        else:
            base_feature_layers = []
        # define added layers onto the top-layer
        add_layers = extra_top_layers
        add_channels = extra_top_depth
        if add_layers > 0:
            final_layer_channels = add_channels
        net, feature_layers = AddTopExtraConvLayers(net, use_pool=False, \
            use_batchnorm=True, num_layers=add_layers, channels=add_channels, \
            feature_layers=base_feature_layers)
    elif basenet == "PVA":
        net = PvaNet(net, from_layer=data_layer)
        final_layer_channels = 384
        if use_layers == 2:
            base_feature_layers = ['conv5_1/incep/pre', 'conv5_4']
        elif use_layers == 3:
            base_feature_layers = [
                'conv4_1/incep/pre', 'conv5_1/incep/pre', 'conv5_4'
            ]
        else:
            base_feature_layers = ['conv5_4']
        # Note: we do not add extra top layers for pvaNet
        feature_layers = base_feature_layers
    elif basenet == "Yolo":
        net = YoloNet(net, from_layer=data_layer)
        final_layer_channels = 1024
        if use_layers == 2:
            base_feature_layers = ['conv5_5', 'conv6_6']
        elif use_layers == 3:
            base_feature_layers = ['conv4_3', 'conv5_5', 'conv6_6']
        else:
            base_feature_layers = ['conv6_6']
        # Note: we do not add extra top layers for YoloNet
        feature_layers = base_feature_layers
    else:
        raise ValueError(
            "only VGG16, Res50/101, PVA and Yolo are supported in current version."
        )

    # concat the feature_layers
    num_layers = len(feature_layers)
    if num_layers == 1:
        tags = ["Ref"]
    elif num_layers == 2:
        tags = ["Down", "Ref"]
        down_methods = [["Reorg"]]
    else:
        if basenet == "Yolo":
            tags = ["Down", "Down", "Ref"]
            down_methods = [["MaxPool", "Reorg"], ["Reorg"]]
        else:
            tags = ["Down", "Ref", "Up"]
            down_methods = [["Reorg"]]
    # if use VGG, Norm may be used.
    # the interlayers can also be used if needed.
    # upsampleChannels must be the channels of Layers added onto the top.
    UnifiedMultiScaleLayers(net,layers=feature_layers, tags=tags, \
                            unifiedlayer="msfMap", dnsampleMethod=down_methods, \
                            upsampleMethod="Deconv", \
                            upsampleChannels=final_layer_channels)

    mbox_layers = Yolo_SsdDetectorHeaders(net, \
         boxsizes=yolo_ssd_param.get("multilayers_boxsizes", []), \
         net_width=net_width, \
         net_height=net_height, \
         data_layer=data_layer, \
         num_classes=yolo_ssd_param.get("num_classes",2), \
         from_layers=["msfMap"], \
         use_batchnorm=yolo_ssd_param.get("multilayers_use_batchnorm",True), \
         prior_variance = yolo_ssd_param.get("multilayers_prior_variance",[0.1,0.1,0.2,0.2]), \
         normalizations=yolo_ssd_param.get("multilayers_normalizations",[]), \
         aspect_ratios=yolo_ssd_param.get("multilayers_aspect_ratios",[]), \
         flip=yolo_ssd_param.get("multilayers_flip",False), \
         clip=yolo_ssd_param.get("multilayers_clip",False), \
         inter_layer_channels=yolo_ssd_param.get("multilayers_inter_layer_channels",[]), \
         kernel_size=yolo_ssd_param.get("multilayers_kernel_size",3), \
         pad=yolo_ssd_param.get("multilayers_pad",1))

    if train == True:
        # create loss
        multiboxloss_param = get_multiboxloss_param( \
           loc_loss_type=yolo_ssd_param.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), \
           conf_loss_type=yolo_ssd_param.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), \
           loc_weight=yolo_ssd_param.get("multiloss_loc_weight",1), \
           conf_weight=yolo_ssd_param.get("multiloss_conf_weight",1), \
           num_classes=yolo_ssd_param.get("num_classes",2), \
           share_location=yolo_ssd_param.get("multiloss_share_location",True), \
           match_type=yolo_ssd_param.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), \
           overlap_threshold=yolo_ssd_param.get("multiloss_overlap_threshold",0.5), \
           use_prior_for_matching=yolo_ssd_param.get("multiloss_use_prior_for_matching",True), \
           background_label_id=yolo_ssd_param.get("multiloss_background_label_id",0), \
           use_difficult_gt=yolo_ssd_param.get("multiloss_use_difficult_gt",False), \
           do_neg_mining=yolo_ssd_param.get("multiloss_do_neg_mining",True), \
           neg_pos_ratio=yolo_ssd_param.get("multiloss_neg_pos_ratio",3), \
           neg_overlap=yolo_ssd_param.get("multiloss_neg_overlap",0.5), \
           code_type=yolo_ssd_param.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \
           encode_variance_in_target=yolo_ssd_param.get("multiloss_encode_variance_in_target",False), \
           map_object_to_agnostic=yolo_ssd_param.get("multiloss_map_object_to_agnostic",False), \
           name_to_label_file=yolo_ssd_param.get("multiloss_name_to_label_file",""))
        loss_param = get_loss_param(normalization=yolo_ssd_param.get(
            "multiloss_normalization", P.Loss.VALID))
        mbox_layers.append(net[gt_label])
        net["mbox_loss"] = L.MultiBoxLoss(*mbox_layers, \
                                          multibox_loss_param=multiboxloss_param, \
                                          loss_param=loss_param, \
                                          include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                          propagate_down=[True, True, False, False])
        return net
    else:
        # create conf softmax layer
        # mbox_layers[1]
        if yolo_ssd_param.get(
                "multiloss_conf_loss_type",
                P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_layers[1], \
                    shape=dict(dim=[0, -1, yolo_ssd_param.get("num_classes",2)]))
            softmax_name = "mbox_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_layers[1] = net[flatten_name]
        elif yolo_ssd_param.get(
                "multiloss_conf_loss_type",
                P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_layers[1])
            mbox_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        det_out_param = get_detection_out_param( \
            num_classes=yolo_ssd_param.get("num_classes",2), \
            share_location=yolo_ssd_param.get("multiloss_share_location",True), \
            background_label_id=yolo_ssd_param.get("multiloss_background_label_id",0), \
            code_type=yolo_ssd_param.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \
            variance_encoded_in_target=yolo_ssd_param.get("multiloss_encode_variance_in_target",False), \
            conf_threshold=yolo_ssd_param.get("detectionout_conf_threshold",0.01), \
            nms_threshold=yolo_ssd_param.get("detectionout_nms_threshold",0.45), \
            boxsize_threshold=yolo_ssd_param.get("detectionout_boxsize_threshold",0.001), \
            top_k=yolo_ssd_param.get("detectionout_top_k",30), \
            visualize=yolo_ssd_param.get("detectionout_visualize",False), \
            visual_conf_threshold=yolo_ssd_param.get("detectionout_visualize_conf_threshold", 0.5), \
            visual_size_threshold=yolo_ssd_param.get("detectionout_visualize_size_threshold", 0), \
            display_maxsize=yolo_ssd_param.get("detectionout_display_maxsize",1000), \
            line_width=yolo_ssd_param.get("detectionout_line_width",4), \
            color=yolo_ssd_param.get("detectionout_color",[[0,255,0],]))
        if visualize:
            mbox_layers.append(net[extra_data])

        net.detection_out = L.DetectionOutput(*mbox_layers, \
            detection_output_param=det_out_param, \
            include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        if not visualize and eval_enable:
            # create eval layer
            det_eval_param = get_detection_eval_param( \
                 num_classes=yolo_ssd_param.get("num_classes",2), \
                 background_label_id=yolo_ssd_param.get("multiloss_background_label_id",0), \
                 evaluate_difficult_gt=yolo_ssd_param.get("detectioneval_evaluate_difficult_gt",False), \
                 boxsize_threshold=yolo_ssd_param.get("detectioneval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]), \
                 iou_threshold=yolo_ssd_param.get("detectioneval_iou_threshold",[0.9,0.75,0.5]), \
                 name_size_file=yolo_ssd_param.get("detectioneval_name_size_file",""))
            net.detection_eval = L.DetectionEvaluate(net.detection_out, net[gt_label], \
                  detection_evaluate_param=det_eval_param, \
                  include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        if not eval_enable:
            net.slience = L.Silence(net.detection_out, ntop=0, \
                include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        return net
Ejemplo n.º 9
0
def vgg16_ssd_seg(source,
                  bbox_seg_data_param,
                  kwargs,
                  use_batchnorm=False,
                  lr_mult=1,
                  crop_layers=[],
                  is_crop_last=False,
                  is_cls=False,
                  is_deploy=False,
                  is_crop_all=False,
                  is_crop_cls=False,
                  is_crop_merge_feature=False):

    if crop_layers is None:
        crop_layers = [
            'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'
        ]
    net = caffe.NetSpec()
    if is_deploy:
        net.data = L.Input(input_param=dict(shape=dict(dim=[1, 3, 320, 320])))
        net.cls_specific_bbox = L.Input(input_param=dict(shape=dict(
            dim=[1, 1, 1, 8])))
        if is_cls:
            net.cls = L.Input(input_param=dict(shape=dict(dim=[1, 20])))
    else:
        net.data, net.bbox, net.seg = L.BBoxSegData(
            name="data",
            annotated_data_param=bbox_seg_data_param,
            data_param=dict(batch_size=8, backend=P.Data.LMDB, source=source),
            ntop=3,
            **kwargs)

        net.cls_specific_bbox, net.binary_mask, net.cls = L.SelectBinary(
            net.bbox, net.seg, random_select=True, num_class=20, ntop=3)

    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False,
               pool_mask=True,
               freeze_all=True)

    AddExtraLayers(net, use_batchnorm, lr_mult=0)

    if is_deploy:
        # MultiBoxLoss parameters.
        num_classes = 21
        share_location = True
        background_label_id = 0
        train_on_diff_gt = True
        normalization_mode = P.Loss.VALID
        code_type = P.PriorBox.CENTER_SIZE
        ignore_cross_boundary_bbox = False
        mining_type = P.MultiBoxLoss.MAX_NEGATIVE
        neg_pos_ratio = 3.
        loc_weight = (neg_pos_ratio + 1.) / 4.
        multibox_loss_param = {
            'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
            'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
            'loc_weight': loc_weight,
            'num_classes': num_classes,
            'share_location': share_location,
            'match_type': P.MultiBoxLoss.PER_PREDICTION,
            'overlap_threshold': 0.5,
            'use_prior_for_matching': True,
            'background_label_id': background_label_id,
            'use_difficult_gt': train_on_diff_gt,
            'mining_type': mining_type,
            'neg_pos_ratio': neg_pos_ratio,
            'neg_overlap': 0.5,
            'code_type': code_type,
            'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,
        }

        # parameters for generating priors.
        # minimum dimension of input image
        min_dim = 320
        # conv4_3 ==> 38 x 38
        # fc7 ==> 19 x 19
        # conv6_2 ==> 10 x 10
        # conv7_2 ==> 5 x 5
        # conv8_2 ==> 3 x 3
        # conv9_2 ==> 1 x 1
        mbox_source_layers = [
            'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'
        ]
        # in percent %
        min_ratio = 20
        max_ratio = 90
        step = int(
            math.floor(
                (max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
        min_sizes = []
        max_sizes = []
        for ratio in xrange(min_ratio, max_ratio + 1, step):
            min_sizes.append(min_dim * ratio / 100.)
            max_sizes.append(min_dim * (ratio + step) / 100.)
        min_sizes = [min_dim * 10 / 100.] + min_sizes
        max_sizes = [min_dim * 20 / 100.] + max_sizes
        steps = [8, 16, 32, 64, 100, 320]
        aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
        # L2 normalize conv4_3.
        normalizations = [20, -1, -1, -1, -1, -1]
        # variance used to encode/decode prior bboxes.
        if code_type == P.PriorBox.CENTER_SIZE:
            prior_variance = [0.1, 0.1, 0.2, 0.2]
        else:
            prior_variance = [0.1]
        flip = True
        clip = False

        # parameters for generating detection output.
        det_out_param = {
            'num_classes': num_classes,
            'share_location': share_location,
            'background_label_id': background_label_id,
            'nms_param': {
                'nms_threshold': 0.45,
                'top_k': 400
            },
            'keep_top_k': 200,
            'confidence_threshold': 0.01,
            'code_type': code_type,
        }

        mbox_layers = CreateMultiBoxHead(net,
                                         data_layer='data',
                                         from_layers=mbox_source_layers,
                                         use_batchnorm=use_batchnorm,
                                         min_sizes=min_sizes,
                                         max_sizes=max_sizes,
                                         aspect_ratios=aspect_ratios,
                                         steps=steps,
                                         normalizations=normalizations,
                                         num_classes=num_classes,
                                         share_location=share_location,
                                         flip=flip,
                                         clip=clip,
                                         prior_variance=prior_variance,
                                         kernel_size=3,
                                         pad=1,
                                         lr_mult=lr_mult)

        conf_name = "mbox_conf"
        if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "{}_reshape".format(conf_name)
            net[reshape_name] = L.Reshape(net[conf_name],
                                          shape=dict(dim=[0, -1, num_classes]))
            softmax_name = "{}_softmax".format(conf_name)
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "{}_flatten".format(conf_name)
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_layers[1] = net[flatten_name]
        elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "{}_sigmoid".format(conf_name)
            net[sigmoid_name] = L.Sigmoid(net[conf_name])
            mbox_layers[1] = net[sigmoid_name]

        net.detection_out = L.DetectionOutput(
            *mbox_layers,
            detection_output_param=det_out_param,
            include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    if not is_cls:
        if not is_deploy:
            net.__setattr__('cls_silence', L.Silence(net.cls, ntop=0))
    else:
        # class vector embedding deconvolution net for class-specific semantic segmentation
        net.cls_reshape = L.Reshape(net.cls, shape=dict(dim=[0, 0, 1, 1]))

    # add top-down deconvolution net
    # mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']
    AddExtraTopDownLayers(net,
                          use_batchnorm=True,
                          lr_mult=1,
                          crop_layers=crop_layers,
                          is_cls=is_cls,
                          is_crop_all=is_crop_all,
                          is_crop_cls=is_crop_cls)

    DeVGGNetBody(net,
                 from_layer='deconv6_1',
                 fully_conv=True,
                 reduced=True,
                 dilated=True,
                 dropout=False,
                 pool_mask=True,
                 extra_crop_layers=crop_layers,
                 is_crop_all=is_crop_all,
                 is_crop_cls=is_crop_cls,
                 is_crop_merge_feature=is_crop_merge_feature)

    dekwargs = {
        'weight_filler': dict(type='xavier'),
        'bias_filler': dict(type='constant', value=0)
    }
    deparam = {
        'param':
        [dict(lr_mult=1, decay_mult=1),
         dict(lr_mult=2, decay_mult=0)]
    }
    net.seg_score = L.Deconvolution(net.derelu1_1,
                                    convolution_param=dict(num_output=2,
                                                           pad=1,
                                                           kernel_size=3,
                                                           **dekwargs),
                                    **deparam)

    if is_crop_last:
        out_layer = "seg_score_crop"
        net[out_layer] = L.CropBBox(net["seg_score"],
                                    net["cls_specific_bbox"],
                                    is_crop_score_map=True)
    else:
        out_layer = "seg_score"

    if is_deploy:
        net.seg_prob = L.Softmax(net[out_layer])
    else:
        net.seg_loss = L.SoftmaxWithLoss(net[out_layer],
                                         net.binary_mask,
                                         loss_param=dict(ignore_label=255))

    return net.to_proto()