예제 #1
0
def Face_train(net, from_layer="data", label="label", lr=1, decay=1):
    # net = FaceNet(net, from_layer="data", use_bn=True)
    # net = YoloNetPartCompress(net, from_layer="data", use_bn=True, use_layers=5, use_sub_layers=5,strid_conv=[1,1,1,0,0],final_pool=False,lr=0.1, decay=0.1)
    net = YoloNetPart(net,
                      from_layer=from_layer,
                      use_bn=True,
                      use_layers=6,
                      use_sub_layers=7,
                      lr=0,
                      decay=0)

    ConvBNUnitLayer(net,
                    "conv2",
                    "conv2_pool",
                    use_bn=True,
                    use_relu=True,
                    num_output=64,
                    kernel_size=1,
                    pad=0,
                    stride=2)
    net = UnifiedMultiScaleLayers(net,
                                  layers=["conv2_pool", "conv3_3", "conv4_3"],
                                  tags=["Down", "Down", "Ref"],
                                  unifiedlayer="featuremap11",
                                  dnsampleMethod=[["Conv"], ["MaxPool"]],
                                  dnsampleChannels=64)
    net = UnifiedMultiScaleLayers(net,
                                  layers=["conv4_3", "conv5_5"],
                                  tags=["Down", "Ref"],
                                  unifiedlayer="featuremap22",
                                  dnsampleMethod=[["MaxPool"]])
    net = UnifiedMultiScaleLayers(net,
                                  layers=["conv5_5", "conv6_7"],
                                  tags=["Down", "Ref"],
                                  unifiedlayer="featuremap33",
                                  dnsampleMethod=[["MaxPool"]],
                                  pad=True)
    mbox_layers = SSDHeader(
        net,
        data_layer="data",
        from_layers=["featuremap11", "featuremap22", "featuremap33"],
        input_height=Input_Height,
        input_width=Input_Width,
        **ssdparam)

    # mbox_layers = SSDHeader(net,data_layer="data",from_layers=["inception3","conv3_2","conv4_2"],input_height=Input_Height,input_width=Input_Width,**ssdparam)
    mbox_layers.append(net.label)
    net.bbox_loss = L.BBoxLoss(*mbox_layers,
                               name="BBoxLoss",
                               bbox_loss_param=bbox_loss_param)
    # net.bbox_loss = L.DenseBBoxLoss(*mbox_layers,name="DenseDetLoss",dense_bbox_loss_param=dense_bbox_loss_param)
    return net
예제 #2
0
def MaskNet_Train(net, from_layer="data", label="label", lr=1, decay=1):
    # ==========================================================================
    # DarkNet19
    # net = YoloNetPart(net, from_layer=from_layer, use_bn=True, use_layers=6, use_sub_layers=7, lr=lr, decay=decay)
    net = YoloNetPartCompress(net,
                              from_layer="data",
                              use_bn=True,
                              use_layers=5,
                              use_sub_layers=5,
                              strid_conv=[1, 1, 1, 0, 0],
                              final_pool=False,
                              lr=0.01,
                              decay=1)
    out_layer = "conv5_5"
    net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=[128,128,128,128,128,128], \
           conv6_kernal_size=[3,3,3,3,3,3], pre_name="conv6",start_pool=True,lr_mult=0.1, decay_mult=1,n_group=1)
    # ==========================================================================
    # Label Split
    net.bbox, net.kps, net.mask = L.SplitLabel(net[label],
                                               name="SplitLabel",
                                               ntop=3)
    # ==========================================================================
    # Concat [conv5_5, conv6_7]
    net = UnifiedMultiScaleLayers(net,
                                  layers=["conv5_5", "conv6_6"],
                                  tags=["Down", "Ref"],
                                  unifiedlayer="featuremap2",
                                  dnsampleMethod=[["MaxPool"]])
    net = UnifiedMultiScaleLayers(net,
                                  layers=["conv4_3", "conv5_5"],
                                  tags=["Down", "Ref"],
                                  unifiedlayer="featuremap1",
                                  dnsampleMethod=[["Reorg"]])
    # Concat [conv4_3, conv5_5, conv6_7]
    net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5"], tags=["Down","Ref"], unifiedlayer="convf_mask", \
                                dnsampleMethod=[["MaxPool"]])
    # ==========================================================================
    # Create SSD Header
    mbox_layers = SSDHeader(net,
                            data_layer="data",
                            from_layers=["featuremap1", "featuremap2"],
                            input_height=Input_Height,
                            input_width=Input_Width,
                            **ssdparam)
    # BBox Loss
    mbox_layers.append(net.bbox)
    net.bbox_loss = L.BBoxLoss(*mbox_layers,
                               name="BBoxLoss",
                               bbox_loss_param=bbox_loss_param)
    # ==========================================================================
    net.roi = L.BoxMatching(net["mbox_priorbox"],
                            net.bbox,
                            box_matching_param=box_matching_param)
    # ROI Align
    net.roi_maps = L.RoiAlign(net.convf_mask,
                              net.roi,
                              roi_align_param=roi_align_param)
    # ==========================================================================
    # Kps Layers
    net = KpsHeader(net,from_layer="roi_maps",out_layer="kps_maps",use_layers=kps_use_conv_layers,num_channels=channels_of_kps, \
           all_kernel_size=kernel_size_of_kps,pad=pad_of_kps,use_deconv_layers=kps_use_deconv_layers,lr=10,decay=decay)
    net.kps_flatten = L.Flatten(net.kps_maps,
                                flatten_param=dict(axis=2, end_axis=-1))

    # ==========================================================================
    # Mask Layers
    net = MaskHeader(net,from_layer="roi_maps",out_layer="mask_maps",use_layers=mask_use_conv_layers,num_channels=channels_of_mask, \
           kernel_size=kernel_size_of_mask,pad=pad_of_mask,use_deconv_layers=mask_use_deconv_layers,lr=lr,decay=decay)
    # ==========================================================================
    # Labels for ROIs of Kps and Mask
    # kps-label gen
    net.kps_label_map, net.kps_label_flags = L.KpsGen(
        net.roi,
        net.kps,
        name="KpsGen",
        ntop=2,
        kps_gen_param=dict(resized_height=Rh_Kps,
                           resized_width=Rw_Kps,
                           use_softmax=True))

    # mask-label gen
    net.mask_label_map, net.mask_label_flags = L.MaskGen(net.roi,net.mask,name="MaskGen",ntop=2, \
     mask_gen_param=dict(height=Input_Height,width=Input_Width,resized_height=Rh_Mask,resized_width=Rw_Mask))
    # ==========================================================================
    # Kps-Loss & Mask-Loss
    net.kps_loss = L.MaskSoftmaxWithLoss(
        net.kps_flatten,
        net.kps_label_map,
        net.kps_label_flags,
        mask_loss_param=dict(scale=loss_scale_kps),
        loss_param=dict(normalization=0),
        softmax_param=dict(axis=2))
    net.mask_loss = L.MaskSigmoidCrossEntropyLoss(
        net.mask_maps,
        net.mask_label_map,
        net.mask_label_flags,
        mask_loss_param=dict(scale=loss_scale_mask))

    return net
예제 #3
0
def FaceBoxFPNNet(net, train=True, data_layer="data", gt_label="label", \
           net_width=512, net_height=288):
    flag_handusefpn = False
    lr = 0
    decay = 0
    use_bn = False
    from_layer = data_layer
    num_channels = [32,64,128]
    k_sizes = [3,3,3]
    strides = [2,2,2]
    for i in xrange(len(num_channels)):
        add_layer = "conv{}".format(i+1)
        ConvBNUnitLayer(net, from_layer, add_layer, use_bn=use_bn, use_relu=True, leaky=False,
                    num_output=num_channels[i], kernel_size=k_sizes[i], pad=(k_sizes[i]-1)/2, stride=strides[i], use_scale=True,
                    n_group=1, lr_mult=lr, decay_mult=decay)
        from_layer = add_layer
        # if not i == len(num_channels) - 1:
        # add_layer = "pool{}".format(i+1)
        # net[add_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0)
        # from_layer = add_layer
    layer_cnt = len(num_channels)
    num_channels = [192,192,192,192]
    divide_scale = 4
    f4_depth = len(num_channels)
    for i in xrange(len(num_channels)):
        n_chan = num_channels[i]
        add_layer = "conv{}_{}".format(layer_cnt+1,i + 1)
        net = InceptionOfficialLayer(net, from_layer, add_layer, channels_1=n_chan/divide_scale, channels_3=[n_chan/8, n_chan/4],
                                     channels_5=[n_chan/8, n_chan/4], channels_ave=n_chan/divide_scale, inter_bn=use_bn, leaky=False,
                                     lr=lr,decay=decay)
        from_layer = "conv{}_{}/incep".format(layer_cnt+1,i + 1)

    if flag_handusefpn:
        layer_cnt += 1
        num_channels = [256,128,256,128,256]
        kernels      = [3,1,3,1,3]
        strides      = [2,1,1,1,1]
        f5_depth = len(num_channels)
        for i in xrange(len(num_channels)):
            add_layer = "conv{}_{}".format(layer_cnt+1,i + 1)
            ConvBNUnitLayer(net, from_layer, add_layer, use_bn=use_bn, use_relu=True, leaky=False,
                            num_output=num_channels[i], kernel_size=kernels[i], pad=kernels[i]/2, stride=strides[i],
                            use_scale=True, n_group=1, lr_mult=lr, decay_mult=decay)
            from_layer = add_layer
        layer_cnt += 1
        num_channels = [128,128,128,128,128]
        kernels      = [3,3,3,3,3]
        strides      = [2,1,1,1,1]
        f6_depth = len(num_channels)
        for i in xrange(len(num_channels)):
            add_layer = "conv{}_{}".format(layer_cnt+1,i + 1)
            ConvBNUnitLayer(net, from_layer, add_layer, use_bn=use_bn, use_relu=True, leaky=False,
                            num_output=num_channels[i], kernel_size=kernels[i], pad=kernels[i]/2, stride=strides[i],
                            use_scale=True, n_group=1, lr_mult=lr, decay_mult=decay)
            from_layer = add_layer
        # ##########################################################################
        # Use FPN
        # f3 -> c6_4
        f3 = 'conv6_{}'.format(f6_depth)
        # f2: f3 -> deconv + c5_3 -> 1x1
        out_layer_1 = f3 + '_deconv'
        net[out_layer_1]=L.Deconvolution(net[f3],**(getDecovArgs(256,lr,decay)))
        f2 = 'conv5_{}'.format(f5_depth)
        out_layer_2 = f2 + '_1x1'
        ConvBNUnitLayer(net, f2, out_layer_2, use_bn=False, use_relu=False, num_output=256, kernel_size=1, pad=0, stride=1, lr_mult=lr,decay_mult=decay)
        out_layer = 'feat5'
        net[out_layer] = L.Eltwise(net[out_layer_2], net[out_layer_1], eltwise_param=dict(operation=P.Eltwise.SUM))
        net['feat5_relu'] = L.ReLU(net['feat5'], in_place=True)
        # f1: f2 -> deconv + c4_4 -> 1x1
        out_layer_1 = out_layer + '_deconv'
        net[out_layer_1]=L.Deconvolution(net['feat5_relu'],**(getDecovArgs(192,lr,decay)))
        f1 = 'conv4_{}/incep'.format(f4_depth)
        out_layer_2 = f1 + '_1x1'
        ConvBNUnitLayer(net, f1, out_layer_2, use_bn=False, use_relu=False, num_output=192, kernel_size=1, pad=0, stride=1, lr_mult=lr,decay_mult=decay)
        out_layer = 'feat4'
        net[out_layer] = L.Eltwise(net[out_layer_2], net[out_layer_1], eltwise_param=dict(operation=P.Eltwise.SUM))
        net['feat4_relu'] = L.ReLU(net['feat4'], in_place=True)
        from_layer = "feat4"
    add_layer = from_layer + "_deconv"
    net[add_layer] = L.Deconvolution(net[from_layer], **(getDecovArgs(64)))
    from_layer = add_layer
    add_layer  = from_layer + "_relu"
    net[add_layer] = L.ReLU(net[from_layer], in_place=True)
    print net.keys()
    # make Loss & Detout for SSD2
    mbox_2_layers = SsdDetectorHeaders(net, \
                                       net_width=net_width, net_height=net_height, data_layer=data_layer, \
                                       from_layers=ssd_Param_2.get('feature_layers', []), \
                                       num_classes=ssd_Param_2.get("num_classes", 2), \
                                       boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \
                                       aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios", []), \
                                       prior_variance=ssd_Param_2.get("anchor_prior_variance", [0.1, 0.1, 0.2, 0.2]), \
                                       flip=ssd_Param_2.get("anchor_flip", True), \
                                       clip=ssd_Param_2.get("anchor_clip", True), \
                                       normalizations=ssd_Param_2.get("interlayers_normalizations", []), \
                                       use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm", True), \
                                       inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels", []), \
                                       use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss", False), \
                                       use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes', False), \
                                       stage=2)
    # make Loss or Detout for SSD1
    if train:
        loss_param = get_loss_param(normalization=ssd_Param_2.get("bboxloss_normalization", P.Loss.VALID))
        mbox_2_layers.append(net[gt_label])
        use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            bboxloss_param = {
                'gt_labels': ssd_Param_2.get('gt_labels', []),
                'target_labels': ssd_Param_2.get('target_labels', []),
                'num_classes': ssd_Param_2.get("num_classes", 2),
                'alias_id': ssd_Param_2.get("alias_id", 0),
                'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.LOGISTIC),
                'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1),
                'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1),
                'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False),
                'gama': ssd_Param_2.get("bboxloss_focus_gama", 2),
                'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'use_prior_for_matching': True,
                'encode_variance_in_target': False,
                'flag_noperson': ssd_Param_2.get('flag_noperson', False),
                'size_threshold_max': ssd_Param_2.get("bboxloss_size_threshold_max", 2),
                'flag_showdebug': ssd_Param_2.get("flag_showdebug", False),
                'flag_forcematchallgt': ssd_Param_2.get("flag_forcematchallgt", False),
                'flag_areamaxcheckinmatch': ssd_Param_2.get("flag_areamaxcheckinmatch", False),
            }
            net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \
                                                 loss_param=loss_param,
                                                 include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                                 propagate_down=[True, True, False, False])
        else:
            bboxloss_param = {
                'gt_labels': ssd_Param_2.get('gt_labels', []),
                'target_labels': ssd_Param_2.get('target_labels', []),
                'num_classes': ssd_Param_2.get("num_classes", 2),
                'alias_id': ssd_Param_2.get("alias_id", 0),
                'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX),
                'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1),
                'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1),
                'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False),
                'gama': ssd_Param_2.get("bboxloss_focus_gama", 2),
                'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'match_type': P.MultiBoxLoss.PER_PREDICTION,
                'share_location': True,
                'use_prior_for_matching': True,
                'background_label_id': 0,
                'encode_variance_in_target': False,
                'map_object_to_agnostic': False,
            }
            net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \
                                            loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                            propagate_down=[True, True, False, False])
    else:
        if ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_2_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_2_layers[1], \
                                          shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes", 2)]))
            softmax_name = "mbox_2_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_2_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_2_layers[1] = net[flatten_name]
        elif ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_2_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1])
            mbox_2_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        # Det-out param
        det_out_param = {
            'num_classes': ssd_Param_2.get("num_classes", 2),
            'target_labels': ssd_Param_2.get('detout_target_labels', []),
            'alias_id': ssd_Param_2.get("alias_id", 0),
            'conf_threshold': ssd_Param_2.get("detout_conf_threshold", 0.01),
            'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45),
            'size_threshold': ssd_Param_2.get("detout_size_threshold", 0.0001),
            'top_k': ssd_Param_2.get("detout_top_k", 30),
            'share_location': True,
            'code_type': P.PriorBox.CENTER_SIZE,
            'background_label_id': 0,
            'variance_encoded_in_target': False,
        }
        use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \
                                                detection_output_param=det_out_param, \
                                                include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.detection_out_2 = L.DetOut(*mbox_2_layers, \
                                           detection_output_param=det_out_param, \
                                           include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # EVAL in TEST MODE
    if not train:
        det_eval_param = {
            'gt_labels': eval_Param.get('eval_gt_labels', []),
            'num_classes': eval_Param.get("eval_num_classes", 2),
            'evaluate_difficult_gt': eval_Param.get("eval_difficult_gt", False),
            'boxsize_threshold': eval_Param.get("eval_boxsize_threshold", [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]),
            'iou_threshold': eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]),
            'background_label_id': 0,
        }
        net.det_accu = L.DetEval(net['detection_out_2'], net[gt_label], \
                                 detection_evaluate_param=det_eval_param, \
                                 include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    return net
예제 #4
0
def DAPNetVGGDark(net, train=True, data_layer="data", gt_label="label", \
           net_width=512, net_height=288):
    # BaseNet
    flag_withparamname = True
    net = VGGDarkNet(net,
                     data_layer="data",
                     flag_withparamname=flag_withparamname)
    # Add Conv6
    conv6_output = Conv6_Param.get('conv6_output', [])
    conv6_kernal_size = Conv6_Param.get('conv6_kernal_size', [])
    out_layer = "pool5"
    net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \
        conv6_kernal_size=conv6_kernal_size, pre_name="conv6",start_pool=False,lr_mult=1, decay_mult=1.0,n_group=1)
    featuremap1 = ["conv2", "conv3_3"]
    tags = ["Down", "Ref"]
    down_methods = [["MaxPool"]]
    out_layer = "featuremap1"
    UnifiedMultiScaleLayers(net,
                            layers=featuremap1,
                            tags=tags,
                            unifiedlayer=out_layer,
                            dnsampleMethod=down_methods)
    # Concat FM2
    featuremap2 = ["conv3_3", "conv4_5"]
    tags = ["Ref", "Ref"]
    down_methods = [["MaxPool"]]
    out_layer = "featuremap2"
    UnifiedMultiScaleLayers(net,
                            layers=featuremap2,
                            tags=tags,
                            unifiedlayer=out_layer,
                            dnsampleMethod=down_methods)
    # Concat FM3
    c6_layer = 'conv6_{}'.format(len(Conv6_Param['conv6_output']))
    featuremap3 = ["pool5", c6_layer]
    tags = ["Ref", "Ref"]
    down_methods = [["MaxPool"]]
    out_layer = "featuremap3"
    UnifiedMultiScaleLayers(net,
                            layers=featuremap3,
                            tags=tags,
                            unifiedlayer=out_layer,
                            dnsampleMethod=down_methods)
    # Create SSD Header for SSD1
    lr_mult = 1
    decay_mult = 1.0
    mbox_1_layers = SsdDetectorHeaders(net, \
         net_width=net_width, net_height=net_height, data_layer=data_layer, \
         from_layers=ssd_Param_1.get('feature_layers',[]), \
         num_classes=ssd_Param_1.get("num_classes",2), \
         boxsizes=ssd_Param_1.get("anchor_boxsizes", []), \
         aspect_ratios=ssd_Param_1.get("anchor_aspect_ratios",[]), \
         prior_variance = ssd_Param_1.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
         flip=ssd_Param_1.get("anchor_flip",True), \
         clip=ssd_Param_1.get("anchor_clip",True), \
         normalizations=ssd_Param_1.get("interlayers_normalizations",[]), \
         use_batchnorm=ssd_Param_1.get("interlayers_use_batchnorm",True), \
         inter_layer_channels=ssd_Param_1.get("interlayers_channels_kernels",[]), \
         use_focus_loss=ssd_Param_1.get("bboxloss_using_focus_loss",False), \
         use_dense_boxes=ssd_Param_1.get('bboxloss_use_dense_boxes',False), \
         stage=1,lr_mult=lr_mult, decay_mult=decay_mult)
    # make Loss or Detout for SSD1
    if train:
        loss_param = get_loss_param(normalization=ssd_Param_1.get(
            "bboxloss_normalization", P.Loss.VALID))
        mbox_1_layers.append(net[gt_label])
        use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            bboxloss_param = {
                'gt_labels':
                ssd_Param_1.get('gt_labels', []),
                'target_labels':
                ssd_Param_1.get('target_labels', []),
                'num_classes':
                ssd_Param_1.get("num_classes", 2),
                'alias_id':
                ssd_Param_1.get("alias_id", 0),
                'loc_loss_type':
                ssd_Param_1.get("bboxloss_loc_loss_type",
                                P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':
                ssd_Param_1.get("bboxloss_conf_loss_type",
                                P.MultiBoxLoss.LOGISTIC),
                'loc_weight':
                ssd_Param_1.get("bboxloss_loc_weight", 1),
                'conf_weight':
                ssd_Param_1.get("bboxloss_conf_weight", 1),
                'overlap_threshold':
                ssd_Param_1.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap':
                ssd_Param_1.get("bboxloss_neg_overlap", 0.5),
                'size_threshold':
                ssd_Param_1.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining':
                ssd_Param_1.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio':
                ssd_Param_1.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss':
                ssd_Param_1.get("bboxloss_using_focus_loss", False),
                'gama':
                ssd_Param_1.get("bboxloss_focus_gama", 2),
                'use_difficult_gt':
                ssd_Param_1.get("bboxloss_use_difficult_gt", False),
                'code_type':
                ssd_Param_1.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'use_prior_for_matching':
                True,
                'encode_variance_in_target':
                False,
                'flag_noperson':
                ssd_Param_1.get('flag_noperson', False),
            }
            net["mbox_1_loss"] = L.DenseBBoxLoss(*mbox_1_layers, dense_bbox_loss_param=bboxloss_param, \
                                    loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                    propagate_down=[True, True, False, False])
        else:
            bboxloss_param = {
                'gt_labels':
                ssd_Param_1.get('gt_labels', []),
                'target_labels':
                ssd_Param_1.get('target_labels', []),
                'num_classes':
                ssd_Param_1.get("num_classes", 2),
                'alias_id':
                ssd_Param_1.get("alias_id", 0),
                'loc_loss_type':
                ssd_Param_1.get("bboxloss_loc_loss_type",
                                P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':
                ssd_Param_1.get("bboxloss_conf_loss_type",
                                P.MultiBoxLoss.SOFTMAX),
                'loc_weight':
                ssd_Param_1.get("bboxloss_loc_weight", 1),
                'conf_weight':
                ssd_Param_1.get("bboxloss_conf_weight", 1),
                'overlap_threshold':
                ssd_Param_1.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap':
                ssd_Param_1.get("bboxloss_neg_overlap", 0.5),
                'size_threshold':
                ssd_Param_1.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining':
                ssd_Param_1.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio':
                ssd_Param_1.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss':
                ssd_Param_1.get("bboxloss_using_focus_loss", False),
                'gama':
                ssd_Param_1.get("bboxloss_focus_gama", 2),
                'use_difficult_gt':
                ssd_Param_1.get("bboxloss_use_difficult_gt", False),
                'code_type':
                ssd_Param_1.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'match_type':
                P.MultiBoxLoss.PER_PREDICTION,
                'share_location':
                True,
                'use_prior_for_matching':
                True,
                'background_label_id':
                0,
                'encode_variance_in_target':
                False,
                'map_object_to_agnostic':
                False,
            }
            net["mbox_1_loss"] = L.BBoxLoss(*mbox_1_layers, bbox_loss_param=bboxloss_param, \
                        loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                        propagate_down=[True, True, False, False])
    else:
        if ssd_Param_1.get("bboxloss_conf_loss_type",
                           P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_1_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_1_layers[1], \
                    shape=dict(dim=[0, -1, ssd_Param_1.get("num_classes",2)]))
            softmax_name = "mbox_1_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_1_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_1_layers[1] = net[flatten_name]
        elif ssd_Param_1.get(
                "bboxloss_conf_loss_type",
                P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_1_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_1_layers[1])
            mbox_1_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        # Det-out param
        det_out_param = {
            'num_classes': ssd_Param_1.get("num_classes", 2),
            'target_labels': ssd_Param_1.get('detout_target_labels', []),
            'alias_id': ssd_Param_1.get("alias_id", 0),
            'conf_threshold': ssd_Param_1.get("detout_conf_threshold", 0.01),
            'nms_threshold': ssd_Param_1.get("detout_nms_threshold", 0.45),
            'size_threshold': ssd_Param_1.get("detout_size_threshold", 0.0001),
            'top_k': ssd_Param_1.get("detout_top_k", 30),
            'share_location': True,
            'code_type': P.PriorBox.CENTER_SIZE,
            'background_label_id': 0,
            'variance_encoded_in_target': False,
        }
        use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            net.detection_out_1 = L.DenseDetOut(*mbox_1_layers, \
            detection_output_param=det_out_param, \
            include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.detection_out_1 = L.DetOut(*mbox_1_layers, \
         detection_output_param=det_out_param, \
         include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # make Loss & Detout for SSD2
    lr_mult = 1.0
    decay_mult = 1.0
    if use_ssd2_for_detection:
        mbox_2_layers = SsdDetectorHeaders(net, \
             net_width=net_width, net_height=net_height, data_layer=data_layer, \
             from_layers=ssd_Param_2.get('feature_layers',[]), \
             num_classes=ssd_Param_2.get("num_classes",2), \
             boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \
             aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \
             prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
             flip=ssd_Param_2.get("anchor_flip",True), \
             clip=ssd_Param_2.get("anchor_clip",True), \
             normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \
             use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \
             inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \
             use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \
             use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \
             stage=2,lr_mult=lr_mult, decay_mult=decay_mult)
        # make Loss or Detout for SSD1
        if train:
            loss_param = get_loss_param(normalization=ssd_Param_2.get(
                "bboxloss_normalization", P.Loss.VALID))
            mbox_2_layers.append(net[gt_label])
            use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',
                                              False)
            if use_dense_boxes:
                bboxloss_param = {
                    'gt_labels':
                    ssd_Param_2.get('gt_labels', []),
                    'target_labels':
                    ssd_Param_2.get('target_labels', []),
                    'num_classes':
                    ssd_Param_2.get("num_classes", 2),
                    'alias_id':
                    ssd_Param_2.get("alias_id", 0),
                    'loc_loss_type':
                    ssd_Param_2.get("bboxloss_loc_loss_type",
                                    P.MultiBoxLoss.SMOOTH_L1),
                    'conf_loss_type':
                    ssd_Param_2.get("bboxloss_conf_loss_type",
                                    P.MultiBoxLoss.LOGISTIC),
                    'loc_weight':
                    ssd_Param_2.get("bboxloss_loc_weight", 1),
                    'conf_weight':
                    ssd_Param_2.get("bboxloss_conf_weight", 1),
                    'overlap_threshold':
                    ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                    'neg_overlap':
                    ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                    'size_threshold':
                    ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                    'do_neg_mining':
                    ssd_Param_2.get("bboxloss_do_neg_mining", True),
                    'neg_pos_ratio':
                    ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                    'using_focus_loss':
                    ssd_Param_2.get("bboxloss_using_focus_loss", False),
                    'gama':
                    ssd_Param_2.get("bboxloss_focus_gama", 2),
                    'use_difficult_gt':
                    ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                    'code_type':
                    ssd_Param_2.get("bboxloss_code_type",
                                    P.PriorBox.CENTER_SIZE),
                    'use_prior_for_matching':
                    True,
                    'encode_variance_in_target':
                    False,
                    'flag_noperson':
                    ssd_Param_2.get('flag_noperson', False),
                }
                net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \
                                        loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                        propagate_down=[True, True, False, False])
            else:
                bboxloss_param = {
                    'gt_labels':
                    ssd_Param_2.get('gt_labels', []),
                    'target_labels':
                    ssd_Param_2.get('target_labels', []),
                    'num_classes':
                    ssd_Param_2.get("num_classes", 2),
                    'alias_id':
                    ssd_Param_2.get("alias_id", 0),
                    'loc_loss_type':
                    ssd_Param_2.get("bboxloss_loc_loss_type",
                                    P.MultiBoxLoss.SMOOTH_L1),
                    'conf_loss_type':
                    ssd_Param_2.get("bboxloss_conf_loss_type",
                                    P.MultiBoxLoss.SOFTMAX),
                    'loc_weight':
                    ssd_Param_2.get("bboxloss_loc_weight", 1),
                    'conf_weight':
                    ssd_Param_2.get("bboxloss_conf_weight", 1),
                    'overlap_threshold':
                    ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                    'neg_overlap':
                    ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                    'size_threshold':
                    ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                    'do_neg_mining':
                    ssd_Param_2.get("bboxloss_do_neg_mining", True),
                    'neg_pos_ratio':
                    ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                    'using_focus_loss':
                    ssd_Param_2.get("bboxloss_using_focus_loss", False),
                    'gama':
                    ssd_Param_2.get("bboxloss_focus_gama", 2),
                    'use_difficult_gt':
                    ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                    'code_type':
                    ssd_Param_2.get("bboxloss_code_type",
                                    P.PriorBox.CENTER_SIZE),
                    'match_type':
                    P.MultiBoxLoss.PER_PREDICTION,
                    'share_location':
                    True,
                    'use_prior_for_matching':
                    True,
                    'background_label_id':
                    0,
                    'encode_variance_in_target':
                    False,
                    'map_object_to_agnostic':
                    False,
                }
                net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \
                            loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                            propagate_down=[True, True, False, False])
        else:
            if ssd_Param_2.get(
                    "bboxloss_conf_loss_type",
                    P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
                reshape_name = "mbox_2_conf_reshape"
                net[reshape_name] = L.Reshape(mbox_2_layers[1], \
                        shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)]))
                softmax_name = "mbox_2_conf_softmax"
                net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
                flatten_name = "mbox_2_conf_flatten"
                net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
                mbox_2_layers[1] = net[flatten_name]
            elif ssd_Param_2.get(
                    "bboxloss_conf_loss_type",
                    P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
                sigmoid_name = "mbox_2_conf_sigmoid"
                net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1])
                mbox_2_layers[1] = net[sigmoid_name]
            else:
                raise ValueError("Unknown conf loss type.")
            # Det-out param
            det_out_param = {
                'num_classes': ssd_Param_2.get("num_classes", 2),
                'target_labels': ssd_Param_2.get('detout_target_labels', []),
                'alias_id': ssd_Param_2.get("alias_id", 0),
                'conf_threshold': ssd_Param_2.get("detout_conf_threshold",
                                                  0.01),
                'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45),
                'size_threshold': ssd_Param_2.get("detout_size_threshold",
                                                  0.0001),
                'top_k': ssd_Param_2.get("detout_top_k", 30),
                'share_location': True,
                'code_type': P.PriorBox.CENTER_SIZE,
                'background_label_id': 0,
                'variance_encoded_in_target': False,
            }
            use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',
                                              False)
            if use_dense_boxes:
                net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \
                detection_output_param=det_out_param, \
                include=dict(phase=caffe_pb2.Phase.Value('TEST')))
            else:
                net.detection_out_2 = L.DetOut(*mbox_2_layers, \
             detection_output_param=det_out_param, \
             include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # EVAL in TEST MODE
    if not train:
        det_eval_param = {
            'gt_labels':
            eval_Param.get('eval_gt_labels', []),
            'num_classes':
            eval_Param.get("eval_num_classes", 2),
            'evaluate_difficult_gt':
            eval_Param.get("eval_difficult_gt", False),
            'boxsize_threshold':
            eval_Param.get("eval_boxsize_threshold",
                           [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]),
            'iou_threshold':
            eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]),
            'background_label_id':
            0,
        }
        if use_ssd2_for_detection:
            det_out_layers = []
            det_out_layers.append(net['detection_out_1'])
            det_out_layers.append(net['detection_out_2'])
            name = 'det_out'
            net[name] = L.Concat(*det_out_layers, axis=2)
            net.det_accu = L.DetEval(net[name], net[gt_label], \
                   detection_evaluate_param=det_eval_param, \
                   include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.det_accu = L.DetEval(net['detection_out_1'], net[gt_label], \
                   detection_evaluate_param=det_eval_param, \
                   include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    return net
예제 #5
0
def HandNet_DarkBase(net, train=True, data_layer="data", gt_label="label", \
           net_width=512, net_height=288):
    use_bn = False
    lr_mult = 0
    use_global_stats = None
    channels = ((32, ), (64, ), (128, 64, 128), (192, 96, 192, 96, 192))
    strides = (True, True, True, False)
    kernels = ((3, ), (3, ), (3, 1, 3), (3, 1, 3, 1, 3))
    pool_last = (False, False, False, True)
    net = VGG16_BaseNet_ChangeChannel(net,
                                      from_layer=data_layer,
                                      channels=channels,
                                      strides=strides,
                                      use_bn=use_bn,
                                      kernels=kernels,
                                      freeze_layers=[],
                                      pool_last=pool_last,
                                      lr_mult=lr_mult,
                                      decay_mult=lr_mult,
                                      use_global_stats=use_global_stats)
    flag_with_deconv = True
    flag_eltwise = False
    from_layer = "conv4_5"
    if flag_with_deconv:
        Deconv(net,
               from_layer,
               num_output=64,
               group=1,
               kernel_size=2,
               stride=2,
               lr_mult=1.0,
               decay_mult=1.0,
               use_bn=True,
               use_scale=True,
               use_relu=False)
    print net.keys()
    if flag_eltwise:
        use_bn = True
        from_layer = "conv1"
        out_layer = 'conv2_hand'
        ConvBNUnitLayer(net,
                        from_layer,
                        out_layer,
                        use_bn=use_bn,
                        use_relu=False,
                        num_output=64,
                        kernel_size=3,
                        pad=1,
                        stride=2,
                        use_scale=True,
                        leaky=False,
                        lr_mult=1,
                        decay_mult=1)

        out_layer = "hand_multiscale"
        net[out_layer] = L.Eltwise(net["conv2_hand"],
                                   net["conv4_3_deconv"],
                                   eltwise_param=dict(operation=P.Eltwise.SUM))
        from_layer = out_layer
        out_layer = from_layer + "_relu"
        net[out_layer] = L.ReLU(net[from_layer], in_place=True)

    # make Loss & Detout for SSD2
    mbox_2_layers = SsdDetectorHeaders(net, \
          net_width=net_width, net_height=net_height, data_layer=data_layer, \
          from_layers=ssd_Param_2.get('feature_layers',[]), \
          num_classes=ssd_Param_2.get("num_classes",2), \
          boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \
          aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \
          prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
          flip=ssd_Param_2.get("anchor_flip",True), \
          clip=ssd_Param_2.get("anchor_clip",True), \
          normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \
          use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \
          inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \
          use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \
          use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \
          stage=2)
    # make Loss or Detout for SSD1
    if train:
        loss_param = get_loss_param(normalization=ssd_Param_2.get(
            "bboxloss_normalization", P.Loss.VALID))
        mbox_2_layers.append(net[gt_label])
        use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            bboxloss_param = {
                'gt_labels':
                ssd_Param_2.get('gt_labels', []),
                'target_labels':
                ssd_Param_2.get('target_labels', []),
                'num_classes':
                ssd_Param_2.get("num_classes", 2),
                'alias_id':
                ssd_Param_2.get("alias_id", 0),
                'loc_loss_type':
                ssd_Param_2.get("bboxloss_loc_loss_type",
                                P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':
                ssd_Param_2.get("bboxloss_conf_loss_type",
                                P.MultiBoxLoss.LOGISTIC),
                'loc_weight':
                ssd_Param_2.get("bboxloss_loc_weight", 1),
                'conf_weight':
                ssd_Param_2.get("bboxloss_conf_weight", 1),
                'overlap_threshold':
                ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap':
                ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                'size_threshold':
                ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining':
                ssd_Param_2.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio':
                ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss':
                ssd_Param_2.get("bboxloss_using_focus_loss", False),
                'gama':
                ssd_Param_2.get("bboxloss_focus_gama", 2),
                'use_difficult_gt':
                ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                'code_type':
                ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'use_prior_for_matching':
                True,
                'encode_variance_in_target':
                False,
                'flag_noperson':
                ssd_Param_2.get('flag_noperson', False),
                'size_threshold_max':
                ssd_Param_2.get("bboxloss_size_threshold_max", 2),
                'flag_showdebug':
                ssd_Param_2.get("flag_showdebug", False),
                'flag_forcematchallgt':
                ssd_Param_2.get("flag_forcematchallgt", False),
                'flag_areamaxcheckinmatch':
                ssd_Param_2.get("flag_areamaxcheckinmatch", False),
            }
            net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \
                                    loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                    propagate_down=[True, True, False, False])
        else:
            bboxloss_param = {
                'gt_labels':
                ssd_Param_2.get('gt_labels', []),
                'target_labels':
                ssd_Param_2.get('target_labels', []),
                'num_classes':
                ssd_Param_2.get("num_classes", 2),
                'alias_id':
                ssd_Param_2.get("alias_id", 0),
                'loc_loss_type':
                ssd_Param_2.get("bboxloss_loc_loss_type",
                                P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':
                ssd_Param_2.get("bboxloss_conf_loss_type",
                                P.MultiBoxLoss.SOFTMAX),
                'loc_weight':
                ssd_Param_2.get("bboxloss_loc_weight", 1),
                'conf_weight':
                ssd_Param_2.get("bboxloss_conf_weight", 1),
                'overlap_threshold':
                ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap':
                ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                'size_threshold':
                ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining':
                ssd_Param_2.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio':
                ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss':
                ssd_Param_2.get("bboxloss_using_focus_loss", False),
                'gama':
                ssd_Param_2.get("bboxloss_focus_gama", 2),
                'use_difficult_gt':
                ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                'code_type':
                ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'match_type':
                P.MultiBoxLoss.PER_PREDICTION,
                'share_location':
                True,
                'use_prior_for_matching':
                True,
                'background_label_id':
                0,
                'encode_variance_in_target':
                False,
                'map_object_to_agnostic':
                False,
            }
            net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \
                        loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                        propagate_down=[True, True, False, False])
    else:
        if ssd_Param_2.get("bboxloss_conf_loss_type",
                           P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_2_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_2_layers[1], \
                    shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)]))
            softmax_name = "mbox_2_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_2_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_2_layers[1] = net[flatten_name]
        elif ssd_Param_2.get(
                "bboxloss_conf_loss_type",
                P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_2_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1])
            mbox_2_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        # Det-out param
        det_out_param = {
            'num_classes': ssd_Param_2.get("num_classes", 2),
            'target_labels': ssd_Param_2.get('detout_target_labels', []),
            'alias_id': ssd_Param_2.get("alias_id", 0),
            'conf_threshold': ssd_Param_2.get("detout_conf_threshold", 0.01),
            'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45),
            'size_threshold': ssd_Param_2.get("detout_size_threshold", 0.0001),
            'top_k': ssd_Param_2.get("detout_top_k", 30),
            'share_location': True,
            'code_type': P.PriorBox.CENTER_SIZE,
            'background_label_id': 0,
            'variance_encoded_in_target': False,
        }
        use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \
               detection_output_param=det_out_param, \
               include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.detection_out_2 = L.DetOut(*mbox_2_layers, \
               detection_output_param=det_out_param, \
               include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # EVAL in TEST MODE
    if not train:
        det_eval_param = {
            'gt_labels':
            eval_Param.get('eval_gt_labels', []),
            'num_classes':
            eval_Param.get("eval_num_classes", 2),
            'evaluate_difficult_gt':
            eval_Param.get("eval_difficult_gt", False),
            'boxsize_threshold':
            eval_Param.get("eval_boxsize_threshold",
                           [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]),
            'iou_threshold':
            eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]),
            'background_label_id':
            0,
        }
        net.det_accu = L.DetEval(net['detection_out_2'], net[gt_label], \
                   detection_evaluate_param=det_eval_param, \
                   include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    return net
예제 #6
0
def DAP_HandNet(net, train=True, data_layer="data", gt_label="label", \
           net_width=512, net_height=288):
    # BaseNet: Only contains conv1 & pool1
    # lr_basenet =0
    # use_sub_layers = ()# exmpty means only has conv1 and pooling
    # num_channels = ()
    # output_channels = (0, )
    # channel_scale = 4
    # add_strs = "_recon"
    # net = ResidualVariant_Base_A(net, data_layer=data_layer, use_sub_layers=use_sub_layers, num_channels=num_channels,
    #                              output_channels=output_channels, channel_scale=channel_scale, lr=lr_basenet, decay=lr_basenet,
    #                              add_strs=add_strs)
    # Base of ZhangM
    net = HandBase(net, data_layer=data_layer, use_bn=True)
    # make Loss & Detout for SSD2
    mbox_2_layers = SsdDetectorHeaders(net, \
          net_width=net_width, net_height=net_height, data_layer=data_layer, \
          from_layers=ssd_Param_2.get('feature_layers',[]), \
          num_classes=ssd_Param_2.get("num_classes",2), \
          boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \
          aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \
          prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
          flip=ssd_Param_2.get("anchor_flip",True), \
          clip=ssd_Param_2.get("anchor_clip",True), \
          normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \
          use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \
          inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \
          use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \
          use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \
          stage=2)
    # make Loss or Detout for SSD1
    if train:
        loss_param = get_loss_param(normalization=ssd_Param_2.get(
            "bboxloss_normalization", P.Loss.VALID))
        mbox_2_layers.append(net[gt_label])
        use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            bboxloss_param = {
                'gt_labels':
                ssd_Param_2.get('gt_labels', []),
                'target_labels':
                ssd_Param_2.get('target_labels', []),
                'num_classes':
                ssd_Param_2.get("num_classes", 2),
                'alias_id':
                ssd_Param_2.get("alias_id", 0),
                'loc_loss_type':
                ssd_Param_2.get("bboxloss_loc_loss_type",
                                P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':
                ssd_Param_2.get("bboxloss_conf_loss_type",
                                P.MultiBoxLoss.LOGISTIC),
                'loc_weight':
                ssd_Param_2.get("bboxloss_loc_weight", 1),
                'conf_weight':
                ssd_Param_2.get("bboxloss_conf_weight", 1),
                'overlap_threshold':
                ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap':
                ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                'size_threshold':
                ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining':
                ssd_Param_2.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio':
                ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss':
                ssd_Param_2.get("bboxloss_using_focus_loss", False),
                'gama':
                ssd_Param_2.get("bboxloss_focus_gama", 2),
                'use_difficult_gt':
                ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                'code_type':
                ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'use_prior_for_matching':
                True,
                'encode_variance_in_target':
                False,
                'flag_noperson':
                ssd_Param_2.get('flag_noperson', False),
                'size_threshold_max':
                ssd_Param_2.get("bboxloss_size_threshold_max", 2),
                'flag_showdebug':
                ssd_Param_2.get("flag_showdebug", False),
                'flag_forcematchallgt':
                ssd_Param_2.get("flag_forcematchallgt", False),
                'flag_areamaxcheckinmatch':
                ssd_Param_2.get("flag_areamaxcheckinmatch", False),
            }
            net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \
                                    loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                    propagate_down=[True, True, False, False])
        else:
            bboxloss_param = {
                'gt_labels':
                ssd_Param_2.get('gt_labels', []),
                'target_labels':
                ssd_Param_2.get('target_labels', []),
                'num_classes':
                ssd_Param_2.get("num_classes", 2),
                'alias_id':
                ssd_Param_2.get("alias_id", 0),
                'loc_loss_type':
                ssd_Param_2.get("bboxloss_loc_loss_type",
                                P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':
                ssd_Param_2.get("bboxloss_conf_loss_type",
                                P.MultiBoxLoss.SOFTMAX),
                'loc_weight':
                ssd_Param_2.get("bboxloss_loc_weight", 1),
                'conf_weight':
                ssd_Param_2.get("bboxloss_conf_weight", 1),
                'overlap_threshold':
                ssd_Param_2.get("bboxloss_overlap_threshold", 0.5),
                'neg_overlap':
                ssd_Param_2.get("bboxloss_neg_overlap", 0.5),
                'size_threshold':
                ssd_Param_2.get("bboxloss_size_threshold", 0.0001),
                'do_neg_mining':
                ssd_Param_2.get("bboxloss_do_neg_mining", True),
                'neg_pos_ratio':
                ssd_Param_2.get("bboxloss_neg_pos_ratio", 3),
                'using_focus_loss':
                ssd_Param_2.get("bboxloss_using_focus_loss", False),
                'gama':
                ssd_Param_2.get("bboxloss_focus_gama", 2),
                'use_difficult_gt':
                ssd_Param_2.get("bboxloss_use_difficult_gt", False),
                'code_type':
                ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE),
                'match_type':
                P.MultiBoxLoss.PER_PREDICTION,
                'share_location':
                True,
                'use_prior_for_matching':
                True,
                'background_label_id':
                0,
                'encode_variance_in_target':
                False,
                'map_object_to_agnostic':
                False,
            }
            net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \
                        loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                        propagate_down=[True, True, False, False])
    else:
        if ssd_Param_2.get("bboxloss_conf_loss_type",
                           P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_2_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_2_layers[1], \
                    shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)]))
            softmax_name = "mbox_2_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_2_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_2_layers[1] = net[flatten_name]
        elif ssd_Param_2.get(
                "bboxloss_conf_loss_type",
                P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_2_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1])
            mbox_2_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        # Det-out param
        det_out_param = {
            'num_classes': ssd_Param_2.get("num_classes", 2),
            'target_labels': ssd_Param_2.get('detout_target_labels', []),
            'alias_id': ssd_Param_2.get("alias_id", 0),
            'conf_threshold': ssd_Param_2.get("detout_conf_threshold", 0.01),
            'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45),
            'size_threshold': ssd_Param_2.get("detout_size_threshold", 0.0001),
            'top_k': ssd_Param_2.get("detout_top_k", 30),
            'share_location': True,
            'code_type': P.PriorBox.CENTER_SIZE,
            'background_label_id': 0,
            'variance_encoded_in_target': False,
        }
        use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False)
        if use_dense_boxes:
            net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \
               detection_output_param=det_out_param, \
               include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.detection_out_2 = L.DetOut(*mbox_2_layers, \
               detection_output_param=det_out_param, \
               include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # EVAL in TEST MODE
    if not train:
        det_eval_param = {
            'gt_labels':
            eval_Param.get('eval_gt_labels', []),
            'num_classes':
            eval_Param.get("eval_num_classes", 2),
            'evaluate_difficult_gt':
            eval_Param.get("eval_difficult_gt", False),
            'boxsize_threshold':
            eval_Param.get("eval_boxsize_threshold",
                           [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]),
            'iou_threshold':
            eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]),
            'background_label_id':
            0,
        }
        net.det_accu = L.DetEval(net['detection_out_2'], net[gt_label], \
                   detection_evaluate_param=det_eval_param, \
                   include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    return net
예제 #7
0
def DAPPoseNet(net, train=True, data_layer="data", gt_label="label",net_width=512, net_height=288):
    # BaseNet
    channels = ((32,), (32,), (64, 32, 128), (128, 64, 128, 64, 256), (256, 128, 256, 128, 256))
    strides = (True, True, True, False, False)
    kernels = ((3,), (3,), (3, 1, 3), (3, 1, 3, 1, 3), (3, 1, 3, 1, 3))
    pool_last = (False,False,False,True,True)
    net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer, channels=channels, strides=strides,
                                          kernels=kernels,freeze_layers=[], pool_last=pool_last,flag_withparamname=True,)
    net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer + pose_string, channels=channels, strides=strides,
                                      kernels=kernels, freeze_layers=[], pool_last=pool_last, flag_withparamname=True, pose_string = pose_string)
    conv6_output = Conv6_Param.get('conv6_output', [])
    conv6_kernal_size = Conv6_Param.get('conv6_kernal_size', [])
    out_layer = "pool5"
    net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \
                   conv6_kernal_size=conv6_kernal_size, pre_name="conv6", start_pool=False, lr_mult=1, decay_mult=1,
                   n_group=1)
    # Create SSD Header for SSD1
    lr_mult = 1
    decay_mult = 1.0
    mbox_1_layers = SsdDetectorHeaders(net, \
         net_width=net_width, net_height=net_height, data_layer=data_layer, \
         from_layers=ssd_Param_1.get('feature_layers',[]), \
         num_classes=ssd_Param_1.get("num_classes",2), \
         boxsizes=ssd_Param_1.get("anchor_boxsizes", []), \
         aspect_ratios=ssd_Param_1.get("anchor_aspect_ratios",[]), \
         prior_variance = ssd_Param_1.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
         flip=ssd_Param_1.get("anchor_flip",True), \
         clip=ssd_Param_1.get("anchor_clip",True), \
         normalizations=ssd_Param_1.get("interlayers_normalizations",[]), \
         use_batchnorm=ssd_Param_1.get("interlayers_use_batchnorm",True), \
         inter_layer_channels=ssd_Param_1.get("interlayers_channels_kernels",[]), \
         use_focus_loss=ssd_Param_1.get("bboxloss_using_focus_loss",False), \
         use_dense_boxes=ssd_Param_1.get('bboxloss_use_dense_boxes',False), \
         stage=1,lr_mult=lr_mult, decay_mult=decay_mult)
    # make Loss or Detout for SSD1
    if train:
        loss_param = get_loss_param(normalization=ssd_Param_1.get("bboxloss_normalization",P.Loss.VALID))
        mbox_1_layers.append(net[gt_label])
        use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False)
        if use_dense_boxes:
            bboxloss_param = {
                'gt_labels': ssd_Param_1.get('gt_labels',[]),
                'target_labels': ssd_Param_1.get('target_labels',[]),
                'num_classes':ssd_Param_1.get("num_classes",2),
                'alias_id':ssd_Param_1.get("alias_id",0),
                'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC),
                'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1),
                'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1),
                'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5),
                'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5),
                'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001),
                'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True),
                'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3),
                'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False),
                'gama':ssd_Param_1.get("bboxloss_focus_gama",2),
                'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False),
                'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                'use_prior_for_matching':True,
                'encode_variance_in_target': False,
                'flag_noperson':ssd_Param_1.get('flag_noperson',False),
            }
            net["mbox_1_loss"] = L.DenseBBoxLoss(*mbox_1_layers, dense_bbox_loss_param=bboxloss_param, \
                                    loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                    propagate_down=[True, True, False, False])
        else:
            bboxloss_param = {
                'gt_labels': ssd_Param_1.get('gt_labels',[]),
                'target_labels': ssd_Param_1.get('target_labels',[]),
                'num_classes':ssd_Param_1.get("num_classes",2),
                'alias_id':ssd_Param_1.get("alias_id",0),
                'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX),
                'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1),
                'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1),
                'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5),
                'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5),
                'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001),
                'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True),
                'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3),
                'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False),
                'gama':ssd_Param_1.get("bboxloss_focus_gama",2),
                'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False),
                'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                'match_type':P.MultiBoxLoss.PER_PREDICTION,
                'share_location':True,
                'use_prior_for_matching':True,
                'background_label_id':0,
                'encode_variance_in_target': False,
                'map_object_to_agnostic':False,
            }
            net["mbox_1_loss"] = L.BBoxLoss(*mbox_1_layers, bbox_loss_param=bboxloss_param, \
                        loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                        propagate_down=[True, True, False, False])
    else:
        if ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_1_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_1_layers[1], \
                    shape=dict(dim=[0, -1, ssd_Param_1.get("num_classes",2)]))
            softmax_name = "mbox_1_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_1_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_1_layers[1] = net[flatten_name]
        elif ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_1_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_1_layers[1])
            mbox_1_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        # Det-out param
        det_out_param = {
            'num_classes':ssd_Param_1.get("num_classes",2),
            'target_labels': ssd_Param_1.get('detout_target_labels',[]),
            'alias_id':ssd_Param_1.get("alias_id",0),
            'conf_threshold':ssd_Param_1.get("detout_conf_threshold",0.01),
            'nms_threshold':ssd_Param_1.get("detout_nms_threshold",0.45),
            'size_threshold':ssd_Param_1.get("detout_size_threshold",0.0001),
            'top_k':ssd_Param_1.get("detout_top_k",30),
            'share_location':True,
            'code_type':P.PriorBox.CENTER_SIZE,
            'background_label_id':0,
            'variance_encoded_in_target':False,
        }
        use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False)
        if use_dense_boxes:
            net.detection_out_1 = L.DenseDetOut(*mbox_1_layers, \
        	  	detection_output_param=det_out_param, \
        	  	include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.detection_out_1 = L.DetOut(*mbox_1_layers, \
    	  		detection_output_param=det_out_param, \
    	  		include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # make Loss & Detout for SSD2
    lr_mult = 1.0
    decay_mult = 1.0
    if use_ssd2_for_detection:
         mbox_2_layers = SsdDetectorHeaders(net, \
              net_width=net_width, net_height=net_height, data_layer=data_layer, \
              from_layers=ssd_Param_2.get('feature_layers',[]), \
              num_classes=ssd_Param_2.get("num_classes",2), \
              boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \
              aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \
              prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
              flip=ssd_Param_2.get("anchor_flip",True), \
              clip=ssd_Param_2.get("anchor_clip",True), \
              normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \
              use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \
              inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \
              use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \
              use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \
              stage=2,lr_mult=lr_mult, decay_mult=decay_mult)
         # make Loss or Detout for SSD1
         if train:
             loss_param = get_loss_param(normalization=ssd_Param_2.get("bboxloss_normalization",P.Loss.VALID))
             mbox_2_layers.append(net[gt_label])
             use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False)
             if use_dense_boxes:
                 bboxloss_param = {
                     'gt_labels': ssd_Param_2.get('gt_labels',[]),
                     'target_labels': ssd_Param_2.get('target_labels',[]),
                     'num_classes':ssd_Param_2.get("num_classes",2),
                     'alias_id':ssd_Param_2.get("alias_id",0),
                     'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                     'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC),
                     'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1),
                     'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1),
                     'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5),
                     'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5),
                     'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001),
                     'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True),
                     'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3),
                     'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False),
                     'gama':ssd_Param_2.get("bboxloss_focus_gama",2),
                     'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False),
                     'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                     'use_prior_for_matching':True,
                     'encode_variance_in_target': False,
                     'flag_noperson': ssd_Param_2.get('flag_noperson', False),
                 }
                 net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \
                                         loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                         propagate_down=[True, True, False, False])
             else:
                 bboxloss_param = {
                     'gt_labels': ssd_Param_2.get('gt_labels',[]),
                     'target_labels': ssd_Param_2.get('target_labels',[]),
                     'num_classes':ssd_Param_2.get("num_classes",2),
                     'alias_id':ssd_Param_2.get("alias_id",0),
                     'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                     'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX),
                     'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1),
                     'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1),
                     'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5),
                     'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5),
                     'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001),
                     'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True),
                     'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3),
                     'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False),
                     'gama':ssd_Param_2.get("bboxloss_focus_gama",2),
                     'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False),
                     'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                     'match_type':P.MultiBoxLoss.PER_PREDICTION,
                     'share_location':True,
                     'use_prior_for_matching':True,
                     'background_label_id':0,
                     'encode_variance_in_target': False,
                     'map_object_to_agnostic':False,
                 }
                 net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \
                             loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                             propagate_down=[True, True, False, False])
         else:
             if ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
                 reshape_name = "mbox_2_conf_reshape"
                 net[reshape_name] = L.Reshape(mbox_2_layers[1], \
                         shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)]))
                 softmax_name = "mbox_2_conf_softmax"
                 net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
                 flatten_name = "mbox_2_conf_flatten"
                 net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
                 mbox_2_layers[1] = net[flatten_name]
             elif ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
                 sigmoid_name = "mbox_2_conf_sigmoid"
                 net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1])
                 mbox_2_layers[1] = net[sigmoid_name]
             else:
                 raise ValueError("Unknown conf loss type.")
             # Det-out param
             det_out_param = {
                 'num_classes':ssd_Param_2.get("num_classes",2),
                 'target_labels': ssd_Param_2.get('detout_target_labels',[]),
                 'alias_id':ssd_Param_2.get("alias_id",0),
                 'conf_threshold':ssd_Param_2.get("detout_conf_threshold",0.01),
                 'nms_threshold':ssd_Param_2.get("detout_nms_threshold",0.45),
                 'size_threshold':ssd_Param_2.get("detout_size_threshold",0.0001),
                 'top_k':ssd_Param_2.get("detout_top_k",30),
                 'share_location':True,
                 'code_type':P.PriorBox.CENTER_SIZE,
                 'background_label_id':0,
                 'variance_encoded_in_target':False,
             }
             use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False)
             if use_dense_boxes:
                 net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \
             	  	detection_output_param=det_out_param, \
             	  	include=dict(phase=caffe_pb2.Phase.Value('TEST')))
             else:
                 net.detection_out_2 = L.DetOut(*mbox_2_layers, \
         	  		detection_output_param=det_out_param, \
         	  		include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # EVAL in TEST MODE
    if not train:
        det_eval_param = {
            'gt_labels': eval_Param.get('eval_gt_labels',[]),
            'num_classes':eval_Param.get("eval_num_classes",2),
            'evaluate_difficult_gt':eval_Param.get("eval_difficult_gt",False),
            'boxsize_threshold':eval_Param.get("eval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]),
            'iou_threshold':eval_Param.get("eval_iou_threshold",[0.9,0.75,0.5]),
            'background_label_id':0,
        }
        if use_ssd2_for_detection:
            det_out_layers = []
            det_out_layers.append(net['detection_out_1'])
            det_out_layers.append(net['detection_out_2'])
            name = 'det_out'
            net[name] = L.Concat(*det_out_layers, axis=2)
            net.det_accu = L.DetEval(net[name], net[gt_label], \
                	  detection_evaluate_param=det_eval_param, \
                	  include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.det_accu = L.DetEval(net['detection_out_1'], net[gt_label], \
                	  detection_evaluate_param=det_eval_param, \
                	  include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    if train:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \
            L.Slice(net["label"+pose_string], ntop=4, slice_param=dict(slice_point=[34, 52, 86], axis=1))
    else:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \
            L.Slice(net["label"+pose_string], ntop=5, slice_param=dict(slice_point=[34, 52, 86, 104], axis=1))
    # label
    net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD))
    net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD))
    ###pose
    pose_test_kwargs={
    # nms
    'nms_threshold': 0.05,
    'nms_max_peaks': 500,
    'nms_num_parts': 18,
    # connect
    'conn_is_type_coco': True,
    'conn_max_person': 10,
    'conn_max_peaks_use': 20,
    'conn_iters_pa_cal': 10,
    'conn_connect_inter_threshold': 0.05,
    'conn_connect_inter_min_nums': 8,
    'conn_connect_min_subset_cnt': 3,
    'conn_connect_min_subset_score': 0.4,
    # visual
    'eval_area_thre': 64*64,
    'eval_oks_thre': [0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9],
    }


    bn_kwargs = {
        'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)],
        'eps': 0.001,
    }
    sb_kwargs = {
        'bias_term': True,
        'param': [dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)],
        'filler': dict(type='constant', value=1.0),
        'bias_filler': dict(type='constant', value=0.2),
    }
    deconv_param = {
        'num_output': 128,
        'kernel_size': 2,
        'pad': 0,
        'stride': 2,
        'weight_filler': dict(type='gaussian', std=0.01),
        'bias_filler': dict(type='constant', value=0),
        'group': 1,
    }
    kwargs_deconv = {
        'param': [dict(lr_mult=1, decay_mult=1)],
        'convolution_param': deconv_param
    }

    from_layer = "conv5_5" + pose_string
    add_layer = from_layer + "_deconv"
    net[add_layer] = L.Deconvolution(net[from_layer], **kwargs_deconv)
    bn_name = add_layer + '_bn'
    net[bn_name] = L.BatchNorm(net[add_layer], in_place=True, **bn_kwargs)
    sb_name = add_layer + '_scale'
    net[sb_name] = L.Scale(net[add_layer], in_place=True, **sb_kwargs)
    relu_name = add_layer + '_relu'
    net[relu_name] = L.ReLU(net[add_layer], in_place=True)
    baselayer = add_layer
    use_stage = 3
    use_3_layers = 5
    use_1_layers = 0
    n_channel = 64
    lrdecay = 1.0
    kernel_size = 3
    flag_output_sigmoid = False
    for stage in xrange(use_stage):
        if stage == 0:
            from_layer = baselayer
        else:
            from_layer = "concat_stage{}".format(stage)
        outlayer = "concat_stage{}".format(stage + 1)
        if stage == use_stage - 1:
            short_cut = False
        else:
            short_cut = True
        net = mPose_StageX_Train(net, from_layer=from_layer, out_layer=outlayer, stage=stage + 1,
                                 mask_vec="vec_mask", mask_heat="heat_mask", \
                                 label_vec="vec_label", label_heat="heat_label", \
                                 use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=short_cut, \
                                 base_layer=baselayer, lr=0.1, decay=lrdecay, num_channels=n_channel,
                                 kernel_size=kernel_size, flag_sigmoid=flag_output_sigmoid)

    # for Test
    if not train:
        if flag_output_sigmoid:
            conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers) + "_sig"
            conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers) + "_sig"
        else:
            conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers)
            conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers)
        net.vec_out = L.Eltwise(net.vec_mask, net[conv_vec], eltwise_param=dict(operation=P.Eltwise.PROD))
        net.heat_out = L.Eltwise(net.heat_mask, net[conv_heat], eltwise_param=dict(operation=P.Eltwise.PROD))
        feaLayers = []
        feaLayers.append(net.heat_out)
        feaLayers.append(net.vec_out)
        outlayer = "concat_stage{}".format(3)
        net[outlayer] = L.Concat(*feaLayers, axis=1)
        # Resize
        resize_kwargs = {
            'factor': pose_test_kwargs.get("resize_factor", 8),
            'scale_gap': pose_test_kwargs.get("resize_scale_gap", 0.3),
            'start_scale': pose_test_kwargs.get("resize_start_scale", 1.0),
        }
        net.resized_map = L.ImResize(net[outlayer], name="resize", imresize_param=resize_kwargs)
        # Nms
        nms_kwargs = {
            'threshold': pose_test_kwargs.get("nms_threshold", 0.05),
            'max_peaks': pose_test_kwargs.get("nms_max_peaks", 100),
            'num_parts': pose_test_kwargs.get("nms_num_parts", 18),
        }
        net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs)
        # ConnectLimbs
        connect_kwargs = {
            'is_type_coco': pose_test_kwargs.get("conn_is_type_coco", True),
            'max_person': pose_test_kwargs.get("conn_max_person", 10),
            'max_peaks_use': pose_test_kwargs.get("conn_max_peaks_use", 20),
            'iters_pa_cal': pose_test_kwargs.get("conn_iters_pa_cal", 10),
            'connect_inter_threshold': pose_test_kwargs.get("conn_connect_inter_threshold", 0.05),
            'connect_inter_min_nums': pose_test_kwargs.get("conn_connect_inter_min_nums", 8),
            'connect_min_subset_cnt': pose_test_kwargs.get("conn_connect_min_subset_cnt", 3),
            'connect_min_subset_score': pose_test_kwargs.get("conn_connect_min_subset_score", 0.4),
        }
        net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs)
        # Eval
        eval_kwargs = {
            'stride': 8,
            'area_thre': pose_test_kwargs.get("eval_area_thre", 64 * 64),
            'oks_thre': pose_test_kwargs.get("eval_oks_thre", [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]),
        }
        net.eval = L.PoseEval(net.limbs, net.gt, pose_eval_param=eval_kwargs)

    return net
예제 #8
0
def SsdDetector(net, train=True, data_layer="data", gt_label="label", \
                net_width=512, net_height=288, \
                eval_enable=True, **ssdparam):
    conv6_output = ssdparam.get("multilayers_conv6_output",[])
    conv6_kernal_size = ssdparam.get("multilayers_conv6_kernal_size",[])
    use_sub_layers = (6, 7)
    num_channels = (144, 288)
    output_channels = (128, 0)
    channel_scale = 4
    add_strs = "_recon"
    net = ResidualVariant_Base_A(net, data_layer=data_layer, use_sub_layers=use_sub_layers, num_channels=num_channels,
                          output_channels=output_channels,channel_scale=channel_scale,lr=0.1, decay=0.1, add_strs=add_strs,)
    # Conv6
    out_layer = "conv3_7_recon_relu"
    net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \
        conv6_kernal_size=conv6_kernal_size, pre_name="conv6",start_pool=True,lr_mult=1, decay_mult=1,n_group=1)
    # Concat FM1
    feature_layers = []
    featuremap1 = ["pool1_recon","conv2_6_recon_relu"]
    tags = ["Down","Ref"]
    down_methods = [["MaxPool"]]
    out_layer = "featuremap1"
    UnifiedMultiScaleLayers(net,layers=featuremap1, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods)
    feature_layers.append(out_layer)
    # Concat FM2
    featuremap2 = ["conv2_6_recon_relu","conv3_7_recon_relu"]
    tags = ["Down","Ref"]
    down_methods = [["MaxPool"]]
    out_layer = "featuremap2"
    UnifiedMultiScaleLayers(net,layers=featuremap2, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods)
    feature_layers.append(out_layer)
    # Concat FM3
    featuremap3 = ["conv3_7_recon_relu","conv6_6"]
    tags = ["Down","Ref"]
    down_methods = [["MaxPool"]]
    out_layer = "featuremap3"
    UnifiedMultiScaleLayers(net,layers=featuremap3, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods)
    feature_layers.append(out_layer)
    # Create SSD Header
    mbox_layers = SsdDetectorHeaders(net, \
         boxsizes=ssdparam.get("multilayers_boxsizes", []), \
         net_width=net_width, \
         net_height=net_height, \
         data_layer=data_layer, \
         num_classes=ssdparam.get("num_classes",2), \
         from_layers=feature_layers, \
         use_batchnorm=ssdparam.get("multilayers_use_batchnorm",True), \
         prior_variance = ssdparam.get("multilayers_prior_variance",[0.1,0.1,0.2,0.2]), \
         normalizations=ssdparam.get("multilayers_normalizations",[]), \
         aspect_ratios=ssdparam.get("multilayers_aspect_ratios",[]), \
         flip=ssdparam.get("multilayers_flip",True), \
         clip=ssdparam.get("multilayers_clip",True), \
         inter_layer_channels=ssdparam.get("multilayers_inter_layer_channels",[]), \
         kernel_size=ssdparam.get("multilayers_kernel_size",3), \
         pad=ssdparam.get("multilayers_pad",1),
         use_focus_loss=ssdparam.get("multiloss_using_focus_loss",False))
    # Loss & Det-eval
    if train:
        loss_param = get_loss_param(normalization=ssdparam.get("multiloss_normalization",P.Loss.VALID))
        mbox_layers.append(net[gt_label])
        bboxloss_param = {
            'loc_loss_type':ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
            'conf_loss_type':ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX),
            'loc_weight':ssdparam.get("multiloss_loc_weight",1),
            'conf_weight':ssdparam.get("multiloss_conf_weight",1),
            'num_classes':ssdparam.get("num_classes",2),
            'share_location':ssdparam.get("multiloss_share_location",True),
            'match_type':ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION),
            'overlap_threshold':ssdparam.get("multiloss_overlap_threshold",0.5),
            'use_prior_for_matching':True,
            'background_label_id':0,
            'use_difficult_gt':ssdparam.get("multiloss_use_difficult_gt",False),
            'do_neg_mining':ssdparam.get("multiloss_do_neg_mining",True),
            'neg_pos_ratio':ssdparam.get("multiloss_neg_pos_ratio",3),
            'neg_overlap':ssdparam.get("multiloss_neg_overlap",0.5),
            'code_type':ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE),
            'encode_variance_in_target': False,
            'map_object_to_agnostic':False,
            'size_threshold':ssdparam.get("multiloss_size_threshold",0.0001),
            'alias_id':ssdparam.get("multiloss_alias_id",0),
            'using_focus_loss':ssdparam.get("multiloss_using_focus_loss",False),
            'gama':ssdparam.get("multiloss_focus_gama",2),
            'loc_class':1,
        }
        net["mbox_loss"] = L.BBoxLoss(*mbox_layers, \
                                bbox_loss_param=bboxloss_param, \
                                loss_param=loss_param, \
                                include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                propagate_down=[True, True, False, False])
    else:
        if ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_layers[1], \
                    shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)]))
            softmax_name = "mbox_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_layers[1] = net[flatten_name]
        elif ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_layers[1])
            mbox_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        # Det-out param
        det_out_param = {
            'num_classes':ssdparam.get("num_classes",2),
            'share_location':ssdparam.get("multiloss_share_location",True),
            'background_label_id':0,
            'code_type':ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE),
            'variance_encoded_in_target':False,
            'conf_threshold':ssdparam.get("detectionout_conf_threshold",0.01),
            'nms_threshold':ssdparam.get("detectionout_nms_threshold",0.45),
            'size_threshold':ssdparam.get("detectionout_size_threshold",0.001),
            'top_k':ssdparam.get("detectionout_top_k",200),
            'alias_id':ssdparam.get("multiloss_alias_id",0),,
        }
        net.detection_out = L.DetOut(*mbox_layers, \
    	  		detection_output_param=det_out_param, \
    	  		include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        # Det-eval
        det_eval_param = {
            'num_classes':ssdparam.get("num_classes",2),
            'background_label_id':0,
            'evaluate_difficult_gt':ssdparam.get("detectioneval_evaluate_difficult_gt",False),
            'boxsize_threshold':ssdparam.get("detectioneval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]),
            'iou_threshold':ssdparam.get("detectioneval_iou_threshold",[0.9,0.75,0.5]),
        }
        net.det_accu = L.DetEval(net.detection_out, net[gt_label], \
            	  detection_evaluate_param=det_eval_param, \
            	  include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    return net