예제 #1
0
def VGG19_PoseNet_COCO_3S_Train(net, data_layer="data", label_layer="label", train=True, **pose_test_kwargs):
    # Slice for label and mask
    if train:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \
            L.Slice(net[label_layer], ntop=4, slice_param=dict(slice_point=[38,57,95], axis=1))
    else:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \
            L.Slice(net[label_layer], ntop=5, slice_param=dict(slice_point=[38,57,95,114], axis=1))
    # Label
    net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD))
    net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD))
    # baseNet-VGG19
    net = VGG19Net_Pre10(net, from_layer=data_layer)
    # conv4_3_CPM & conv4_4_CPM
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.01),
            'bias_filler': dict(type='constant', value=0)}
    # conv4_3_CPM
    net.conv4_3_CPM = L.Convolution(net.relu4_2, num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu4_3_CPM = L.ReLU(net.conv4_3_CPM, in_place=True)
    net.conv4_4_CPM = L.Convolution(net.relu4_3_CPM, num_output=128, pad=1, kernel_size=3, **kwargs)
    net.relu4_4_CPM = L.ReLU(net.conv4_4_CPM, in_place=True)
    # Stage1
    net = Pose_Stage1_COCO_train(net, from_layer="relu4_4_CPM", out_layer="concat_stage2", \
                               mask_L1="vec_mask", mask_L2="heat_mask", \
                               label_L1="vec_label", label_L2="heat_label", lr=1, decay=1)
    # Stage2-3
    net = Pose_StageX_COCO_train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=2, \
                               mask_L1="vec_mask", mask_L2="heat_mask", \
                               label_L1="vec_label", label_L2="heat_label", \
                               short_cut=True, base_layer="relu4_4_CPM", lr=4, decay=1)
    net = Pose_StageX_COCO_train(net, from_layer="concat_stage3", out_layer="concat_stage4", stage=3, \
                               mask_L1="vec_mask", mask_L2="heat_mask", \
                               label_L1="vec_label", label_L2="heat_label", \
                               short_cut=False, lr=4, decay=1)
    # for Test
    if not train:
        net.vec_out = L.Eltwise(net.vec_mask, net.Mconv7_stage3_L1, eltwise_param=dict(operation=P.Eltwise.PROD))
        net.heat_out = L.Eltwise(net.heat_mask, net.Mconv7_stage3_L2, eltwise_param=dict(operation=P.Eltwise.PROD))
        feaLayers = []
        feaLayers.append(net.heat_out)
        feaLayers.append(net.vec_out)
        net["concat_stage4"] = L.Concat(*feaLayers, axis=1)
        # Resize
        resize_kwargs = {
            'factor': pose_test_kwargs.get("resize_factor", 8),
            'scale_gap': pose_test_kwargs.get("resize_scale_gap", 0.3),
            'start_scale': pose_test_kwargs.get("resize_start_scale", 1.0),
        }
        net.resized_map = L.ImResize(net.concat_stage4, name="resize", imresize_param=resize_kwargs)
        # Nms
        nms_kwargs = {
            'threshold': pose_test_kwargs.get("nms_threshold", 0.05),
            'max_peaks': pose_test_kwargs.get("nms_max_peaks", 64),
            'num_parts': pose_test_kwargs.get("nms_num_parts", 18),
        }
        net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs)
        # ConnectLimbs
        connect_kwargs = {
            'is_type_coco': pose_test_kwargs.get("conn_is_type_coco", True),
            'max_person': pose_test_kwargs.get("conn_max_person", 20),
            'max_peaks_use': pose_test_kwargs.get("conn_max_peaks_use", 32),
            'iters_pa_cal': pose_test_kwargs.get("conn_iters_pa_cal", 10),
            'connect_inter_threshold': pose_test_kwargs.get("conn_connect_inter_threshold", 0.05),
            'connect_inter_min_nums': pose_test_kwargs.get("conn_connect_inter_min_nums", 8),
            'connect_min_subset_cnt': pose_test_kwargs.get("conn_connect_min_subset_cnt", 3),
            'connect_min_subset_score': pose_test_kwargs.get("conn_connect_min_subset_score", 0.3),
        }
        net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs)
        # Eval
        eval_kwargs = {
            'stride': 8,
            'area_thre': pose_test_kwargs.get("eval_area_thre", 96*96),
            'oks_thre': pose_test_kwargs.get("eval_oks_thre", [0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9]),
        }
        net.eval = L.PoseEval(net.limbs, net.gt, pose_eval_param=eval_kwargs)
    return net
예제 #2
0
def VGG19_PoseNet_Stage3_COCO_Test(net, from_layer="data", frame_layer="orig_data", **pose_kwargs):
    # baseNet-VGG19
    assert from_layer in net.keys()
    net = VGG19Net_Pre10(net, from_layer="data")
    # conv4_3_CPM & conv4_4_CPM
    kwargs = {
            'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
            'weight_filler': dict(type='gaussian', std=0.01),
            'bias_filler': dict(type='constant', value=0)}
    # conv4_3_CPM
    net.conv4_3_CPM = L.Convolution(net.relu4_2, num_output=256, pad=1, kernel_size=3, **kwargs)
    net.relu4_3_CPM = L.ReLU(net.conv4_3_CPM, in_place=True)
    net.conv4_4_CPM = L.Convolution(net.relu4_3_CPM, num_output=128, pad=1, kernel_size=3, **kwargs)
    net.relu4_4_CPM = L.ReLU(net.conv4_4_CPM, in_place=True)
    # Stage1
    net = Pose_Stage1_COCO(net, from_layer="relu4_4_CPM", out_layer="concat_stage2", lr=1, decay=1)
    # Stage2-6
    net = Pose_StageX_COCO(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=2, short_cut=True, base_layer="relu4_4_CPM", lr=4, decay=1)
    net = Pose_StageX_COCO(net, from_layer="concat_stage3", out_layer="concat_stage4", stage=3, short_cut=False, lr=4, decay=1)
    # concat the output layers
    feaLayers = []
    feaLayers.append(net["Mconv7_stage3_L2"])
    feaLayers.append(net["Mconv7_stage3_L1"])
    net["concat_stage4"] = L.Concat(*feaLayers, axis=1)
    # Resize
    resize_kwargs = {
        'factor': pose_kwargs.get("resize_factor", 8),
        'scale_gap': pose_kwargs.get("resize_scale_gap", 0.3),
        'start_scale': pose_kwargs.get("resize_start_scale", 1.0),
    }
    net.resized_map = L.ImResize(net.concat_stage4, name="resize", imresize_param=resize_kwargs)
    # Nms
    nms_kwargs = {
        'threshold': pose_kwargs.get("nms_threshold", 0.05),
        'max_peaks': pose_kwargs.get("nms_max_peaks", 64),
        'num_parts': pose_kwargs.get("nms_num_parts", 18),
    }
    net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs)
    # ConnectLimbs
    connect_kwargs = {
        'is_type_coco': pose_kwargs.get("conn_is_type_coco", True),
        'max_person': pose_kwargs.get("conn_max_person", 20),
        'max_peaks_use': pose_kwargs.get("conn_max_peaks_use", 32),
        'iters_pa_cal': pose_kwargs.get("conn_iters_pa_cal", 10),
        'connect_inter_threshold': pose_kwargs.get("conn_connect_inter_threshold", 0.05),
        'connect_inter_min_nums': pose_kwargs.get("conn_connect_inter_min_nums", 8),
        'connect_min_subset_cnt': pose_kwargs.get("conn_connect_min_subset_cnt", 3),
        'connect_min_subset_score': pose_kwargs.get("conn_connect_min_subset_score", 0.3),
    }
    net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs)
    # VisualizePose
    visual_kwargs = {
        'is_type_coco': pose_kwargs.get("conn_is_type_coco", True),
        'type': pose_kwargs.get("visual_type", P.Visualizepose.POSE),
        'visualize': pose_kwargs.get("visual_visualize", True),
        'draw_skeleton': pose_kwargs.get("visual_draw_skeleton", True),
        'print_score': pose_kwargs.get("visual_print_score", False),
        'part_id': pose_kwargs.get("visual_part_id", 0),
        'from_part': pose_kwargs.get("visual_from_part", 0),
        'vec_id': pose_kwargs.get("visual_vec_id", 0),
        'from_vec': pose_kwargs.get("visual_from_vec", 0),
        'pose_threshold': pose_kwargs.get("visual_pose_threshold", 0.05),
        'write_frames': pose_kwargs.get("visual_write_frames", False),
        'output_directory': pose_kwargs.get("visual_output_directory", ""),
    }
    net.finished = L.Visualizepose(net[frame_layer], net.resized_map, net.limbs, visualize_pose_param=visual_kwargs)

    return net
예제 #3
0
def mPoseNet_Decomp_3S_Test(net,
                            from_layer="data",
                            frame_layer="orig_data",
                            use_bn=True,
                            **kwargs):
    # Darknet19
    net = YoloNetPart_Decomp(net, from_layer=from_layer, use_bn=use_bn, use_layers=5, use_sub_layers=5, \
            final_pool=False, lr=1, decay=1, **kwargs)
    # concat conv4_3 & conv5_5
    net = UnifiedMultiScaleLayers(net, layers=["conv4_3_c","conv5_5_c"], tags=["Ref","Up"], \
                                  unifiedlayer="convf", upsampleMethod="Reorg")
    # Stages
    baselayer = "convf"
    use_3_layers = 5
    use_1_layers = 0
    net = mPose_StageX_decomp_Test(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \
                           use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \
                           base_layer=baselayer, **kwargs)
    net = mPose_StageX_decomp_Test(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \
                           use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \
                           base_layer=baselayer, **kwargs)
    net = mPose_StageX_decomp_Test(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \
                           use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=False,
                           base_layer=baselayer, **kwargs)
    conv_vec = "stage{}_conv{}_vec_c".format(3, use_3_layers + use_1_layers)
    conv_heat = "stage{}_conv{}_heat_c".format(3, use_3_layers + use_1_layers)
    feaLayers = []
    feaLayers.append(net[conv_heat])
    feaLayers.append(net[conv_vec])
    outlayer = "concat_stage{}".format(3)
    net[outlayer] = L.Concat(*feaLayers, axis=1)
    # Resize
    resize_kwargs = {
        'factor': kwargs.get("resize_factor", 8),
        'scale_gap': kwargs.get("resize_scale_gap", 0.3),
        'start_scale': kwargs.get("resize_start_scale", 1.0),
    }
    net.resized_map = L.ImResize(net[outlayer],
                                 name="resize",
                                 imresize_param=resize_kwargs)
    # Nms
    nms_kwargs = {
        'threshold': kwargs.get("nms_threshold", 0.05),
        'max_peaks': kwargs.get("nms_max_peaks", 64),
        'num_parts': kwargs.get("nms_num_parts", 18),
    }
    net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs)
    # ConnectLimbs
    connect_kwargs = {
        'is_type_coco':
        kwargs.get("conn_is_type_coco", True),
        'max_person':
        kwargs.get("conn_max_person", 20),
        'max_peaks_use':
        kwargs.get("conn_max_peaks_use", 32),
        'iters_pa_cal':
        kwargs.get("conn_iters_pa_cal", 10),
        'connect_inter_threshold':
        kwargs.get("conn_connect_inter_threshold", 0.05),
        'connect_inter_min_nums':
        kwargs.get("conn_connect_inter_min_nums", 8),
        'connect_min_subset_cnt':
        kwargs.get("conn_connect_min_subset_cnt", 3),
        'connect_min_subset_score':
        kwargs.get("conn_connect_min_subset_score", 0.3),
    }
    net.limbs = L.Connectlimb(net.resized_map,
                              net.joints,
                              connect_limb_param=connect_kwargs)
    # VisualizePose
    visual_kwargs = {
        'is_type_coco': kwargs.get("conn_is_type_coco", True),
        'type': kwargs.get("visual_type", P.Visualizepose.POSE),
        'visualize': kwargs.get("visual_visualize", True),
        'draw_skeleton': kwargs.get("visual_draw_skeleton", True),
        'print_score': kwargs.get("visual_print_score", False),
        'part_id': kwargs.get("visual_part_id", 0),
        'from_part': kwargs.get("visual_from_part", 0),
        'vec_id': kwargs.get("visual_vec_id", 0),
        'from_vec': kwargs.get("visual_from_vec", 0),
        'pose_threshold': kwargs.get("visual_pose_threshold", 0.05),
        'write_frames': kwargs.get("visual_write_frames", False),
        'output_directory': kwargs.get("visual_output_directory", ""),
    }
    net.finished = L.Visualizepose(net[frame_layer],
                                   net.resized_map,
                                   net.limbs,
                                   visualize_pose_param=visual_kwargs)
    return net
예제 #4
0
def DAPPoseNet(net, train=True, data_layer="data", gt_label="label",net_width=512, net_height=288):
    # BaseNet
    channels = ((32,), (32,), (64, 32, 128), (128, 64, 128, 64, 256), (256, 128, 256, 128, 256))
    strides = (True, True, True, False, False)
    kernels = ((3,), (3,), (3, 1, 3), (3, 1, 3, 1, 3), (3, 1, 3, 1, 3))
    pool_last = (False,False,False,True,True)
    net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer, channels=channels, strides=strides,
                                          kernels=kernels,freeze_layers=[], pool_last=pool_last,flag_withparamname=True,)
    net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer + pose_string, channels=channels, strides=strides,
                                      kernels=kernels, freeze_layers=[], pool_last=pool_last, flag_withparamname=True, pose_string = pose_string)
    conv6_output = Conv6_Param.get('conv6_output', [])
    conv6_kernal_size = Conv6_Param.get('conv6_kernal_size', [])
    out_layer = "pool5"
    net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \
                   conv6_kernal_size=conv6_kernal_size, pre_name="conv6", start_pool=False, lr_mult=1, decay_mult=1,
                   n_group=1)
    # Create SSD Header for SSD1
    lr_mult = 1
    decay_mult = 1.0
    mbox_1_layers = SsdDetectorHeaders(net, \
         net_width=net_width, net_height=net_height, data_layer=data_layer, \
         from_layers=ssd_Param_1.get('feature_layers',[]), \
         num_classes=ssd_Param_1.get("num_classes",2), \
         boxsizes=ssd_Param_1.get("anchor_boxsizes", []), \
         aspect_ratios=ssd_Param_1.get("anchor_aspect_ratios",[]), \
         prior_variance = ssd_Param_1.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
         flip=ssd_Param_1.get("anchor_flip",True), \
         clip=ssd_Param_1.get("anchor_clip",True), \
         normalizations=ssd_Param_1.get("interlayers_normalizations",[]), \
         use_batchnorm=ssd_Param_1.get("interlayers_use_batchnorm",True), \
         inter_layer_channels=ssd_Param_1.get("interlayers_channels_kernels",[]), \
         use_focus_loss=ssd_Param_1.get("bboxloss_using_focus_loss",False), \
         use_dense_boxes=ssd_Param_1.get('bboxloss_use_dense_boxes',False), \
         stage=1,lr_mult=lr_mult, decay_mult=decay_mult)
    # make Loss or Detout for SSD1
    if train:
        loss_param = get_loss_param(normalization=ssd_Param_1.get("bboxloss_normalization",P.Loss.VALID))
        mbox_1_layers.append(net[gt_label])
        use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False)
        if use_dense_boxes:
            bboxloss_param = {
                'gt_labels': ssd_Param_1.get('gt_labels',[]),
                'target_labels': ssd_Param_1.get('target_labels',[]),
                'num_classes':ssd_Param_1.get("num_classes",2),
                'alias_id':ssd_Param_1.get("alias_id",0),
                'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC),
                'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1),
                'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1),
                'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5),
                'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5),
                'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001),
                'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True),
                'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3),
                'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False),
                'gama':ssd_Param_1.get("bboxloss_focus_gama",2),
                'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False),
                'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                'use_prior_for_matching':True,
                'encode_variance_in_target': False,
                'flag_noperson':ssd_Param_1.get('flag_noperson',False),
            }
            net["mbox_1_loss"] = L.DenseBBoxLoss(*mbox_1_layers, dense_bbox_loss_param=bboxloss_param, \
                                    loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                    propagate_down=[True, True, False, False])
        else:
            bboxloss_param = {
                'gt_labels': ssd_Param_1.get('gt_labels',[]),
                'target_labels': ssd_Param_1.get('target_labels',[]),
                'num_classes':ssd_Param_1.get("num_classes",2),
                'alias_id':ssd_Param_1.get("alias_id",0),
                'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX),
                'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1),
                'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1),
                'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5),
                'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5),
                'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001),
                'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True),
                'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3),
                'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False),
                'gama':ssd_Param_1.get("bboxloss_focus_gama",2),
                'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False),
                'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                'match_type':P.MultiBoxLoss.PER_PREDICTION,
                'share_location':True,
                'use_prior_for_matching':True,
                'background_label_id':0,
                'encode_variance_in_target': False,
                'map_object_to_agnostic':False,
            }
            net["mbox_1_loss"] = L.BBoxLoss(*mbox_1_layers, bbox_loss_param=bboxloss_param, \
                        loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                        propagate_down=[True, True, False, False])
    else:
        if ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
            reshape_name = "mbox_1_conf_reshape"
            net[reshape_name] = L.Reshape(mbox_1_layers[1], \
                    shape=dict(dim=[0, -1, ssd_Param_1.get("num_classes",2)]))
            softmax_name = "mbox_1_conf_softmax"
            net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
            flatten_name = "mbox_1_conf_flatten"
            net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
            mbox_1_layers[1] = net[flatten_name]
        elif ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
            sigmoid_name = "mbox_1_conf_sigmoid"
            net[sigmoid_name] = L.Sigmoid(mbox_1_layers[1])
            mbox_1_layers[1] = net[sigmoid_name]
        else:
            raise ValueError("Unknown conf loss type.")
        # Det-out param
        det_out_param = {
            'num_classes':ssd_Param_1.get("num_classes",2),
            'target_labels': ssd_Param_1.get('detout_target_labels',[]),
            'alias_id':ssd_Param_1.get("alias_id",0),
            'conf_threshold':ssd_Param_1.get("detout_conf_threshold",0.01),
            'nms_threshold':ssd_Param_1.get("detout_nms_threshold",0.45),
            'size_threshold':ssd_Param_1.get("detout_size_threshold",0.0001),
            'top_k':ssd_Param_1.get("detout_top_k",30),
            'share_location':True,
            'code_type':P.PriorBox.CENTER_SIZE,
            'background_label_id':0,
            'variance_encoded_in_target':False,
        }
        use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False)
        if use_dense_boxes:
            net.detection_out_1 = L.DenseDetOut(*mbox_1_layers, \
        	  	detection_output_param=det_out_param, \
        	  	include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.detection_out_1 = L.DetOut(*mbox_1_layers, \
    	  		detection_output_param=det_out_param, \
    	  		include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # make Loss & Detout for SSD2
    lr_mult = 1.0
    decay_mult = 1.0
    if use_ssd2_for_detection:
         mbox_2_layers = SsdDetectorHeaders(net, \
              net_width=net_width, net_height=net_height, data_layer=data_layer, \
              from_layers=ssd_Param_2.get('feature_layers',[]), \
              num_classes=ssd_Param_2.get("num_classes",2), \
              boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \
              aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \
              prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \
              flip=ssd_Param_2.get("anchor_flip",True), \
              clip=ssd_Param_2.get("anchor_clip",True), \
              normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \
              use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \
              inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \
              use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \
              use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \
              stage=2,lr_mult=lr_mult, decay_mult=decay_mult)
         # make Loss or Detout for SSD1
         if train:
             loss_param = get_loss_param(normalization=ssd_Param_2.get("bboxloss_normalization",P.Loss.VALID))
             mbox_2_layers.append(net[gt_label])
             use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False)
             if use_dense_boxes:
                 bboxloss_param = {
                     'gt_labels': ssd_Param_2.get('gt_labels',[]),
                     'target_labels': ssd_Param_2.get('target_labels',[]),
                     'num_classes':ssd_Param_2.get("num_classes",2),
                     'alias_id':ssd_Param_2.get("alias_id",0),
                     'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                     'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC),
                     'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1),
                     'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1),
                     'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5),
                     'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5),
                     'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001),
                     'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True),
                     'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3),
                     'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False),
                     'gama':ssd_Param_2.get("bboxloss_focus_gama",2),
                     'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False),
                     'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                     'use_prior_for_matching':True,
                     'encode_variance_in_target': False,
                     'flag_noperson': ssd_Param_2.get('flag_noperson', False),
                 }
                 net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \
                                         loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                                         propagate_down=[True, True, False, False])
             else:
                 bboxloss_param = {
                     'gt_labels': ssd_Param_2.get('gt_labels',[]),
                     'target_labels': ssd_Param_2.get('target_labels',[]),
                     'num_classes':ssd_Param_2.get("num_classes",2),
                     'alias_id':ssd_Param_2.get("alias_id",0),
                     'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1),
                     'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX),
                     'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1),
                     'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1),
                     'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5),
                     'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5),
                     'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001),
                     'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True),
                     'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3),
                     'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False),
                     'gama':ssd_Param_2.get("bboxloss_focus_gama",2),
                     'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False),
                     'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE),
                     'match_type':P.MultiBoxLoss.PER_PREDICTION,
                     'share_location':True,
                     'use_prior_for_matching':True,
                     'background_label_id':0,
                     'encode_variance_in_target': False,
                     'map_object_to_agnostic':False,
                 }
                 net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \
                             loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \
                             propagate_down=[True, True, False, False])
         else:
             if ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX:
                 reshape_name = "mbox_2_conf_reshape"
                 net[reshape_name] = L.Reshape(mbox_2_layers[1], \
                         shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)]))
                 softmax_name = "mbox_2_conf_softmax"
                 net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
                 flatten_name = "mbox_2_conf_flatten"
                 net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
                 mbox_2_layers[1] = net[flatten_name]
             elif ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC:
                 sigmoid_name = "mbox_2_conf_sigmoid"
                 net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1])
                 mbox_2_layers[1] = net[sigmoid_name]
             else:
                 raise ValueError("Unknown conf loss type.")
             # Det-out param
             det_out_param = {
                 'num_classes':ssd_Param_2.get("num_classes",2),
                 'target_labels': ssd_Param_2.get('detout_target_labels',[]),
                 'alias_id':ssd_Param_2.get("alias_id",0),
                 'conf_threshold':ssd_Param_2.get("detout_conf_threshold",0.01),
                 'nms_threshold':ssd_Param_2.get("detout_nms_threshold",0.45),
                 'size_threshold':ssd_Param_2.get("detout_size_threshold",0.0001),
                 'top_k':ssd_Param_2.get("detout_top_k",30),
                 'share_location':True,
                 'code_type':P.PriorBox.CENTER_SIZE,
                 'background_label_id':0,
                 'variance_encoded_in_target':False,
             }
             use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False)
             if use_dense_boxes:
                 net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \
             	  	detection_output_param=det_out_param, \
             	  	include=dict(phase=caffe_pb2.Phase.Value('TEST')))
             else:
                 net.detection_out_2 = L.DetOut(*mbox_2_layers, \
         	  		detection_output_param=det_out_param, \
         	  		include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    # EVAL in TEST MODE
    if not train:
        det_eval_param = {
            'gt_labels': eval_Param.get('eval_gt_labels',[]),
            'num_classes':eval_Param.get("eval_num_classes",2),
            'evaluate_difficult_gt':eval_Param.get("eval_difficult_gt",False),
            'boxsize_threshold':eval_Param.get("eval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]),
            'iou_threshold':eval_Param.get("eval_iou_threshold",[0.9,0.75,0.5]),
            'background_label_id':0,
        }
        if use_ssd2_for_detection:
            det_out_layers = []
            det_out_layers.append(net['detection_out_1'])
            det_out_layers.append(net['detection_out_2'])
            name = 'det_out'
            net[name] = L.Concat(*det_out_layers, axis=2)
            net.det_accu = L.DetEval(net[name], net[gt_label], \
                	  detection_evaluate_param=det_eval_param, \
                	  include=dict(phase=caffe_pb2.Phase.Value('TEST')))
        else:
            net.det_accu = L.DetEval(net['detection_out_1'], net[gt_label], \
                	  detection_evaluate_param=det_eval_param, \
                	  include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    if train:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \
            L.Slice(net["label"+pose_string], ntop=4, slice_param=dict(slice_point=[34, 52, 86], axis=1))
    else:
        net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \
            L.Slice(net["label"+pose_string], ntop=5, slice_param=dict(slice_point=[34, 52, 86, 104], axis=1))
    # label
    net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD))
    net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD))
    ###pose
    pose_test_kwargs={
    # nms
    'nms_threshold': 0.05,
    'nms_max_peaks': 500,
    'nms_num_parts': 18,
    # connect
    'conn_is_type_coco': True,
    'conn_max_person': 10,
    'conn_max_peaks_use': 20,
    'conn_iters_pa_cal': 10,
    'conn_connect_inter_threshold': 0.05,
    'conn_connect_inter_min_nums': 8,
    'conn_connect_min_subset_cnt': 3,
    'conn_connect_min_subset_score': 0.4,
    # visual
    'eval_area_thre': 64*64,
    'eval_oks_thre': [0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9],
    }


    bn_kwargs = {
        'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)],
        'eps': 0.001,
    }
    sb_kwargs = {
        'bias_term': True,
        'param': [dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)],
        'filler': dict(type='constant', value=1.0),
        'bias_filler': dict(type='constant', value=0.2),
    }
    deconv_param = {
        'num_output': 128,
        'kernel_size': 2,
        'pad': 0,
        'stride': 2,
        'weight_filler': dict(type='gaussian', std=0.01),
        'bias_filler': dict(type='constant', value=0),
        'group': 1,
    }
    kwargs_deconv = {
        'param': [dict(lr_mult=1, decay_mult=1)],
        'convolution_param': deconv_param
    }

    from_layer = "conv5_5" + pose_string
    add_layer = from_layer + "_deconv"
    net[add_layer] = L.Deconvolution(net[from_layer], **kwargs_deconv)
    bn_name = add_layer + '_bn'
    net[bn_name] = L.BatchNorm(net[add_layer], in_place=True, **bn_kwargs)
    sb_name = add_layer + '_scale'
    net[sb_name] = L.Scale(net[add_layer], in_place=True, **sb_kwargs)
    relu_name = add_layer + '_relu'
    net[relu_name] = L.ReLU(net[add_layer], in_place=True)
    baselayer = add_layer
    use_stage = 3
    use_3_layers = 5
    use_1_layers = 0
    n_channel = 64
    lrdecay = 1.0
    kernel_size = 3
    flag_output_sigmoid = False
    for stage in xrange(use_stage):
        if stage == 0:
            from_layer = baselayer
        else:
            from_layer = "concat_stage{}".format(stage)
        outlayer = "concat_stage{}".format(stage + 1)
        if stage == use_stage - 1:
            short_cut = False
        else:
            short_cut = True
        net = mPose_StageX_Train(net, from_layer=from_layer, out_layer=outlayer, stage=stage + 1,
                                 mask_vec="vec_mask", mask_heat="heat_mask", \
                                 label_vec="vec_label", label_heat="heat_label", \
                                 use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=short_cut, \
                                 base_layer=baselayer, lr=0.1, decay=lrdecay, num_channels=n_channel,
                                 kernel_size=kernel_size, flag_sigmoid=flag_output_sigmoid)

    # for Test
    if not train:
        if flag_output_sigmoid:
            conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers) + "_sig"
            conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers) + "_sig"
        else:
            conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers)
            conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers)
        net.vec_out = L.Eltwise(net.vec_mask, net[conv_vec], eltwise_param=dict(operation=P.Eltwise.PROD))
        net.heat_out = L.Eltwise(net.heat_mask, net[conv_heat], eltwise_param=dict(operation=P.Eltwise.PROD))
        feaLayers = []
        feaLayers.append(net.heat_out)
        feaLayers.append(net.vec_out)
        outlayer = "concat_stage{}".format(3)
        net[outlayer] = L.Concat(*feaLayers, axis=1)
        # Resize
        resize_kwargs = {
            'factor': pose_test_kwargs.get("resize_factor", 8),
            'scale_gap': pose_test_kwargs.get("resize_scale_gap", 0.3),
            'start_scale': pose_test_kwargs.get("resize_start_scale", 1.0),
        }
        net.resized_map = L.ImResize(net[outlayer], name="resize", imresize_param=resize_kwargs)
        # Nms
        nms_kwargs = {
            'threshold': pose_test_kwargs.get("nms_threshold", 0.05),
            'max_peaks': pose_test_kwargs.get("nms_max_peaks", 100),
            'num_parts': pose_test_kwargs.get("nms_num_parts", 18),
        }
        net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs)
        # ConnectLimbs
        connect_kwargs = {
            'is_type_coco': pose_test_kwargs.get("conn_is_type_coco", True),
            'max_person': pose_test_kwargs.get("conn_max_person", 10),
            'max_peaks_use': pose_test_kwargs.get("conn_max_peaks_use", 20),
            'iters_pa_cal': pose_test_kwargs.get("conn_iters_pa_cal", 10),
            'connect_inter_threshold': pose_test_kwargs.get("conn_connect_inter_threshold", 0.05),
            'connect_inter_min_nums': pose_test_kwargs.get("conn_connect_inter_min_nums", 8),
            'connect_min_subset_cnt': pose_test_kwargs.get("conn_connect_min_subset_cnt", 3),
            'connect_min_subset_score': pose_test_kwargs.get("conn_connect_min_subset_score", 0.4),
        }
        net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs)
        # Eval
        eval_kwargs = {
            'stride': 8,
            'area_thre': pose_test_kwargs.get("eval_area_thre", 64 * 64),
            'oks_thre': pose_test_kwargs.get("eval_oks_thre", [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]),
        }
        net.eval = L.PoseEval(net.limbs, net.gt, pose_eval_param=eval_kwargs)

    return net
예제 #5
0
def RemPoseNet_Test(net,
                    from_layer="data",
                    frame_layer="orig_data",
                    **pose_kwargs):
    # BaseNet
    net = RemBaseNet(net,
                     from_layer=from_layer,
                     use_bn=base_use_bn,
                     use_conv6=False,
                     lr=1,
                     decay=1)
    # Stage-5
    stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers))
    if use_stride_conv[4]:
        stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers) - 1)
    # Stage-4
    stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers))
    if use_stride_conv[3]:
        stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers) - 1)
    net = UnifiedMultiScaleLayers(net,
                                  layers=[stage_4, stage_5],
                                  tags=["Ref", "Up"],
                                  unifiedlayer="convf",
                                  upsampleMethod="Reorg")

    # STG#1
    net = RemPoseStage_Test(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \
                    short_cut=True, base_layer=baselayer, lr=1, decay=1)
    # STG#2
    net = RemPoseStage_Train(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \
                    short_cut=True, base_layer=baselayer, lr=1, decay=1)
    # STG#3
    net = RemPoseStage_Train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \
                    short_cut=False, base_layer=baselayer, lr=1, decay=1)

    conv_vec = "stage{}_conv{}_vec".format(3, use_layers)
    conv_heat = "stage{}_conv{}_heat".format(3, use_layers)
    feaLayers = []
    feaLayers.append(net[conv_heat])
    feaLayers.append(net[conv_vec])
    outlayer = "concat_stage{}".format(3)
    net[outlayer] = L.Concat(*feaLayers, axis=1)
    # Resize
    resize_kwargs = {
        'factor': pose_kwargs.get("resize_factor", 2),
        'scale_gap': pose_kwargs.get("resize_scale_gap", 0.3),
        'start_scale': pose_kwargs.get("resize_start_scale", 1.0),
    }
    net.resized_map = L.ImResize(net[outlayer],
                                 name="resize",
                                 imresize_param=resize_kwargs)
    # Nms
    nms_kwargs = {
        'threshold': pose_kwargs.get("nms_threshold", 0.05),
        'max_peaks': pose_kwargs.get("nms_max_peaks", 100),
        'num_parts': pose_kwargs.get("nms_num_parts", 18),
    }
    net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs)
    # ConnectLimbs
    connect_kwargs = {
        'is_type_coco':
        pose_kwargs.get("conn_is_type_coco", True),
        'max_person':
        pose_kwargs.get("conn_max_person", 10),
        'max_peaks_use':
        pose_kwargs.get("conn_max_peaks_use", 20),
        'iters_pa_cal':
        pose_kwargs.get("conn_iters_pa_cal", 10),
        'connect_inter_threshold':
        pose_kwargs.get("conn_connect_inter_threshold", 0.05),
        'connect_inter_min_nums':
        pose_kwargs.get("conn_connect_inter_min_nums", 8),
        'connect_min_subset_cnt':
        pose_kwargs.get("conn_connect_min_subset_cnt", 3),
        'connect_min_subset_score':
        pose_kwargs.get("conn_connect_min_subset_score", 0.4),
    }
    net.limbs = L.Connectlimb(net.resized_map,
                              net.joints,
                              connect_limb_param=connect_kwargs)
    # VisualizePose
    visual_kwargs = {
        'is_type_coco': pose_kwargs.get("conn_is_type_coco", True),
        'type': pose_kwargs.get("visual_type", P.Visualizepose.POSE),
        'visualize': pose_kwargs.get("visual_visualize", True),
        'draw_skeleton': pose_kwargs.get("visual_draw_skeleton", True),
        'print_score': pose_kwargs.get("visual_print_score", False),
        'part_id': pose_kwargs.get("visual_part_id", 0),
        'from_part': pose_kwargs.get("visual_from_part", 0),
        'vec_id': pose_kwargs.get("visual_vec_id", 0),
        'from_vec': pose_kwargs.get("visual_from_vec", 0),
        'pose_threshold': pose_kwargs.get("visual_pose_threshold", 0.05),
        'write_frames': pose_kwargs.get("visual_write_frames", False),
        'output_directory': pose_kwargs.get("visual_output_directory", ""),
    }
    net.visual = L.Visualizepose(net[frame_layer],
                                 net.resized_map,
                                 net.limbs,
                                 visualize_pose_param=visual_kwargs)
    return net