def VGG19_PoseNet_COCO_3S_Train(net, data_layer="data", label_layer="label", train=True, **pose_test_kwargs): # Slice for label and mask if train: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \ L.Slice(net[label_layer], ntop=4, slice_param=dict(slice_point=[38,57,95], axis=1)) else: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \ L.Slice(net[label_layer], ntop=5, slice_param=dict(slice_point=[38,57,95,114], axis=1)) # Label net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) # baseNet-VGG19 net = VGG19Net_Pre10(net, from_layer=data_layer) # conv4_3_CPM & conv4_4_CPM kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0)} # conv4_3_CPM net.conv4_3_CPM = L.Convolution(net.relu4_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu4_3_CPM = L.ReLU(net.conv4_3_CPM, in_place=True) net.conv4_4_CPM = L.Convolution(net.relu4_3_CPM, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu4_4_CPM = L.ReLU(net.conv4_4_CPM, in_place=True) # Stage1 net = Pose_Stage1_COCO_train(net, from_layer="relu4_4_CPM", out_layer="concat_stage2", \ mask_L1="vec_mask", mask_L2="heat_mask", \ label_L1="vec_label", label_L2="heat_label", lr=1, decay=1) # Stage2-3 net = Pose_StageX_COCO_train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=2, \ mask_L1="vec_mask", mask_L2="heat_mask", \ label_L1="vec_label", label_L2="heat_label", \ short_cut=True, base_layer="relu4_4_CPM", lr=4, decay=1) net = Pose_StageX_COCO_train(net, from_layer="concat_stage3", out_layer="concat_stage4", stage=3, \ mask_L1="vec_mask", mask_L2="heat_mask", \ label_L1="vec_label", label_L2="heat_label", \ short_cut=False, lr=4, decay=1) # for Test if not train: net.vec_out = L.Eltwise(net.vec_mask, net.Mconv7_stage3_L1, eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_out = L.Eltwise(net.heat_mask, net.Mconv7_stage3_L2, eltwise_param=dict(operation=P.Eltwise.PROD)) feaLayers = [] feaLayers.append(net.heat_out) feaLayers.append(net.vec_out) net["concat_stage4"] = L.Concat(*feaLayers, axis=1) # Resize resize_kwargs = { 'factor': pose_test_kwargs.get("resize_factor", 8), 'scale_gap': pose_test_kwargs.get("resize_scale_gap", 0.3), 'start_scale': pose_test_kwargs.get("resize_start_scale", 1.0), } net.resized_map = L.ImResize(net.concat_stage4, name="resize", imresize_param=resize_kwargs) # Nms nms_kwargs = { 'threshold': pose_test_kwargs.get("nms_threshold", 0.05), 'max_peaks': pose_test_kwargs.get("nms_max_peaks", 64), 'num_parts': pose_test_kwargs.get("nms_num_parts", 18), } net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs) # ConnectLimbs connect_kwargs = { 'is_type_coco': pose_test_kwargs.get("conn_is_type_coco", True), 'max_person': pose_test_kwargs.get("conn_max_person", 20), 'max_peaks_use': pose_test_kwargs.get("conn_max_peaks_use", 32), 'iters_pa_cal': pose_test_kwargs.get("conn_iters_pa_cal", 10), 'connect_inter_threshold': pose_test_kwargs.get("conn_connect_inter_threshold", 0.05), 'connect_inter_min_nums': pose_test_kwargs.get("conn_connect_inter_min_nums", 8), 'connect_min_subset_cnt': pose_test_kwargs.get("conn_connect_min_subset_cnt", 3), 'connect_min_subset_score': pose_test_kwargs.get("conn_connect_min_subset_score", 0.3), } net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs) # Eval eval_kwargs = { 'stride': 8, 'area_thre': pose_test_kwargs.get("eval_area_thre", 96*96), 'oks_thre': pose_test_kwargs.get("eval_oks_thre", [0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9]), } net.eval = L.PoseEval(net.limbs, net.gt, pose_eval_param=eval_kwargs) return net
def VGG19_PoseNet_Stage3_COCO_Test(net, from_layer="data", frame_layer="orig_data", **pose_kwargs): # baseNet-VGG19 assert from_layer in net.keys() net = VGG19Net_Pre10(net, from_layer="data") # conv4_3_CPM & conv4_4_CPM kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0)} # conv4_3_CPM net.conv4_3_CPM = L.Convolution(net.relu4_2, num_output=256, pad=1, kernel_size=3, **kwargs) net.relu4_3_CPM = L.ReLU(net.conv4_3_CPM, in_place=True) net.conv4_4_CPM = L.Convolution(net.relu4_3_CPM, num_output=128, pad=1, kernel_size=3, **kwargs) net.relu4_4_CPM = L.ReLU(net.conv4_4_CPM, in_place=True) # Stage1 net = Pose_Stage1_COCO(net, from_layer="relu4_4_CPM", out_layer="concat_stage2", lr=1, decay=1) # Stage2-6 net = Pose_StageX_COCO(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=2, short_cut=True, base_layer="relu4_4_CPM", lr=4, decay=1) net = Pose_StageX_COCO(net, from_layer="concat_stage3", out_layer="concat_stage4", stage=3, short_cut=False, lr=4, decay=1) # concat the output layers feaLayers = [] feaLayers.append(net["Mconv7_stage3_L2"]) feaLayers.append(net["Mconv7_stage3_L1"]) net["concat_stage4"] = L.Concat(*feaLayers, axis=1) # Resize resize_kwargs = { 'factor': pose_kwargs.get("resize_factor", 8), 'scale_gap': pose_kwargs.get("resize_scale_gap", 0.3), 'start_scale': pose_kwargs.get("resize_start_scale", 1.0), } net.resized_map = L.ImResize(net.concat_stage4, name="resize", imresize_param=resize_kwargs) # Nms nms_kwargs = { 'threshold': pose_kwargs.get("nms_threshold", 0.05), 'max_peaks': pose_kwargs.get("nms_max_peaks", 64), 'num_parts': pose_kwargs.get("nms_num_parts", 18), } net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs) # ConnectLimbs connect_kwargs = { 'is_type_coco': pose_kwargs.get("conn_is_type_coco", True), 'max_person': pose_kwargs.get("conn_max_person", 20), 'max_peaks_use': pose_kwargs.get("conn_max_peaks_use", 32), 'iters_pa_cal': pose_kwargs.get("conn_iters_pa_cal", 10), 'connect_inter_threshold': pose_kwargs.get("conn_connect_inter_threshold", 0.05), 'connect_inter_min_nums': pose_kwargs.get("conn_connect_inter_min_nums", 8), 'connect_min_subset_cnt': pose_kwargs.get("conn_connect_min_subset_cnt", 3), 'connect_min_subset_score': pose_kwargs.get("conn_connect_min_subset_score", 0.3), } net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs) # VisualizePose visual_kwargs = { 'is_type_coco': pose_kwargs.get("conn_is_type_coco", True), 'type': pose_kwargs.get("visual_type", P.Visualizepose.POSE), 'visualize': pose_kwargs.get("visual_visualize", True), 'draw_skeleton': pose_kwargs.get("visual_draw_skeleton", True), 'print_score': pose_kwargs.get("visual_print_score", False), 'part_id': pose_kwargs.get("visual_part_id", 0), 'from_part': pose_kwargs.get("visual_from_part", 0), 'vec_id': pose_kwargs.get("visual_vec_id", 0), 'from_vec': pose_kwargs.get("visual_from_vec", 0), 'pose_threshold': pose_kwargs.get("visual_pose_threshold", 0.05), 'write_frames': pose_kwargs.get("visual_write_frames", False), 'output_directory': pose_kwargs.get("visual_output_directory", ""), } net.finished = L.Visualizepose(net[frame_layer], net.resized_map, net.limbs, visualize_pose_param=visual_kwargs) return net
def mPoseNet_Decomp_3S_Test(net, from_layer="data", frame_layer="orig_data", use_bn=True, **kwargs): # Darknet19 net = YoloNetPart_Decomp(net, from_layer=from_layer, use_bn=use_bn, use_layers=5, use_sub_layers=5, \ final_pool=False, lr=1, decay=1, **kwargs) # concat conv4_3 & conv5_5 net = UnifiedMultiScaleLayers(net, layers=["conv4_3_c","conv5_5_c"], tags=["Ref","Up"], \ unifiedlayer="convf", upsampleMethod="Reorg") # Stages baselayer = "convf" use_3_layers = 5 use_1_layers = 0 net = mPose_StageX_decomp_Test(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \ base_layer=baselayer, **kwargs) net = mPose_StageX_decomp_Test(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=True, \ base_layer=baselayer, **kwargs) net = mPose_StageX_decomp_Test(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=False, base_layer=baselayer, **kwargs) conv_vec = "stage{}_conv{}_vec_c".format(3, use_3_layers + use_1_layers) conv_heat = "stage{}_conv{}_heat_c".format(3, use_3_layers + use_1_layers) feaLayers = [] feaLayers.append(net[conv_heat]) feaLayers.append(net[conv_vec]) outlayer = "concat_stage{}".format(3) net[outlayer] = L.Concat(*feaLayers, axis=1) # Resize resize_kwargs = { 'factor': kwargs.get("resize_factor", 8), 'scale_gap': kwargs.get("resize_scale_gap", 0.3), 'start_scale': kwargs.get("resize_start_scale", 1.0), } net.resized_map = L.ImResize(net[outlayer], name="resize", imresize_param=resize_kwargs) # Nms nms_kwargs = { 'threshold': kwargs.get("nms_threshold", 0.05), 'max_peaks': kwargs.get("nms_max_peaks", 64), 'num_parts': kwargs.get("nms_num_parts", 18), } net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs) # ConnectLimbs connect_kwargs = { 'is_type_coco': kwargs.get("conn_is_type_coco", True), 'max_person': kwargs.get("conn_max_person", 20), 'max_peaks_use': kwargs.get("conn_max_peaks_use", 32), 'iters_pa_cal': kwargs.get("conn_iters_pa_cal", 10), 'connect_inter_threshold': kwargs.get("conn_connect_inter_threshold", 0.05), 'connect_inter_min_nums': kwargs.get("conn_connect_inter_min_nums", 8), 'connect_min_subset_cnt': kwargs.get("conn_connect_min_subset_cnt", 3), 'connect_min_subset_score': kwargs.get("conn_connect_min_subset_score", 0.3), } net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs) # VisualizePose visual_kwargs = { 'is_type_coco': kwargs.get("conn_is_type_coco", True), 'type': kwargs.get("visual_type", P.Visualizepose.POSE), 'visualize': kwargs.get("visual_visualize", True), 'draw_skeleton': kwargs.get("visual_draw_skeleton", True), 'print_score': kwargs.get("visual_print_score", False), 'part_id': kwargs.get("visual_part_id", 0), 'from_part': kwargs.get("visual_from_part", 0), 'vec_id': kwargs.get("visual_vec_id", 0), 'from_vec': kwargs.get("visual_from_vec", 0), 'pose_threshold': kwargs.get("visual_pose_threshold", 0.05), 'write_frames': kwargs.get("visual_write_frames", False), 'output_directory': kwargs.get("visual_output_directory", ""), } net.finished = L.Visualizepose(net[frame_layer], net.resized_map, net.limbs, visualize_pose_param=visual_kwargs) return net
def DAPPoseNet(net, train=True, data_layer="data", gt_label="label",net_width=512, net_height=288): # BaseNet channels = ((32,), (32,), (64, 32, 128), (128, 64, 128, 64, 256), (256, 128, 256, 128, 256)) strides = (True, True, True, False, False) kernels = ((3,), (3,), (3, 1, 3), (3, 1, 3, 1, 3), (3, 1, 3, 1, 3)) pool_last = (False,False,False,True,True) net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer, channels=channels, strides=strides, kernels=kernels,freeze_layers=[], pool_last=pool_last,flag_withparamname=True,) net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer + pose_string, channels=channels, strides=strides, kernels=kernels, freeze_layers=[], pool_last=pool_last, flag_withparamname=True, pose_string = pose_string) conv6_output = Conv6_Param.get('conv6_output', []) conv6_kernal_size = Conv6_Param.get('conv6_kernal_size', []) out_layer = "pool5" net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \ conv6_kernal_size=conv6_kernal_size, pre_name="conv6", start_pool=False, lr_mult=1, decay_mult=1, n_group=1) # Create SSD Header for SSD1 lr_mult = 1 decay_mult = 1.0 mbox_1_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_1.get('feature_layers',[]), \ num_classes=ssd_Param_1.get("num_classes",2), \ boxsizes=ssd_Param_1.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_1.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_1.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_1.get("anchor_flip",True), \ clip=ssd_Param_1.get("anchor_clip",True), \ normalizations=ssd_Param_1.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_1.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_1.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_1.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_1.get('bboxloss_use_dense_boxes',False), \ stage=1,lr_mult=lr_mult, decay_mult=decay_mult) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_1.get("bboxloss_normalization",P.Loss.VALID)) mbox_1_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_1.get('gt_labels',[]), 'target_labels': ssd_Param_1.get('target_labels',[]), 'num_classes':ssd_Param_1.get("num_classes",2), 'alias_id':ssd_Param_1.get("alias_id",0), 'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC), 'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_1.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'use_prior_for_matching':True, 'encode_variance_in_target': False, 'flag_noperson':ssd_Param_1.get('flag_noperson',False), } net["mbox_1_loss"] = L.DenseBBoxLoss(*mbox_1_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_1.get('gt_labels',[]), 'target_labels': ssd_Param_1.get('target_labels',[]), 'num_classes':ssd_Param_1.get("num_classes",2), 'alias_id':ssd_Param_1.get("alias_id",0), 'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), 'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_1.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'match_type':P.MultiBoxLoss.PER_PREDICTION, 'share_location':True, 'use_prior_for_matching':True, 'background_label_id':0, 'encode_variance_in_target': False, 'map_object_to_agnostic':False, } net["mbox_1_loss"] = L.BBoxLoss(*mbox_1_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_1_conf_reshape" net[reshape_name] = L.Reshape(mbox_1_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_1.get("num_classes",2)])) softmax_name = "mbox_1_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_1_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_1_layers[1] = net[flatten_name] elif ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_1_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_1_layers[1]) mbox_1_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes':ssd_Param_1.get("num_classes",2), 'target_labels': ssd_Param_1.get('detout_target_labels',[]), 'alias_id':ssd_Param_1.get("alias_id",0), 'conf_threshold':ssd_Param_1.get("detout_conf_threshold",0.01), 'nms_threshold':ssd_Param_1.get("detout_nms_threshold",0.45), 'size_threshold':ssd_Param_1.get("detout_size_threshold",0.0001), 'top_k':ssd_Param_1.get("detout_top_k",30), 'share_location':True, 'code_type':P.PriorBox.CENTER_SIZE, 'background_label_id':0, 'variance_encoded_in_target':False, } use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: net.detection_out_1 = L.DenseDetOut(*mbox_1_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_1 = L.DetOut(*mbox_1_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # make Loss & Detout for SSD2 lr_mult = 1.0 decay_mult = 1.0 if use_ssd2_for_detection: mbox_2_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_2.get('feature_layers',[]), \ num_classes=ssd_Param_2.get("num_classes",2), \ boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_2.get("anchor_flip",True), \ clip=ssd_Param_2.get("anchor_clip",True), \ normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \ stage=2,lr_mult=lr_mult, decay_mult=decay_mult) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_2.get("bboxloss_normalization",P.Loss.VALID)) mbox_2_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels',[]), 'target_labels': ssd_Param_2.get('target_labels',[]), 'num_classes':ssd_Param_2.get("num_classes",2), 'alias_id':ssd_Param_2.get("alias_id",0), 'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC), 'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_2.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'use_prior_for_matching':True, 'encode_variance_in_target': False, 'flag_noperson': ssd_Param_2.get('flag_noperson', False), } net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels',[]), 'target_labels': ssd_Param_2.get('target_labels',[]), 'num_classes':ssd_Param_2.get("num_classes",2), 'alias_id':ssd_Param_2.get("alias_id",0), 'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), 'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_2.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'match_type':P.MultiBoxLoss.PER_PREDICTION, 'share_location':True, 'use_prior_for_matching':True, 'background_label_id':0, 'encode_variance_in_target': False, 'map_object_to_agnostic':False, } net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_2_conf_reshape" net[reshape_name] = L.Reshape(mbox_2_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)])) softmax_name = "mbox_2_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_2_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_2_layers[1] = net[flatten_name] elif ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_2_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1]) mbox_2_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes':ssd_Param_2.get("num_classes",2), 'target_labels': ssd_Param_2.get('detout_target_labels',[]), 'alias_id':ssd_Param_2.get("alias_id",0), 'conf_threshold':ssd_Param_2.get("detout_conf_threshold",0.01), 'nms_threshold':ssd_Param_2.get("detout_nms_threshold",0.45), 'size_threshold':ssd_Param_2.get("detout_size_threshold",0.0001), 'top_k':ssd_Param_2.get("detout_top_k",30), 'share_location':True, 'code_type':P.PriorBox.CENTER_SIZE, 'background_label_id':0, 'variance_encoded_in_target':False, } use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_2 = L.DetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # EVAL in TEST MODE if not train: det_eval_param = { 'gt_labels': eval_Param.get('eval_gt_labels',[]), 'num_classes':eval_Param.get("eval_num_classes",2), 'evaluate_difficult_gt':eval_Param.get("eval_difficult_gt",False), 'boxsize_threshold':eval_Param.get("eval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]), 'iou_threshold':eval_Param.get("eval_iou_threshold",[0.9,0.75,0.5]), 'background_label_id':0, } if use_ssd2_for_detection: det_out_layers = [] det_out_layers.append(net['detection_out_1']) det_out_layers.append(net['detection_out_2']) name = 'det_out' net[name] = L.Concat(*det_out_layers, axis=2) net.det_accu = L.DetEval(net[name], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.det_accu = L.DetEval(net['detection_out_1'], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) if train: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \ L.Slice(net["label"+pose_string], ntop=4, slice_param=dict(slice_point=[34, 52, 86], axis=1)) else: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \ L.Slice(net["label"+pose_string], ntop=5, slice_param=dict(slice_point=[34, 52, 86, 104], axis=1)) # label net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) ###pose pose_test_kwargs={ # nms 'nms_threshold': 0.05, 'nms_max_peaks': 500, 'nms_num_parts': 18, # connect 'conn_is_type_coco': True, 'conn_max_person': 10, 'conn_max_peaks_use': 20, 'conn_iters_pa_cal': 10, 'conn_connect_inter_threshold': 0.05, 'conn_connect_inter_min_nums': 8, 'conn_connect_min_subset_cnt': 3, 'conn_connect_min_subset_score': 0.4, # visual 'eval_area_thre': 64*64, 'eval_oks_thre': [0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9], } bn_kwargs = { 'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': 0.001, } sb_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.2), } deconv_param = { 'num_output': 128, 'kernel_size': 2, 'pad': 0, 'stride': 2, 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0), 'group': 1, } kwargs_deconv = { 'param': [dict(lr_mult=1, decay_mult=1)], 'convolution_param': deconv_param } from_layer = "conv5_5" + pose_string add_layer = from_layer + "_deconv" net[add_layer] = L.Deconvolution(net[from_layer], **kwargs_deconv) bn_name = add_layer + '_bn' net[bn_name] = L.BatchNorm(net[add_layer], in_place=True, **bn_kwargs) sb_name = add_layer + '_scale' net[sb_name] = L.Scale(net[add_layer], in_place=True, **sb_kwargs) relu_name = add_layer + '_relu' net[relu_name] = L.ReLU(net[add_layer], in_place=True) baselayer = add_layer use_stage = 3 use_3_layers = 5 use_1_layers = 0 n_channel = 64 lrdecay = 1.0 kernel_size = 3 flag_output_sigmoid = False for stage in xrange(use_stage): if stage == 0: from_layer = baselayer else: from_layer = "concat_stage{}".format(stage) outlayer = "concat_stage{}".format(stage + 1) if stage == use_stage - 1: short_cut = False else: short_cut = True net = mPose_StageX_Train(net, from_layer=from_layer, out_layer=outlayer, stage=stage + 1, mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=short_cut, \ base_layer=baselayer, lr=0.1, decay=lrdecay, num_channels=n_channel, kernel_size=kernel_size, flag_sigmoid=flag_output_sigmoid) # for Test if not train: if flag_output_sigmoid: conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers) + "_sig" conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers) + "_sig" else: conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers) conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers) net.vec_out = L.Eltwise(net.vec_mask, net[conv_vec], eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_out = L.Eltwise(net.heat_mask, net[conv_heat], eltwise_param=dict(operation=P.Eltwise.PROD)) feaLayers = [] feaLayers.append(net.heat_out) feaLayers.append(net.vec_out) outlayer = "concat_stage{}".format(3) net[outlayer] = L.Concat(*feaLayers, axis=1) # Resize resize_kwargs = { 'factor': pose_test_kwargs.get("resize_factor", 8), 'scale_gap': pose_test_kwargs.get("resize_scale_gap", 0.3), 'start_scale': pose_test_kwargs.get("resize_start_scale", 1.0), } net.resized_map = L.ImResize(net[outlayer], name="resize", imresize_param=resize_kwargs) # Nms nms_kwargs = { 'threshold': pose_test_kwargs.get("nms_threshold", 0.05), 'max_peaks': pose_test_kwargs.get("nms_max_peaks", 100), 'num_parts': pose_test_kwargs.get("nms_num_parts", 18), } net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs) # ConnectLimbs connect_kwargs = { 'is_type_coco': pose_test_kwargs.get("conn_is_type_coco", True), 'max_person': pose_test_kwargs.get("conn_max_person", 10), 'max_peaks_use': pose_test_kwargs.get("conn_max_peaks_use", 20), 'iters_pa_cal': pose_test_kwargs.get("conn_iters_pa_cal", 10), 'connect_inter_threshold': pose_test_kwargs.get("conn_connect_inter_threshold", 0.05), 'connect_inter_min_nums': pose_test_kwargs.get("conn_connect_inter_min_nums", 8), 'connect_min_subset_cnt': pose_test_kwargs.get("conn_connect_min_subset_cnt", 3), 'connect_min_subset_score': pose_test_kwargs.get("conn_connect_min_subset_score", 0.4), } net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs) # Eval eval_kwargs = { 'stride': 8, 'area_thre': pose_test_kwargs.get("eval_area_thre", 64 * 64), 'oks_thre': pose_test_kwargs.get("eval_oks_thre", [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]), } net.eval = L.PoseEval(net.limbs, net.gt, pose_eval_param=eval_kwargs) return net
def RemPoseNet_Test(net, from_layer="data", frame_layer="orig_data", **pose_kwargs): # BaseNet net = RemBaseNet(net, from_layer=from_layer, use_bn=base_use_bn, use_conv6=False, lr=1, decay=1) # Stage-5 stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers)) if use_stride_conv[4]: stage_5 = "{}_{}".format(conv_stage_name[4], len(stage5_layers) - 1) # Stage-4 stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers)) if use_stride_conv[3]: stage_4 = "{}_{}".format(conv_stage_name[3], len(stage4_layers) - 1) net = UnifiedMultiScaleLayers(net, layers=[stage_4, stage_5], tags=["Ref", "Up"], unifiedlayer="convf", upsampleMethod="Reorg") # STG#1 net = RemPoseStage_Test(net, from_layer=baselayer, out_layer="concat_stage1", stage=1, \ short_cut=True, base_layer=baselayer, lr=1, decay=1) # STG#2 net = RemPoseStage_Train(net, from_layer="concat_stage1", out_layer="concat_stage2", stage=2, \ short_cut=True, base_layer=baselayer, lr=1, decay=1) # STG#3 net = RemPoseStage_Train(net, from_layer="concat_stage2", out_layer="concat_stage3", stage=3, \ short_cut=False, base_layer=baselayer, lr=1, decay=1) conv_vec = "stage{}_conv{}_vec".format(3, use_layers) conv_heat = "stage{}_conv{}_heat".format(3, use_layers) feaLayers = [] feaLayers.append(net[conv_heat]) feaLayers.append(net[conv_vec]) outlayer = "concat_stage{}".format(3) net[outlayer] = L.Concat(*feaLayers, axis=1) # Resize resize_kwargs = { 'factor': pose_kwargs.get("resize_factor", 2), 'scale_gap': pose_kwargs.get("resize_scale_gap", 0.3), 'start_scale': pose_kwargs.get("resize_start_scale", 1.0), } net.resized_map = L.ImResize(net[outlayer], name="resize", imresize_param=resize_kwargs) # Nms nms_kwargs = { 'threshold': pose_kwargs.get("nms_threshold", 0.05), 'max_peaks': pose_kwargs.get("nms_max_peaks", 100), 'num_parts': pose_kwargs.get("nms_num_parts", 18), } net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs) # ConnectLimbs connect_kwargs = { 'is_type_coco': pose_kwargs.get("conn_is_type_coco", True), 'max_person': pose_kwargs.get("conn_max_person", 10), 'max_peaks_use': pose_kwargs.get("conn_max_peaks_use", 20), 'iters_pa_cal': pose_kwargs.get("conn_iters_pa_cal", 10), 'connect_inter_threshold': pose_kwargs.get("conn_connect_inter_threshold", 0.05), 'connect_inter_min_nums': pose_kwargs.get("conn_connect_inter_min_nums", 8), 'connect_min_subset_cnt': pose_kwargs.get("conn_connect_min_subset_cnt", 3), 'connect_min_subset_score': pose_kwargs.get("conn_connect_min_subset_score", 0.4), } net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs) # VisualizePose visual_kwargs = { 'is_type_coco': pose_kwargs.get("conn_is_type_coco", True), 'type': pose_kwargs.get("visual_type", P.Visualizepose.POSE), 'visualize': pose_kwargs.get("visual_visualize", True), 'draw_skeleton': pose_kwargs.get("visual_draw_skeleton", True), 'print_score': pose_kwargs.get("visual_print_score", False), 'part_id': pose_kwargs.get("visual_part_id", 0), 'from_part': pose_kwargs.get("visual_from_part", 0), 'vec_id': pose_kwargs.get("visual_vec_id", 0), 'from_vec': pose_kwargs.get("visual_from_vec", 0), 'pose_threshold': pose_kwargs.get("visual_pose_threshold", 0.05), 'write_frames': pose_kwargs.get("visual_write_frames", False), 'output_directory': pose_kwargs.get("visual_output_directory", ""), } net.visual = L.Visualizepose(net[frame_layer], net.resized_map, net.limbs, visualize_pose_param=visual_kwargs) return net