Exemple #1
0
def getKptsFromImage(human_model, pose_model, image, smooth=None):

    bboxs, scores = yolo_det(image, human_model)
    # bbox is coordinate location
    inputs, origin_img, center, scale = PreProcess(image, bboxs, scores, cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:, [2, 1, 0]]
        output = pose_model(inputs.cuda())
        # compute coordinate
        preds, maxvals = get_final_preds(cfg,
                                         output.clone().cpu().numpy(),
                                         np.asarray(center), np.asarray(scale))

    # 选择 y 坐标最大的人
    # max_index = 0
    # max_y = np.mean(preds[0, :, 1])

    # for k in range(len(preds)):
    #     tmp_y = np.mean(preds[k, :, 1])
    #     if tmp_y > max_y:
    #         max_index = k
    #         max_y = tmp_y
    # result = np.concatenate((preds[max_index], maxvals[max_index]), 1)

    # 3D video pose (only support single human)
    result = np.concatenate((preds[0], maxvals[0]), 1)

    return result
Exemple #2
0
def detect_pose(pa,c):

  image = pa
  frame = cv2.imread(image, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
  bboxs, scores = yolo_det(frame, human_model)
  src_dir = pa 
  if len(bboxs)>0:
    inputs, origin_img, center, scale = PreProcess(frame, bboxs, scores, cfg)

    with torch.no_grad():
      # compute output heatmap
      inputs = inputs[:,[2,1,0]]
      output = pose_model(inputs.cuda())
      
      # compute coordinate
      preds, maxvals = get_final_preds(
          cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
      thresh = 0.7
      if maxvals[0][0]>thresh and maxvals[0][1]>thresh and maxvals[0][2]>thresh and maxvals[0][3]>thresh and maxvals[0][4]>thresh and maxvals[0][13]>thresh and maxvals[0][14]>thresh and maxvals[0][15]>thresh and maxvals[0][16]>thresh:
        #dst_dir = '/content/full_shot' + '/' + f'image_{c}' + '.jpg' -- want to move all full shot to single folder
        #shutil.copy(src_dir,dst_dir)
        return 1
      else:
        return 0
        
  else:
    return 0
Exemple #3
0
def generate_kpts(video_name, smooth=None, no_nan=True):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    cam_w = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
    cam_h = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length-1)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
        except Exception as e:
            if not no_nan:
                # append NaN so we can interpolate later
                kpts_result.append(np.full((17, 2), np.nan, dtype=np.float32))
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result, input_fps, cam_w, cam_h
Exemple #4
0
def main():
    args = parse_args()
    update_config(cfg, args)
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED


    ########## 加载human detecotor model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()

    from lib.detector.yolo.human_detector import main as yolo_det
    bboxs, scores = yolo_det(args.img_input, human_model)

    # bbox is coordinate location
    inputs, origin_img, center, scale = PreProcess(args.img_input, bboxs, scores, cfg)

    # load MODEL
    model = model_load(cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:,[2,1,0]]
        output = model(inputs)
        # compute coordinate
        preds, maxvals = get_final_preds(
            cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

    image = plot_keypoint(origin_img, preds, maxvals, 0.3)
    cv2.imwrite(args.img_output, image)
Exemple #5
0
def generate_kpts(video_name):
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # 关键点收集
    kpts_result = []
    for i in tqdm(range(video_length-1)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

        # 平滑点
        preds = smooth_filter(preds)

        # 3D video pose 只支持单人
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result
Exemple #6
0
def getKptsFromImage(human_model, pose_model, image, smooth=None):
    args = get_args()
    update_config(cfg, args)

    bboxs, scores = yolo_det(input_image, human_model)
    # bbox is coordinate location
    inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores,
                                                   cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:, [2, 1, 0]]
        output = pose_model(inputs.cuda())
        # compute coordinate
        preds, maxvals = get_final_preds(cfg,
                                         output.clone().cpu().numpy(),
                                         np.asarray(center), np.asarray(scale))

    # 3D video pose (only support single human)
    return preds[0]
Exemple #7
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    resize_W = 640
    resize_H = 384
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    first_frame = 1

    flow_boxs = 0
    flow_kpts = 0

    item = 0
    for i in tqdm(range(video_length - 1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        if first_frame == 0:
            try:
                t0 = ckpt_time()
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxs, flow_kpts = flow_propagation(keypoints, flow_result)
                _, t1 = ckpt_time(t0, 1)
            except Exception as e:
                print(e)
                continue

        pre_image = input_image
        first_frame = 0

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            if type(flow_boxs) == int:
                inputs, origin_img, center, scale = PreProcess(
                    input_image, bboxs, scores, cfg)
            else:
                #  flow_boxs = (flow_boxs + bboxs) /2
                inputs, origin_img, center, scale = PreProcess(
                    input_image, flow_boxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 1080, 720)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        if type(flow_boxs) != int:
            preds = (preds + flow_kpts) / 2

        origin_img = np.zeros(origin_img.shape, np.uint8)
        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        out.write(image)
        keypoints = np.concatenate((preds, maxvals), 2)

        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 1920, 1080)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Exemple #8
0
def generate_kpts(video_name, smooth=None):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length - 1)):

        ret_val, input_image = cam.read()
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(
                input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # print("input shape: ", inputs.shape)  # 1 3 256 192
            # print("output shape: ", output.shape) # 1 17 64 48
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])  # preds[0] (17, 2)

        # # 选择 y 坐标最大的人
        # max_index = 0
        # max_y = np.mean(preds[0, :, 1])
        #
        # for k in range(len(preds)):
        #     tmp_y = np.mean(preds[k, :, 1])
        #     if tmp_y > max_y:
        #         max_index = k
        #         max_y = tmp_y
        # kpts_result.append(preds[max_index])
        # # print("maxvals[max_index]:", np.mean(maxvals[max_index]))

    result = np.array(kpts_result)
    return result
def main():
    tick = 0
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0]))


    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    item = 0
    for i in tqdm(range(video_length-1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()


        if args.camera:
            # 为取得实时速度,每两帧取一帧预测
            if item == 0:
                item = 1
                continue

        item = 0
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced",0);
            cv2.resizeWindow("enhanced", 960, 480);
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))


        # 平滑点
        preds = smooth_filter(preds)
        #  preds = np.expand_dims(preds, 0)
        origin_img = np.zeros(origin_img.shape, np.uint8)
        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        if i >= 14:
            out.write(image)
        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
            cv2.resizeWindow("enhanced", 960, 480);
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Exemple #10
0
def main():
    global max_id
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # 保持长宽都是64的倍数,用于flownet2
    resize_W = int(input_image.shape[1] / 64) * 64
    resize_H = int(
        (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64
    image_resolution = (resize_W, resize_H)
    print(resize_W, resize_H)
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    for i in tqdm(range(video_length - 1)):
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        try:
            if i > 0:
                pdb()
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxes, flow_kpts = flow_propagation(
                    prev_kpts, flow_result)
                flow_boxes = np.concatenate(
                    (flow_boxes, np.expand_dims(prev_boxes[..., 4], -1)),
                    -1)  # flow_boxes + previous boxes scores
                flow_kpts = np.concatenate((flow_kpts, prev_kpts_scores), -1)

            # boxes_threthold is 0.9
            detected_boxes, detected_scores = yolo_det(
                input_image, human_model)  # bbox is coordinate location
            detected_scores = np.expand_dims(detected_scores.flatten(), -1)
            detected_boxes = np.concatenate((detected_boxes, detected_scores),
                                            -1)  # (N, 17, 3)

            if i == 0:
                inputs, origin_img, center, scale = PreProcess(
                    input_image, detected_boxes[..., :4],
                    detected_boxes[..., 4], cfg)
                #  ploted_image = plot_boxes(input_image, detected_boxes, [i for i in range(len(detected_boxes))])
                #  cv2.imshow('image', ploted_image)
                #  cv2.waitKey(100)
            else:
                # 最难! 会重新给pose net一个输入顺序, 并且给出相应的ids
                print('before mapping: ', previous_ids)
                new_boxes, new_ids = boxes_nms_test(flow_boxes, detected_boxes,
                                                    previous_ids,
                                                    image_resolution)
                print('after mapping: ', new_ids)
                print(flow_boxes[:, 1], detected_boxes[:, 1])
                #  ploted_image = plot_boxes(input_image, new_boxes, new_ids)
                #  cv2.imshow('image', ploted_image)
                #  cv2.waitKey(100)
                inputs, origin_img, center, scale = PreProcess(
                    input_image, new_boxes[..., :4], new_boxes[..., 4], cfg)

        except Exception as e:
            print(e)
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        # 姿态检测
        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            detected_kpts, detected_kpts_scores = get_final_preds(
                cfg,
                output.clone().cpu().numpy(), np.asarray(center),
                np.asarray(scale))
            detected_kpts = np.concatenate(
                (detected_kpts, detected_kpts_scores), 2)

        # TRACK Assign IDs. flow_boxes; detected_boxes, new_ids
        if i > 0:
            pose_similarity_matrix = compute_pairwise_oks(
                flow_kpts, flow_boxes[..., :4], detected_kpts)
            box_similarity_matrix = boxes_similarity(flow_boxes[..., :4],
                                                     detected_boxes[..., :4])
            ratio = 0.5
            similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * (
                1 - ratio)
            prev_filter_ids, cur_ids = bipartite_matching_greedy(
                similarity_matrix)

            print('previous frame boxes: ', prev_pose_ids)
            cur_len = len(detected_kpts)
            new_pose_ids = pose_match_ids(prev_pose_ids, prev_filter_ids,
                                          cur_ids, cur_len)

            #  detected_kpts = detected_kpts[ [i-1 for i in new_ids],:]
            #  detected_kpts_scores = detected_kpts_scores[[i-1 for i in new_ids],:]
            print(prev_filter_ids, cur_ids)
            print('after map: ', new_pose_ids)

        # 为下一帧处理做准备
        pre_image = input_image.copy()
        prev_kpts = detected_kpts
        prev_kpts_scores = detected_kpts_scores
        if i == 0:
            prev_boxes = detected_boxes
            previous_ids = [j for j in range(len(detected_boxes))]
            prev_pose_ids = previous_ids

        else:
            previous_ids = new_ids
            prev_boxes = new_boxes
            prev_pose_ids = new_pose_ids
        if i > 1:
            image = plot_keypoint_track(origin_img, detected_kpts,
                                        detected_kpts_scores,
                                        new_boxes[..., :4], new_pose_ids, 0.1)
        else:
            image = plot_keypoint_track(origin_img, detected_kpts,
                                        detected_kpts_scores,
                                        detected_boxes[..., :4], previous_ids,
                                        0.1)

        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Exemple #11
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # 保持长宽都是64的倍数
    resize_W = int(input_image.shape[1] / 64) * 64
    resize_H = int(
        (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64
    print(resize_W, resize_H)
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    flow_boxs = 0
    flow_kpts = 0

    previous_ids = 0
    pdb()
    for i in tqdm(range(video_length - 1)):
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        if i > 0:
            try:
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxs, flow_kpts = flow_propagation(
                    pre_keypoints, flow_result)
                flow_kpts = np.concatenate((flow_kpts, flow_pose_scores), -1)
            except Exception as e:
                print(e)
                continue

        pre_image = input_image

        try:
            # boxes_threthold is 0.6
            bboxs, scores = yolo_det(
                input_image, human_model)  # bbox is coordinate location

            # 第一帧
            if i == 0:
                inputs, origin_img, center, scale = PreProcess(
                    input_image, bboxs, scores, cfg)
                # 初始IDs, 和 socres map
                previous_ids = [i for i in range(len(bboxs))]
                #  id_scores_map = {}
                #  for i in range(len(bboxs)): id_scores_map.update({previous_ids[i]: scores[i]})
            else:
                # 本帧、上一帧 边框置信度NMS
                #  new_boxs, new_ids = boxes_nms(flow_boxs, bboxs, previous_ids)
                inputs, origin_img, center, scale = PreProcess(
                    input_image, bboxs, scores, cfg)

        except Exception as e:
            print(e)
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))
            keypoints = np.concatenate((preds, maxvals), 2)

        # 当前帧边框置信度, 作为下一帧流边框的置信度
        #  flow_bbox_scores = scores.copy()

        #  if i != 1:
        #  preds = (preds + flow_kpts) / 2

        # shift-kpts, shift-boxes, cur_kpts ------> TRACK

        if i > 0:
            kps_b = keypoints.copy()
            box_b = bboxs[:preds.shape[0]]
            kps_a = flow_kpts  # (N, 17, 3)
            box_a = flow_boxs

            pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b)
            box_similarity_matrix = boxs_similarity(box_a, box_b)
            ratio = 0.5
            similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * (
                1 - ratio)
            prev_filter_ids, cur_ids = bipartite_matching_greedy(
                similarity_matrix)

            print('previous frame boxes: ', previous_ids)
            print(prev_filter_ids, cur_ids)

            cur_len = len(box_b) + len(box_a) - len(cur_ids)
            cur_maps = -np.ones(shape=(cur_len, ))

            new_boxes = []
            new_kpts = []

            for pos, num in enumerate(cur_ids):
                cur_maps[pos] = previous_ids[prev_filter_ids[pos]]
                new_boxes.append(bo)

            prev_max_id = max(max(previous_ids), prev_max_id)

            for i in range(cur_len):
                if cur_maps[i] == -1.:
                    prev_max_id += 1
                    cur_maps[i] = prev_max_id

            previous_ids = cur_maps.astype(np.uint8).tolist()
            print('after map: ', previous_ids)

        # 整理好传给下一帧flownet的关键点, ids,
        if i == 0:
            pre_flow_keypoints = keypoints
            pre_flow_pkt_scores = scores.copy()
        # 根据映射结果
        else:
            pre_flow_keypoints = tracked_keypoints
            pre_flow_pkt_scores = tracked_scores

        if i > 1:
            image = plot_keypoint_track(origin_img, preds, maxvals, box_b,
                                        previous_ids, 0.1)

        if args.display and i > 1:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Exemple #12
0
def main():
    previous_ids = 0
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    resize_W = int(input_image.shape[1] / 64) * 64
    resize_H = int((input_image.shape[0] / input_image.shape[1] * resize_W) / 64 ) * 64
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    image_resolution = (resize_W, resize_H)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0]))


    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()


    #### load optical flow model
    flow_model = load_model()

    item = 0
    prev_max_id = 0
    for i in tqdm(range(video_length-1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        item = 0
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            bboxs = np.concatenate((bboxs, scores.transpose(1,0)), -1)


            # 加入flownet 模块
            if i>0:
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxes, flow_kpts = flow_propagation(prev_kpts, flow_result)
                flow_boxes = np.concatenate((flow_boxes, np.expand_dims(prev_boxes[...,4], -1)), -1)
                flow_kpts = np.concatenate((flow_kpts,prev_kpts_scores), -1)
                detected_boxes = bboxs.copy()
                #  plot_boxes(input_image.copy(), flow_boxes, [i for i in range(len(flow_boxes))], '{}_flow.png'.format(1000+i))
                #  plot_boxes(input_image.copy(), detected_boxes, [i for i in range(len(detected_boxes))], '{}_detected.png'.format(1000+i))
                bboxs = boxes_nms_test(flow_boxes, bboxs, image_resolution)
                #  plot_boxes(input_image.copy(), bboxs, [i for i in range(len(bboxs))], 'nms_{}.png'.format(100+i))

            inputs, origin_img, center, scale = PreProcess(input_image, bboxs[..., :4], bboxs[...,4], cfg)


        except Exception as e:
            print(e)
            pdb()
            continue

        try:
            with torch.no_grad():
                # compute output heatmap
                inputs = inputs[:,[2,1,0]]
                output = pose_model(inputs.cuda())
                # compute coordinate
                preds, maxvals = get_final_preds(
                    cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
        except Exception as e:
            print(e)
            continue


        kps_b = np.concatenate((preds, maxvals), 2)
        prev_kpts = kps_b.copy()
        box_b = bboxs[:preds.shape[0]]

        if previous_ids == 0:
            previous_ids = [j for j in range(len(preds))]

        if i>0:
            # kps_a是前一帧的 kps_b是当前hrnet检测出来的
            kps_a = flow_kpts
            box_a = flow_boxes
            pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b)
            box_similarity_matrix = boxes_similarity(box_a, box_b)
            # pose similarity ratio
            ratio = 0.5
            similarity_matrix = pose_similarity_matrix*ratio + box_similarity_matrix*(1-ratio)
            # prev_filter_ids: 经过筛选之后的上一帧ids序列
            prev_filter_ids, cur_ids = bipartite_matching_greedy(similarity_matrix)

            print('previous frame boxes: ',previous_ids)
            print(prev_filter_ids, cur_ids)

            cur_len = len(box_b)
            cur_maps = -np.ones(shape=(cur_len,))

            for pos, num in enumerate(cur_ids):
                cur_maps[num] = previous_ids[prev_filter_ids[pos]]

            prev_max_id = max(max(previous_ids), prev_max_id)

            for j in range(cur_len):
                if cur_maps[j] == -1.:
                    prev_max_id += 1
                    cur_maps[j] = prev_max_id

            # 作为下一次循环的上一帧ids序列
            previous_ids = cur_maps.astype(np.uint8).tolist()
            print('after map: ', previous_ids)


        # 作为下一次循环的上一帧
        kps_a = kps_b.copy()
        box_a = box_b.copy()
        prev_kpts = kps_b
        prev_kpts_scores = maxvals
        pre_image = input_image
        prev_boxes = bboxs

        if i>0:
            image = plot_keypoint_track(origin_img, preds, maxvals, box_a, previous_ids, 0.1)
            out.write(image)
        if args.display and i>0:
            ########### 指定屏幕大小
            winname = 'image'
            cv2.namedWindow(winname)        # Create a named window
            cv2.moveWindow(winname, 1000,850)  # Move it to (40,30)
            cv2.imshow(winname, image)
            cv2.waitKey(100)
Exemple #13
0
def main():
    args = parse_args()
    update_config(cfg, args)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    #  pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda()
    pose_model.cuda()

    from pycocotools.coco import COCO
    annFile = '/ssd/xyliu/data/coco/annotations/instances_val2017.json'
    im_root = '/ssd/xyliu/data/coco/images/val2017/'
    coco = COCO(annFile)
    catIds = coco.getCatIds(catNms=['person'])
    # 所有人体图片的id
    imgIds = coco.getImgIds(catIds=catIds)
    kpts_result = []
    detected_image_num = 0
    box_num = 0
    for imgId in tqdm(imgIds[:]):
        img = coco.loadImgs(imgId)[0]
        im_name = img['file_name']
        img = im_root + im_name
        img_input = plt.imread(img)

        try:
            bboxs, scores = mm_det(human_model, img_input, 0.3)
            inputs, origin_img, center, scale = PreProcess(
                img_input, bboxs, scores, cfg)

        except Exception as e:
            print(e)
            continue

        detected_image_num += 1
        with torch.no_grad():
            output = pose_model(inputs.cuda())
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

            #  vis = np.ones(shape=maxvals.shape,)
            vis = maxvals
            preds = preds.astype(np.float16)
            keypoints = np.concatenate((preds, vis), -1)
            for k, s in zip(keypoints, scores.tolist()):
                box_num += 1
                k = k.flatten().tolist()
                item = {
                    "image_id": imgId,
                    "category_id": 1,
                    "keypoints": k,
                    "score": s
                }
                kpts_result.append(item)

    num_joints = 17
    in_vis_thre = 0.2
    oks_thre = 0.5
    oks_nmsed_kpts = []
    for i in range(len(kpts_result)):
        img_kpts = kpts_result[i]['keypoints']
        kpt = np.array(img_kpts).reshape(17, 3)
        box_score = kpts_result[i]['score']
        kpt_score = 0
        valid_num = 0
        # each joint for bbox
        for n_jt in range(0, num_joints):
            # score
            t_s = kpt[n_jt][2]
            if t_s > in_vis_thre:
                kpt_score = kpt_score + t_s
                valid_num = valid_num + 1
        if valid_num != 0:
            kpt_score = kpt_score / valid_num

        # rescoring 关节点的置信度 与 box的置信度的乘积
        kpts_result[i]['score'] = kpt_score * box_score

    import json
    data = json.dumps(kpts_result)
    print(
        'image num is {} \tdetected_image num is {}\t person num is {}'.format(
            len(imgIds), detected_image_num, box_num)),
    #  data = json.dumps(str(kpts_result))
    with open('person_keypoints.json', 'wt') as f:
        #  pass
        f.write(data)
Exemple #14
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    #  pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda()
    pose_model.cuda()

    item = 0
    for i in tqdm(range(video_length - 1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()

        #  if args.camera:
        #  #  为取得实时速度,每两帧取一帧预测
        #  if item == 0:
        #  item = 1
        #  continue

        item = 0
        try:
            bboxs, scores = mm_det(human_model, input_image)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(
                input_image, bboxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        out.write(image)
        if args.display:
            ######### 全屏
            #  out_win = "output_style_full_screen"
            #  cv2.namedWindow(out_win, cv2.WINDOW_NORMAL)
            #  cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
            #  cv2.imshow(out_win, image)

            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)