Example #1
0
def detect_pose(pa,c):

  image = pa
  frame = cv2.imread(image, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
  bboxs, scores = yolo_det(frame, human_model)
  src_dir = pa 
  if len(bboxs)>0:
    inputs, origin_img, center, scale = PreProcess(frame, bboxs, scores, cfg)

    with torch.no_grad():
      # compute output heatmap
      inputs = inputs[:,[2,1,0]]
      output = pose_model(inputs.cuda())
      
      # compute coordinate
      preds, maxvals = get_final_preds(
          cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
      thresh = 0.7
      if maxvals[0][0]>thresh and maxvals[0][1]>thresh and maxvals[0][2]>thresh and maxvals[0][3]>thresh and maxvals[0][4]>thresh and maxvals[0][13]>thresh and maxvals[0][14]>thresh and maxvals[0][15]>thresh and maxvals[0][16]>thresh:
        #dst_dir = '/content/full_shot' + '/' + f'image_{c}' + '.jpg' -- want to move all full shot to single folder
        #shutil.copy(src_dir,dst_dir)
        return 1
      else:
        return 0
        
  else:
    return 0
Example #2
0
def main():
    args = parse_args()
    update_config(cfg, args)
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED


    ########## 加载human detecotor model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()

    from lib.detector.yolo.human_detector import main as yolo_det
    bboxs, scores = yolo_det(args.img_input, human_model)

    # bbox is coordinate location
    inputs, origin_img, center, scale = PreProcess(args.img_input, bboxs, scores, cfg)

    # load MODEL
    model = model_load(cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:,[2,1,0]]
        output = model(inputs)
        # compute coordinate
        preds, maxvals = get_final_preds(
            cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

    image = plot_keypoint(origin_img, preds, maxvals, 0.3)
    cv2.imwrite(args.img_output, image)
Example #3
0
def main():
    args = parse_args()
    update_config(cfg, args)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    ########## 加载human detecotor model
    from lib.detector.mmdetection.high_api import load_model
    human_model = load_model()
    from lib.detector.mmdetection.high_api import human_boxes_get as mmd_detector
    bboxs, scores = mmd_detector(human_model, args.img_input) # bboxes (N, 4) [x0, y0, x1, y1]
    # bbox is coordinate location
    inputs, origin_img, center, scale = PreProcess(args.img_input, bboxs, scores, cfg)

    # load HRNET MODEL
    model = model_load(cfg)
    with torch.no_grad():
        # compute output heatmap
        #  inputs = inputs[:,[2,1,0]]
        #  inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB)
        output = model(inputs)
        # compute coordinate
        preds, maxvals = get_final_preds(
            cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

    image = plot_keypoint(origin_img, preds, maxvals, 0.3)
    cv2.imwrite(args.img_output, image)
    if args.display:
        cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
        cv2.resizeWindow("enhanced", 960, 480);
        cv2.imshow('enhanced', image)
        cv2.waitKey(5000)
Example #4
0
def getKptsFromImage(human_model, pose_model, image, smooth=None):

    bboxs, scores = yolo_det(image, human_model)
    # bbox is coordinate location
    inputs, origin_img, center, scale = PreProcess(image, bboxs, scores, cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:, [2, 1, 0]]
        output = pose_model(inputs.cuda())
        # compute coordinate
        preds, maxvals = get_final_preds(cfg,
                                         output.clone().cpu().numpy(),
                                         np.asarray(center), np.asarray(scale))

    # 选择 y 坐标最大的人
    # max_index = 0
    # max_y = np.mean(preds[0, :, 1])

    # for k in range(len(preds)):
    #     tmp_y = np.mean(preds[k, :, 1])
    #     if tmp_y > max_y:
    #         max_index = k
    #         max_y = tmp_y
    # result = np.concatenate((preds[max_index], maxvals[max_index]), 1)

    # 3D video pose (only support single human)
    result = np.concatenate((preds[0], maxvals[0]), 1)

    return result
Example #5
0
def main():
    args = parse_args()
    update_config(cfg, args)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    ########## 加载human detector model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()

    from lib.detector.yolo.human_detector import human_bbox_get as yolo_det
    print(args.img_input)
    img = cv2.imread(args.img_input)
    # print(type(img))
    # cv2.imshow("test", img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    bboxs, scores = yolo_det(args.img_input, human_model,
                             confidence=0.5)  # bboxes (N, 4) [x0, y0, x1, y1]
    # print("bboxs = ", bboxs)
    # print("scores = ", scores)
    # ipdb.set_trace()

    # bbox is coordinate location
    inputs, origin_img, center, scale = preprocess(args.img_input, bboxs,
                                                   scores, cfg)

    # load MODEL
    model = model_load(cfg)

    with torch.no_grad():
        # compute output heatmap
        #  inputs = inputs[:,[2,1,0]]
        #  inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB)
        output = model(inputs)
        # compute coordinate
        preds, maxvals = get_final_preds(cfg,
                                         output.clone().cpu().numpy(),
                                         np.asarray(center), np.asarray(scale))
        print("preds = ", preds)
        print("maxvals = ", maxvals)

    image = plot_keypoint(origin_img, preds, maxvals, 0.5)
    cv2.imwrite(args.img_output, image)
    #if args.display:
    #cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
    #cv2.resizeWindow("enhanced", 960, 480);
    cv2.imshow('enhanced', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
def generate_kpts(video_name, smooth=False):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    # # ret_val, input_image = cam.read()
    # # Video writer
    # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    # input_fps = cam.get(cv2.CAP_PROP_FPS)

    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = preprocess(
                input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

            # if len(preds) != 1:
            #     print('here')

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result
Example #7
0
def generate_kpts(video_name, smooth=None, no_nan=True):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    cam_w = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
    cam_h = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length-1)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
        except Exception as e:
            if not no_nan:
                # append NaN so we can interpolate later
                kpts_result.append(np.full((17, 2), np.nan, dtype=np.float32))
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result, input_fps, cam_w, cam_h
Example #8
0
def generate_kpts(video_name):
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # 关键点收集
    kpts_result = []
    for i in tqdm(range(video_length-1)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

        # 平滑点
        preds = smooth_filter(preds)

        # 3D video pose 只支持单人
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result
Example #9
0
def getKptsFromImage(human_model, pose_model, image, smooth=None):
    args = get_args()
    update_config(cfg, args)

    bboxs, scores = yolo_det(image, human_model)
    # bbox is coordinate location
    inputs, origin_img, center, scale = preprocess(image, bboxs, scores, cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:, [2, 1, 0]]
        output = pose_model(inputs.cuda())
        # compute coordinate
        preds, maxvals = get_final_preds(
            cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

    # 3D video pose (only support single human)
    return preds[0]
Example #10
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    #  pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda()
    pose_model.cuda()

    item = 0
    for i in tqdm(range(video_length - 1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()

        #  if args.camera:
        #  #  为取得实时速度,每两帧取一帧预测
        #  if item == 0:
        #  item = 1
        #  continue

        item = 0
        try:
            bboxs, scores = mm_det(human_model, input_image)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(
                input_image, bboxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        out.write(image)
        if args.display:
            ######### 全屏
            #  out_win = "output_style_full_screen"
            #  cv2.namedWindow(out_win, cv2.WINDOW_NORMAL)
            #  cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
            #  cv2.imshow(out_win, image)

            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Example #11
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # 保持长宽都是64的倍数
    resize_W = int(input_image.shape[1] / 64) * 64
    resize_H = int((input_image.shape[0] / input_image.shape[1] * resize_W) / 64 ) * 64
    print(resize_W, resize_H)
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    first_frame = 1

    flow_boxs = 0
    flow_kpts = 0

    item = 0
    for i in tqdm(range(video_length-1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        if first_frame == 0:
            try:
                t0 = ckpt_time()
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxs, flow_kpts = flow_propagation(keypoints, flow_result)
                print('每次flownet耗时:{:0.3f}'.format(time.time()- t0))
            except Exception as e:
                print(e)
                continue

        pre_image = input_image
        first_frame = 0


        try:
            bboxs, scores = yolo_det(input_image, human_model)

            # 第一帧
            if i == 0:
                inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
            else:
                # 本帧、上一帧 边框置信度NMS
                if not (flow_bbox_scores>scores).tolist()[0][0]:
                    flow_boxs = bboxs
                inputs, origin_img, center, scale = PreProcess(input_image, flow_boxs, scores, cfg)

        except:
            out.write(input_image)
            cv2.namedWindow("enhanced",0);
            cv2.resizeWindow("enhanced", 960, 480);
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

        # 当前帧边框置信度, 作为下一帧流边框的置信度
        flow_bbox_scores = scores.copy()

        if i != 1:
            preds = (preds + flow_kpts) / 2

        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        out.write(image)
        keypoints = np.concatenate((preds, maxvals), 2)


        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
            cv2.resizeWindow("enhanced", 960, 480);
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Example #12
0
def validate(config,
             val_loader,
             val_dataset,
             model,
             criterion,
             output_dir,
             tb_log_dir,
             writer_dict=None,
             gamma=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    # acc = AverageMeter()

    # switch to evaluate mode
    model.eval()

    all_preds = []
    all_gts = []

    with torch.no_grad():
        end = time.time()
        for i, (input, target, h_target, v_target, _) in enumerate(val_loader):
            if len(input.shape) > 4:
                input = input.view(input.shape[0] * input.shape[1],
                                   input.shape[2], input.shape[3],
                                   input.shape[4])
                target = target.view(target.shape[0] * target.shape[1],
                                     target.shape[2], target.shape[3])

            # compute output
            if config.MODEL.LEARN_PAIRWISE_TERMS:
                # outputs, h_output, v_output, disagreement = model(input, config.TRAIN.NE_GAMMA_L if gamma is None else gamma)
                # h_output = h_output[0]
                # v_output = v_output[0]
                outputs, disagreement, _ = model(
                    input, config.TRAIN.NE_GAMMA_L if gamma is None else gamma)
                output = outputs[-1]
            else:
                output = model(input)

            target = target.cuda(non_blocking=True)
            # if config.MODEL.LEARN_PAIRWISE_TERMS:
            #     loss = torch.stack(
            #         [criterion(output, target) for output in outputs],
            #         dim = 0
            #     )
            #     loss = loss.mean()
            # else:
            loss = criterion(output, target)

            output = torch.nn.functional.interpolate(output,
                                                     size=(target.size(1),
                                                           target.size(2)),
                                                     mode="bilinear",
                                                     align_corners=False)

            num_images = input.size(0)
            # measure accuracy and record loss
            losses.update(loss.item(), num_images)
            # if config.MODEL.LEARN_PAIRWISE_TERMS:
            #     preds = get_final_preds(outputs[-2].detach().cpu().numpy(), outputs[-1].detach().cpu().numpy())
            # else:
            preds = get_final_preds(output.detach().cpu().numpy())

            all_preds.extend(preds)
            all_gts.extend(target.detach().cpu().numpy())

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % config.PRINT_FREQ == 0:
                msg = 'Test: [{0}/{1}]\t' \
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
                      'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                          i, len(val_loader), batch_time=batch_time,
                          loss=losses)
                logger.info(msg)

                if writer_dict:
                    writer = writer_dict['writer']
                    global_steps = writer_dict['vis_global_steps']

                    idx = np.random.randint(0, num_images)
                    # idx = 0

                    input_image = input.detach().cpu().numpy()[idx]
                    input_image = input_image * val_dataset.std.squeeze(
                        0) + val_dataset.mean.squeeze(0)
                    input_image[input_image > 1.0] = 1.0
                    input_image[input_image < 0.0] = 0.0

                    target_image = target.detach().cpu().numpy()[idx].astype(
                        np.int64)
                    target_image = val_dataset.decode_segmap(target_image)

                    # if config.MODEL.LEARN_PAIRWISE_TERMS:
                    #     output = (torch.nn.functional.softmax(outputs[-2], dim=1) +
                    #         torch.nn.functional.softmax(outputs[-1], dim=1)) / 2.0
                    #     labels = torch.argmax(output, dim=1, keepdim=False)
                    # else:
                    output = torch.nn.functional.softmax(output, dim=1)
                    labels = torch.argmax(output, dim=1, keepdim=False)

                    labels = labels.detach().cpu().numpy()[idx]
                    output_vis = vis_segments(labels,
                                              config.MODEL.EXTRA.NUM_CLASSES)

                    writer.add_image('input_image',
                                     input_image,
                                     global_steps,
                                     dataformats='CHW')
                    writer.add_image('result_vis',
                                     output_vis,
                                     global_steps,
                                     dataformats='HWC')
                    writer.add_image('gt_mask',
                                     target_image,
                                     global_steps,
                                     dataformats='HWC')

                    if config.MODEL.LEARN_PAIRWISE_TERMS:
                        disagreement = disagreement.detach().cpu().numpy(
                        )[idx].astype(np.uint8)
                        disagreement = np.expand_dims(disagreement, axis=-1)
                        disagreement = np.repeat(disagreement, 3, axis=-1)
                        disagreement = np.transpose(disagreement,
                                                    (2, 0, 1)) * 255

                        writer.add_image('disagreement',
                                         disagreement,
                                         global_steps,
                                         dataformats='CHW')

                    # if config.MODEL.LEARN_PAIRWISE_TERMS:
                    #     h_output = h_output.view(h_output.size(0), -1, h_output.size(3), h_output.size(4))
                    #     h_output = torch.nn.functional.softmax(h_output, dim=1)
                    #     h_labels = torch.argmax(h_output, dim=1, keepdim=False)
                    #     h_labels = h_labels.detach().cpu().numpy()[idx].astype(np.int64)
                    #     h_labels = h_labels[:-1, :]

                    #     v_output = v_output.view(v_output.size(0), -1, v_output.size(3), v_output.size(4))
                    #     v_output = torch.nn.functional.softmax(v_output, dim=1)
                    #     v_labels = torch.argmax(v_output, dim=1, keepdim=False)
                    #     v_labels = v_labels.detach().cpu().numpy()[idx].astype(np.int64)
                    #     v_labels = v_labels[:, :-1]
                    #     # h_labels = h_target.detach().cpu().numpy()[idx].astype(np.int64)
                    #     # h_labels = h_labels[:-1, :]

                    #     # v_labels = v_target.detach().cpu().numpy()[idx].astype(np.int64)
                    #     # v_labels = v_labels[:, :-1]

                    #     n_classes = config.MODEL.EXTRA.NUM_CLASSES
                    #     off_diag = np.logical_or(
                    #         (h_labels % n_classes != h_labels // n_classes),
                    #         (v_labels % n_classes != v_labels // n_classes)
                    #     )
                    #     off_diag = off_diag.astype(np.uint8) * 255
                    #     writer.add_image('edges', off_diag, global_steps,
                    #         dataformats='HW')

                    writer_dict['vis_global_steps'] = global_steps + 1

                # prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i)

        # Calculate IoU score for entire validation set
        if config.DATASET.DATASET.find('mnist') >= 0:
            all_preds = np.concatenate(all_preds, axis=0)
            all_gts = np.concatenate(all_gts, axis=0)

        avg_iou_score = calc_IoU(all_preds, all_gts,
                                 config.MODEL.EXTRA.NUM_CLASSES)

        perf_indicator = avg_iou_score

        logger.info('Mean IoU score: {:.3f}'.format(avg_iou_score))

        if writer_dict:
            writer = writer_dict['writer']
            global_steps = writer_dict['valid_global_steps']

            writer.add_scalar('valid_loss', losses.avg, global_steps)
            writer.add_scalar('valid_iou_score', avg_iou_score, global_steps)

            writer_dict['valid_global_steps'] = global_steps + 1

    return perf_indicator
Example #13
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    resize_W = 640
    resize_H = 384
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    first_frame = 1

    flow_boxs = 0
    flow_kpts = 0

    item = 0
    for i in tqdm(range(video_length - 1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        if first_frame == 0:
            try:
                t0 = ckpt_time()
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxs, flow_kpts = flow_propagation(keypoints, flow_result)
                _, t1 = ckpt_time(t0, 1)
            except Exception as e:
                print(e)
                continue

        pre_image = input_image
        first_frame = 0

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            if type(flow_boxs) == int:
                inputs, origin_img, center, scale = PreProcess(
                    input_image, bboxs, scores, cfg)
            else:
                #  flow_boxs = (flow_boxs + bboxs) /2
                inputs, origin_img, center, scale = PreProcess(
                    input_image, flow_boxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 1080, 720)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        if type(flow_boxs) != int:
            preds = (preds + flow_kpts) / 2

        origin_img = np.zeros(origin_img.shape, np.uint8)
        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        out.write(image)
        keypoints = np.concatenate((preds, maxvals), 2)

        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 1920, 1080)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Example #14
0
def generate_kpts(video_name, smooth=None):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length - 1)):

        ret_val, input_image = cam.read()
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(
                input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # print("input shape: ", inputs.shape)  # 1 3 256 192
            # print("output shape: ", output.shape) # 1 17 64 48
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])  # preds[0] (17, 2)

        # # 选择 y 坐标最大的人
        # max_index = 0
        # max_y = np.mean(preds[0, :, 1])
        #
        # for k in range(len(preds)):
        #     tmp_y = np.mean(preds[k, :, 1])
        #     if tmp_y > max_y:
        #         max_index = k
        #         max_y = tmp_y
        # kpts_result.append(preds[max_index])
        # # print("maxvals[max_index]:", np.mean(maxvals[max_index]))

    result = np.array(kpts_result)
    return result
Example #15
0
def test(config, test_loader, model, output_dir, tb_log_dir, writer_dict=None):
    batch_time = AverageMeter()

    # switch to evaluate mode
    model.eval()

    all_preds = []

    with torch.no_grad():
        end = time.time()
        for i, input in enumerate(test_loader):
            # compute output
            output = model(input)

            num_images = input.size(0)

            preds = get_final_preds(output.detach().cpu().numpy())
            all_preds.extend(preds)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % config.PRINT_FREQ == 0:
                msg = 'Test: [{0}/{1}]\t' \
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
                          i, len(test_loader), batch_time=batch_time)
                logger.info(msg)

            if writer_dict:
                writer = writer_dict['writer']
                global_steps = writer_dict['vis_global_steps']

                # idx = np.random.randint(0, num_images)
                idx = 0

                input_image = input.detach().cpu().numpy()[idx]
                min_val = input_image.min()
                max_val = input_image.max()
                input_image = (input_image - min_val) / (max_val - min_val)
                heatmap_pred = output.detach().cpu().numpy()[idx]
                heatmap_pred[heatmap_pred < 0.0] = 0
                heatmap_pred[heatmap_pred > 1.0] = 1.0

                input_image = (input_image * 255).astype(np.uint8)
                input_image = np.transpose(input_image, (1, 2, 0))
                pred = preds[idx]
                tp = np.ones(pred.shape[0], dtype=bool)

                writer.add_image('final_preds',
                                 final_preds,
                                 global_steps,
                                 dataformats='HWC')
                writer.add_image('input_recording',
                                 input_image,
                                 global_steps,
                                 dataformats='HWC')
                writer.add_image('heatmap_pred',
                                 heatmap_pred,
                                 global_steps,
                                 dataformats='CHW')

                writer_dict['vis_global_steps'] = global_steps + 1
def main():
    args = parse_args()
    update_config(cfg, args)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    ########## 加载human detector model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()

    from lib.detector.yolo.human_detector import human_bbox_get as yolo_det
    print(args.img_input)
    img = cv2.imread(args.img_input)
    print(type(img))
    cv2.imshow("test", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    bboxs, scores = yolo_det(args.img_input, human_model,
                             confidence=0.5)  # bboxes (N, 4) [x0, y0, x1, y1]
    print("bboxs = ", bboxs)
    print("scores = ", scores)

    # bbox is coordinate location
    inputs, origin_img, center, scale = preprocess(args.img_input, bboxs,
                                                   scores, cfg)
    print("inputs type = ", type(inputs))
    print("inputs shape after preprocess = ", inputs.shape)

    import onnxruntime
    import numpy as np
    sess = onnxruntime.InferenceSession("onnx_hrnet_human.onnx")

    input_name = sess.get_inputs()[0].name
    print("input name", input_name)
    input_shape = sess.get_inputs()[0].shape
    print("input shape", input_shape)
    input_type = sess.get_inputs()[0].type
    print("input type", input_type)

    output_name = sess.get_outputs()[0].name
    print("output name", output_name)
    output_shape = sess.get_outputs()[0].shape
    print("output shape", output_shape)
    output_type = sess.get_outputs()[0].type
    print("output type", output_type)

    # inference
    import numpy.random
    x = inputs.numpy()
    x = x.astype(numpy.float32)
    res = sess.run([output_name], {input_name: x})
    print("np.array(res[0]) shape = ", np.array(res[0]).shape)

    #ipdb.set_trace()

    preds, maxvals = get_final_preds(cfg, np.array(res[0]), np.asarray(center),
                                     np.asarray(scale))
    print("preds = ", preds)
    print("maxvals = ", maxvals)

    image = plot_keypoint(origin_img, preds, maxvals, 0.5)
    cv2.imwrite(args.img_output, image)
    #if args.display:
    #cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
    #cv2.resizeWindow("enhanced", 960, 480);
    cv2.imshow('enhanced', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
def validate(config, val_loader, val_dataset, model, criterion, output_dir,
             tb_log_dir):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
    # switch to evaluate mode
    model.eval()
    num_samples = len(val_dataset)
    all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3),
                         dtype=np.float32)
    all_boxes = np.zeros((num_samples, 6))
    image_path = []
    filenames = []
    imgnums = []
    idx = 0
    with torch.no_grad():
        end = time.time()
        for i, (input, target, target_weight, meta) in enumerate(val_loader):
            input, target = input.cuda(), target.cuda()
            # compute output
            output = model(input)[-1]
            if config.TEST.FLIP_TEST:
                # this part is ugly, because pytorch has not supported negative index input_flipped = model(input[:, :, :, ::-1])
                input_flipped = np.flip(input.cpu().numpy(), 3).copy()
                input_flipped = torch.from_numpy(input_flipped).cuda()
                output_flipped = model(input_flipped)[-1]
                output_flipped = flip_back(output_flipped.cpu().numpy(),
                                           val_dataset.flip_pairs)
                output_flipped = torch.from_numpy(output_flipped.copy()).cuda()

                # feature is not aligned, shift flipped heatmap for higher accuracy
                if config.TEST.SHIFT_HEATMAP:
                    output_flipped[:, :, :,
                                   1:] = output_flipped.clone()[:, :, :, 0:-1]
                    # output_flipped[:, :, :, 0] = 0

                output = (output + output_flipped) * 0.5

            target = target.cuda(non_blocking=True)
            target_weight = target_weight.cuda(non_blocking=True)

            loss = criterion(output, target, target_weight)

            num_images = input.size(0)
            # measure accuracy and record loss
            losses.update(loss.item(), num_images)
            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
                                             target.cpu().numpy())

            acc.update(avg_acc, cnt)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            c = meta['center'].numpy()
            s = meta['scale'].numpy()
            score = meta['score'].numpy()

            preds, maxvals = get_final_preds(config,
                                             output.clone().cpu().numpy(), c,
                                             s)

            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
            all_preds[idx:idx + num_images, :, 2:3] = maxvals
            # double check this all_boxes parts
            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1)
            all_boxes[idx:idx + num_images, 5] = score
            image_path.extend(meta['image'])
            if config.DATASET.DATASET == 'posetrack':
                filenames.extend(meta['filename'])
                imgnums.extend(meta['imgnum'].numpy())

            idx += num_images

            if i % config.PRINT_FREQ == 0:
                msg = 'Test: [{0}/{1}]\t' \
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
                      'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc)
                logger.info(msg)

                prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i)
                save_debug_images(config, input, meta, target, pred * 4,
                                  output, prefix)

        name_values, perf_indicator = val_dataset.evaluate(
            config, all_preds, output_dir, all_boxes, image_path, filenames,
            imgnums)

        _, full_arch_name = get_model_name(config)
        if isinstance(name_values, list):
            for name_value in name_values:
                _print_name_value(name_value, full_arch_name)
        else:
            _print_name_value(name_values, full_arch_name)

    return perf_indicator
Example #18
0
def main():
    tick = 0
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0]))


    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    item = 0
    for i in tqdm(range(video_length-1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()


        if args.camera:
            # 为取得实时速度,每两帧取一帧预测
            if item == 0:
                item = 1
                continue

        item = 0
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced",0);
            cv2.resizeWindow("enhanced", 960, 480);
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))


        # 平滑点
        preds = smooth_filter(preds)
        #  preds = np.expand_dims(preds, 0)
        origin_img = np.zeros(origin_img.shape, np.uint8)
        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        if i >= 14:
            out.write(image)
        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
            cv2.resizeWindow("enhanced", 960, 480);
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Example #19
0
def main():
    args = parse_args()
    update_config(cfg, args)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    #  pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda()
    pose_model.cuda()

    from pycocotools.coco import COCO
    annFile = '/ssd/xyliu/data/coco/annotations/instances_val2017.json'
    im_root = '/ssd/xyliu/data/coco/images/val2017/'
    coco = COCO(annFile)
    catIds = coco.getCatIds(catNms=['person'])
    # 所有人体图片的id
    imgIds = coco.getImgIds(catIds=catIds)
    kpts_result = []
    detected_image_num = 0
    box_num = 0
    for imgId in tqdm(imgIds[:]):
        img = coco.loadImgs(imgId)[0]
        im_name = img['file_name']
        img = im_root + im_name
        img_input = plt.imread(img)

        try:
            bboxs, scores = mm_det(human_model, img_input, 0.3)
            inputs, origin_img, center, scale = PreProcess(
                img_input, bboxs, scores, cfg)

        except Exception as e:
            print(e)
            continue

        detected_image_num += 1
        with torch.no_grad():
            output = pose_model(inputs.cuda())
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

            #  vis = np.ones(shape=maxvals.shape,)
            vis = maxvals
            preds = preds.astype(np.float16)
            keypoints = np.concatenate((preds, vis), -1)
            for k, s in zip(keypoints, scores.tolist()):
                box_num += 1
                k = k.flatten().tolist()
                item = {
                    "image_id": imgId,
                    "category_id": 1,
                    "keypoints": k,
                    "score": s
                }
                kpts_result.append(item)

    num_joints = 17
    in_vis_thre = 0.2
    oks_thre = 0.5
    oks_nmsed_kpts = []
    for i in range(len(kpts_result)):
        img_kpts = kpts_result[i]['keypoints']
        kpt = np.array(img_kpts).reshape(17, 3)
        box_score = kpts_result[i]['score']
        kpt_score = 0
        valid_num = 0
        # each joint for bbox
        for n_jt in range(0, num_joints):
            # score
            t_s = kpt[n_jt][2]
            if t_s > in_vis_thre:
                kpt_score = kpt_score + t_s
                valid_num = valid_num + 1
        if valid_num != 0:
            kpt_score = kpt_score / valid_num

        # rescoring 关节点的置信度 与 box的置信度的乘积
        kpts_result[i]['score'] = kpt_score * box_score

    import json
    data = json.dumps(kpts_result)
    print(
        'image num is {} \tdetected_image num is {}\t person num is {}'.format(
            len(imgIds), detected_image_num, box_num)),
    #  data = json.dumps(str(kpts_result))
    with open('person_keypoints.json', 'wt') as f:
        #  pass
        f.write(data)
Example #20
0
def main():
    previous_ids = 0
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    item = 0
    prev_max_id = 0
    for i in tqdm(range(video_length - 1)):
        x0 = ckpt_time()
        ret_val, input_image = cam.read()
        item = 0
        try:
            bboxs, scores = yolo_det(input_image, human_model, 1, 0.9)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(
                input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        try:
            with torch.no_grad():
                # compute output heatmap
                inputs = inputs[:, [2, 1, 0]]
                output = pose_model(inputs.cuda())
                # compute coordinate
                preds, maxvals = get_final_preds(cfg,
                                                 output.clone().cpu().numpy(),
                                                 np.asarray(center),
                                                 np.asarray(scale))
        except Exception as e:
            print(e)
            continue

        kps_b = np.concatenate((preds, maxvals), 2)
        box_b = bboxs[:preds.shape[0]]

        if previous_ids == 0:
            previous_ids = [j for j in range(len(preds))]

        if i > 0:
            pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b)
            box_similarity_matrix = boxes_similarity(box_a, box_b)
            # pose similarity ratio
            ratio = 0.8
            similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * (
                1 - ratio)
            # prev_filter_ids: 经过筛选之后的上一帧ids序列
            prev_filter_ids, cur_ids = bipartite_matching_greedy(
                similarity_matrix)

            print('previous frame boxes: ', previous_ids)
            print(prev_filter_ids, cur_ids)

            cur_len = len(box_b)
            cur_maps = -np.ones(shape=(cur_len, ))

            for pos, num in enumerate(cur_ids):
                cur_maps[num] = previous_ids[prev_filter_ids[pos]]

            prev_max_id = max(max(previous_ids), prev_max_id)

            for j in range(cur_len):
                if cur_maps[j] == -1.:
                    prev_max_id += 1
                    cur_maps[j] = prev_max_id

            # 作为下一次循环的上一帧ids序列
            previous_ids = cur_maps.astype(np.uint8).tolist()
            print('after map: ', previous_ids)

        # 作为下一次循环的上一帧
        kps_a = kps_b.copy()
        box_a = box_b.copy()

        if i > 0:
            image = plot_keypoint_track(origin_img, preds, maxvals, box_a,
                                        previous_ids, 0.1)
            out.write(image)
        if args.display and i > 0:
            winname = 'image'
            cv2.namedWindow(winname)  # Create a named window
            cv2.moveWindow(winname, 1000, 850)  # Move it to (40,30)
            cv2.imshow(winname, image)
            cv2.waitKey(100)
Example #21
0
def main():
    previous_ids = 0
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    resize_W = int(input_image.shape[1] / 64) * 64
    resize_H = int((input_image.shape[0] / input_image.shape[1] * resize_W) / 64 ) * 64
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    image_resolution = (resize_W, resize_H)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0]))


    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()


    #### load optical flow model
    flow_model = load_model()

    item = 0
    prev_max_id = 0
    for i in tqdm(range(video_length-1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        item = 0
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            bboxs = np.concatenate((bboxs, scores.transpose(1,0)), -1)


            # 加入flownet 模块
            if i>0:
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxes, flow_kpts = flow_propagation(prev_kpts, flow_result)
                flow_boxes = np.concatenate((flow_boxes, np.expand_dims(prev_boxes[...,4], -1)), -1)
                flow_kpts = np.concatenate((flow_kpts,prev_kpts_scores), -1)
                detected_boxes = bboxs.copy()
                #  plot_boxes(input_image.copy(), flow_boxes, [i for i in range(len(flow_boxes))], '{}_flow.png'.format(1000+i))
                #  plot_boxes(input_image.copy(), detected_boxes, [i for i in range(len(detected_boxes))], '{}_detected.png'.format(1000+i))
                bboxs = boxes_nms_test(flow_boxes, bboxs, image_resolution)
                #  plot_boxes(input_image.copy(), bboxs, [i for i in range(len(bboxs))], 'nms_{}.png'.format(100+i))

            inputs, origin_img, center, scale = PreProcess(input_image, bboxs[..., :4], bboxs[...,4], cfg)


        except Exception as e:
            print(e)
            pdb()
            continue

        try:
            with torch.no_grad():
                # compute output heatmap
                inputs = inputs[:,[2,1,0]]
                output = pose_model(inputs.cuda())
                # compute coordinate
                preds, maxvals = get_final_preds(
                    cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
        except Exception as e:
            print(e)
            continue


        kps_b = np.concatenate((preds, maxvals), 2)
        prev_kpts = kps_b.copy()
        box_b = bboxs[:preds.shape[0]]

        if previous_ids == 0:
            previous_ids = [j for j in range(len(preds))]

        if i>0:
            # kps_a是前一帧的 kps_b是当前hrnet检测出来的
            kps_a = flow_kpts
            box_a = flow_boxes
            pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b)
            box_similarity_matrix = boxes_similarity(box_a, box_b)
            # pose similarity ratio
            ratio = 0.5
            similarity_matrix = pose_similarity_matrix*ratio + box_similarity_matrix*(1-ratio)
            # prev_filter_ids: 经过筛选之后的上一帧ids序列
            prev_filter_ids, cur_ids = bipartite_matching_greedy(similarity_matrix)

            print('previous frame boxes: ',previous_ids)
            print(prev_filter_ids, cur_ids)

            cur_len = len(box_b)
            cur_maps = -np.ones(shape=(cur_len,))

            for pos, num in enumerate(cur_ids):
                cur_maps[num] = previous_ids[prev_filter_ids[pos]]

            prev_max_id = max(max(previous_ids), prev_max_id)

            for j in range(cur_len):
                if cur_maps[j] == -1.:
                    prev_max_id += 1
                    cur_maps[j] = prev_max_id

            # 作为下一次循环的上一帧ids序列
            previous_ids = cur_maps.astype(np.uint8).tolist()
            print('after map: ', previous_ids)


        # 作为下一次循环的上一帧
        kps_a = kps_b.copy()
        box_a = box_b.copy()
        prev_kpts = kps_b
        prev_kpts_scores = maxvals
        pre_image = input_image
        prev_boxes = bboxs

        if i>0:
            image = plot_keypoint_track(origin_img, preds, maxvals, box_a, previous_ids, 0.1)
            out.write(image)
        if args.display and i>0:
            ########### 指定屏幕大小
            winname = 'image'
            cv2.namedWindow(winname)        # Create a named window
            cv2.moveWindow(winname, 1000,850)  # Move it to (40,30)
            cv2.imshow(winname, image)
            cv2.waitKey(100)
Example #22
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # 保持长宽都是64的倍数
    resize_W = int(input_image.shape[1] / 64) * 64
    resize_H = int(
        (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64
    print(resize_W, resize_H)
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    flow_boxs = 0
    flow_kpts = 0

    previous_ids = 0
    pdb()
    for i in tqdm(range(video_length - 1)):
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        if i > 0:
            try:
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxs, flow_kpts = flow_propagation(
                    pre_keypoints, flow_result)
                flow_kpts = np.concatenate((flow_kpts, flow_pose_scores), -1)
            except Exception as e:
                print(e)
                continue

        pre_image = input_image

        try:
            # boxes_threthold is 0.6
            bboxs, scores = yolo_det(
                input_image, human_model)  # bbox is coordinate location

            # 第一帧
            if i == 0:
                inputs, origin_img, center, scale = PreProcess(
                    input_image, bboxs, scores, cfg)
                # 初始IDs, 和 socres map
                previous_ids = [i for i in range(len(bboxs))]
                #  id_scores_map = {}
                #  for i in range(len(bboxs)): id_scores_map.update({previous_ids[i]: scores[i]})
            else:
                # 本帧、上一帧 边框置信度NMS
                #  new_boxs, new_ids = boxes_nms(flow_boxs, bboxs, previous_ids)
                inputs, origin_img, center, scale = PreProcess(
                    input_image, bboxs, scores, cfg)

        except Exception as e:
            print(e)
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))
            keypoints = np.concatenate((preds, maxvals), 2)

        # 当前帧边框置信度, 作为下一帧流边框的置信度
        #  flow_bbox_scores = scores.copy()

        #  if i != 1:
        #  preds = (preds + flow_kpts) / 2

        # shift-kpts, shift-boxes, cur_kpts ------> TRACK

        if i > 0:
            kps_b = keypoints.copy()
            box_b = bboxs[:preds.shape[0]]
            kps_a = flow_kpts  # (N, 17, 3)
            box_a = flow_boxs

            pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b)
            box_similarity_matrix = boxs_similarity(box_a, box_b)
            ratio = 0.5
            similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * (
                1 - ratio)
            prev_filter_ids, cur_ids = bipartite_matching_greedy(
                similarity_matrix)

            print('previous frame boxes: ', previous_ids)
            print(prev_filter_ids, cur_ids)

            cur_len = len(box_b) + len(box_a) - len(cur_ids)
            cur_maps = -np.ones(shape=(cur_len, ))

            new_boxes = []
            new_kpts = []

            for pos, num in enumerate(cur_ids):
                cur_maps[pos] = previous_ids[prev_filter_ids[pos]]
                new_boxes.append(bo)

            prev_max_id = max(max(previous_ids), prev_max_id)

            for i in range(cur_len):
                if cur_maps[i] == -1.:
                    prev_max_id += 1
                    cur_maps[i] = prev_max_id

            previous_ids = cur_maps.astype(np.uint8).tolist()
            print('after map: ', previous_ids)

        # 整理好传给下一帧flownet的关键点, ids,
        if i == 0:
            pre_flow_keypoints = keypoints
            pre_flow_pkt_scores = scores.copy()
        # 根据映射结果
        else:
            pre_flow_keypoints = tracked_keypoints
            pre_flow_pkt_scores = tracked_scores

        if i > 1:
            image = plot_keypoint_track(origin_img, preds, maxvals, box_b,
                                        previous_ids, 0.1)

        if args.display and i > 1:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Example #23
0
def main():
    args = parse_args()
    reset_config(config, args)
    image_path = '/home/xyliu/Pictures/pose/soccer.png'

    ########## 加载human detecotor model
    #  from lib.detector.yolo.human_detector2 import load_model as yolo_model
    #  human_model = yolo_model()

    from lib.detector.mmdetection.high_api import load_model as mm_model
    human_model = mm_model()

    #  from lib.detector.yolo.human_detector2 import human_bbox_get as yolo_det
    from lib.detector.mmdetection.high_api import human_boxes_get as mm_det

    # load MODEL
    pose_model = model_load(config)
    pose_model.eval()
    pose_model.cuda()

    from pycocotools.coco import COCO
    annFile = '/ssd/xyliu/data/coco/annotations/instances_val2017.json'
    im_root = '/ssd/xyliu/data/coco/images/val2017/'
    coco = COCO(annFile)
    catIds = coco.getCatIds(catNms=['person'])
    # 所有人体图片的id
    imgIds = coco.getImgIds(catIds=catIds)
    kpts_result = []
    detected_image_num = 0
    box_num = 0
    for imgId in tqdm(imgIds):
        img = coco.loadImgs(imgId)[0]
        im_name = img['file_name']
        img = im_root + im_name
        img_input = cv2.imread(
            img, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        try:
            #  bboxs, scores = yolo_det(img_input, human_model)
            bboxs, scores = mm_det(human_model, img_input, 0.1)
            inputs, origin_img, center, scale = PreProcess(
                img_input, bboxs, scores, config)

        except Exception as e:
            print(e)
            continue

        detected_image_num += 1
        with torch.no_grad():
            output = pose_model(inputs.cuda())
            preds, maxvals = get_final_preds(config,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

            #  vis = np.ones(shape=maxvals.shape,)
            vis = maxvals
            preds = preds.astype(np.float16)
            keypoints = np.concatenate((preds, vis), -1)
            for k, s in zip(keypoints, scores.tolist()):
                box_num += 1
                k = k.flatten().tolist()
                item = {
                    "image_id": imgId,
                    "category_id": 1,
                    "keypoints": k,
                    "score": s
                }
                kpts_result.append(item)

    num_joints = 17
    in_vis_thre = 0.2
    oks_thre = 0.5
    oks_nmsed_kpts = []
    for i in range(len(kpts_result)):
        img_kpts = kpts_result[i]['keypoints']
        kpt = np.array(img_kpts).reshape(17, 3)
        box_score = kpts_result[i]['score']
        kpt_score = 0
        valid_num = 0
        # each joint for bbox
        for n_jt in range(0, num_joints):
            # score
            t_s = kpt[n_jt][2]
            if t_s > in_vis_thre:
                kpt_score = kpt_score + t_s
                valid_num = valid_num + 1
        if valid_num != 0:
            kpt_score = kpt_score / valid_num

        # rescoring 关节点的置信度 与 box的置信度的乘积
        kpts_result[i]['score'] = kpt_score * box_score

    import json
    data = json.dumps(kpts_result)
    print(
        'image num is {} \tdetected_image num is {}\t person num is {}'.format(
            len(imgIds), detected_image_num, box_num)),
    #  data = json.dumps(str(kpts_result))
    with open('person_keypoints.json', 'wt') as f:
        #  pass
        f.write(data)
Example #24
0
def main():
    global max_id
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # 保持长宽都是64的倍数,用于flownet2
    resize_W = int(input_image.shape[1] / 64) * 64
    resize_H = int(
        (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64
    image_resolution = (resize_W, resize_H)
    print(resize_W, resize_H)
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    for i in tqdm(range(video_length - 1)):
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        try:
            if i > 0:
                pdb()
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxes, flow_kpts = flow_propagation(
                    prev_kpts, flow_result)
                flow_boxes = np.concatenate(
                    (flow_boxes, np.expand_dims(prev_boxes[..., 4], -1)),
                    -1)  # flow_boxes + previous boxes scores
                flow_kpts = np.concatenate((flow_kpts, prev_kpts_scores), -1)

            # boxes_threthold is 0.9
            detected_boxes, detected_scores = yolo_det(
                input_image, human_model)  # bbox is coordinate location
            detected_scores = np.expand_dims(detected_scores.flatten(), -1)
            detected_boxes = np.concatenate((detected_boxes, detected_scores),
                                            -1)  # (N, 17, 3)

            if i == 0:
                inputs, origin_img, center, scale = PreProcess(
                    input_image, detected_boxes[..., :4],
                    detected_boxes[..., 4], cfg)
                #  ploted_image = plot_boxes(input_image, detected_boxes, [i for i in range(len(detected_boxes))])
                #  cv2.imshow('image', ploted_image)
                #  cv2.waitKey(100)
            else:
                # 最难! 会重新给pose net一个输入顺序, 并且给出相应的ids
                print('before mapping: ', previous_ids)
                new_boxes, new_ids = boxes_nms_test(flow_boxes, detected_boxes,
                                                    previous_ids,
                                                    image_resolution)
                print('after mapping: ', new_ids)
                print(flow_boxes[:, 1], detected_boxes[:, 1])
                #  ploted_image = plot_boxes(input_image, new_boxes, new_ids)
                #  cv2.imshow('image', ploted_image)
                #  cv2.waitKey(100)
                inputs, origin_img, center, scale = PreProcess(
                    input_image, new_boxes[..., :4], new_boxes[..., 4], cfg)

        except Exception as e:
            print(e)
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        # 姿态检测
        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            detected_kpts, detected_kpts_scores = get_final_preds(
                cfg,
                output.clone().cpu().numpy(), np.asarray(center),
                np.asarray(scale))
            detected_kpts = np.concatenate(
                (detected_kpts, detected_kpts_scores), 2)

        # TRACK Assign IDs. flow_boxes; detected_boxes, new_ids
        if i > 0:
            pose_similarity_matrix = compute_pairwise_oks(
                flow_kpts, flow_boxes[..., :4], detected_kpts)
            box_similarity_matrix = boxes_similarity(flow_boxes[..., :4],
                                                     detected_boxes[..., :4])
            ratio = 0.5
            similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * (
                1 - ratio)
            prev_filter_ids, cur_ids = bipartite_matching_greedy(
                similarity_matrix)

            print('previous frame boxes: ', prev_pose_ids)
            cur_len = len(detected_kpts)
            new_pose_ids = pose_match_ids(prev_pose_ids, prev_filter_ids,
                                          cur_ids, cur_len)

            #  detected_kpts = detected_kpts[ [i-1 for i in new_ids],:]
            #  detected_kpts_scores = detected_kpts_scores[[i-1 for i in new_ids],:]
            print(prev_filter_ids, cur_ids)
            print('after map: ', new_pose_ids)

        # 为下一帧处理做准备
        pre_image = input_image.copy()
        prev_kpts = detected_kpts
        prev_kpts_scores = detected_kpts_scores
        if i == 0:
            prev_boxes = detected_boxes
            previous_ids = [j for j in range(len(detected_boxes))]
            prev_pose_ids = previous_ids

        else:
            previous_ids = new_ids
            prev_boxes = new_boxes
            prev_pose_ids = new_pose_ids
        if i > 1:
            image = plot_keypoint_track(origin_img, detected_kpts,
                                        detected_kpts_scores,
                                        new_boxes[..., :4], new_pose_ids, 0.1)
        else:
            image = plot_keypoint_track(origin_img, detected_kpts,
                                        detected_kpts_scores,
                                        detected_boxes[..., :4], previous_ids,
                                        0.1)

        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)