def Demo(opt):
	if opt.mode == 'video':
		if opt.modality == 'R':
			video_loader = VideoLoader(opt.video_path).start()
			(fourcc, fps, w, h) = video_loader.videoinfo()
			#load recognition loader
			print('Loading model...')
			sys.stdout.flush()
			Recognition= RecognitionLoader(video_loader, recognition_classes, opt.temporal_sample_length, step=opt.recognize_sample_step).start()
			#data writer
			if opt.save_video:
				if not os.path.exists(opt.out_path):
					os.makedirs(opt.out_path)
				save_path = os.path.join(opt.out_path, 'VideoDemo.avi')
				writer = DataWriter(Recognition, savepath=save_path, save_video=opt.save_video, vis=opt.show, fps=fps, frameSize=(w, h)).start()
				print('output video has been saved in %s'%save_path)
				writer.stop()
		elif opt.modality == 'DR':
			if opt.save_video:
				if not os.path.exists(opt.out_path):
					os.makedirs(opt.out_path)
				out_path = os.path.join(opt.out_path, 'result.avi')
				video_DR = Video_Detect_Recognition_Processor(opt.video_path, out_path, detect_classes, recognition_classes).start()		
				video_DR.read_detect()
	if opt.mode == 'camera':
		if opt.modality == 'R':
			CameraRecognitionLoader = CameraRecognition(classes).start()
		elif opt.modality== 'DR':
			#Camera = CameraLoader().start()
			#Detect = Detect_Recognition_Processor(classes).start()
			#Recognize = RecognizeProcessor(Detect, classes).start()
			DR = Read_Detect_Recognition_Processor(detect_classes, recognition_classes).start()
			DR.read_detect()
 def __init__(self, videofile, mode='normal'):
     self.videofile = videofile
     self.mode = mode
     self.data_loader = VideoLoader(self.videofile,
                                    batchSize=args.detbatch).start()
     (fourcc, fps, frameSize) = self.data_loader.videoinfo()
     self.fourcc = fourcc
     self.fps = fps
     self.frameSize = frameSize
     self.det_loader = DetectionLoader(self.data_loader,
                                       batchSize=args.detbatch).start()
     self.det_processor = DetectionProcessor(self.det_loader).start()
     self.pose_dataset = Mscoco()
     save_path = os.path.join(
         args.outputpath, 'AlphaPose_' +
         ntpath.basename(self.videofile).split('.')[0] + '.mp4')
     self.writer = DataWriter(args.save_video, save_path,
                              cv2.VideoWriter_fourcc(*'DIVX'), self.fps,
                              self.frameSize).start()
     self.results = list()
Exemple #3
0

if __name__ == "__main__":
    webcam = args.webcam
    mode = args.mode
    if not os.path.exists(args.outputpath):
        os.mkdir(args.outputpath)

    # Load input video
    fvs = WebcamLoader(webcam).start()
    (fourcc, fps, frameSize) = fvs.videoinfo()
    # Data writer
    save_path = os.path.join(args.outputpath,
                             'AlphaPose_webcam' + webcam + '.avi')
    writer = DataWriter(args.save_video, save_path,
                        cv2.VideoWriter_fourcc(*'XVID'), fps,
                        frameSize).start()

    # Load YOLO model
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_model = Darknet("yolo/cfg/yolov3.cfg")
    det_model.load_weights('models/yolo/yolov3.weights')
    det_model.net_info['height'] = args.inp_dim
    det_inp_dim = int(det_model.net_info['height'])
    assert det_inp_dim % 32 == 0
    assert det_inp_dim > 32
    det_model.cuda()
    det_model.eval()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {
        'dt': [],
        'pt': [],
        'pn': []
    }

    # Init data writer
    writer = DataWriter(args.save_video).start()

    data_len = data_loader.length()
    im_names_desc = tqdm(range(data_len))

    batchSize = args.posebatch
    for i in im_names_desc:
        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read()
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1])
                continue

            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
Exemple #5
0
def handle_video(videofile):
    args.video = videofile
    videofile = args.video
    mode = args.mode

    if not len(videofile):
        raise IOError('Error: must contain --video')

    # Load input video
    data_loader = VideoLoader(videofile, batchSize=args.detbatch).start()
    (fourcc, fps, frameSize) = data_loader.videoinfo()

    print('the video is {} f/s'.format(fps))

    # Load detection loader
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start()
    #  start a thread to read frames from the file video stream
    det_processor = DetectionProcessor(det_loader).start()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'dt': [], 'pt': [], 'pn': []}

    # Data writer
    save_path = os.path.join(
        args.outputpath,
        'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi')
    writer = DataWriter(args.save_video, save_path,
                        cv2.VideoWriter_fourcc(*'XVID'), fps,
                        frameSize).start()

    im_names_desc = tqdm(range(data_loader.length()))
    batchSize = args.posebatch
    for i in im_names_desc:
        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if orig_img is None:
                break
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue

            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
            # Pose Estimation

            datalen = inps.size(0)
            leftover = 0
            if (datalen) % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)].cuda()
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)

            hm = hm.cpu().data
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)

        if args.profile:
            # TQDM
            im_names_desc.set_description(
                'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}'
                .format(dt=np.mean(runtime_profile['dt']),
                        pt=np.mean(runtime_profile['pt']),
                        pn=np.mean(runtime_profile['pn'])))

    if (args.save_img or args.save_video) and not args.vis_fast:
        print(
            '===========================> Rendering remaining images in the queue...'
        )
        print(
            '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).'
        )
    while (writer.running()):
        pass
    writer.stop()
    final_result = writer.results()

    # 获取第 0 个框的人
    kpts = []
    for i in range(len(final_result)):
        try:
            preds = final_result[i]['result']
            # preds[i]['keypoints'] (17,2)
            # preds[i]['kp_score'] (17,1)
            # preds[i]['proposal_score'] (1)
            # 选择 y 坐标最大的人 —— 用于打羽毛球视频
            max_index = 0
            min_index = 0
            # max_y = np.mean(preds[0]['keypoints'].data.numpy()[:, 1])
            min_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0])
            max_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0])

            for k in range(len(preds)):
                # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1])
                tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0])
                # if tmp_y > max_y:
                if tmp_x < min_x:
                    min_index = k
                    # max_y = tmp_y
                    min_x = tmp_x
            for k in range(len(preds)):
                # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1])
                tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0])
                # if tmp_y > max_y:
                if tmp_x > max_x:
                    max_index = k
                    max_x = tmp_x
            mid_index = 0
            for k in range(len(preds)):
                if k == max_index or k == min_index:
                    continue
                mid_index = k
            kpt = preds[mid_index]['keypoints']
            # kpt = final_result[i]['result'][0]['keypoints']
            kpts.append(kpt.data.numpy())

        except:
            # print(sys.exc_info())
            print('error...')

    filename = os.path.basename(args.video).split('.')[0]
    name = filename + '.npz'
    kpts = np.array(kpts).astype(np.float32)
    # print('kpts npz save in ', name)
    # np.savez_compressed(name, kpts=kpts)
    return kpts
Exemple #6
0
def call_alphapose(input_dir, output_dir, format='open', batchSize=1):
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    for root, dirs, files in os.walk(input_dir):
        im_names = files
    print(files)
    data_loader = ImageLoader(im_names,
                              batchSize=batchSize,
                              format='yolo',
                              dir_path=input_dir).start()
    det_loader = DetectionLoader(data_loader, batchSize=batchSize).start()
    det_processor = DetectionProcessor(det_loader).start()
    # Load pose model
    pose_dataset = Mscoco()
    pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()
    runtime_profile = {'dt': [], 'pt': [], 'pn': []}
    # Init data writer
    writer = DataWriter(False).start()
    data_len = data_loader.length()
    im_names_desc = tqdm(range(data_len))
    for i in im_names_desc:
        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue

            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
            # Pose Estimation

            datalen = inps.size(0)
            leftover = 0
            if (datalen) % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)].cuda()
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)
            hm = hm.cpu()
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)
    while (writer.running()):
        pass
    writer.stop()
    final_result = writer.results()
    write_json(final_result, output_dir, _format=format)
    correct_json_save(output_dir)
    print('Over')
Exemple #7
0
def test():
    inputpath = args.inputpath
    inputlist = args.inputlist
    mode = args.mode

    #if not os.path.exists(args.outputpath):
    #os.mkdir(args.outputpath)

    #if len(inputlist):
    #im_names = open(inputlist, 'r').readlines()
    #elif len(inputpath) and inputpath != '/':
    for root, dirs, files in os.walk(inputpath):
        im_names = files
    #else:
    #raise IOError('Error: must contain either --indir/--list')

    im_names = sorted(im_names, key=lambda x: int(os.path.splitext(x)[0]))
    print(im_names)
    # Load input images
    data_loader = ImageLoader(im_names, batchSize=1, format='yolo').start()

    # Load detection loader
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_loader = DetectionLoader(data_loader, batchSize=1).start()
    det_processor = DetectionProcessor(det_loader).start()

    runtime_profile = {'dt': [], 'pt': [], 'pn': []}

    # Init data writer
    writer = DataWriter(args.save_video).start()

    data_len = data_loader.length()
    im_names_desc = tqdm(range(data_len))

    batchSize = args.posebatch
    for i in im_names_desc:
        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue

            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
            # Pose Estimation

            datalen = inps.size(0)
            leftover = 0
            if (datalen) % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)].cuda()
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)
            hm = hm.cpu()
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)

        if args.profile:
            # TQDM
            im_names_desc.set_description(
                'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}'
                .format(dt=np.mean(runtime_profile['dt']),
                        pt=np.mean(runtime_profile['pt']),
                        pn=np.mean(runtime_profile['pn'])))

    print('===========================> Finish Model Running.')
    if (args.save_img or args.save_video) and not args.vis_fast:
        print(
            '===========================> Rendering remaining images in the queue...'
        )
        print(
            '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).'
        )
    while (writer.running()):
        pass
    writer.stop()
    final_result = writer.results()
    write_json(final_result, args.outputpath)
    return final_result
Exemple #8
0
def handle_video(video_file):
    # =========== common ===============
    args.video = video_file
    base_name = os.path.basename(args.video)
    video_name = base_name[:base_name.rfind('.')]
    # =========== end common ===============
    # =========== image ===============
    # img_path = f'outputs/alpha_pose_{video_name}/split_image/'
    # args.inputpath = img_path
    # args.outputpath = f'outputs/alpha_pose_{video_name}'
    # if os.path.exists(args.outputpath):
    #     shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True)
    # else:
    #     os.mkdir(args.outputpath)

    # # if not len(video_file):
    # #     raise IOError('Error: must contain --video')

    # if len(img_path) and img_path != '/':
    #     for root, dirs, files in os.walk(img_path):
    #         im_names = sorted([f for f in files if 'png' in f or 'jpg' in f])
    # else:
    #     raise IOError('Error: must contain either --indir/--list')

    # # Load input images
    # data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start()
    # print(f'Totally {data_loader.datalen} images')
    # =========== end image ===============
    # =========== video ===============
    args.outputpath = f'outputs/alpha_pose_{video_name}'
    if os.path.exists(args.outputpath):
        shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True)
    else:
        os.mkdir(args.outputpath)
    videofile = args.video
    mode = args.mode
    if not len(videofile):
        raise IOError('Error: must contain --video')
    # Load input video
    data_loader = VideoLoader(videofile, batchSize=args.detbatch).start()
    (fourcc, fps, frameSize) = data_loader.videoinfo()
    print('the video is {} f/s'.format(fps))
    # =========== end video ===============
    # Load detection loader
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start()
    #  start a thread to read frames from the file video stream
    det_processor = DetectionProcessor(det_loader).start()
    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model  #.cuda()
    pose_model.eval()
    runtime_profile = {'dt': [], 'pt': [], 'pn': []}
    # Data writer
    save_path = os.path.join(
        args.outputpath,
        'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi')
    # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start()
    writer = DataWriter(args.save_video).start()
    print('Start pose estimation...')
    im_names_desc = tqdm(range(data_loader.length()))
    batchSize = args.posebatch
    for i in im_names_desc:

        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if orig_img is None:
                print(f'{i}-th image read None: handle_video')
                break
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue

            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
            # Pose Estimation

            datalen = inps.size(0)
            leftover = 0
            if datalen % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)]  #.cuda()
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)

            hm = hm.cpu().data
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)

        if args.profile:
            # TQDM
            im_names_desc.set_description(
                'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}'
                .format(dt=np.mean(runtime_profile['dt']),
                        pt=np.mean(runtime_profile['pt']),
                        pn=np.mean(runtime_profile['pn'])))
    if (args.save_img or args.save_video) and not args.vis_fast:
        print(
            '===========================> Rendering remaining images in the queue...'
        )
        print(
            '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).'
        )
    while writer.running():
        pass
    writer.stop()
    final_result = writer.results()
    write_json(final_result, args.outputpath)

    return final_result, video_name
    det_processor = DetectionProcessor(det_loader).start()

    # Load pose model here
    pose_dataset = Mscoco()  # is_train, res, joints, rot_factor
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, obj_id, pose_dataset,
                                     opt.kpdWeights)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'dt': [], 'pt': [], 'pn': []}

    # Init data writer for writing data and post
    writer = DataWriter(cam_K, 50, kp_model_vertices,
                        args.save_video).start()  # save_video default: False

    data_len = data_loader.length()
    im_names_desc = tqdm(range(data_len))

    batchSize = args.posebatch
    for i in im_names_desc:
        # for i in range(data_len):
        # if i>10: break # for debugging
        start_time = getTime()
        with torch.no_grad():
            # Detection is handling here
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()

            if boxes is None or boxes.nelement() == 0:
    # Load input video
    fvs_0 = WebcamLoader(url_1).start()
    fvs_1 = WebcamLoader(url_2).start()

    (fourcc, fps, frameSize) = fvs_0.videoinfo()

    # read the camera parameter of this dataset
    # with open ( opt.camera_parameter_path,'rb' ) as f:
    #     camera_parameter = pickle.load (f)

    # Data writer
    save_path = os.path.join(args.outputpath,
                             'AlphaPose_webcam' + webcam + '.avi')
    writer = DataWriter(args.save_video, save_path,
                        cv2.VideoWriter_fourcc(*'XVID'), fps,
                        frameSize).start()

    # detection module
    print('Loading detection model ')
    sys.stdout.flush()
    det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
    det_model.load_weights('models/yolo/yolov3-spp.weights')
    det_model.net_info['height'] = args.inp_dim
    det_inp_dim = int(det_model.net_info['height'])
    assert det_inp_dim % 32 == 0
    assert det_inp_dim > 32
    det_model.cuda()
    det_model.eval()

    # pose module
def main(file_name):
    #  videofile = args.video
    videofile = file_name
    mode = args.mode
    if not os.path.exists(args.outputpath):
        os.mkdir(args.outputpath)

    if not len(videofile):
        raise IOError('Error: must contain --video')

    # Load input video
    data_loader = VideoLoader(videofile, batchSize=args.detbatch).start()
    (fourcc, fps, frameSize) = data_loader.videoinfo()

    # Load detection loader
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start()
    det_processor = DetectionProcessor(det_loader).start()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'dt': [], 'pt': [], 'pn': []}

    # Data writer
    save_path = os.path.join(
        args.outputpath,
        'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi')
    writer = DataWriter(args.save_video, save_path,
                        cv2.VideoWriter_fourcc(*'XVID'), fps,
                        frameSize).start()

    im_names_desc = tqdm(range(data_loader.length()))
    batchSize = args.posebatch
    for i in im_names_desc:
        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if orig_img is None:
                break
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue

            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
            # Pose Estimation

            datalen = inps.size(0)
            leftover = 0
            if (datalen) % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)].cuda()
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)

            hm = hm.cpu().data
            import ipdb
            ipdb.set_trace()
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)

        if args.profile:
            # TQDM
            im_names_desc.set_description(
                'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}'
                .format(dt=np.mean(runtime_profile['dt']),
                        pt=np.mean(runtime_profile['pt']),
                        pn=np.mean(runtime_profile['pn'])))

    print('===========================> Finish Model Running.')
    if (args.save_img or args.save_video) and not args.vis_fast:
        print(
            '===========================> Rendering remaining images in the queue...'
        )
        print(
            '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).'
        )
    while (writer.running()):
        pass
    writer.stop()
    final_result = writer.results()
    write_json(final_result, args.outputpath)
Exemple #12
0
def handle_video(videofile, no_nan=True):
    args.video = videofile
    videofile = args.video
    mode = args.mode

    if not len(videofile):
        raise IOError('Error: must contain --video')

    # Load input video
    data_loader = VideoLoader(videofile, batchSize=args.detbatch).start()
    (fourcc, fps, frameSize) = data_loader.videoinfo()
    cam_w = frameSize[0]
    cam_h = frameSize[1]

    print('the video is {} f/s'.format(fps))

    # Load detection loader
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start()
    #  start a thread to read frames from the file video stream
    det_processor = DetectionProcessor(det_loader).start()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'dt': [], 'pt': [], 'pn': []}

    # Data writer
    save_path = os.path.join(
        args.outputpath,
        'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi')
    writer = DataWriter(args.save_video, save_path,
                        cv2.VideoWriter_fourcc(*'XVID'), fps,
                        frameSize).start()

    im_names_desc = tqdm(range(data_loader.length()))
    batchSize = args.posebatch
    frames_w_pose = []
    frame_cnt = 0
    for i in im_names_desc:
        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if orig_img is None:
                break

            frame_cnt += 1
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue

            frames_w_pose.append(frame_cnt - 1)
            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)

            # Pose Estimation
            datalen = inps.size(0)
            leftover = 0
            if (datalen) % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)].cuda()
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)

            hm = hm.cpu().data
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)

        if args.profile:
            # TQDM
            im_names_desc.set_description(
                'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}'
                .format(dt=np.mean(runtime_profile['dt']),
                        pt=np.mean(runtime_profile['pt']),
                        pn=np.mean(runtime_profile['pn'])))

    if (args.save_img or args.save_video) and not args.vis_fast:
        print(
            '===========================> Rendering remaining images in the queue...'
        )
        print(
            '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).'
        )
    while (writer.running()):
        pass
    writer.stop()
    final_result = writer.results()

    kpts = []
    if not no_nan:
        for i in range(frame_cnt):
            # initialize to NaN so we can interpolate later
            kpts.append(np.full((17, 2), np.nan, dtype=np.float32))

    for i in range(len(final_result)):
        try:
            kpt = final_result[i]['result'][0]['keypoints']
            if not no_nan:
                kpts[frames_w_pose[i]] = kpt.data.numpy()
            else:
                kpts.append(kpt.data.numpy())
        except:
            print('error...')

    kpts = np.array(kpts).astype(np.float32)

    #filename = os.path.basename(args.video).split('.')[0]
    #name = filename + '.npz'
    #print('kpts npz save in ', name)
    #np.savez_compressed(name, kpts=kpts, fps=fps, cam_w=cam_w, cam_h=cam_h)

    return kpts, fps, cam_w, cam_h
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'dt': [], 'dn': [], 'pt': [], 'pn': []}

    # Load input images
    dataset = Image_loader(im_names, format='yolo')
    test_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=20,
                                              pin_memory=True)

    # Init data writer
    writer = DataWriter(args.save_video).start()

    im_names_desc = tqdm(test_loader)
    print("bbb")
    for i, (img, inp, orig_img, im_name,
            im_dim_list) in enumerate(im_names_desc):
        print(im_name)
        start_time = getTime()
        with torch.no_grad():
            ht = inp.size(2)
            wd = inp.size(3)
            # Human Detection
            img = Variable(img[0]).cuda()
            im_dim_list = im_dim_list[0].cuda()

            prediction = det_model(img, CUDA=True)
Exemple #14
0
def Alphapose(
    im_names,
    pose_model,
):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Load input images
    data_loader = ImageLoader(im_names, batchSize=args.detbatch,
                              format='yolo').start()

    # Load detection loader
    sys.stdout.flush()
    det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start()
    det_processor = DetectionProcessor(det_loader).start()
    runtime_profile = {'dt': [], 'pt': [], 'pn': []}

    # Init data writer
    writer = DataWriter(args.save_video).start()

    data_len = data_loader.length()
    im_names_desc = tqdm(range(data_len))

    batchSize = args.posebatch
    for i in im_names_desc:
        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue
            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
            # Pose Estimation

            datalen = inps.size(0)
            leftover = 0
            if (datalen) % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)].to(device)
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)
            hm = hm.cpu()
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)

        if args.profile:
            # TQDM
            im_names_desc.set_description(
                'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}'
                .format(dt=np.mean(runtime_profile['dt']),
                        pt=np.mean(runtime_profile['pt']),
                        pn=np.mean(runtime_profile['pn'])))

    print('Finish Model Running.')
    if (args.save_img or args.save_video) and not args.vis_fast:
        print(
            '===========================> Rendering remaining images in the queue...'
        )
        print(
            '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).'
        )
    while (writer.running()):
        pass
    writer.stop()
    final_result = writer.results()
    # write_json(final_result, args.outputpath)
    if final_result[0]['result']:
        return final_result[0]['result'][0]['keypoints']
    else:
        return None
class AlphaPose():
    def __init__(self, videofile, mode='normal'):
        self.videofile = videofile
        self.mode = mode
        self.data_loader = VideoLoader(self.videofile,
                                       batchSize=args.detbatch).start()
        (fourcc, fps, frameSize) = self.data_loader.videoinfo()
        self.fourcc = fourcc
        self.fps = fps
        self.frameSize = frameSize
        self.det_loader = DetectionLoader(self.data_loader,
                                          batchSize=args.detbatch).start()
        self.det_processor = DetectionProcessor(self.det_loader).start()
        self.pose_dataset = Mscoco()
        save_path = os.path.join(
            args.outputpath, 'AlphaPose_' +
            ntpath.basename(self.videofile).split('.')[0] + '.mp4')
        self.writer = DataWriter(args.save_video, save_path,
                                 cv2.VideoWriter_fourcc(*'DIVX'), self.fps,
                                 self.frameSize).start()
        self.results = list()

    def pose_estimation(self, pose_model):
        batchSize = args.posebatch
        for i in range(self.data_loader.length()):
            with torch.no_grad(
            ):  #不計算導數以此減少運算量 可用在model evaluating時 將inference的code放在其中
                (inps, orig_img, im_name, boxes, scores, pt1,
                 pt2) = self.det_processor.read()
                if orig_img is None:
                    break
                if boxes is None or boxes.nelement() == 0:
                    self.writer.save(None, None, None, None, None, orig_img,
                                     im_name.split('/')[-1])
                    continue

                # Pose Estimation
                datalen = inps.size(0)
                leftover = 0
                if (datalen) % batchSize:
                    leftover = 1
                num_batches = datalen // batchSize + leftover
                hm = []
                for j in range(num_batches):
                    inps_j = inps[j *
                                  batchSize:min((j + 1) *
                                                batchSize, datalen)].cuda()
                    hm_j = pose_model(inps_j)
                    hm.append(hm_j)
                hm = torch.cat(hm)

                hm = hm.cpu().data
                self.writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                                 im_name.split('/')[-1])

    def run(self):
        args.mode = self.mode
        if args.fast_inference:
            pose_model = InferenNet_fast(4 * 1 + 1, self.pose_dataset)
        else:
            pose_model = InferenNet(4 * 1 + 1, self.pose_dataset)
        pose_model.cuda()
        pose_model.eval()

        #pose estimation
        print('Start Pose Estimating...')
        self.pose_estimation(pose_model)

        print('Finish Model Running.')
        if (args.save_img or args.save_video) and not args.vis_fast:
            print('Rendering remaining images in the queue...')
        while (self.writer.running()):
            pass
        self.writer.stop()
        self.results = self.writer.results().copy()

    def arm_pos(self):
        arms = []
        for frame in self.results:
            #body是每個人的骨架的dictionary
            jpg = frame['imgname'].split('.')
            for body in frame['result']:
                arm = [
                ]  #[int, tuple, tuple, tuple, tuple] -> [frame_number, left_x1y1, left_x2y2, right_x1y1, right_x2y2]
                arm.append(int(jpg[0]))
                joint_list = body['keypoints'].tolist()
                arm.append((int(joint_list[7][0]), int(joint_list[7][1])))
                arm.append((int(joint_list[9][0]), int(joint_list[9][1])))
                arm.append((int(joint_list[8][0]), int(joint_list[8][1])))
                arm.append((int(joint_list[10][0]), int(joint_list[10][1])))
                arms.append(arm)

        return arms

    def arm_pos_json(self, json_path):
        with open(json_path, 'r', encoding='utf-8') as f:
            result = json.load(f)
        #json_format
        arms = []
        l_index = [21, 27]
        r_index = [24, 30]
        for body in result:
            #body是每個人的骨架的dictionary
            arm = [
            ]  #[int, tuple, tuple, tuple, tuple] -> [frame_number, left_x1y1, left_x2y2, right_x1y1, right_x2y2]
            jpg = body['image_id'].split('.')
            arm.append(int(jpg[0]))
            for i in l_index:
                arm.append(
                    (int(body['keypoints'][i]), int(body['keypoints'][i + 1])))
            for i in r_index:
                arm.append(
                    (int(body['keypoints'][i]), int(body['keypoints'][i + 1])))
            arms.append(arm)

        return arms