Beispiel #1
0
def process_video(model,
                  config,
                  im_paths,
                  kps,
                  pred_dir,
                  min_frame=0,
                  max_frame=None):
    bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1)
    min_f = max(s, min_frame)
    if max_frame:
        max_f = min(e, max_frame)
    else:
        max_f = min(e, len(kps))

    images = []
    images_orig = []
    for i in tqdm(range(min_f, max_f)):
        proc_params = process_image(
            im_path=im_paths[i],
            bbox_param=bbox_params_smooth[i],
        )
        images.append(proc_params.pop('image'))
        images_orig.append(proc_params)

    preds = model.predict_all_images(images)
    render_preds(
        output_path=pred_dir,
        config=config,
        preds=preds,
        images=images,
        images_orig=images_orig,
    )
Beispiel #2
0
def predict_on_tracks(model, img_dir, poseflow_path, output_path, track_id,
                      trim_length):
    # Get all the images
    im_paths = sorted(glob(osp.join(img_dir, '*.png')))
    all_kps = get_labels_poseflow(poseflow_path, len(im_paths))

    # Here we set which track to use.
    track_id = min(track_id, len(all_kps) - 1)
    print('Total number of PoseFlow tracks:', len(all_kps))
    print('Processing track_id:', track_id)
    kps = all_kps[track_id]

    bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1)

    images = []
    images_orig = []
    min_f = max(s, 0)
    max_f = min(e, len(kps))

    print('----------')
    print('Preprocessing frames.')
    print('----------')

    for i in range(min_f, max_f):
        proc_params = process_image(
            im_path=im_paths[i],
            bbox_param=bbox_params_smooth[i],
        )
        images.append(proc_params.pop('image'))
        images_orig.append(proc_params)

    if track_id > 0:
        output_path += '_{}'.format(track_id)

    mkdir(output_path)
    pred_path = osp.join(output_path, 'hmmr_output.pkl')
    if osp.exists(pred_path):
        print('----------')
        print('Loading pre-computed prediction.')
        print('----------')

        with open(pred_path, 'rb') as f:
            preds = pickle.load(f)
    else:
        print('----------')
        print('Running prediction.')
        print('----------')

        preds = model.predict_all_images(images)

        with open(pred_path, 'wb') as f:
            print('Saving prediction results to', pred_path)
            pickle.dump(preds, f)
Beispiel #3
0
def main(model):
    # Keypoints are only used to compute the bounding box around human tracks.
    # They are not fed into the model. Keypoint format is [x, y, vis]. Keypoint
    # order doesn't matter.
    if config.dataset == '':
        im_paths, kps = load_poseflow_video(config.vid_path, config.out_dir)
        vis_thresh = 0.1
    elif config.dataset == 'penn_action':
        im_paths, kps = load_penn_video(config.penn_dir, config.vid_id)
        vis_thresh = 0.5
    else:
        raise Exception('Dataset {} not recognized'.format(config.dataset))
    bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh)
    images = []
    min_f = max(s, 0)
    max_f = min(e, len(kps))
    for i in range(min_f, max_f):
        images.append(
            process_image(im_path=im_paths[i],
                          bbox_param=bbox_params_smooth[i]))
    all_images, vid_paths = process_videos(
        config=config,
        images=images,
        T=(NUM_CONDITION + config.ar_length),
        suffix='AR{}'.format(config.ar_length),
    )
    if not osp.exists(config.out_dir):
        os.mkdir(config.out_dir)
    renderer = VisRenderer(img_size=224)
    for i in range(0, len(all_images), config.batch_size):
        run_predictions(
            config=config,
            renderer=renderer,
            model=model,
            images=all_images[i:i + config.batch_size],
            vid_paths=vid_paths[i:i + config.batch_size],
            num_condition=NUM_CONDITION,
        )
Beispiel #4
0
def add_to_tfrecord(image_paths,
                    gt2ds,
                    coder,
                    writer,
                    feature_extractor=None,
                    augmentor=None,
                    DRAW=False,
                    vis_thresh=0,
                    sigma=3):
    """
    Adds all information from a subject-sequence-camera tuple to a tfrecord.
    """
    image_datas, image_shapes, labels, centers = [], [], [], []
    scale_factors, start_pts = [], []

    images = []  # Used to compute phis if needed.

    bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(
        gt2ds, vis_thresh, sigma=sigma)
    
    #import ipdb; ipdb.set_trace()
    for i, (im_path, gt2d, bbox_param) in enumerate(
            list(zip(image_paths, gt2ds, bbox_params))[time_pt1:time_pt2]):

        # This can be improved a lot.
        ret_dict = process_image(im_path, gt2d, coder, bbox_param, DRAW)

        image_datas.append(ret_dict['image_data'])
        image_shapes.append(ret_dict['image_shape'])
        labels.append(ret_dict['label'])
        centers.append(ret_dict['center'])
        scale_factors.append(ret_dict['scale_factors'])
        start_pts.append(ret_dict['start_pt'])
        if feature_extractor is not None:
            # AJ: Make sure images you send to augmentor is [0, 1]!!
            images.append(ret_dict['image'] / 255.)

    # Apply Data Augmentation & Feature Extraction
    if feature_extractor:
        pose_dummy = np.zeros((len(labels), 72))
        gt3d_dummy = np.zeros((len(labels), 14, 3))
        ret_dict = augmentor(
            images=images,
            image_sizes=image_shapes,
            labels=labels,
            centers=centers,
            poses=pose_dummy,
            gt3ds=gt3d_dummy)
        augmented_imgs = ret_dict['images']
        labels = ret_dict['labels']
        centers = ret_dict['centers']
        image_shapes = [list(img.shape[:2]) for img in augmented_imgs]

        phis = feature_extractor.compute_all_phis(augmented_imgs)
        del images  # Clear the memory.
        image_datas = [
            coder.encode_jpeg(((img + 1) * 0.5) * 255.)
            for img in augmented_imgs
        ]
    else:
        phis = None

    if not FLAGS.save_img:
        image_datas = None

    example = convert_to_example_temporal(
        image_datas=image_datas,
        image_paths=image_paths,
        image_shapes=image_shapes,
        labels=labels,
        centers=centers,
        gt3ds=None,
        poses=None,
        shape=None,
        scale_factors=scale_factors,
        start_pts=start_pts,
        cams=None,
        phis=phis,
    )
    writer.write(example.SerializeToString())
Beispiel #5
0
def add_to_tfrecord(image_paths,
                    gt2ds,
                    gt3ds,
                    poses,
                    shape,
                    coder,
                    writer,
                    visualize=False,
                    vis_thresh=0.1,
                    img_size=224,
                    sigma=8):
    """

    Args:
        image_paths (N).
        gt2ds (Nx19x3).
        gt3ds (Nx14x3).
        poses (Nx72).
        shape (10).
        coder (ImageCoder).
        writer (TFRecordWriter).
        visualize (bool).
        vis_thresh (float).
        img_size (int).
    """
    results = {
        'image_data_scaled': [],
        'im_path': [],
        'im_shape': [],
        'kps': [],
        'center': [],
        'scale': [],
        'start_pt': [],
    }

    bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(
        gt2ds,
        vis_thresh,
        sigma=sigma,
    )
    if time_pt1 != 0 or time_pt2 != len(image_paths):
        print('Start: {}, End: {}'.format(time_pt1, time_pt2))
    for im_path, kps, bbox in tqdm(
            list(zip(image_paths, gt2ds, bbox_params))[time_pt1:time_pt2]):
        ret_dict = process_image(im_path=im_path,
                                 gt2d=kps,
                                 coder=coder,
                                 bbox_param=bbox,
                                 visualize=visualize,
                                 vis_thresh=vis_thresh,
                                 img_size=img_size)

        for key in ret_dict:
            results[key].append(ret_dict[key])

    # Adjust to start and end time if they exist.
    if gt3ds is not None:
        gt3ds = gt3ds[time_pt1:time_pt2]
    if poses is not None:
        poses = poses[time_pt1:time_pt2]

    example = convert_to_example_temporal(
        cams=[],
        centers=results['center'],  # N x 2
        gt3ds=gt3ds,
        image_datas=results['image_data_scaled'],  # N
        image_paths=results['im_path'],  # N
        image_shapes=results['im_shape'],  # N x 2
        labels=results['kps'],  # N x 3 x 19
        scale_factors=results['scale'],  # N
        start_pts=results['start_pt'],  # N x 2
        time_pts=(time_pt1, time_pt2),  # 2
        poses=poses,  # N x 72
        shape=shape,  # 10
    )
    writer.write(example.SerializeToString())
Beispiel #6
0
def predict_on_tracks(model, img_dir, poseflow_path, output_path, track_id,
                      trim_length):
    # Get all the images
    im_paths = sorted(glob(osp.join(img_dir, '*.png')))
    all_kps = get_labels_poseflow(poseflow_path, len(im_paths))

    # Here we set which track to use.
    track_id = min(track_id, len(all_kps) - 1)
    print('Total number of PoseFlow tracks:', len(all_kps))
    print('Processing track_id:', track_id)
    kps = all_kps[track_id]

    bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1)

    images = []
    images_orig = []
    min_f = max(s, 0)
    max_f = min(e, len(kps))

    print('----------')
    print('Preprocessing frames.')
    print('----------')

    for i in range(min_f, max_f):
        proc_params = process_image(
            im_path=im_paths[i],
            bbox_param=bbox_params_smooth[i],
        )
        images.append(proc_params.pop('image'))
        images_orig.append(proc_params)

    if track_id > 0:
        output_path += '_{}'.format(track_id)

    mkdir(output_path)
    pred_path = osp.join(output_path, 'hmmr_output.pkl')
    if osp.exists(pred_path):
        print('----------')
        print('Loading pre-computed prediction.')
        print('----------')

        with open(pred_path, 'rb') as f:
            preds = pickle.load(f)
    else:
        print('----------')
        print('Running prediction.')
        print('----------')

        preds = model.predict_all_images(images)

        with open(pred_path, 'wb') as f:
            print('Saving prediction results to', pred_path)
            pickle.dump(preds, f)

    human_body_info = {}
    human_body_info['anim_len'] = len(preds['cams'])
    human_body_info['cam_array'] = preds['cams']
    human_body_info['smpl_array'] = np.arange(len(preds['cams']) * 72,
                                              dtype='float32').reshape(
                                                  (len(preds['cams']), 24, 3))
    cams = [0, 0, 0]
    for i in range(len(preds['cams'])):
        temp = rot_mat_to_axis_angle(preds['poses'][i])
        temp = np.reshape(temp, (24, 3))
        human_body_info['smpl_array'][i] = temp

        cams = cams + preds['cams'][i]
    cams = cams / len(preds['cams'])
    human_body_info['cam_array'][:] = cams
    with open('../demo/transfer_data/human_body_info.pkl', 'wb') as f:
        print('Saving prediction results to',
              '../demo/transfer_data/human_body_info.pkl')
        pickle.dump(human_body_info, f)
Beispiel #7
0
def predict_on_tracks(model, img_dir, poseflow_path, output_path, track_id,
                      trim_length):
    # Get all the images
    im_paths = sorted(glob(osp.join(img_dir, '*.png')))
    all_kps = get_labels_poseflow(poseflow_path, len(im_paths))

    # Here we set which track to use.
    track_id = min(track_id, len(all_kps) - 1)
    print('Total number of PoseFlow tracks:', len(all_kps))
    print('Processing track_id:', track_id)
    kps = all_kps[track_id]

    bbox_params_smooth, s, e = get_smooth_bbox_params(kps, vis_thresh=0.1)

    images = []
    images_orig = []
    min_f = max(s, 0)
    max_f = min(e, len(kps))

    print('----------')
    print('Preprocessing frames.')
    print('----------')

    for i in range(min_f, max_f):
        proc_params = process_image(
            im_path=im_paths[i],
            bbox_param=bbox_params_smooth[i],
        )
        images.append(proc_params.pop('image'))
        images_orig.append(proc_params)

    if track_id > 0:
        output_path += '_{}'.format(track_id)

    mkdir(output_path)

    # make dir to save joint rotation mat in json
    import os
    without = output_path.split(os.sep)[:-1]  # without hmmr_output
    if without.__len__() > 1:
        print('length>1')
        sys.exit(1)
    else:
        without = without[0]
    myjson_dir = osp.join(without, 'rot_output')
    myrot_path = osp.join(myjson_dir, 'rot_output.json')
    mykps_path = osp.join(myjson_dir, 'kps_output.json')
    mkdir(myjson_dir)

    # george's revision

    pred_path = osp.join(output_path, 'hmmr_output.pkl')

    if osp.exists(pred_path):
        print('----------')
        print('Loading pre-computed prediction.')
        print('----------')

        with open(pred_path, 'rb') as f:
            preds = pickle.load(f)
    else:
        print('----------')
        print('Running prediction.')
        print('----------')

        preds = model.predict_all_images(images)

        with open(pred_path, 'wb') as f:
            print('Saving prediction results to', pred_path)
            pickle.dump(preds, f)
    # get the kps
    mykps = preds['kps']
    totalkpsdict = {}
    totalkpsdict['frame_Count'] = mykps.shape[0]
    for i in range(0, mykps.shape[0]):
        frame_index = "frame_" + "%04d" % i
        framedict = {}
        for j in range(0, mykps.shape[1]):
            _kps = mykps[i][j]
            kpslist = [float(j) for j in _kps]
            kps_index = 'kps_' + "%02d" % j
            framedict[kps_index] = kpslist
        totalkpsdict[frame_index] = framedict
    print('Saving kps results to', mykps_path)
    with open(mykps_path, 'w') as jf:
        json.dump(totalkpsdict, jf, sort_keys=True)
    # get the poses
    myposes = preds['poses']
    totaldict = {}
    totaldict['frame_Count'] = myposes.shape[0]
    print("There are totally {} frames ".format(myposes.shape[0]))
    print('----------')
    for i in range(0, myposes.shape[0]):
        frame_index = "frame_" + "%04d" % i
        framedict = {}
        print('processing frame : {}'.format(frame_index))
        for j in range(0, myposes.shape[1]):
            rotmat = myposes[i][j]
            rotlist = list(np.reshape(rotmat, (1, -1))[0])
            rotlist = [float(j) for j in rotlist]
            rot_index = 'rot_' + "%02d" % j
            framedict[rot_index] = rotlist
        totaldict[frame_index] = framedict
        print('----------')
    print('Saving rot results to', myrot_path)

    with open(myrot_path, 'w') as jf:
        json.dump(totaldict, jf, sort_keys=True)
    # george's revision

    if trim_length > 0:
        output_path += '_trim'
    print('----------')
    print('Rendering results to {}.'.format(output_path))
    print('----------')
    #preds is short for predict next is to dig out how to render smpl model
    render_preds(
        output_path=output_path,
        config=config,
        preds=preds,
        images=images,
        images_orig=images_orig,
        trim_length=trim_length,
    )