def process_video_frame(video_frame):
    video_frame = video_frame[:, :, :3]
    video_frame = scipy.misc.imresize(video_frame, (240, 320))
    image_v = np.expand_dims((video_frame.astype('float') / 255.0) - 0.5, 0)

    keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf = network_elements
    keypoint_coord3d_v, scale_v, center_v, keypoints_scoremap_v = sess.run(
        [keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf],
        feed_dict={image_tf: image_v})

    keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
    keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v)

    # post processing
    coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
    coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

    plot_hand_2d(coord_hw, video_frame)

    score_label = process_keypoints(keypoint_coord3d_v)
    if score_label is not None:
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(video_frame, score_label, (10, 200), font, 1.0,
                    (255, 0, 0), 2, cv2.LINE_AA)

    return video_frame
Esempio n. 2
0
def main(args):
    webcamId = 0
    try:
        if len(args) > 1 :
            webcamId = int(args[1])
    except ValueError:
        print("Invalid webcam id. Fall back to default value '" + str(webcamId) + "'.")

    # stream creation
    inputStream = cv2.VideoCapture(webcamId)
    if not inputStream.isOpened():
        print("Can not use camera with id " + str(webcamId) + ".")
        return 1

    # network input
    image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3))
    hand_side_tf = tf.constant([[1.0, 0.0]])  # left hand (true for all samples provided)
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \
    keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Start TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # initialize network
    net.init(session)

    while True:
        _, image_raw = inputStream.read()
        image_resized = cv2.resize(image_raw, (320, 240))
        image_rgb = cv2.cvtColor(image_resized,cv2.COLOR_BGR2RGB)

        image_v = np.expand_dims((image_rgb.astype('float') / 255.0) - 0.5, 0)

        start_time = time.time()
        hand_scoremap_v, image_crop_v, scale_v, center_v,\
        keypoints_scoremap_v, keypoint_coord3d_v = session.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,
                                                             keypoints_scoremap_tf, keypoint_coord3d_tf],
                                                            feed_dict={image_tf: image_v})
        delta_time = time.time() - start_time
        print("Inference time: " + str(delta_time))

        # post processing
        keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
        coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
        coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

        plot_hand_cv2(image_resized, coord_hw)
        image_fullsize = cv2.resize(image_resized, (1600, 1200))

        cv2.imshow('result', image_fullsize)
        cv2.waitKey(1)

    cv2.releaseAllWindows()
    return 0
Esempio n. 3
0
def doHandPoseEstimate(image_cv, sess, args):

    image_tf = args['image_tf']
    hand_side_tf = args['hand_side_tf']
    evaluation = args['evaluation']
    net = args['net']
    hand_scoremap_tf = args['hand_scoremap_tf']
    image_crop_tf = args['image_crop_tf']
    scale_tf = args['scale_tf']
    center_tf = args['center_tf']
    keypoints_scoremap_tf = args['keypoints_scoremap_tf']
    keypoint_coord3d_tf = args['keypoint_coord3d_tf']

    image_raw = image_cv[:, :, ::-1]
    image_raw = cv2.resize(image_raw, (320, 240))
    image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0)

    hand_scoremap_v, image_crop_v, scale_v, center_v,\
    keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,
                                                         keypoints_scoremap_tf, keypoint_coord3d_tf],
                                                        feed_dict={image_tf: image_v})

    hand_scoremap_v = np.squeeze(hand_scoremap_v)
    image_crop_v = np.squeeze(image_crop_v)
    keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
    keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v)

    # post processing
    image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8')
    coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
    coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

    # visualize
    fig = plt.figure(1)
    plt.ion()
    plt.clf()
    ax1 = fig.add_subplot(221)
    ax2 = fig.add_subplot(222)
    ax3 = fig.add_subplot(223)
    ax4 = fig.add_subplot(224, projection='3d')
    ax1.imshow(image_raw)
    plot_hand(coord_hw, ax1)
    ax2.imshow(image_crop_v)
    plot_hand(coord_hw_crop, ax2)
    ax3.imshow(np.argmax(hand_scoremap_v, 2))
    plot_hand_3d(keypoint_coord3d_v, ax4)
    ax4.view_init(azim=-90.0,
                  elev=-90.0)  # aligns the 3d coord with the camera view
    ax4.set_xlim([-3, 3])
    ax4.set_ylim([-3, 1])
    ax4.set_zlim([-3, 3])
    plt.show()
    plt.pause(0.0001)
    plt.show()
Esempio n. 4
0
def get_feature(img, sess, sess_args):
    image_v = np.expand_dims((img.astype('float') / 255.0) - 0.5, 0)

    hand_scoremap_v, image_crop_v, scale_v, center_v,\
    keypoints_scoremap_v, keypoint_coord3d_v = sess.run(sess_args,
                                                        feed_dict={image_tf: image_v})
    coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
    center_v = np.array([0., 0.])
    scale_v = np.array([1.])
    coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)
    return coord_hw  #/100#coord_hw_crop# - center_v
Esempio n. 5
0
def get_coords_and_figure_from_name(img_name):
    image_raw = scipy.misc.imread(img_name)

    hand_scoremap_v, image_crop_v, scale_v, center_v, \
    keypoints_scoremap_v, keypoint_coord3d_v = run_model_on_image(image_raw)

    # from here on: saving stuff
    basename = os.path.splitext(os.path.basename(img_name))[0]

    # post processing
    hand_scoremap_v = np.squeeze(hand_scoremap_v)
    image_crop_v = np.squeeze(image_crop_v)
    keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
    image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8')
    coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
    coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)
    # save keypoint coordinates
    keypoint_save_filename = "{:s}/{:s}_coords.pkl".format(
        output_coords_dir, basename)
    with open(keypoint_save_filename, 'wb') as f:
        pickle.dump((keypoint_coord3d_v, scale_v, center_v), f, protocol=-1)
    print("Saved keypoint coordinates to {:s}".format(keypoint_save_filename))

    # save image
    image_save_filename = "{:s}/{:s}_figures.png".format(
        output_figures_dir, basename)

    fig = plt.figure(1)
    ax1 = fig.add_subplot(221)
    ax2 = fig.add_subplot(222)
    ax3 = fig.add_subplot(223)
    ax4 = fig.add_subplot(224, projection='3d')
    ax1.imshow(image_raw)
    plot_hand(coord_hw, ax1)
    ax2.imshow(image_crop_v)
    plot_hand(coord_hw_crop, ax2)
    ax3.imshow(np.argmax(hand_scoremap_v, 2))
    plot_hand_3d(keypoint_coord3d_v, ax4)
    ax4.view_init(azim=-90.0,
                  elev=-90.0)  # aligns the 3d coord with the camera view
    ax4.set_xlim([-3, 3])
    ax4.set_ylim([-3, 1])
    ax4.set_zlim([-3, 3])
    plt.savefig(image_save_filename)
    plt.close()

    print("Saved figure to {:s}".format(image_save_filename))
Esempio n. 6
0
def process_img(known_finger_poses, img_name):
    image_raw = scipy.misc.imread(img_name)[:, :, :3]
    image_raw = np.array(Image.fromarray(image_raw).resize((320, 240)))
    image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0)

    #if args.plot_fingers == 1:
    scale_v, center_v, keypoints_scoremap_v, \
     keypoint_coord3d_v = sess.run([scale_tf, center_tf, keypoints_scoremap_tf,\
            keypoint_coord3d_tf], feed_dict = {image_tf: image_v})

    keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
    keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v)

    # post processing
    coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
    coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

    plot_hand_2d(coord_hw, image_raw)

    # Classifying based on Geometry
    #if args.solve_by == 0:
    score_label = predict_by_geometry(keypoint_coord3d_v, known_finger_poses,
                                      0.55)
    # Classifying based on Neural networks
    # elif args.solve_by == 1:
    # 	score_label = predict_by_neural_network(keypoint_coord3d_v, known_finger_poses,
    # 											args.pb_file, args.threshold)
    # Classifying based on SVM
    # elif args.solve_by == 2:
    # 	score_label = predict_by_svm(keypoint_coord3d_v, known_finger_poses, args.svc_file)

    # save processed image
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(image_raw, score_label, (10, 200), font, 1.0, (255, 0, 0), 2,
                cv2.LINE_AA)

    file_save_path = os.path.join('../images_out',
                                  "{}.png".format(int(time.time())))
    mpimg.imsave(file_save_path, image_raw)

    return score_label
Esempio n. 7
0
             ],
             exclude_var_list=['PosePrior', 'ViewpointNet'])

util = EvalUtil()
# iterate dataset
for i in range(dataset.num_samples):
    # get prediction
    keypoints_scoremap_v,\
    scale_crop_v, center_v, kp_uv21_gt, kp_vis = sess.run([keypoints_scoremap, scale_crop, center, data['keypoint_uv21'], data['keypoint_vis21']])

    keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
    kp_uv21_gt = np.squeeze(kp_uv21_gt)
    kp_vis = np.squeeze(kp_vis)

    # detect keypoints
    coord_hw_pred_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
    coord_hw_pred = trafo_coords(coord_hw_pred_crop, center_v, scale_crop_v,
                                 256)
    coord_uv_pred = np.stack([coord_hw_pred[:, 1], coord_hw_pred[:, 0]], 1)

    # scale pred to image size of the dataset (to match with stored coordinates)
    coord_uv_pred[:, 1] /= scale[0]
    coord_uv_pred[:, 0] /= scale[1]

    # some datasets are already stored with downsampled resolution
    scale2orig_res = 1.0
    if hasattr(dataset, 'resolution'):
        scale2orig_res = dataset.resolution

    util.feed(kp_uv21_gt / scale2orig_res, kp_vis,
              coord_uv_pred / scale2orig_res)
Esempio n. 8
0
    net = ColorHandPose3DNetwork()

    for image_name in image_list:
        image = cv2.imread(image_name) if isinstance(image_name,
                                                     str) else image_name
        image = cv2.resize(image, (320, 240))
        image_v = np.expand_dims((image.astype(np.float32) / 255.) - .5,
                                 axis=0)

        inference = net.inference(image_v)
        hand_score_map, image_crop, scale, center, keypoint_score_map, keypoint_coord3d = tuple(
            inference)

        hand_score_map = np.squeeze(
            hand_score_map, axis=0)  # (1, 256, 256, 2) -> (256, 256, 2)
        image_crop = np.squeeze(
            image_crop)  # (1, 256, 256, 3) -> (256, 256, 3)
        keypoint_score_map = np.squeeze(
            keypoint_score_map)  # (1, 256, 256, 21) -> (256, 256, 21)
        keypoint_coord3d = np.squeeze(
            keypoint_coord3d)  # (1, 21, 3) -> (21, 3)

        image_crop = ((image_crop + .5) * 255).astype(np.uint8)
        coord_hw_crop = detect_keypoints(np.squeeze(keypoint_score_map))
        coord_hw = trafo_coords(coord_hw_crop, center, scale, 256)

        # visualize
        plot_inference(image, image_crop, coord_hw, coord_hw_crop,
                       hand_score_map, keypoint_coord3d)
    def process(self, image_list):
        """
        Args:
            image_list: list of tuples, first item being the name/path of the
                image, and the second item being a RGB matrix.
        """
        results = []
        print('Extracting masks...')
        for image_name, image_raw in tqdm(image_list):
            
            save_name = os.path.join(self.cache_loc,
                    ('#'.join(image_name.split('/')[-3:])[:-self.extension_length])+'.pkl')
            
            if os.path.exists(save_name) and not self.overwrite:
                # loading directly from cache
                with open(save_name, 'rb') as f:
                    results.append(pickle.load(f))

            else:
                image_raw_shape = image_raw.shape[:2] 
                image_raw = imresize(image_raw, (240, 320))
                image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0)
                
                hand_scoremap_v, image_crop_v, scale_v, center_v,\
                keypoints_scoremap_v, keypoint_coord3d_v = \
                self.sess.run([self.hand_scoremap_tf, self.image_crop_tf, self.scale_tf, 
                        self.center_tf, self.keypoints_scoremap_tf, self.keypoint_coord3d_tf],
                                        feed_dict={self.image_tf: image_v})

                hand_scoremap_v = np.squeeze(hand_scoremap_v)
                image_crop_v = np.squeeze(image_crop_v)
                keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
                keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v)
                
                # post processing
                image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8')
                coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
                coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

                # return
                # TODO: these coordinates are all normalized with respect to
                # the rescaled image. aprameters would have to be scaled back.
                image_result = {'image_name': image_name,
                                'original_shape': image_raw_shape,
                                'confidence': hand_scoremap_v,
                                'binary_mask': np.argmax(hand_scoremap_v, 2),
                                'hand_joints_2d': coord_hw_crop,
                                'hand_joints_3d': keypoint_coord3d_v}
                
                # assuming that there is a 4-character extension like .jpg for the
                # image name
                with open(save_name, 'wb') as f:
                    pickle.dump(image_result, f)

                results.append(image_result)
                # if self.visualize:
                #     # visualize
                #     fig = plt.figure(1, figsize=(10,10))
                #     ax1 = fig.add_subplot(221)
                #     ax2 = fig.add_subplot(222)
                #     ax3 = fig.add_subplot(223)
                #     ax4 = fig.add_subplot(224, projection='3d')
                #     ax1.imshow(image_raw)
                #     plot_hand(coord_hw, ax1)
                #     ax2.imshow(image_crop_v)
                #     plot_hand(coord_hw_crop, ax2)
                #     ax3.imshow(np.argmax(hand_scoremap_v, 2))
                #     plot_hand_3d(keypoint_coord3d_v, ax4)
                #     ax4.view_init(azim=-90.0, elev=-90.0)  # aligns the 3d coord with the camera view
                #     ax4.set_xlim([-3, 3])
                #     ax4.set_ylim([-3, 1])
                #     ax4.set_zlim([-3, 3])
                #     
                #     image_save_name = os.path.join(self.visualize_save_loc, 
                #             os.path.basename(image_name))
                #     plt.savefig(image_save_name)
        return results
Esempio n. 10
0
# iterate dataset
for i in range(dataset.num_samples):
    # get prediction
    keypoint_xyz21, keypoint_vis21, keypoint_scale, keypoint_uv21_v, image_crop_v, scoremap_v = \
        sess.run([data['keypoint_xyz21'], data['keypoint_vis21'], data['keypoint_scale'], data['keypoint_uv21'], image_crop, scoremap])

    keypoint_xyz21 = np.squeeze(keypoint_xyz21)
    keypoint_vis21 = np.squeeze(keypoint_vis21)
    keypoint_scale = np.squeeze(keypoint_scale)
    keypoint_uv21_v = np.squeeze(keypoint_uv21_v)
    image_crop_v = np.squeeze((image_crop_v + 0.5) * 256).astype(np.uint8)
    scoremap_v = np.squeeze(scoremap_v)
    for ik in (1, 5, 9, 13, 17):
        scoremap_v[:, :, ik:ik + 4] = scoremap_v[:, :, ik + 3:ik - 1:-1]

    coord2d_v = detect_keypoints(scoremap_v) * 8

    # center gt
    keypoint_xyz21 -= keypoint_xyz21[0, :]

    if (i % 100) == 0:
        print('%d / %d images done: %.3f percent' %
              (i, dataset.num_samples, i * 100.0 / dataset.num_samples))

        if args.visualize:
            fig = plt.figure(1)
            ax1 = fig.add_subplot(121, projection='3d')
            plot_hand_3d(keypoint_xyz21,
                         ax1,
                         color_fixed=np.array([1.0, 0.0, 0.0]))
            ax1.view_init(
Esempio n. 11
0
    from mpl_toolkits.mplot3d import Axes3D

    for i in range(50):

        scoremap_3d, keypoint_xyz21_normed, image_crop, keypoint_uv21, img_dir, scoremap, hand_side \
            = sess.run([data['scoremap_3d'], data['keypoint_xyz21_normed'], data['image_crop'], data['keypoint_uv21'], data['img_dir'], data['scoremap'], data['hand_side']])
        print(img_dir[0].decode())
        print(hand_side)
        scoremap_3d = np.squeeze(scoremap_3d)
        keypoint_xyz21_normed = np.squeeze(keypoint_xyz21_normed)
        image_crop = np.squeeze((image_crop + 0.5) * 255).astype(np.uint8)
        keypoint_uv21 = np.squeeze(keypoint_uv21)
        scoremap = np.squeeze(scoremap)

        keypoints = detect_keypoints_3d(scoremap_3d)
        keypoints2d = detect_keypoints(scoremap)

        fig = plt.figure()
        ax = fig.add_subplot(221, projection='3d')
        plot_hand_3d(keypoints, ax)
        ax.invert_yaxis()
        ax.invert_zaxis()
        ax = fig.add_subplot(222, projection='3d')
        plot_hand_3d(keypoint_xyz21_normed, ax)
        ax.invert_yaxis()
        ax.invert_zaxis()
        ax = fig.add_subplot(223)
        ax.imshow(image_crop)
        plot_hand(keypoint_uv21[:, ::-1], ax)
        ax = fig.add_subplot(224)
        ax.imshow(image_crop)
Esempio n. 12
0
                         color_fixed=np.array([1.0, 0.0, 0.0]))
            ax1.view_init(
                azim=-90.0,
                elev=-90.0)  # aligns the 3d coord with the camera view
            plt.xlabel('x')
            plt.ylabel('y')

            ax2 = fig.add_subplot(122)
            plt.imshow(image_scaled_v)

            plt.show()
            # pdb.set_trace()

    if args.save:
        fig = plt.figure(figsize=(12, 6))
        keypoints2d = detect_keypoints(keypoints_scoremap_v)
        coord_hw = trafo_coords(keypoints2d, center_v, scale_v, 256)
        coord_uv21 = keypoint_uv21_v[:, ::-1] / 2
        ax1 = fig.add_subplot(121)
        plt.imshow(image_scaled_v)
        plot_hand(coord_hw, ax1, color_fixed=np.array((0., 0., 1.0)))
        plot_hand(coord_uv21, ax1, color_fixed=np.array((1., 0., 0.0)))

        ax2 = fig.add_subplot(122, projection='3d')
        plot_hand_3d(coord3d_pred_v,
                     ax2,
                     color_fixed=np.array([0.0, 0.0, 1.0]))
        plot_hand_3d(keypoint_xyz21,
                     ax2,
                     color_fixed=np.array([1.0, 0.0, 0.0]))
        ax2.set_xlabel('x')
Esempio n. 13
0
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    sess.run(tf.global_variables_initializer())
    tf.train.start_queue_runners(sess=sess)

    from utils.general import detect_keypoints, plot_hand
    import matplotlib.pyplot as plt
    # from mpl_toolkits.mplot3d import Axes3D

    for i in range(50):
        scoremap, keypoint_uv21, image_crop, resized, img_dir, keypoint_vis21 \
            = sess.run([data['scoremap'], data['keypoint_uv21'], data['image_crop'], resized_, data['img_dir'], data['keypoint_vis21']])
        scoremap = np.squeeze(scoremap)
        resized = np.squeeze(resized)
        image_crop = np.squeeze((image_crop+0.5) * 255).astype(np.uint8)
        keypoint_uv21 = np.squeeze(keypoint_uv21)
        keypoint_vis21 = np.squeeze(keypoint_vis21)

        keypoints = detect_keypoints(scoremap)
        resized_keypoints = detect_keypoints(resized)
        print(img_dir[0].decode())
        # print(keypoint_vis21)
        # print(keypoint_uv21)

        fig = plt.figure()
        ax = fig.add_subplot(121)
        plot_hand(keypoints, ax)
        plt.imshow(image_crop)
        ax = fig.add_subplot(122)
        plot_hand(keypoint_uv21[:, ::-1], ax)
        plt.imshow(image_crop)
        plt.show()
Esempio n. 14
0
    if i >= 0:
        image_crop_v = np.squeeze((value_dict['image_crop']+0.5)*255).astype(np.uint8)
        image_v = np.squeeze((value_dict['image']+0.5)*255).astype(np.uint8)

        if lifting_dict['method'] == 'direct':
            coord3d_pred_v = np.squeeze(value_dict['coord_xyz_norm'])
        elif lifting_dict['method'] == 'heatmap':
            heatmap_3d_v = np.squeeze(value_dict['heatmap_3d'])
            coord3d_pred_v = detect_keypoints_3d(heatmap_3d_v)
            coord3d_pred_v = coord3d_pred_v[:21, :]

        coord3d_pred_v -= coord3d_pred_v[0, :]
        coord3d_pred_v /= hand_size(coord3d_pred_v)
        coord3d_pred_v *= 0.7

        coord2d_hw_v = detect_keypoints(np.squeeze(value_dict['heatmap_2d']))[:21, :]
        crop_scale = np.squeeze(value_dict['crop_scale'])
        crop_center = np.squeeze(value_dict['crop_center'])
        K = np.squeeze(value_dict['K'])

        wrapper.reset_value()

        coord3d_rev = (coord3d_pred_v + np.array([[0.0, 0.0, 10.0]])) * 100
        for ij in (1, 5, 9, 13, 17):
            coord3d_rev[ij:ij+4] = coord3d_rev[ij+3:ij-1:-1]
        wrapper.fit3d(coord3d_rev)

        coord2d_hw_global = (coord2d_hw_v - dataset.crop_size/2) / crop_scale + crop_center
        coord2d_uv_global = np.copy(coord2d_hw_global[:, ::-1])
        for ij in (1, 5, 9, 13, 17):
            coord2d_uv_global[ij:ij+4] = coord2d_uv_global[ij+3:ij-1:-1]
Esempio n. 15
0
def get_pic(image_list):
    # images to be shown
    # image_list = list()
    # image_list.append('./data/img30.jpg')
    #image_list.append('./data/img31.jpg')
    #image_list.append('./data/img32.jpg')
    #image_list.append('./data/img33.jpg')

    # network input
    image_tf = tf.placeholder(tf.float32, shape=(1, 320, 240, 3))
    hand_side_tf = tf.constant([[1.0, 0.0]])  # left hand (true for all samples provided)
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\
    keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Start TF
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session()#config=tf.ConfigProto(gpu_options=gpu_options))

    # initialize network
    net.init(sess)

    # Feed image list through network
    final = list()
    for i, image in tqdm(enumerate(image_list)):
        
        image_raw = image
        # image_raw = cv2.resize(image_raw, dsize=(240, 320))#, interpolation=cv2.INTER_CUBI)
        image_raw = make_it_small.small(image_raw)

        for row in image_raw:
            for pixel in row:
                temp = pixel[0]
                pixel[0] = pixel[2]
                pixel[2] = temp

        image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0)

        hand_scoremap_v, image_crop_v, scale_v, center_v,\
        keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,
                                                             keypoints_scoremap_tf, keypoint_coord3d_tf],
                                                            feed_dict={image_tf: image_v})

        img_angle = description_of_hand_position(keypoint_coord3d_v)
        hand_scoremap_v = np.squeeze(hand_scoremap_v)
        image_crop_v = np.squeeze(image_crop_v)
        keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
        keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v)

        # post processing
        image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8')
        coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
        coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

        # visualize
        fig = plt.figure(1)
        ax1 = fig.add_subplot(221)
        ax2 = fig.add_subplot(222)
        ax3 = fig.add_subplot(223)
        ax4 = fig.add_subplot(224, projection='3d')
        ax1.imshow(image_raw)
        plot_hand(coord_hw, ax1)
        ax2.imshow(image_crop_v)
        plot_hand(coord_hw_crop, ax2)
        ax3.imshow(np.argmax(hand_scoremap_v, 2))
        plot_hand_3d(keypoint_coord3d_v, ax4)
        ax4.view_init(azim=-90.0, elev=-90.0)  # aligns the 3d coord with the camera view
        ax4.set_xlim([-3, 3])
        ax4.set_ylim([-3, 1])
        ax4.set_zlim([-3, 3])
        fig.suptitle(img_angle,fontsize = 10)
        plt.savefig("imgs/{}.png".format(str(i)))
        plt.close(fig)
        img = imageio.imread("imgs/{}.png".format(str(i)))
        final.append(img)
    return final