def worker(input_q, output_q): fps = FPS().start() image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3)) hand_side_tf = tf.constant([[1.0, 1.0]]) # Both left and right hands included evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) net.init(sess) while True: fps.update() frame = input_q.get() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image_raw = scipy.misc.imresize(frame, (240, 320)) image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) keypoint_coord3d_v = sess.run(keypoint_coord3d_tf, feed_dict={image_tf: image_v}) output_q.put( predict_by_geometry(keypoint_coord3d_v, known_finger_poses, 0.45)) fps.stop() sess.close()
def main(args): webcamId = 0 try: if len(args) > 1 : webcamId = int(args[1]) except ValueError: print("Invalid webcam id. Fall back to default value '" + str(webcamId) + "'.") # stream creation inputStream = cv2.VideoCapture(webcamId) if not inputStream.isOpened(): print("Can not use camera with id " + str(webcamId) + ".") return 1 # network input image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3)) hand_side_tf = tf.constant([[1.0, 0.0]]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(session) while True: _, image_raw = inputStream.read() image_resized = cv2.resize(image_raw, (320, 240)) image_rgb = cv2.cvtColor(image_resized,cv2.COLOR_BGR2RGB) image_v = np.expand_dims((image_rgb.astype('float') / 255.0) - 0.5, 0) start_time = time.time() hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = session.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) delta_time = time.time() - start_time print("Inference time: " + str(delta_time)) # post processing keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) plot_hand_cv2(image_resized, coord_hw) image_fullsize = cv2.resize(image_resized, (1600, 1200)) cv2.imshow('result', image_fullsize) cv2.waitKey(1) cv2.releaseAllWindows() return 0
def prepare_network(): # network input image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3)) hand_side_tf = tf.constant([[1.0, 1.0]]) # Both left and right hands included evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(sess) return sess, image_tf, keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf
def prepare_network(): # Entrada a red CNN image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3)) hand_side_tf = tf.constant([[1.0, 1.0] ]) # inclusion de mano derecha e izquierda evaluation = tf.placeholder_with_default(True, shape=()) # Construccion de red net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Inicializa TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # inicializa red net.init(sess) return sess, image_tf, keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf
def __init__(self, model_weight_files=['./utilities/hand3d/weights/handsegnet-rhd.pickle', './utilities/hand3d/weights/posenet3d-rhd-stb-slr-finetuned.pickle'], visualize=False, visualize_save_loc='visualize/handpose_estimation', cache_loc='cache/handpose_estimation', image_extension='.jpg', overwrite=False): self.extension_length = len(image_extension) self.model_weight_files = model_weight_files self.visualize = visualize self.visualize_save_loc = visualize_save_loc if self.visualize: if not os.path.exists(self.visualize_save_loc): os.makedirs(self.visualize_save_loc, exist_ok=True) self.cache_loc = cache_loc if not os.path.exists(self.cache_loc): os.makedirs(self.cache_loc, exist_ok=True) self.overwrite = overwrite # input place holders self.image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3)) self.hand_side_tf = tf.constant([[1.0, 0.0]]) # left hand (true for all samples provided) self.evaluation = tf.placeholder_with_default(True, shape=()) # building network self.net = ColorHandPose3DNetwork() self.hand_scoremap_tf, self.image_crop_tf, self.scale_tf, self.center_tf,\ self.keypoints_scoremap_tf, self.keypoint_coord3d_tf = \ self.net.inference(self.image_tf, self.hand_side_tf, self.evaluation) # Start TF self.gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=self.gpu_options)) self.net.init(self.sess, weight_files=self.model_weight_files)
'snapshot_dir': 'snapshots_handsegnet' } # get dataset dataset = BinaryDbReader(mode='training', batch_size=4, shuffle=True, hue_aug=True, random_crop_to_size=True) # build network graph data = dataset.get() # build network evaluation = tf.placeholder_with_default(True, shape=()) net = ColorHandPose3DNetwork() hand_mask_pred = net.inference_detection(data['image'], train=True) # Start TF gpu_options = tf.GPUOptions(allow_growth=True, ) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) tf.train.start_queue_runners(sess=sess) # Loss loss = 0.0 s = data['hand_mask'].get_shape().as_list() for i, pred_item in enumerate(hand_mask_pred): gt = tf.reshape(data['hand_mask'], [s[0] * s[1] * s[2], -1]) pred = tf.reshape(hand_mask_pred, [s[0] * s[1] * s[2], -1]) loss += tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=gt))
def main(): if not os.path.isdir(save_roi_path): os.mkdir(save_roi_path) if os.path.isdir(frames_path): image_list = [ os.path.join(frames_path, file) for file in os.listdir(frames_path) if file.endswith('.jpg') ] print("Record {} image completely from {} ".format( len(image_list), frames_path)) else: raise NotADirectoryError(frames_path + " isn\'t a directory.") # network input image_tf = tf.placeholder(tf.float32, shape=(1, input_size, input_size, channel)) # h, w, c hand_side_tf = tf.constant([[1.0, 0.0] ]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(sess) start = time.time() value_list = [] column_name = [ 'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ] dist = (width - height) // 2 for num, image_name in enumerate(image_list, 1): filename = os.path.basename(image_name) # extract the filename name, ext = os.path.splitext(filename) label = which_label(name) image_bgr = cv2.imread(image_name) # Feed image list through network # image_bgr = cv2.resize(image_raw, (width, height), cv2.INTER_AREA) image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) if (num % 100 == 1): print("Process the image : " + name + " ... {}/{}".format(num, len(image_list))) x1, x2, y1, y2 = width // 2 - dist, width // 2 + dist, 0, height image_v = image_rgb[y1:y2, x1:x2] # cenetr image for label image_v = cv2.resize(image_v, (input_size, input_size), cv2.INTER_AREA) image_v = np.expand_dims( (np.array(image_v).astype('float') / 255.0) - 0.5, 0) _, _, scale_v, center_v, _, _ = sess.run([ hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf ], feed_dict={image_tf: image_v}) # left top right bottom y_c, x_c = np.squeeze(center_v) half_side_len = 128.0 / scale_v # the length of frame x, y = max(x1 + int(x_c - half_side_len), 0), max(y1 + int(y_c - half_side_len), 0) xmax, ymax = min(x1 + int(x_c + half_side_len), width), min(y1 + int(y_c + half_side_len), height) image_crop = image_bgr[y:ymax, x:xmax] res_path = os.path.join(save_roi_path, filename) cv2.imwrite(res_path, image_crop) value = (filename, width, height, label, x, y, xmax, ymax) value_list.append(value) xml_df = pd.DataFrame(value_list, columns=column_name) xml_df.to_csv(os.path.join(frames_path, 'test_images.csv'), index=None) endt = time.time() print("Handle all images completely and elapsed_time is {:.2f}s.".format( endt - start))
def main(argv=None): train_para = { 'lr': [1e-4, 1e-5, 1e-6], 'lr_iter': [10000, 20000], 'max_iter': 30000, 'show_loss_freq': 1000, 'snapshot_freq': 5000, 'snapshot_dir': 'snapshots_posenet' } # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) tf.train.start_queue_runners(sess=sess) # get dataset dataset_GANerate = GANerate(batchnum=32) image_crop_eval, keypoint_uv21_eval, keypoint_uv_heatmap_eval, keypoint_xyz21_normed_eval = dataset_GANerate.get_batch_data_eval # build network evaluation = tf.placeholder_with_default(True, shape=()) net = ColorHandPose3DNetwork() image_crop_eval = tf.add(image_crop_eval, 0, name='input_node_representations') keypoints_scoremap_eval = net.inference_pose2d(image_crop_eval, train=True) s = keypoint_uv_heatmap_eval.get_shape().as_list() keypoints_scoremap_eval = [ tf.image.resize_images(x, (s[1], s[2])) for x in keypoints_scoremap_eval ] # Loss loss_eval = 0.0 for i, pred_item in enumerate(keypoints_scoremap_eval): loss_eval += tf.reduce_sum( tf.sqrt( tf.reduce_mean(tf.square(pred_item - keypoint_uv_heatmap_eval), [1, 2]))) keypoints_scoremap_eval = keypoints_scoremap_eval[-1] keypoints_scoremap_eval = tf.add(keypoints_scoremap_eval, 0, name='final_output_node_representations') init = tf.global_variables_initializer() config = tf.ConfigProto() # occupy gpu gracefully config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init.run() checkpoint_path = './snapshots_posenet' model_name = 'model-42' if checkpoint_path: saver = tf.train.Saver(max_to_keep=10) saver.restore(sess, checkpoint_path + '/' + model_name) print("restore from " + checkpoint_path + '/' + model_name) create_pb = True if create_pb: input_graph_def = sess.graph.as_graph_def() variable_names = [v.name for v in input_graph_def.node] print( '==================Model Analysis Report variable_names======================' ) print(variable_names) print( '==================Model Analysis Report operations======================' ) for op in sess.graph.get_operations(): print(str(op.name)) stats_graph(sess.graph) output_graph_def = tf.graph_util.convert_variables_to_constants( sess, # The session input_graph_def, # input_graph_def is useful for retrieving the nodes 'final_output_node_representations'.split(",")) with tf.gfile.FastGFile(checkpoint_path + '/' + model_name + ".pb", "wb") as f: f.write(output_graph_def.SerializeToString()) print("Start testing...") path = './snapshots_posenet/baseline' import matplotlib.image loss_eval_v = 0.0 loss_piex_save = 0.0 for one_epoch in tqdm(range(100)): image, heatmap, heatmap_pre, keypoint_uv21, loss_eval_v = sess.run( [ image_crop_eval, keypoint_uv_heatmap_eval, keypoints_scoremap_eval, keypoint_uv21_eval, loss_eval ]) image = (image + 0.5) * 255 image = image.astype(np.int16) #根据热度图计算最大的下标 keypoint_uv21_pre = np.zeros_like(keypoint_uv21) for i in range(heatmap_pre.shape[0]): for j in range(heatmap_pre.shape[-1]): heatmap_pre_tmp = heatmap_pre[i, :, :, j] cor_tmp = unravel_index(heatmap_pre_tmp.argmax(), heatmap_pre_tmp.shape) keypoint_uv21_pre[i, j, 0] = cor_tmp[1] keypoint_uv21_pre[i, j, 1] = cor_tmp[0] loss_piex = keypoint_uv21_pre - keypoint_uv21 loss_piex = np.sqrt( np.square(loss_piex[:, :, 0]) + np.square(loss_piex[:, :, 1])) loss_piex_save = loss_piex_save + np.mean(loss_piex) # visualize fig = plt.figure(1) plt.clf() ax1 = fig.add_subplot(221) ax1.imshow(image[0]) plot_hand(keypoint_uv21[0], ax1) ax3 = fig.add_subplot(223) ax3.imshow(image[0]) ax3.set_title(str(loss_piex[0, :].astype(np.int32)), fontsize=5) plot_hand(keypoint_uv21_pre[0], ax3) plot_hand(keypoint_uv21[0], ax3) ax2 = fig.add_subplot(222) ax4 = fig.add_subplot(224) ax2.imshow(np.sum(heatmap[0], axis=-1)) # 第一个batch的维度 hand1(0~31) back1(32~63) ax2.scatter(keypoint_uv21[0, :, 0], keypoint_uv21[0, :, 1], s=10, c='k', marker='.') ax4.imshow(np.sum(heatmap_pre[0], axis=-1)) # 第一个batch的维度 hand1(0~31) back1(32~63) ax4.scatter(keypoint_uv21_pre[0, :, 0], keypoint_uv21_pre[0, :, 1], s=10, c='k', marker='.') plt.savefig(path + '/image/' + str(one_epoch).zfill(5) + '.png') loss_eval_v = loss_eval_v / 100 loss_piex_save = loss_piex_save / 100 print(loss_piex_save) #4.472415127649567
def cashbox(): ## initialize variables arr = [True, False, False, False] LOWERB = np.array([0, 0, 0]) UPPERB = np.array([35, 35, 35]) LOWERB_HAND = np.array([40, 60, 100]) UPPERB_HAND = np.array([80, 100, 140]) cap = cv2.VideoCapture( 'http://192.168.1.38:15490/videostream.cgi?user=admin&pwd=A2345678901') t = time.time() boxOpen = False handUp = False threshold = 85000 thresholdHand = 2000 startCount = False start = time.time() ## for handpose usage image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3)) hand_side_tf = tf.constant([[1.0, 0.0] ]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) ## build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) ## Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) ## pass argument args = { 'image_tf': image_tf, 'hand_side_tf': hand_side_tf, 'evaluation': evaluation, 'net': net, 'hand_scoremap_tf': hand_scoremap_tf, 'image_crop_tf': image_crop_tf, 'scale_tf': scale_tf, 'center_tf': center_tf, 'keypoints_scoremap_tf': keypoints_scoremap_tf, 'keypoint_coord3d_tf': keypoint_coord3d_tf } ## initialize network net.init(sess) ## main script while (True): ret, frame = cap.read() frame_cpy = frame.copy() frame = frame[90:190, 90:270] mask = cv2.inRange(frame, LOWERB, UPPERB) maskHand = cv2.inRange(frame, LOWERB_HAND, UPPERB_HAND) cv2.imshow('mask', mask) cv2.imshow('mask_hand', maskHand) cv2.imshow('large frame', frame_cpy) cv2.imshow('small frame', frame_cpy[90:190, 90:270]) if time.time() - t > 2: t = time.time() print('box: ', np.count_nonzero(mask) * 10) print('hand: ', np.count_nonzero(maskHand) * 10) if boxOpen & handUp & (np.count_nonzero(mask) * 10 < threshold): start = time.time() timenow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(timenow, 'CASH BOX CLOSED!', '*ref: ', np.count_nonzero(mask) * 10) print('-----------------------------------------') print('-----------------------------------------') print('----------TRANSACTION ENDED--------------') print('-----------------------------------------') print('-----------------------------------------') arr[3] = True startCount = False else: arr[3] = False if boxOpen & (np.count_nonzero(maskHand) * 10 > thresholdHand): timenow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(timenow, 'HAND DETECTED.', '*ref: ', np.count_nonzero(maskHand)) doHandPoseEstimate(frame, sess, args) arr[2] = True handUp = True startCount = False else: arr[2] = False handUp = False if np.count_nonzero(mask) * 10 > threshold: if not (startCount): start = time.time() startCount = True timenow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(timenow, 'CASH BOX OPENED!', '*ref: ', np.count_nonzero(mask) * 10) arr[1] = True boxOpen = True else: arr[1] = False boxOpen = False print(time.time() - start, startCount) if (time.time() - start > 10) and startCount: print('Warning : Cash Box opened for more than 10 secs!') print('Current Status : ' + str([x * 1 for x in arr])) print('--------------------------------------------') if cv2.waitKey(1) == 27: break cap.release() cv2.destroyAllWindows()
def main(): if not os.path.isdir(save_path): os.mkdir(save_path) # image to be read image_list = list() dirname = image_path.split(os.sep)[-2] if os.path.isdir(image_path): for file in os.listdir(image_path): if file.endswith('.jpg'): image_list.append(os.path.join(image_path, file)) print("Record all video completely from " + image_path) else: raise FileNotFoundError(image_path + " doesn't exist.") # network input image_tf = tf.placeholder(tf.float32, shape=(1, height, width, channel)) hand_side_tf = tf.constant([[1.0, 0.0]]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(sess) start = time.time() value_list = [] for num, image_name in enumerate(image_list, 1): filename = image_name.split(os.sep)[-1] # extract the filename name, ext = os.path.splitext(filename) if num % 200 == 1: print("Process the image : " + name + " ... {}/{}".format(num, len(image_list))) image_raw = cv2.imread(image_name) # Feed image list through network image_raw = cv2.resize(image_raw, (width, height), cv2.INTER_AREA) image_raw = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB) image_v = np.expand_dims((np.array(image_raw).astype('float') / 255.0) - 0.5, 0) _, _, scale_v, center_v, _, _ = sess.run( [hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) # left top right bottom y_c, x_c = np.squeeze(center_v) half_side_len = 128.0 / scale_v # the length of frame x, y = int(x_c - half_side_len*0.9), int(y_c - half_side_len) xmax, ymax = int(x_c + half_side_len*0.9), int(y_c + half_side_len) x, y, xmax, ymax = max(x, 0), max(y, 0), min(xmax, width), min(ymax, height) cv2.rectangle(image_raw, (x, y), (xmax, ymax), (77, 255, 9), 1, 1) res_img = name + '_v' + ext res_roi = cv2.cvtColor(image_raw, cv2.COLOR_RGB2BGR) cv2.imwrite(os.path.join(save_path, res_img), cv2.resize(res_roi, (width//2, height//2), cv2.INTER_AREA)) value = (filename, width, height, 'hand', x, y, xmax, ymax) value_list.append(value) endt = time.time() print("Handle all images completely and elapsed_time is {:.2f}s.".format(endt-start)) column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] xml_df = pd.DataFrame(value_list, columns=column_name) xml_df.to_csv(os.path.join(root_path, dirname + '.csv'), index=None) print('Successfully converted image to csv. --- ' + dirname)
def __init__(self): self.num_kp = 21 self.n_fully_connected_layers = 2 self.n_classes = 5 self.fully_connected_layers_size = 32 self.color_hand_pose_net = ColorHandPose3DNetwork()
def get_pic(image_list): # images to be shown # image_list = list() # image_list.append('./data/img30.jpg') #image_list.append('./data/img31.jpg') #image_list.append('./data/img32.jpg') #image_list.append('./data/img33.jpg') # network input image_tf = tf.placeholder(tf.float32, shape=(1, 320, 240, 3)) hand_side_tf = tf.constant([[1.0, 0.0]]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session()#config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(sess) # Feed image list through network final = list() for i, image in tqdm(enumerate(image_list)): image_raw = image # image_raw = cv2.resize(image_raw, dsize=(240, 320))#, interpolation=cv2.INTER_CUBI) image_raw = make_it_small.small(image_raw) for row in image_raw: for pixel in row: temp = pixel[0] pixel[0] = pixel[2] pixel[2] = temp image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) img_angle = description_of_hand_position(keypoint_coord3d_v) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # visualize fig = plt.figure(1) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) fig.suptitle(img_angle,fontsize = 10) plt.savefig("imgs/{}.png".format(str(i))) plt.close(fig) img = imageio.imread("imgs/{}.png".format(str(i))) final.append(img) return final