parser.add_argument('--save', '-s', action='store_true') args = parser.parse_args() # get dataset # dataset = BinaryDbReader(mode='evaluation', shuffle=False, use_wrist_coord=False) dataset = BinaryDbReaderSTB(mode='evaluation', shuffle=False, use_wrist_coord=False, hand_crop=True) # build network graph data = dataset.get() image_crop = (data['image_crop']) image_crop = image_crop[:, :, ::-1, ::-1] # convert to BGR # build network net = CPM(out_chan=22) # feed through network scoremap, _ = net.inference(image_crop)[-1] # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) tf.train.start_queue_runners(sess=sess) weight_path = './weights/pose_model.npy' net.init(weight_path, sess) util = EvalUtil() # iterate dataset
global_step = tf.Variable(0, trainable=False, name="global_step") lr_scheduler = utils.general.LearningRateScheduler(values=train_para['lr'], steps=train_para['lr_iter']) lr = lr_scheduler.get_lr(global_step) opt = tf.train.AdamOptimizer(lr) tower_grads = [] tower_losses = [] tower_losses_PAF = [] tower_losses_2d = [] with tf.variable_scope(tf.get_variable_scope()): for ig in range(num_gpu): with tf.device('/gpu:%d' % ig): # build network net = CPM(out_chan=22, numPAF=20, crop_size=368, withPAF=True, PAFdim=3) predicted_scoremaps, _, predicted_PAFs = net.inference(data['image_crop'][ig], train=True) # Loss assert len(predicted_scoremaps) == 6 s = data['scoremap2d'][ig].get_shape().as_list() valid = tf.concat([data['hand_valid'][ig], tf.ones((s[0], 1), dtype=tf.bool)], axis=1) valid = tf.cast(valid, tf.float32) mask_scoremap = tf.tile(tf.expand_dims(data['mask_crop'][ig], axis=3), [1, 1, 1, s[3]]) loss_2d = 0.0 # multiply mask_scoremap to mask out the invalid areas for ip, predicted_scoremap in enumerate(predicted_scoremaps): resized_scoremap = tf.image.resize_images(predicted_scoremap, (s[1], s[2])) mean_over_pixel = tf.reduce_sum(tf.square((resized_scoremap - data['scoremap2d'][ig]) * mask_scoremap), [1, 2]) / (tf.reduce_sum(mask_scoremap, [1, 2]) + 1e-6) loss_2d_ig = tf.reduce_sum(valid * mean_over_pixel) / (tf.reduce_sum(valid) + 1e-6) loss_2d += loss_2d_ig
global_step = tf.Variable(already_trained + 1, trainable=False, name="global_step") else: global_step = tf.Variable(0, trainable=False, name="global_step") lr_scheduler = LearningRateScheduler(values=train_para['lr'], steps=train_para['lr_iter']) lr = lr_scheduler.get_lr(global_step) opt = tf.train.AdamOptimizer(lr) with tf.variable_scope(tf.get_variable_scope()): for ig in range(num_gpu): with tf.device('/gpu:%d' % ig): # build network net = CPM(crop_size=368, out_chan=22) predicted_scoremaps, _ = net.inference(data['image_crop'][ig], train=True) # Loss s = data['scoremap'][ig].get_shape().as_list() ext_vis = tf.concat([ data['keypoint_vis21'][ig], tf.ones([s[0], 1], dtype=tf.bool) ], axis=1) vis = tf.cast(tf.reshape(ext_vis, [s[0], s[3]]), tf.float32) losses = [] loss = 0.0 for ip, predicted_scoremap in enumerate(predicted_scoremaps): resized_scoremap = tf.image.resize_images(
class E2ENet(object): """ Network performing 3D pose estimation of a human hand from a single color image. """ def __init__(self, lifting_dict, out_chan=21, crop_size=256): self.crop_size = crop_size self.out_chan = out_chan self.cpm = CPM(self.crop_size, self.out_chan) self.lifting_dict = lifting_dict assert lifting_dict['method'] in ['direct', 'heatmap'] def init(self, session, weight_files=None, exclude_var_list=None, cpm_init_vgg=False): """ Initializes weights from pickled python dictionaries. Inputs: session: tf.Session, Tensorflow session object containing the network graph weight_files: list of str, Paths to the pickle files that are used to initialize network weights exclude_var_list: list of str, Weights that should not be loaded """ if exclude_var_list is None: exclude_var_list = list() import pickle if cpm_init_vgg: self.cpm.init_vgg(session) if weight_files is not None: # Initialize with weights for file_name in weight_files: assert os.path.exists(file_name), "File not found." with open(file_name, 'rb') as fi: weight_dict = pickle.load(fi) weight_dict = { k: v for k, v in weight_dict.items() if not any([x in k for x in exclude_var_list]) } if len(weight_dict) > 0: init_op, init_feed = tf.contrib.framework.assign_from_values( weight_dict) session.run(init_op, init_feed) print('Loaded %d variables from %s' % (len(weight_dict), file_name)) def inference(self, input_image, evaluation, train=False): heatmap_2d, encoding = self.cpm.inference(input_image, train) with tf.variable_scope("E2ENet"): scoremap = [encoding] + heatmap_2d with tf.variable_scope(self.lifting_dict['method']): scoremap = tf.concat(scoremap, axis=3) s = scoremap.get_shape().as_list() if self.lifting_dict['method'] == 'direct': with tf.variable_scope('PosePrior'): # some conv layers out_chan_list = [32, 64, 128] x = scoremap for i, out_chan in enumerate(out_chan_list): x = ops.conv_relu(x, 'conv_pose_%d_1' % i, kernel_size=3, stride=1, out_chan=out_chan, trainable=train) x = ops.conv_relu( x, 'conv_pose_%d_2' % i, kernel_size=3, stride=2, out_chan=out_chan, trainable=train ) # in the end this will be 4x4xC # reshape and some fc layers x = tf.reshape(x, [s[0], -1]) out_chan_list = [512, 512] for i, out_chan in enumerate(out_chan_list): x = ops.fully_connected_relu(x, 'fc_pose_%d' % i, out_chan=out_chan, trainable=train) x = ops.dropout(x, 0.8, evaluation) coord_xyz_can = ops.fully_connected(x, 'fc_xyz', out_chan=63, trainable=train) coord_xyz_can = tf.reshape(coord_xyz_can, [s[0], 21, 3]) with tf.variable_scope('ViewPoint'): x = scoremap out_chan_list = [64, 128, 256] for i, out_chan in enumerate(out_chan_list): x = ops.conv_relu(x, 'conv_vp_%d_1' % i, kernel_size=3, stride=1, out_chan=out_chan, trainable=train) x = ops.conv_relu( x, 'conv_vp_%d_2' % i, kernel_size=3, stride=2, out_chan=out_chan, trainable=train ) # in the end this will be 4x4x128 # flatten x = tf.reshape(x, [s[0], -1]) # this is Bx2048 # Estimate Viewpoint --> 3 params out_chan_list = [256, 128] for i, out_chan in enumerate(out_chan_list): x = ops.fully_connected_relu(x, 'fc_vp_%d' % i, out_chan=out_chan, trainable=train) x = ops.dropout(x, 0.75, evaluation) ux = ops.fully_connected(x, 'fc_vp_ux', out_chan=1, trainable=train) uy = ops.fully_connected(x, 'fc_vp_uy', out_chan=1, trainable=train) uz = ops.fully_connected(x, 'fc_vp_uz', out_chan=1, trainable=train) with tf.name_scope('get_rot_mat'): u_norm = tf.sqrt( tf.square(ux) + tf.square(uy) + tf.square(uz) + 1e-8) theta = u_norm # some tmp vars st_b = tf.sin(theta) ct_b = tf.cos(theta) one_ct_b = 1.0 - tf.cos(theta) st = st_b[:, 0] ct = ct_b[:, 0] one_ct = one_ct_b[:, 0] norm_fac = 1.0 / u_norm[:, 0] ux = ux[:, 0] * norm_fac uy = uy[:, 0] * norm_fac uz = uz[:, 0] * norm_fac rot_mat = self._stitch_mat_from_vecs([ ct + ux * ux * one_ct, ux * uy * one_ct - uz * st, ux * uz * one_ct + uy * st, uy * ux * one_ct + uz * st, ct + uy * uy * one_ct, uy * uz * one_ct - ux * st, uz * ux * one_ct - uy * st, uz * uy * one_ct + ux * st, ct + uz * uz * one_ct ]) coord_xyz_norm = tf.matmul(coord_xyz_can, rot_mat) rel_dict = { 'coord_xyz_norm': coord_xyz_norm, 'coord_xyz_can': coord_xyz_can, 'rot_mat': rot_mat, 'heatmap_2d': heatmap_2d } return rel_dict elif self.lifting_dict['method'] == 'heatmap': with tf.variable_scope('heatmap'): assert s[1] == self.crop_size / 8 and s[ 2] == self.crop_size / 8 s3d = s[1] x = ops.conv_relu(scoremap, 'lifting', kernel_size=1, stride=1, out_chan=s3d, trainable=train) x = tf.transpose(x, perm=[0, 3, 1, 2]) encoding_3d = tf.expand_dims(x, -1) x = ops.conv3d_relu(encoding_3d, 'conv3d1_stage1', kernel_size=5, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d_relu(x, 'conv3d2_stage1', kernel_size=5, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d_relu(x, 'conv3d3_stage1', kernel_size=5, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d_relu(x, 'conv3d4_stage1', kernel_size=1, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d(x, 'conv3d5_stage1', kernel_size=1, stride=1, out_chan=self.out_chan, trainable=train) scoremap_3d = [x] for stage_id in range(2, 4): x = tf.concat([x, encoding_3d], axis=4) x = ops.conv3d_relu( x, 'conv3d1_stage{}'.format(stage_id), kernel_size=5, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d_relu( x, 'conv3d2_stage{}'.format(stage_id), kernel_size=5, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d_relu( x, 'conv3d3_stage{}'.format(stage_id), kernel_size=5, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d_relu( x, 'conv3d4_stage{}'.format(stage_id), kernel_size=1, stride=1, out_chan=128, leaky=False, trainable=train) x = ops.conv3d(x, 'conv3d5_stage{}'.format(stage_id), kernel_size=1, stride=1, out_chan=self.out_chan, trainable=train) scoremap_3d.append(x) rel_dict = { 'heatmap_3d': scoremap_3d, 'heatmap_2d': heatmap_2d } return rel_dict @staticmethod def _stitch_mat_from_vecs(vector_list): """ Stitches a given list of vectors into a 3x3 matrix. Input: vector_list: list of 9 tensors, which will be stitched into a matrix. list contains matrix elements in a row-first fashion (m11, m12, m13, m21, m22, m23, m31, m32, m33). Length of the vectors has to be the same, because it is interpreted as batch dimension. """ assert len(vector_list ) == 9, "There have to be exactly 9 tensors in vector_list." batch_size = vector_list[0].get_shape().as_list()[0] vector_list = [tf.reshape(x, [1, batch_size]) for x in vector_list] trafo_matrix = tf.dynamic_stitch( [[0], [1], [2], [3], [4], [5], [6], [7], [8]], vector_list) trafo_matrix = tf.reshape(trafo_matrix, [3, 3, batch_size]) trafo_matrix = tf.transpose(trafo_matrix, [2, 0, 1]) return trafo_matrix
def __init__(self, lifting_dict, out_chan=21, crop_size=256): self.crop_size = crop_size self.out_chan = out_chan self.cpm = CPM(self.crop_size, self.out_chan) self.lifting_dict = lifting_dict assert lifting_dict['method'] in ['direct', 'heatmap']
i, utils.keypoint_conversion.a4_to_main['openpose_lhand_score'], 2].astype(np.float32) openpose_lhand_valid = (openpose_lhand_score > 0.01) if openpose_lhand_valid.any(): min_coord = np.amin(openpose_lhand[openpose_lhand_valid], axis=0) max_coord = np.amax(openpose_lhand[openpose_lhand_valid], axis=0) lfit_size = np.amax(max_coord - min_coord) / 2 if lfit_size > max_lsize: max_lsize = lfit_size lhand_ref_frame = i assert max_rsize > 0 assert max_lsize > 0 rscale2d_ref = float(s[1]) / (2 * max_rsize * hand_zoom) lscale2d_ref = float(s[1]) / (2 * max_lsize * hand_zoom) bodynet = CPM(out_chan=21, crop_size=368, withPAF=True, PAFdim=3, numPAF=23) handnet = CPM(out_chan=22, numPAF=20, crop_size=368, withPAF=True, PAFdim=3) with tf.variable_scope('body'): # feed through network bheatmap_2d, _, bPAF = bodynet.inference(data['bimage_crop'], train=False) with tf.variable_scope('hand', reuse=tf.AUTO_REUSE): lheatmap_2d, _, lPAF = handnet.inference(data['limage_crop'], train=False) # rheatmap_2d, _, rPAF = handnet.inference(data['rimage_crop'], train=False) rheatmap_2d, _, rPAF = handnet.inference( data['rimage_crop'][:, :, ::-1, :], train=False) # flip right to left s = data['bimage_crop'].get_shape().as_list() data['bheatmap_2d'] = tf.image.resize_images(bheatmap_2d[-1], (s[1], s[2]), tf.image.ResizeMethod.BICUBIC) data['bPAF'] = tf.image.resize_images(bPAF[-1], (s[1], s[2]),
# flag that allows to load a retrained snapshot(original weights used in the paper are used otherwise) USE_RETRAINED = False FREIBURG_ORDER = False PATH_TO_SNAPSHOTS = './snapshots_cpm_rotate_s10_wrist_dome_simon/' # only used when USE_RETRAINED is true # get dataset # dataset = ManualDBReader(mode='evaluation', shuffle=False, hand_crop=True, use_wrist_coord=True, crop_size=368, crop_size_zoom=2.0) dataset = DomeReader(mode='evaluation', shuffle=False, hand_crop=True, use_wrist_coord=True, crop_size=368, crop_size_zoom=2.0) # build network graph data = dataset.get(read_image=True) # data = dataset.get() # build network evaluation = tf.placeholder_with_default(True, shape=()) net = CPM(crop_size=368, out_chan=22) data['image_crop'] = data['image_crop'][:, :, :, ::-1] # convert to BGR (for tomas model) keypoints_scoremap, _ = net.inference(data['image_crop']) keypoints_scoremap = keypoints_scoremap[-1] # upscale to original size s = data['image_crop'].get_shape().as_list() keypoints_scoremap = tf.image.resize_images(keypoints_scoremap, (s[1], s[2])) # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) tf.train.start_queue_runners(sess=sess) # initialize network weights
lr = lr_scheduler.get_lr(global_step) opt = tf.train.AdamOptimizer(lr) tower_grads = [] tower_losses = [] tower_losses_PAF = [] tower_losses_2d = [] with tf.variable_scope(tf.get_variable_scope()): for ig in range(num_gpu): with tf.device('/gpu:%d' % ig): # build network net = CPM(out_chan=21, crop_size=368, withPAF=True, PAFdim=3, numPAF=23, numStage=numStage) predicted_scoremaps, _, predicted_PAFs = net.inference( data['image_crop'][ig], train=True) # with tf.variable_scope('hourglass'): # net = Hourglass(num_output_channel=20, PAF_dim=3, num_PAF=20, num_hourglass=numStage) # predicted_scoremaps, predicted_PAFs = net.inference(data['image_crop'][ig]) # Loss s = data['scoremap2d'][ig].get_shape().as_list() valid = tf.concat([ data['body_valid'][ig], tf.zeros((s[0], 1), dtype=tf.bool) ], axis=1)