def predictor(enc_flat, action, lstm_states, pred_depth, reuse=False, scope_prefix='', hparams=None): """LSTM predictor network.""" with tf.variable_scope(scope_prefix + 'predict', reuse=reuse): enc_final_size = enc_flat.get_shape().as_list()[1] action_size = action.get_shape().as_list()[1] initial_size = (enc_final_size + action_size) batch_size = tf.shape(enc_flat)[0] init_stddev = 1e-2 pre_pred = tf.concat([enc_flat, action], 1) pre_pred = tf.layers.dense( pre_pred, initial_size, kernel_initializer=tf.truncated_normal_initializer( stddev=init_stddev)) # This is only needed or the GAN version. if hparams.pred_noise_std > 0: # Add the noise like this so a pretrained model can be used. pred_noise = tf.random_normal(shape=[batch_size, 100], stddev=hparams.pred_noise_std) pre_pred += tf.layers.dense( pred_noise, initial_size, kernel_initializer=tf.truncated_normal_initializer( stddev=init_stddev), name='noise_dense') pre_pred = tf.nn.relu(pre_pred) if lstm_states[pred_depth - 2] is None: back_connect = tf.tile( tf.get_variable('back_connect_init', shape=[1, initial_size * 2], initializer=tf.truncated_normal_initializer( stddev=init_stddev)), (batch_size, 1)) else: back_connect = lstm_states[pred_depth - 2] lstm_init_stddev = 1e-4 part_pred, lstm_states[0] = common_video.lstm_cell( tf.concat([pre_pred, back_connect], 1), lstm_states[0], initial_size, use_peepholes=True, initializer=tf.truncated_normal_initializer( stddev=lstm_init_stddev), num_proj=initial_size) part_pred = contrib.layers().layer_norm(part_pred) pred = part_pred for pred_layer_num in range(1, pred_depth, 2): part_pred, lstm_states[pred_layer_num] = common_video.lstm_cell( pred, lstm_states[pred_layer_num], initial_size, use_peepholes=True, initializer=tf.truncated_normal_initializer( stddev=lstm_init_stddev), num_proj=initial_size) pred += part_pred part_pred, lstm_states[ pred_layer_num + 1] = common_video.lstm_cell( tf.concat([pred, pre_pred], 1), lstm_states[pred_layer_num + 1], initial_size, use_peepholes=True, initializer=tf.truncated_normal_initializer( stddev=lstm_init_stddev), num_proj=initial_size) part_pred = contrib.layers().layer_norm(part_pred) pred += part_pred pred = tf.layers.dense( pred, enc_final_size, kernel_initializer=tf.truncated_normal_initializer( stddev=init_stddev)) if hparams.enc_pred_use_l2norm: pred = tf.nn.l2_normalize(pred, 1) return pred
def predictor(enc_flat, action, lstm_states, pred_depth, reuse=False, scope_prefix='', hparams=None): """LSTM predictor network.""" with tf.variable_scope(scope_prefix + 'predict', reuse=reuse): enc_final_size = enc_flat.get_shape().as_list()[1] action_size = action.get_shape().as_list()[1] initial_size = (enc_final_size + action_size) batch_size = tf.shape(enc_flat)[0] init_stddev = 1e-2 pre_pred = tf.concat([enc_flat, action], 1) pre_pred = tf.layers.dense( pre_pred, initial_size, kernel_initializer=tf.truncated_normal_initializer(stddev=init_stddev)) # This is only needed or the GAN version. if hparams.pred_noise_std > 0: # Add the noise like this so a pretrained model can be used. pred_noise = tf.random_normal( shape=[batch_size, 100], stddev=hparams.pred_noise_std) pre_pred += tf.layers.dense( pred_noise, initial_size, kernel_initializer=tf.truncated_normal_initializer( stddev=init_stddev), name='noise_dense') pre_pred = tf.nn.relu(pre_pred) if lstm_states[pred_depth - 2] is None: back_connect = tf.tile( tf.get_variable( 'back_connect_init', shape=[1, initial_size * 2], initializer=tf.truncated_normal_initializer(stddev=init_stddev)) , (batch_size, 1)) else: back_connect = lstm_states[pred_depth - 2] lstm_init_stddev = 1e-4 part_pred, lstm_states[0] = common_video.lstm_cell( tf.concat([pre_pred, back_connect], 1), lstm_states[0], initial_size, use_peepholes=True, initializer=tf.truncated_normal_initializer(stddev=lstm_init_stddev), num_proj=initial_size) part_pred = tf.contrib.layers.layer_norm(part_pred) pred = part_pred for pred_layer_num in range(1, pred_depth, 2): part_pred, lstm_states[pred_layer_num] = common_video.lstm_cell( pred, lstm_states[pred_layer_num], initial_size, use_peepholes=True, initializer=tf.truncated_normal_initializer(stddev=lstm_init_stddev), num_proj=initial_size) pred += part_pred part_pred, lstm_states[pred_layer_num + 1] = common_video.lstm_cell( tf.concat([pred, pre_pred], 1), lstm_states[pred_layer_num + 1], initial_size, use_peepholes=True, initializer=tf.truncated_normal_initializer(stddev=lstm_init_stddev), num_proj=initial_size) part_pred = tf.contrib.layers.layer_norm(part_pred) pred += part_pred pred = tf.layers.dense( pred, enc_final_size, kernel_initializer=tf.truncated_normal_initializer(stddev=init_stddev)) if hparams.enc_pred_use_l2norm: pred = tf.nn.l2_normalize(pred, 1) return pred