def update_parameters(self, loss): if self.regularization_constant != 0: l2_norm = tf.reduce_sum([tf.sqrt(tf.reduce_sum(tf.square(param))) for param in tf.trainable_variables()]) loss = loss + self.regularization_constant*l2_norm optimizer = self.get_optimizer(self.learning_rate_var, self.beta1_decay_var) grads = optimizer.compute_gradients(loss) clipped = [(tf.clip_by_value(g, -self.grad_clip, self.grad_clip), v_) for g, v_ in grads] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): step = optimizer.apply_gradients(clipped, global_step=self.global_step) if self.enable_parameter_averaging: maintain_averages_op = self.ema.apply(tf.trainable_variables()) with tf.control_dependencies([step]): self.step = tf.group(maintain_averages_op) else: self.step = step logging.info('all parameters:') logging.info(pp.pformat([(var.name, shape(var)) for var in tf.global_variables()])) logging.info('trainable parameters:') logging.info(pp.pformat([(var.name, shape(var)) for var in tf.trainable_variables()])) logging.info('trainable parameter count:') logging.info(str(np.sum(np.prod(shape(var)) for var in tf.trainable_variables())))
def update_parameters(self, loss): self.global_step = tf.Variable(0, trainable=False) self.learning_rate_var = tf.Variable(0.0, trainable=False) if self.regularization_constant != 0: l2_norm = tf.reduce_sum([tf.sqrt(tf.reduce_sum(tf.square(param))) for param in tf.trainable_variables()]) loss = loss + self.regularization_constant*l2_norm optimizer = self.get_optimizer(self.learning_rate_var) grads = optimizer.compute_gradients(loss) clipped = [(tf.clip_by_value(g, -self.grad_clip, self.grad_clip), v_) for g, v_ in grads] step = optimizer.apply_gradients(clipped, global_step=self.global_step) if self.enable_parameter_averaging: maintain_averages_op = self.ema.apply(tf.trainable_variables()) with tf.control_dependencies([step]): self.step = tf.group(maintain_averages_op) else: self.step = step logging.info('all parameters:') logging.info(pp.pformat([(var.name, shape(var)) for var in tf.global_variables()])) logging.info('trainable parameters:') logging.info(pp.pformat([(var.name, shape(var)) for var in tf.trainable_variables()])) logging.info('trainable parameter count:') logging.info(str(np.sum(np.prod(shape(var)) for var in tf.trainable_variables())))
def __init__(self, config, environment): super(DRQNAgent, self).__init__(config, environment) self.replay_memory = DRQNReplayMemory(config) self.net = DRQN(len(self.env.n_actions), config) self.net.build() self.net.add_summary([ "average_reward", "average_loss", "average_q", "ep_max_reward", "ep_min_reward", "ep_num_game", "learning_rate" ], ["ep_rewards", "ep_actions"]) self.init_logging(self.net.dir_log) self.queue = deque(maxlen=self.config.mem_size) self.account_profit_loss = 0. self.forecast_window = config.forecast_window self.close_attempts = 0 logging.info('all parameters:') logging.info( pp.pformat([(var.name, shape(var)) for var in tf.global_variables()])) logging.info('trainable parameters:') logging.info( pp.pformat([(var.name, shape(var)) for var in tf.trainable_variables()])) logging.info('trainable parameter count:') logging.info( str(np.sum( np.prod(shape(var)) for var in tf.trainable_variables())))
def update_parameters(self, loss): if self.regularization_constant != 0: l2_norm = tf.reduce_sum([ tf.sqrt(tf.reduce_sum(tf.square(param))) for param in tf.trainable_variables() ]) loss = loss + self.regularization_constant * l2_norm self.learning_rate_step = tf.placeholder('int64', None, name='learning_rate_step') self.learning_rate_op = tf.maximum( self.learning_rate_minimum, tf.train.exponential_decay(self.learning_rate, self.learning_rate_step, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True)) optimizer = self.get_optimizer(self.learning_rate_op) grads = optimizer.compute_gradients(loss) clipped = [(tf.clip_by_value(g, -self.grad_clip, self.grad_clip), v_) for g, v_ in grads] optim = optimizer.apply_gradients(clipped, global_step=self.global_step) if self.enable_parameter_averaging: maintain_averages_op = self.ema.apply(tf.trainable_variables()) with tf.control_dependencies([optim]): self.optim = tf.group(maintain_averages_op) else: self.optim = optim logging.info('all parameters:') logging.info( pp.pformat([(var.name, shape(var)) for var in tf.global_variables()])) logging.info('trainable parameters:') logging.info( pp.pformat([(var.name, shape(var)) for var in tf.trainable_variables()])) logging.info('trainable parameter count:') logging.info( str(np.sum( np.prod(shape(var)) for var in tf.trainable_variables())))
def decrypter(image, is_training): ''' Steganography Decrypter ''' _, _, mncnls = params.MNROWS.value, params.MNCOLS.value, params.MNCNLS.value steg_image_shape = tf_utils.shape(image)[1:4] expc_shape = [ params.MNROWS.value, params.MNCOLS.value, params.MNCNLS.value ] assert steg_image_shape == expc_shape, \ 'Stegged Image Dimension Error, Actual({}) != Expected({})'.format( steg_image_shape, expc_shape) data_format = 'channels_first' with tf.variable_scope('decrypter'): with ts.arg_scope([conv2d], **conv2d_params), \ ts.arg_scope([sep_conv2d], **sep_conv2d_params), \ ts.arg_scope([batch_norm], **batch_norm_params): m = image m = tf.transpose(m, [0, 3, 1, 2]) m = standard_block_s2c(m, 32, is_training, 1, data_format) m = standard_block_s2c(m, 32, is_training, 1, data_format) m = standard_block_s2c(m, 64, is_training, 1, data_format) m = standard_block_s2c(m, 64, is_training, 1, data_format) m = standard_block_s2c(m, 128, is_training, 1, data_format) m = standard_block_s2c(m, 128, is_training, 1, data_format) m = standard_block_c2s(m, 32, is_training, 1, data_format) m = standard_block_c2s(m, mncnls, is_training, 1, data_format) m = tf.transpose(m, [0, 2, 3, 1], name='dcpt_image') return m
def update_parameters(self, loss): if self.regularization_constant != 0: # 所有训练变量 平方和求根 的平方和-->这个正则项,迫使参数 平方和的根变小 l2_norm = tf.reduce_sum([ tf.sqrt(tf.reduce_sum(tf.square(param))) for param in tf.trainable_variables() ]) loss = loss + self.regularization_constant * l2_norm optimizer = self.get_optimizer(self.learning_rate_var) # list(zip(grads, var_list)) 梯度和变量 grads = optimizer.compute_gradients(loss) # <-20 >20的将会被裁剪(用-20,20替代) clipped = [(tf.clip_by_value(g, -self.grad_clip, self.grad_clip), v_) for g, v_ in grads] # 应用梯度下降 step = optimizer.apply_gradients(clipped, global_step=self.global_step) if self.enable_parameter_averaging: maintain_averages_op = self.ema.apply(tf.trainable_variables()) with tf.control_dependencies([step]): # 执行一组操作,在执行滑动平均前,执行梯度计算 self.step = tf.group(maintain_averages_op) else: self.step = step logging.info('all parameters:') logging.info( pp.pformat([(var.name, shape(var)) for var in tf.global_variables()])) logging.info('trainable parameters:') logging.info( pp.pformat([(var.name, shape(var)) for var in tf.trainable_variables()])) logging.info('trainable parameter count:') # 所有参数的个数 prod 求乘积 logging.info( str(np.sum( np.prod(shape(var)) for var in tf.trainable_variables())))
def encrypter(orig_image, hide_image, is_training): ''' Steganography Encrypter ''' _, _, mncnls = params.MNROWS.value, params.MNCOLS.value, params.MNCNLS.value orig_image_shape = tf_utils.shape(orig_image)[1:4] hide_image_shape = tf_utils.shape(hide_image)[1:4] expc_shape = [ params.MNROWS.value, params.MNCOLS.value, params.MNCNLS.value ] assert orig_image_shape == expc_shape, \ 'Cover Image Dimension Error, Actual({}) != Expected({})'.format( orig_image_shape, expc_shape) assert hide_image_shape == expc_shape, \ 'Hidden Image Dimension Error, Actual({}) != Expected({})'.format( hide_image_shape, expc_shape) data_format = 'channels_first' with tf.variable_scope('encrypter'): with ts.arg_scope([conv2d], **conv2d_params), \ ts.arg_scope([sep_conv2d], **sep_conv2d_params), \ ts.arg_scope([batch_norm], **batch_norm_params): orig_image = tf.transpose(orig_image, [0, 3, 1, 2]) hide_image = tf.transpose(hide_image, [0, 3, 1, 2]) m = tf.concat([orig_image, hide_image], axis=1) m = standard_block_s2c(m, 32, is_training, 1, data_format) m = standard_block_s2c(m, 32, is_training, 1, data_format) m = standard_block_s2c(m, 64, is_training, 1, data_format) m = standard_block_s2c(m, 64, is_training, 1, data_format) m = standard_block_s2c(m, 128, is_training, 1, data_format) m = standard_block_s2c(m, 128, is_training, 1, data_format) m = standard_block_c2s(m, 32, is_training, 1, data_format) m = standard_block_c2s(m, mncnls, is_training, 1, data_format) m = tf.transpose(m, [0, 2, 3, 1], name='steg_image') return m
def __init__( self, lstm_size, num_attn_mixture_components, attention_values, attention_values_lengths, num_output_mixture_components, bias, reuse=None, ): self.reuse = reuse self.lstm_size = lstm_size self.num_attn_mixture_components = num_attn_mixture_components self.attention_values = attention_values self.attention_values_lengths = attention_values_lengths self.window_size = shape(self.attention_values, 2) self.char_len = tf.shape(attention_values)[1] self.batch_size = tf.shape(attention_values)[0] self.num_output_mixture_components = num_output_mixture_components self.output_units = 6*self.num_output_mixture_components + 1 self.bias = bias
def __init__( self, lstm_size, num_attn_mixture_components, attention_values, attention_values_lengths, num_output_mixture_components, bias, reuse=None, ): self.reuse = reuse self.lstm_size = lstm_size self.num_attn_mixture_components = num_attn_mixture_components self.attention_values = attention_values self.attention_values_lengths = attention_values_lengths self.window_size = shape(self.attention_values, 2) self.char_len = tf.shape(attention_values)[1] self.batch_size = tf.shape(attention_values)[0] self.num_output_mixture_components = num_output_mixture_components self.output_units = 6 * self.num_output_mixture_components + 1 self.bias = bias
def update_parameters(self, loss): if self.regularization_constant != 0: l2_norm = tf.reduce_sum([(tf.reduce_sum(tf.nn.l2_loss(param))) \ for param in tf.trainable_variables()]) loss2 = loss + self.regularization_constant * l2_norm if not self.optimizer_initialized_flag: optimizer = self.get_optimizer(self.learning_rate_var) else: optimizer = self.optimizer if self.regularization_constant != 0: grads = optimizer.compute_gradients(loss2) else: grads = optimizer.compute_gradients(loss) if self.grad_clip == -1: clipped = grads else: clipped = [(tf.clip_by_value(g, -self.grad_clip, self.grad_clip), v_) if g is not None else (g, v_) for g, v_ in grads] step = optimizer.apply_gradients(clipped, global_step=self.global_step) if self.enable_parameter_averaging: maintain_averages_op = self.ema.apply(tf.trainable_variables()) with tf.control_dependencies([step]): self.step = tf.group(maintain_averages_op) else: self.step = step # logging.info('all parameters:') # logging.info(pp.pformat([(var.name, shape(var)) for var in tf.global_variables()])) # logging.info('trainable parameters:') # logging.info(pp.pformat([(var.name, shape(var)) for var in tf.trainable_variables()])) logging.info('trainable parameter count:') logging.info( str(np.sum( np.prod(shape(var)) for var in tf.trainable_variables())))
def decode(self, x, conv_inputs, features): batch_size = tf.shape(x)[0] # initialize state tensor arrays state_queues = [] for i, (conv_input, dilation) in enumerate(zip(conv_inputs, self.dilations)): batch_idx = tf.range(batch_size) batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation)) batch_idx = tf.reshape(batch_idx, [-1]) queue_begin_time = self.encode_len - dilation - 1 temporal_idx = tf.expand_dims( queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0) temporal_idx = tf.reshape(temporal_idx, [-1]) idx = tf.stack([batch_idx, temporal_idx], axis=1) slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2))) layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.num_decode_steps) layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2))) state_queues.append(layer_ta) # initialize feature tensor array features_ta = tf.TensorArray(dtype=tf.float32, size=self.num_decode_steps) features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2))) # initialize output tensor array emit_ta = tf.TensorArray(size=self.num_decode_steps, dtype=tf.float32) # initialize other loop vars elements_finished = 0 >= self.decode_len time = tf.constant(0, dtype=tf.int32) # get initial x input current_idx = tf.stack( [tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1) initial_input = tf.gather_nd(x, current_idx) def loop_fn(time, current_input, queues): current_features = features_ta.read(time) current_input = tf.concat([current_input, current_features], axis=1) with tf.variable_scope('x-proj-decode', reuse=tf.AUTO_REUSE): w_x_proj = tf.get_variable('weights') b_x_proj = tf.get_variable('biases') x_proj = tf.nn.tanh( tf.matmul(current_input, w_x_proj) + b_x_proj) skip_outputs, updated_queues = [], [] for i, (conv_input, queue, dilation) in enumerate( zip(conv_inputs, queues, self.dilations)): state = queue.read(time) with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=tf.AUTO_REUSE): w_conv = tf.get_variable('weights'.format(i)) b_conv = tf.get_variable('biases'.format(i)) dilated_conv = tf.matmul( state, w_conv[0, :, :]) + tf.matmul( x_proj, w_conv[1, :, :]) + b_conv conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid( conv_gate) with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=tf.AUTO_REUSE): w_proj = tf.get_variable('weights'.format(i)) b_proj = tf.get_variable('biases'.format(i)) concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj skips, residuals = tf.split( concat_outputs, [self.skip_channels, self.residual_channels], axis=1) x_proj += residuals skip_outputs.append(skips) updated_queues.append(queue.write(time + dilation, x_proj)) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1)) with tf.variable_scope('dense-decode-1', reuse=tf.AUTO_REUSE): w_h = tf.get_variable('weights') b_h = tf.get_variable('biases') h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h) with tf.variable_scope('dense-decode-2', reuse=tf.AUTO_REUSE): w_y = tf.get_variable('weights') b_y = tf.get_variable('biases') y_hat = tf.matmul(h, w_y) + b_y elements_finished = (time >= self.decode_len) finished = tf.reduce_all(elements_finished) next_input = tf.cond( finished, lambda: tf.zeros([batch_size, 1], dtype=tf.float32), lambda: y_hat) next_elements_finished = (time >= self.decode_len - 1) return (next_elements_finished, next_input, updated_queues) def condition(unused_time, elements_finished, *_): return tf.logical_not(tf.reduce_all(elements_finished)) def body(time, elements_finished, emit_ta, *state_queues): (next_finished, emit_output, state_queues) = loop_fn(time, initial_input, state_queues) emit = tf.where(elements_finished, tf.zeros_like(emit_output), emit_output) emit_ta = emit_ta.write(time, emit) elements_finished = tf.logical_or(elements_finished, next_finished) return [time + 1, elements_finished, emit_ta] + list(state_queues) returned = tf.while_loop(cond=condition, body=body, loop_vars=[time, elements_finished, emit_ta] + state_queues) outputs_ta = returned[2] y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2)) return y_hat
def decode(self, x, conv_inputs, features): """ :param x: y_hat_encode 是encode 最后一次输出 :param conv_inputs: conv_inputs [input] 每层输入数组 (去除最后一个输出) :param features: self.decode_features :return: """ batch_size = tf.shape(x)[0] # initialize state tensor arrays state_queues = [] # 1 2 4 ...128; for i, (conv_input, dilation) in enumerate(zip(conv_inputs, self.dilations)): print('1111111111111111111111_{}'.format(i)) # batch_size 标量 batch_idx = tf.range(batch_size) # shape:(batch_size,dilation) 例如:dilation =4 batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation)) # 64 * dilation batch_idx = tf.reshape(batch_idx, [-1]) # encode_len=[375,740] queue_begin_time = self.encode_len - dilation - 1 # (batch,dilation) 最后一个空洞卷积,不包括 最后一个元素 temporal_idx = tf.expand_dims(queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0) # 1D temporal_idx = tf.reshape(temporal_idx, [-1]) # (512,2) = (batch*dilation ,2) idx = tf.stack([batch_idx, temporal_idx], axis=1) # (512,32) gather 行=idx 列 conv_input---->(128,4,32) 选择最后 [dilation,1](不包含最后一个) slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2))) # 构造tensorArray 长度 dilation+decode_step layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.num_decode_steps) # 把slice中的数据放到array中 dilation,batch,dim layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2))) state_queues.append(layer_ta) # initialize feature tensor array features_ta = tf.TensorArray(dtype=tf.float32, size=self.num_decode_steps) features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2))) # initialize output tensor array emit_ta = tf.TensorArray(size=self.num_decode_steps, dtype=tf.float32) # initialize other loop vars elements_finished = 0 >= self.decode_len time = tf.constant(0, dtype=tf.int32) # get initial x input (batch,encode_len-1) current_idx = tf.stack([tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1) # 使用 最后一个encode initial_input = tf.gather_nd(x, current_idx) def loop_fn(time, current_input, queues): # 读取decode特征 current_features = features_ta.read(time) # 特征与当前输入concat current_input = tf.concat([current_input, current_features], axis=1) with tf.variable_scope('x-proj-decode', reuse=True): w_x_proj = tf.get_variable('weights') b_x_proj = tf.get_variable('biases') x_proj = tf.nn.tanh(tf.matmul(current_input, w_x_proj) + b_x_proj) skip_outputs, updated_queues = [], [] for i, (conv_input, queue, dilation) in enumerate(zip(conv_inputs, queues, self.dilations)): # 历史 state = queue.read(time) with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True): # 卷积核 w_conv = tf.get_variable('weights'.format(i)) b_conv = tf.get_variable('biases'.format(i)) dilated_conv = tf.matmul(state, w_conv[0, :, :]) + tf.matmul(x_proj, w_conv[1, :, :]) + b_conv conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True): w_proj = tf.get_variable('weights'.format(i)) b_proj = tf.get_variable('biases'.format(i)) concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj skips, residuals = tf.split(concat_outputs, [self.skip_channels, self.residual_channels], axis=1) x_proj += residuals skip_outputs.append(skips) updated_queues.append(queue.write(time + dilation, x_proj)) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1)) with tf.variable_scope('dense-decode-1', reuse=True): w_h = tf.get_variable('weights') b_h = tf.get_variable('biases') h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h) with tf.variable_scope('dense-decode-2', reuse=True): w_y = tf.get_variable('weights') b_y = tf.get_variable('biases') y_hat = tf.matmul(h, w_y) + b_y print('33333333333333333333333333332') elements_finished = (time >= self.decode_len) finished = tf.reduce_all(elements_finished) next_input = tf.cond( finished, lambda: tf.zeros([batch_size, 1], dtype=tf.float32), lambda: y_hat ) next_elements_finished = (time >= self.decode_len - 1) print('3333333333333333333333333333444') return (next_elements_finished, next_input, updated_queues) def condition(unused_time, elements_finished, *_): # 全True 则False ;否则为True-->也就是每个elements_finished 都True则停止循环 return tf.logical_not(tf.reduce_all(elements_finished)) def body(time, elements_finished, emit_ta, *state_queues): # (next_finished, emit_output, state_queues) = loop_fn(time, initial_input, state_queues) # 没有完成,返回空 emit = tf.where(elements_finished, tf.zeros_like(emit_output), emit_output) emit_ta = emit_ta.write(time, emit) elements_finished = tf.logical_or(elements_finished, next_finished) return [time + 1, elements_finished, emit_ta] + list(state_queues) returned = tf.while_loop( cond=condition, body=body, loop_vars=[time, elements_finished, emit_ta] + state_queues ) outputs_ta = returned[2] y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2)) return y_hat
def decode(self, x, conv_inputs, features): """ Parameters ---------- x: projected skip outputs from encoder shape = [batch_size, seq_len, 1] conv_inputs: conv outputs from encoder length = len(dilations) each element has shape [batch_size, seq_len, residual_channels] features: features shape = [batch_size, seq_len, num_features] Returns ------- """ batch_size = tf.shape(x)[0] # initialize state tensor arrays state_queues = [] for i, (conv_input, dilation) in enumerate(zip(conv_inputs, self.dilations)): """ conv_input.shape = [batch_size, seq_len, residual_channels] """ """ batch_idx.shape = (dilation*batch_size,) Before flattening: batch_idx.shape = [batch_size, dilation] batch_idx[n, :] = [n]*dilation """ batch_idx = tf.range(batch_size) batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation)) batch_idx = tf.reshape(batch_idx, [-1]) """ For each batch, temporal_idx gives us the indices of the last possible slice in time to which we can apply the dilation. temporal_idx.shape = (dilation*batch_size,) Before flattening, temporal_idx.shape = [batch_size, dilation] temporal_idx[n, :] = queue_begin_time[n] + np.arange(dilation) """ queue_begin_time = self.encode_len - dilation - 1 temporal_idx = tf.expand_dims( queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0) temporal_idx = tf.reshape(temporal_idx, [-1]) """ For each batch, idx tells us which temporal indices to grab for conv_input. idx.shape = [batch_size, 2] For dilation = 2, idx looks like | batch_idx_1, temporal_idx_1 | | batch_idx_1, temporal_idx_2 | | batch_idx_2, temporal_idx_3 | | batch_idx_2, temporal_idx_4 | ... | batch_idx_n, temporal_idx_m | | batch_idx_n, temporal_idx_m | For each batch, slices represents chunks of conv_input we'll feed to the decoder's convolution. slices.shape = [batch_size, dilation, tf.shape(conv_input, 2)] """ idx = tf.stack([batch_idx, temporal_idx], axis=1) slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2))) """ layer_ta is a time-series of convolution outputs/inputs. layer_ta.read(0).shape = [batch_size, tf.shape(conv_input)[2]] """ layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.num_decode_steps) layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2))) """ state_queues will contain convolution outputs/inputs for a layer """ state_queues.append(layer_ta) # initialize feature tensor array # features_ta.read(0).shape = [batch_size, num_features] features_ta = tf.TensorArray(dtype=tf.float32, size=self.num_decode_steps) features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2))) # initialize output tensor array emit_ta = tf.TensorArray(size=self.num_decode_steps, dtype=tf.float32) # initialize other loop vars elements_finished = 0 >= self.decode_len time = tf.constant(0, dtype=tf.int32) # get initial x input current_idx = tf.stack( [tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1) """ initial_input.shape = [batch_size, 1] """ initial_input = tf.gather_nd(x, current_idx) def loop_fn(time, current_input, queues): """ Parameters ---------- time: int current_input: if t == 0, current_input = thought_vector from encoder if t > 0, current_input = forecast from previous time step shape = [batch_size, 1] queues: [layer_ta] Convolution outputs computed before time input Returns ------- next_elements_finished next_input updated_queues """ """ current_features.shape = [batch_size, num_features] current_input.shape = [batch_size, num_features + 1] """ current_features = features_ta.read(time) current_input = tf.concat([current_input, current_features], axis=1) """ x-proj.shape = [batch_size, residual_channels] """ with tf.variable_scope('x-proj-decode', reuse=True): w_x_proj = tf.get_variable('weights') b_x_proj = tf.get_variable('biases') x_proj = tf.nn.tanh( tf.matmul(current_input, w_x_proj) + b_x_proj) skip_outputs, updated_queues = [], [] for i, (queue, dilation) in enumerate(zip(queues, self.dilations)): """ state.shape = [batch_size, skip_channels] """ state = queue.read(time) with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True): """ Our convolution_width is 2. Suppose all the inputs and outputs are 1D. For any given time t and dilation d, the convolution will be as follows: conv[t] = state[t]*filter[0] + x[t]*filter[1] where state[t] = resid output from encoder if t <= d = resid output from decoder if t > d and x[t] = x_proj[t] w_conv.shape = [convolution_width, residual_channels, output_units] dilated_conv.shape = [batch_size, 2*residual_channels] """ w_conv = tf.get_variable('weights') b_conv = tf.get_variable('biases') dilated_conv = tf.matmul( state, w_conv[0, :, :]) + tf.matmul( x_proj, w_conv[1, :, :]) + b_conv conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid( conv_gate) with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True): w_proj = tf.get_variable('weights') b_proj = tf.get_variable('biases') concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj skips, residuals = tf.split( concat_outputs, [self.skip_channels, self.residual_channels], axis=1) x_proj += residuals skip_outputs.append(skips) """ For a given batch, say dilation = 4. Suppose the unflattened temporal_idx looks like [96, 97, 98, 99] for this batch. The first time we call loop_fn, time = 96. So, we're calculating resid values for time = time + dilation = 100 As a result, we want to write our calculations to to time + dilation. """ updated_queues.append(queue.write(time + dilation, x_proj)) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1)) with tf.variable_scope('dense-decode-1', reuse=True): w_h = tf.get_variable('weights') b_h = tf.get_variable('biases') h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h) with tf.variable_scope('dense-decode-2', reuse=True): w_y = tf.get_variable('weights') b_y = tf.get_variable('biases') y_hat = tf.matmul(h, w_y) + b_y elements_finished = (time >= self.decode_len) finished = tf.reduce_all(elements_finished) next_input = tf.cond( finished, lambda: tf.zeros([batch_size, 1], dtype=tf.float32), lambda: y_hat) next_elements_finished = (time >= self.decode_len - 1) return (next_elements_finished, next_input, updated_queues) def condition(unused_time, elements_finished, *_): return tf.logical_not(tf.reduce_all(elements_finished)) def body(time, elements_finished, emit_ta, *state_queues): (next_finished, emit_output, state_queues) = loop_fn(time, initial_input, state_queues) emit = tf.where(elements_finished, tf.zeros_like(emit_output), emit_output) emit_ta = emit_ta.write(time, emit) elements_finished = tf.logical_or(elements_finished, next_finished) return [time + 1, elements_finished, emit_ta] + list(state_queues) returned = tf.while_loop(cond=condition, body=body, loop_vars=[time, elements_finished, emit_ta] + state_queues) outputs_ta = returned[2] y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2)) return y_hat
def decode(self, x, conv_inputs, features): batch_size = tf.shape(x)[0] # initialize state tensor arrays state_queues = [] for i, (conv_input, dilation) in enumerate(zip(conv_inputs, self.dilations)): batch_idx = tf.range(batch_size) # For example, batch_size =5, dilation =3, then # batch_idx will be [[0,0,0],[1,1,1],[2,2,2],[3,3,3],[4,4,4]] batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation)) # batch_idx will be [0,0,0,1,1,1,2,2,2,3,3,3,4,4,4] batch_idx = tf.reshape(batch_idx, [-1]) # the begining time step for state queue. the result is a tensor with shape (batch_size,) queue_begin_time = self.encode_len - dilation - 1 # e.g. batch_size=5, dilation=3, the elements in queue_begin_time all are 7, and # tf.expand_dims(queue_begin_time,1) will be [[7],[7],[7],[7],[7]] # tf.expand_dims(tf.range(dilation),0) will be [[0,1,2]] # temporal_idx will be # [[7, 8, 9], # [7, 8, 9], # [7, 8, 9], # [7, 8, 9], # [7, 8, 9]] # After reshape, it will be [7,8,9,7,8,9,7,8,9,7,8,9,7,8,9]] temporal_idx = tf.expand_dims( queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0) temporal_idx = tf.reshape(temporal_idx, [-1]) # idx is used as argument of tf.gather to retrieve elements in conv_input. # idx has shape (batch_size * dilation,2) idx = tf.stack([batch_idx, temporal_idx], axis=1) # slice from conv_input with dilation for each batch, shape(batch_size, dilation, feature_size=32) slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2))) layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.num_decode_steps) # (batch_size, dilation, feature_size) layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2))) state_queues.append(layer_ta) # initialize feature tensor array # features shape (batch_size, num_decode_steps, 64 + 1 + 9 + 3 + 2 = 79) # after shaped, it will be num_decode_steps * (batch_size, 79) features_ta = tf.TensorArray(dtype=tf.float32, size=self.num_decode_steps) features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2))) # initialize output tensor array emit_ta = tf.TensorArray(size=self.num_decode_steps, dtype=tf.float32) # initialize other loop vars elements_finished = 0 >= self.decode_len time = tf.constant(0, dtype=tf.int32) # get initial x input # idx for x, like (e.g batch_size=32, encode_len=366) # [[0,365],[1,365],...[31,365]]) # here x is the return y_hat of function encode, with shape (batch_size, seq_len, 1) current_idx = tf.stack( [tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1) initial_input = tf.gather_nd(x, current_idx) #shape (batch_size, 1) # current_input is the encoded input at last encode step (i.e. initial_input) # or last decode step (i.e. the variable y_hat at the end of loop_fn) # queues are the convolution results with dilation defined above in the variable state_queues def loop_fn(time, current_input, queues): # read the decode features for the time-th decode step as current features current_features = features_ta.read(time) # current input is the encoded result for the last step. Initial_input is # the encoded info at the last encode_step # concat input and features as the input for the next steps # current_input has shape (batch_size,1), # current_features shape (batch_size,79) # after concat, it will be (batch_size, 80) current_input = tf.concat([current_input, current_features], axis=1) # use the variables initialzed in scope x-proj-decode of the initialize_decode_params function # x_proj shape (batch_size, 32) with tf.variable_scope('x-proj-decode', reuse=True): w_x_proj = tf.get_variable( 'weights' ) #shape (input.shape[2]+feature.shape[2]=80,residual_channels=32) b_x_proj = tf.get_variable('biases') # shape(32,) x_proj = tf.nn.tanh( tf.matmul(current_input, w_x_proj) + b_x_proj) skip_outputs, updated_queues = [], [] for i, (conv_input, queue, dilation) in enumerate( zip(conv_inputs, queues, self.dilations)): # read state at the time-th state which is the sliced conv_input information state = queue.read( time) # queue shape(dilation, batch_size, feature_size) with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True): w_conv = tf.get_variable( 'weights'.format(i) ) # shape (conv_width=2, residual_channel=32, residual_channels + skip_channels) b_conv = tf.get_variable('biases'.format(i)) dilated_conv = tf.matmul( state, w_conv[0, :, :]) + tf.matmul( x_proj, w_conv[1, :, :]) + b_conv conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid( conv_gate) with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True): w_proj = tf.get_variable( 'weights'.format(i) ) #shape (residual_channels, residual_channels + skip_channels) b_proj = tf.get_variable('biases'.format(i)) concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj skips, residuals = tf.split( concat_outputs, [self.skip_channels, self.residual_channels], axis=1) x_proj += residuals # shape (batch_size, residule_channels) skip_outputs.append(skips) updated_queues.append(queue.write(time + dilation, x_proj)) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1)) with tf.variable_scope('dense-decode-1', reuse=True): w_h = tf.get_variable('weights') # shape (24*32=768, 128) b_h = tf.get_variable('biases') h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h) with tf.variable_scope('dense-decode-2', reuse=True): w_y = tf.get_variable('weights') b_y = tf.get_variable('biases') y_hat = tf.matmul(h, w_y) + b_y elements_finished = (time >= self.decode_len) finished = tf.reduce_all(elements_finished) next_input = tf.cond( finished, lambda: tf.zeros([batch_size, 1], dtype=tf.float32), lambda: y_hat) next_elements_finished = (time >= self.decode_len - 1) return (next_elements_finished, next_input, updated_queues) def condition(unused_time, elements_finished, *_): # not all the elements are finished, continue the loop return tf.logical_not(tf.reduce_all(elements_finished)) # loop body def body(time, elements_finished, emit_ta, *state_queues): # call loop_fn to do the real loop logic (next_finished, emit_output, state_queues) = loop_fn(time, initial_input, state_queues) # why set zero when finished??? emit = tf.where(elements_finished, tf.zeros_like(emit_output), emit_output) # write emit as the result for the time-th step in emit_ta emit_ta = emit_ta.write(time, emit) # if elements_finished or next_finished is true. elements_finished = tf.logical_or(elements_finished, next_finished) return [time + 1, elements_finished, emit_ta] + list(state_queues) # dynamic loop in tensor returned = tf.while_loop(cond=condition, body=body, loop_vars=[time, elements_finished, emit_ta] + state_queues) # outputs_ta is equal to previous emit_ta outputs_ta = returned[2] # shape (batch_size, num_decode_step, 1) y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2)) return y_hat
def decode(self, x, convolutionInputs, features): batchSize = tf.shape(x)[0] # initialize state tensor arrays state_queues = [] for i, (conv_input, dilation) in enumerate(zip(convolutionInputs, self.dilations)): batch_idx = tf.range(batchSize) batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation)) batch_idx = tf.reshape(batch_idx, [-1]) queue_begin_time = self.lengthOfencode - dilation - 1 Indextemp = tf.expand_dims(queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0) Indextemp = tf.reshape(Indextemp, [-1]) idx = tf.stack([batch_idx, Indextemp], axis=1) #collects all slices from conv_input within specified index into tensor of shape as indicated slices = tf.reshape(tf.gather_nd(conv_input, idx), (batchSize, dilation, shape(conv_input, 2))) layerTensorArray = tf.TensorArray(dtype=tf.float32, size=dilation + self.decodeCount) #unpacks the tensor into individual tensors layerTensorArray = layerTensorArray.unstack(tf.transpose(slices, (1, 0, 2))) state_queues.append(layerTensorArray) # initialize feature tensor array featuresTensorArray = tf.TensorArray(dtype=tf.float32, size=self.decodeCount) featuresTensorArray = featuresTensorArray.unstack(tf.transpose(features, (1, 0, 2))) # initialize output tensor array FinalemittedArray = tf.TensorArray(size=self.decodeCount, dtype=tf.float32) # initialize other loop vars finishedElements = 0 >= self.lengthOfDecode time = tf.constant(0, dtype=tf.int32) # get initial x input current_idx = tf.stack([tf.range(tf.shape(self.lengthOfencode)[0]), self.lengthOfencode - 1], axis=1) initial_input = tf.gather_nd(x, current_idx) def loopfunction(time, current_input, queues): current_features = featuresTensorArray.read(time) current_input = tf.concat([current_input, current_features], axis=1) with tf.variable_scope('x-proj-decode', reuse=True): w_xProjection = tf.get_variable('weights') b_xProjection = tf.get_variable('biases') #calcluating feature map at every level # This is obtained by doing convolution on input image on sub regions with filter # and adding bias and applying non linear filter function. xProjection = tf.nn.tanh(tf.matmul(current_input, w_xProjection) + b_xProjection) skipChannels, updated_queues = [], [] for i, (conv_input, queue, dilation) in enumerate(zip(convolutionInputs, queues, self.dilationsCount)): state = queue.read(time) with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True): w_conv = tf.get_variable('weights'.format(i)) b_conv = tf.get_variable('biases'.format(i)) #doing dilated convolution at every point dilatedConvolution = tf.matmul(state, w_conv[0, :, :]) + tf.matmul(xProjection, w_conv[1, :, :]) + b_conv filterConvolution, gateConvolution = tf.split(dilatedConvolution, 2, axis=1) #applying tanh to get feature map dilatedConvolution = tf.nn.tanh(filterConvolution)*tf.nn.sigmoid(gateConvolution) with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True): wProjection = tf.get_variable('weights'.format(i)) bProjection = tf.get_variable('biases'.format(i)) #final convolution concat_outputs = tf.matmul(dilatedConvolution, wProjection) + bProjection skips, residuals = tf.split(concat_outputs, [self.num_of_skip_channels, self.num_of_residual_channels], axis=1) xProjection += residuals skipChannels.append(skips) updated_queues.append(queue.write(time + dilation, xProjection)) skipChannels = tf.nn.relu(tf.concat(skipChannels, axis=1)) with tf.variable_scope('dense-decode-1', reuse=True): w_h = tf.get_variable('weights') b_h = tf.get_variable('biases') #doing convolution on skip outputs h = tf.nn.relu(tf.matmul(skipChannels, w_h) + b_h) with tf.variable_scope('dense-decode-2', reuse=True): w_y = tf.get_variable('weights') b_y = tf.get_variable('biases') #final convolution y_hat = tf.matmul(h, w_y) + b_y finishedElements = (time >= self.lengthOfDecode) finished = tf.reduce_all(finishedElements) next_input = tf.cond( finished, lambda: tf.zeros([batchSize, 1], dtype=tf.float32), lambda: y_hat ) next_finishedElements = (time >= self.lengthOfDecode - 1) return (next_finishedElements, next_input, updated_queues) def condition(unused_time, finishedElements, *_): return tf.logical_not(tf.reduce_all(finishedElements)) def body(time, finishedElements, FinalemittedArray, *state_queues): (next_finished, FinalemittedOutput, state_queues) = loopfunction(time, initial_input, state_queues) emit = tf.where(finishedElements, tf.zeros_like(FinalemittedOutput), FinalemittedOutput) FinalemittedArray = FinalemittedArray.write(time, emit) finishedElements = tf.logical_or(finishedElements, next_finished) return [time + 1, finishedElements, FinalemittedArray] + list(state_queues) returned = tf.while_loop( cond=condition, body=body, loop_vars=[time, finishedElements, FinalemittedArray] + state_queues ) outputsTensorArray = returned[2] y_hat = tf.transpose(outputsTensorArray.stack(), (1, 0, 2)) return y_hat