def conv_LSTM(state, scope): with tf.variable_scope(scope): state_shape = state.shape.as_list() channels = state_shape[-1] conv_LSTM_cell = rnn.ConvLSTMCell( conv_ndims=2, input_shape=state_shape[1:], # exclude batch size output_channels=channels, kernel_shape=[1, 1], ) # State will have the following shape: # [timesteps, batch size, width, height, channels] # timesteps = rollout size # batch size = 1 (online learning) # width = height = size_s = size_m # channels = depends on preprocessing, number of features, etc... state = tf.expand_dims(state, 1) conv_LSTM_out, _ = tf.nn.dynamic_rnn( conv_LSTM_cell, state, time_major=True, dtype=state.dtype ) return conv_LSTM_out
def __init__(self, config, initial_state): self.config = config with tf.variable_scope("trans") as scope: self.inputs = tf.placeholder(tf.float32, [ self.config.trans_in_l, None, self.config.trans_seq_l, self.config.trans_in_w, self.config.trans_in_h ], name="trans_inputs") self.targets = tf.placeholder(tf.float32, [ self.config.trans_in_l, None, self.config.trans_seq_l, self.config.trans_in_w, self.config.trans_in_h ], name="trans_target") self.initial_state_input = tf.placeholder(tf.float32, [ None, self.config.trans_seq_l, self.config.trans_in_w, self.config.trans_in_h ]) self.rnn = rnn.ConvLSTMCell(self.config.trans_conv_ndims, [ self.config.trans_in_w, self.config.trans_in_h, self.config.trans_in_l ], self.config.trans_output_channel, self.config.trans_kernel_shape, name="trans_conv_lstm") self.trans_output, self.trans_output_state = tf.nn.dynamic_rnn( self.rnn, inputs=self.inputs, initial_state=self.initial_state_input)
def build(self, features): # Extract features while preserving the dimensions # Minimap convolutions m_conv = cnn_block(features["minimap"], scope="m") s_conv = cnn_block(features["screen"], scope="s") # Create the state representation by concatenating on the channel axis state_representation = tf.concat( [m_conv, s_conv, tf.transpose(features["info"], [0, 2, 3, 1])], axis=3, name="state_representation") state_shape = state_representation.shape.as_list() conv_LSTM_cell = rnn.ConvLSTMCell( conv_ndims=1, input_shape=[state_shape[1], state_shape[2]], output_channels=state_shape[3], kernel_shape=[1]) conv_LSTM_out, state = tf.nn.dynamic_rnn( conv_LSTM_cell, state_representation, dtype=state_representation.dtype) fc = layers.fully_connected( layers.flatten(conv_LSTM_out), num_outputs=256, activation_fn=tf.nn.relu, scope='fully_conv_lstm_features', ) spatial_action = tf.nn.softmax( layers.flatten( layers.conv2d(state_representation, num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope='spatial_policy'))) non_spatial_action = layers.fully_connected( fc, num_outputs=len(actions.FUNCTIONS), activation_fn=tf.nn.softmax, scope='non_spatial_policy') value = layers.fully_connected(fc, num_outputs=1, activation_fn=None, scope='value') return spatial_action, non_spatial_action, value
def __init__(self, state, size, step_size, filters, scope): state_shape = state.shape.as_list() lstm = rnn.ConvLSTMCell( conv_ndims=2, input_shape=state_shape[1:], # exclude batch size output_channels=filters, kernel_shape=[3, 3] ) c_size = [int(c) for c in lstm.state_size.c] h_size = [int(h) for h in lstm.state_size.h] c_init = np.zeros([1] + c_size, np.float32) h_init = np.zeros([1] + h_size, np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder( tf.float32, shape=[1] + c_size, name='c_in_{}'.format(scope) ) h_in = tf.placeholder( tf.float32, shape=[1] + h_size, name='h_in_{}'.format(scope) ) self.state_in = [c_in, h_in] state_in = rnn.LSTMStateTuple(c_in, h_in) # State will have the following shape: # [timesteps, batch size, width, height, channels] # timesteps = rollout size # batch size = 1 (online learning) # width = height = size_s = size_m # channels = depends on preprocessing, number of features, etc... state = tf.expand_dims(state, 1) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm, state, time_major=True, initial_state=state_in, sequence_length=step_size ) lstm_outputs = tf.reshape(lstm_outputs, [-1, size, size, filters]) lstm_c, lstm_h = lstm_state self.state_out = [lstm_c[:1, :], lstm_h[:1, :]] self.output = lstm_outputs
def conv_lstm_2d(inputs, state, output_channels, kernel_size=5, name=None, spatial_dims=None): """2D Convolutional LSTM.""" input_shape = common.shape_list(inputs) batch_size, input_channels = input_shape[0], input_shape[-1] if spatial_dims is None: input_shape = input_shape[1:] else: input_shape = spatial_dims + [input_channels] cell = contrib_rnn.ConvLSTMCell(2, input_shape, output_channels, [kernel_size, kernel_size], name=name) if state is None: state = cell.zero_state(batch_size, tf.float32) outputs, new_state = cell(inputs, state) return outputs, new_state
def __init__(self, model, channel_num, batch_size, seq_len, learning_rate, ws, wg, wt, phase, sum_dir): if phase == 'train' or phase == 'test': self.inputNoiseList = [tf.placeholder(tf.float32, [batch_size, 128, 128, channel_num])\ for _ in range(seq_len)] self.inputCleanList = [tf.placeholder(tf.float32, [batch_size, 128, 128, 3])\ for _ in range(seq_len)] else: self.inputNoiseList = [tf.placeholder(tf.float32, [batch_size, 416, 800, channel_num])\ for _ in range(seq_len)] self.inputCleanList = [tf.placeholder(tf.float32, [batch_size, 416, 800, 3])\ for _ in range(seq_len)] with arg_scope( [layers.conv2d], activation_fn=tf.nn.leaky_relu, #normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}, padding='SAME'): with tf.variable_scope("model") as scope: #Full VAEGAN structure if phase == 'train' or phase == 'test': inpH, inpW = 128, 128 else: inpH, inpW = 416, 800 if model == 'RAE': with tf.name_scope("initalize_RNN_cell"): cell1 = rnn.ConvLSTMCell(2, [inpH, inpW, 32], 32, [3, 3], name='rnn1') cell2 = rnn.ConvLSTMCell(2, [inpH / 2, inpW / 2, 43], 43, [3, 3], name='rnn2') cell3 = rnn.ConvLSTMCell(2, [inpH / 4, inpW / 4, 57], 57, [3, 3], name='rnn3') cell4 = rnn.ConvLSTMCell(2, [inpH / 8, inpW / 8, 76], 76, [3, 3], name='rnn4') cell5 = rnn.ConvLSTMCell(2, [inpH / 16, inpW / 16, 101], 101, [3, 3], name='rnn5') cell6 = rnn.ConvLSTMCell(2, [inpH / 32, inpW / 32, 101], 101, [3, 3], name='rnn6') # Encoder l1, l2, l3, l4, l5, out = encoderRNN(self.inputNoiseList, batch_size, cell1, cell2, cell3, \ cell4, cell5, cell6, (inpH, inpW), reuse_vars=False) elif model == "AE": l1, l2, l3, l4, l5, out = encoder(self.inputNoiseList, batch_size, reuse_vars=False) Enc_params_num = len(tf.trainable_variables()) # Decoder / Generator self.denoised_imgList = decoder(l1, l2, l3, l4, l5, out, (inpH, inpW), reuse_vars=False) Enc_n_Dec_params_num = len(tf.trainable_variables()) self.params = tf.trainable_variables() self.Enc_params = self.params[:Enc_params_num] self.Dec_params = self.params[Enc_params_num:Enc_n_Dec_params_num] print(len(self.params)) for var in self.params: print(var.name) self.Spatial_loss = self.__get_L1_loss(self.denoised_imgList, self.inputCleanList) Spatial_loss_sum = tf.summary.scalar('Spatial_loss', self.Spatial_loss) self.Gradient_loss = self.__get_grad_L1_loss(self.denoised_imgList, self.inputCleanList) Gradient_loss_sum = tf.summary.scalar('Gradient_loss', self.Gradient_loss) if model == 'RAE': self.Temporal_loss = self.__get_tem_L1_loss( self.denoised_imgList, self.inputCleanList) Temporal_loss_sum = tf.summary.scalar('Temporal_loss', self.Temporal_loss) # merge summary for Tensorboard self.detached_loss_summary_merged = tf.summary.merge( [Spatial_loss_sum, Gradient_loss_sum, Temporal_loss_sum]) # loss function total_loss = ws * self.Spatial_loss + wg * self.Gradient_loss + wt * self.Temporal_loss elif model == 'AE': self.detached_loss_summary_merged = tf.summary.merge( [Spatial_loss_sum, Gradient_loss_sum]) # loss function total_loss = ws * self.Spatial_loss + wg * self.Gradient_loss # self.train = layers.optimize_loss(total_loss, tf.train.get_or_create_global_step(\ # ), learning_rate=learning_rate, variables = self.params, optimizer='RMSProp', update_ops=[]) self.train = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-08, name='Adam').minimize( total_loss, var_list=self.params) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) #.replace('\\','/') self.train_writer = tf.summary.FileWriter(sum_dir, self.sess.graph)
with graph.as_default(): # placehoder encoder_inputs = tf.placeholder( tf.float32, [TIME_STEPS, None, GRID_SIZE, GRID_SIZE, CHANNEL]) decoder_target_outputs = tf.placeholder( tf.float32, [TIME_STEPS, None, GRID_SIZE, GRID_SIZE, CHANNEL]) _, batch_size, _, _, _ = tf.unstack(tf.shape(encoder_inputs)) #------------------------------------Encoder------------------------------------------# #Convlstm instance encoder_cell_1 = rnn.ConvLSTMCell( conv_ndims=2, input_shape=[GRID_SIZE, GRID_SIZE, CHANNEL], output_channels=16, kernel_shape=[16, 16], use_bias=True, skip_connection=False, forget_bias=1.0, initializers=None, name='encode') encoder_cell_2 = rnn.ConvLSTMCell( conv_ndims=2, input_shape=[GRID_SIZE, GRID_SIZE, CHANNEL], output_channels=16, kernel_shape=[16, 16], use_bias=True, skip_connection=False, forget_bias=1.0, initializers=None, name='encoder')
def get_model(input, is_training): # input: bx10x227x227 with tf.variable_scope('spatial_encoder_1'): input = tf.expand_dims(input, axis=-1) print(input) encoded = tf.layers.conv3d(input, 128, kernel_size=[1, 11, 11], strides=[1, 4, 4], padding='valid', name='conv1') encoded = tf.layers.batch_normalization( encoded, training=is_training) # bx10x55x55x128 encoded = tf.nn.tanh(encoded, name='tanh_1') with tf.variable_scope('spatial_encoder_2'): encoded = tf.layers.conv3d(encoded, 64, kernel_size=[1, 5, 5], strides=[1, 2, 2], padding='valid', name='conv2') encoded = tf.layers.batch_normalization( encoded, training=is_training) # bx10x26x26x64 encoded = tf.nn.tanh(encoded, name='tanh_2') with tf.variable_scope('temporal_encoder_1'): lstm_cell = rnn.ConvLSTMCell(2, input_shape=[26, 26, 64], output_channels=64, kernel_shape=[3, 3]) output, _ = tf.nn.dynamic_rnn(lstm_cell, encoded, initial_state=None, dtype='float32') with tf.variable_scope('temporal_encoder_2'): lstm_cell = rnn.ConvLSTMCell(2, input_shape=[26, 26, 64], output_channels=32, kernel_shape=[2, 2]) output, _ = tf.nn.dynamic_rnn(lstm_cell, output, initial_state=None, dtype='float32') # print (output) with tf.variable_scope('temporal_decoder_1'): lstm_cell = rnn.ConvLSTMCell(2, input_shape=[26, 26, 32], output_channels=64, kernel_shape=[3, 3]) output, _ = tf.nn.dynamic_rnn(lstm_cell, output, initial_state=None, dtype='float32') print(output) with tf.variable_scope('spatial_decoder_1'): decoded = tf.layers.conv3d_transpose(output, 128, kernel_size=[1, 5, 5], padding='valid', strides=[1, 2, 2], name='deconv1') decoded = tf.layers.batch_normalization(decoded, training=is_training) decoded = tf.nn.tanh(decoded, name='tanh_3') #print(decoded) with tf.variable_scope('spatial_decoder_2'): decoded = tf.layers.conv3d_transpose(decoded, 1, kernel_size=[1, 11, 11], padding='valid', strides=[1, 4, 4], name='deconv2') #print(decoded) decoded = tf.squeeze(decoded, axis=-1) print(decoded) return decoded