def single_cell_fn(unit_type, num_units, dropout, mode, forget_bias=1.0): """Create an instance of a single RNN cell.""" dropout = dropout if mode is True else 0.0 if unit_type == "lstm": c = rnn_cell.LSTMCell(num_units, forget_bias=forget_bias, state_is_tuple=False) elif unit_type == "gru": c = rnn_cell.GRUCell(num_units) else: raise ValueError("Unknown unit type %s!" % unit_type) if dropout > 0.0: c = rnn_cell.DropoutWrapper(cell=c, input_keep_prob=(1.0 - dropout)) return c
def inference(x, y, n_batch, is_training, input_digits=None, output_digits=None, n_hidden=None, n_out=None): def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial) def bias_variable(shape): initial = tf.zeros(shape, dtype=tf.float32) return tf.Variable(initial) def batch_normalization(shape, x): with tf.name_scope('batch_normalization'): eps = 1e-8 # beta = tf.Variable(tf.zeros(shape)) # gamma = tf.Variable(tf.ones(shape)) mean, var = tf.nn.moments(x, [0, 1]) # nom_batch = gamma * (x - mean) / tf.sqrt(var + eps) + beta nom_batch = (x - mean) / tf.sqrt(var + eps) # print(nom_batch[0], len(nom_batch[0])) return nom_batch encoder_forward = rnn_cell.GRUCell(n_hidden, reuse=tf.AUTO_REUSE) encoder_backward = rnn_cell.GRUCell(n_hidden, reuse=tf.AUTO_REUSE) encoder_outputs = [] encoder_states = [] # size = [batch_size][input_digits][input_len] x = tf.transpose(batch_normalization(input_digits, x), [1, 0, 2]) x = tf.reshape(x, [-1, n_in]) x = tf.split(x, input_digits, 0) # Encode # state = encoder.zero_state(n_batch, tf.float32) # with tf.variable_scope('Encoder'): # for t in range(input_digits): # if t > 0: # tf.get_variable_scope().reuse_variables() # (output, state) = encoder(batch_normalization(input_digits, x)[:, t, :], state) # encoder_outputs.append(output) # encoder_states.append(state) encoder_outputs, encoder_states_fw, encoder_states_bw = tf.nn.static_bidirectional_rnn( encoder_forward, encoder_backward, x, dtype=tf.float32) # encoder_outputs size = [time][batch][cell_fw.output_size + cell_bw.output_size] # encoder_states_fw, encoder_states_bw is final state # Decode AttentionMechanism = seq2seq.BahdanauAttention(num_units=num_units, memory=tf.reshape(encoder_outputs, \ [n_batch, input_digits, n_hidden * 2]) ) # when use bidirectional, n_hidden * 2 # tf.reshape(encoder_outputs, n_batch, input_digits, ), # memory_sequence_length = input_digits) # normalize=True) decoder_1 = rnn_cell.GRUCell(n_hidden, reuse=tf.AUTO_REUSE) # decoder_2 = rnn_cell.GRUCell(n_hidden, reuse = tf.AUTO_REUSE) decoder_1 = seq2seq.AttentionWrapper( decoder_1, attention_mechanism=AttentionMechanism, attention_layer_size=attention_layer_size, output_attention=False) # initial_cell_state = encoder_states[-1])こいつが悪い # decoder_2= seq2seq.AttentionWrapper(decoder_2, # attention_mechanism = AttentionMechanism, # attention_layer_size = 50, # output_attention = False, # name = 'att_lay_2') state_1 = decoder_1.zero_state(n_batch, tf.float32)\ .clone(cell_state=encoder_states_fw) # state_2 = decoder_2.zero_state(n_batch, tf.float32) # .clone(cell_state=tf.reshape(encoder_states_bw[-1], [n_batch, n_hidden])) # state = encoder_states[-1] # decoder_outputs = tf.reshape(encoder_outputs[-1, :, :], [n_batch, 1]) # [input_len, n_batch, n_hidden] decoder_1_outputs = tf.slice(encoder_outputs, [input_digits - 2, 0, 0], [1, n_batch, n_hidden]) # decoder_2_outputs = tf.slice(encoder_outputs, [input_digits-2, 0, n_hidden], [1, n_batch, n_hidden]) # decoder_2_outputs = encoder_outputs[:, :, n_hidden:][-1] # decoder_outputs = [encoder_outputs[-1]] # 出力層の重みとバイアスを事前に定義 V_hid_1 = weight_variable([n_hidden, n_out]) c_hid_1 = bias_variable([n_out]) V_hid_2 = weight_variable([n_hidden, n_out]) c_hid_2 = bias_variable([n_out]) V_out = weight_variable([n_hidden, n_out]) c_out = bias_variable([n_out]) fc_outputs = [] # decoder = seq2seq.BasicDecoder(cell = decoder, # heiper = helper, # initial_state=state, # ) elems = tf.convert_to_tensor([1, 0]) samples = tf.multinomial(tf.log([[tchr_frcng_thr, 1 - tchr_frcng_thr]]), 1) # note log-prob with tf.variable_scope('Decoder'): for t in range(1, output_digits): if t > 1: tf.get_variable_scope().reuse_variables() # tf.get_variable_scope().reuse_variables() if is_training is True: cell_input_bin = elems[tf.cast(samples[0][0], tf.int32)] # bool = tf.equal(cell_input_bin, 1) t_const = tf.const(t) cell_input = tf.case( { tf.equal(cell_input_bin, 1): lambda: batch_normalization(output_digits, y)[:, t - 1, :], tf.equal(t_const, 1): lambda: tf.matmul(decoder_1_outputs[-1], V_hid_1) + c_hid_1 }, default=lambda: output_1) # cell_input_bin = np.randam.choice([1, 0],p=[tchr_frcng_thr, 1 - tchr_frcng_thr]) # # if cell_input_bin==1: # cell_input = batch_normalization(output_digits, y)[:, t-1, :] # # elif t == 1: # cell_input = tf.matmul(decoder_1_outputs[-1], V_hid_1) + c_hid_1 # # else: # cell_input = output_1 (output_1, state_1) = decoder_1(cell_input, state_1) # (output_2, state_2) = decoder_2(batch_normalization(output_digits, y)[:, t-1, :], state_2) else: # 直前の出力を求める out_1 = tf.matmul(decoder_1_outputs[-1], V_hid_1) + c_hid_1 #to hidden layer # out_2 = tf.matmul(decoder_2_outputs[-1], V_hid_2) + c_hid_2#to hidden layer # fc_out = tf.matmul(tf.concat([decoder_1_outputs[-1], decoder_2_outputs[-1]], 1), V_out) + c_out #forecast data # elems = decoder_outputs[-1], V , c # out = tf.map_fn(lambda x: x[0] * x[1] + x[2], elems) # out = decoder_outputs fc_outputs.append(out_1) (output_1, state_1) = decoder_1(out_1, state_1) # (output_2, state_2) = decoder_2(out_2, state_2) # decoder_outputs.append(output) decoder_1_outputs = tf.concat([ decoder_1_outputs, tf.reshape(output_1, [1, n_batch, n_hidden]) ], axis=0) # decoder_2_outputs = tf.concat([decoder_2_outputs, tf.reshape(output_2, [1, n_batch, n_hidden])], axis = 0) # decoder_outputs = tf.concat([decoder_outputs, output], 1) if is_training is True: output = tf.reshape(tf.concat(decoder_1_outputs, axis=1), [-1, output_digits, n_hidden]) with tf.name_scope('check'): linear = tf.einsum( 'ijk,kl->ijl', output, V_out, ) + c_out return linear else: # 最後の出力を求める fc_out = tf.matmul(tf.concat(decoder_1_outputs[-1], 1), V_out) + c_out fc_outputs.append(fc_out) output = tf.reshape(tf.concat(fc_outputs, axis=1), [-1, output_digits, n_out]) return output
def __init__(self, pd, generator=None): self.pd = pd self.graph = tf.Graph() with self.graph.as_default(): global_step = tf.Variable(0, name='global_step', trainable=False) if generator: print('Running synthetic experiment') data_nd_norm = generator() data_nd = data_nd_norm else: data_mean = tf.constant(pd['power_handler'].mean, tf.float32, name='data_mean') data_std = tf.constant(pd['power_handler'].std, tf.float32, name='data_std') data_nd = tf.placeholder( tf.float32, [pd['batch_size'], pd['input_samples'], 1]) self.data_nd = data_nd data_nd_norm = (data_nd - data_mean) / data_std print('data_nd_shape', data_nd_norm.shape) dtype = tf.float32 data_encoder_time, data_decoder_time = tf.split( data_nd_norm, [pd['input_samples'] - pd['pred_samples'], pd['pred_samples']], axis=1) if pd['fft']: dtype = tf.complex64 if pd['window_function'] == 'learned_gaussian': window = wl.gaussian_window(pd['window_size']) elif pd['window_function'] == 'learned_plank': window = wl.plank_taper(pd['window_size']) elif pd['window_function'] == 'learned_tukey': window = wl.tukey_window(pd['window_size']) elif pd['window_function'] == 'learned_gauss_plank': window = wl.gauss_plank_window(pd['window_size']) else: window = scisig.get_window(window=pd['window_function'], Nx=pd['window_size']) window = tf.constant(window, tf.float32) def transpose_stft_squeeze(in_data, window, pd): ''' Compute a windowed stft and do low pass filtering if necessary. ''' tmp_in_data = tf.transpose(in_data, [0, 2, 1]) in_data_fft = eagerSTFT.stft(tmp_in_data, window, pd['window_size'], pd['overlap']) freqs = int(in_data_fft.shape[-1]) idft_shape = in_data_fft.shape.as_list() if idft_shape[1] == 1: # in the one dimensional case squeeze the dim away. in_data_fft = tf.squeeze(in_data_fft, axis=1) if pd['fft_compression_rate']: compressed_freqs = int(freqs / pd['fft_compression_rate']) print('fft_compression_rate', pd['fft_compression_rate'], 'freqs', freqs, 'compressed_freqs', compressed_freqs) # remove frequencies from the last dimension. return in_data_fft[ ..., :compressed_freqs], idft_shape, freqs else: return in_data_fft, idft_shape, freqs else: # arrange as batch time freq dim in_data_fft = tf.transpose(in_data_fft, [0, 2, 3, 1]) raise NotImplementedError data_encoder_freq, _, enc_freqs = \ transpose_stft_squeeze(data_encoder_time, window, pd) data_decoder_freq, dec_shape, dec_freqs = \ transpose_stft_squeeze(data_decoder_time, window, pd) assert enc_freqs == dec_freqs, 'encoder-decoder frequencies must agree' fft_pred_samples = data_decoder_freq.shape[1].value elif pd['linear_reshape']: encoder_time_steps = data_encoder_time.shape[1].value // pd[ 'step_size'] data_encoder_time = tf.reshape( data_encoder_time, [pd['batch_size'], encoder_time_steps, pd['step_size']]) if pd['downsampling'] > 1: data_encoder_time = data_encoder_time[:, :, :: pd['downsampling']] decoder_time_steps = data_decoder_time.shape[1].value // pd[ 'step_size'] if pd['cell_type'] == 'cgRNN': if pd['stiefel']: cell = ccell.StiefelGatedRecurrentUnit( pd['num_units'], num_proj=pd['num_proj'], complex_input=pd['fft'], complex_output=pd['fft'], activation=ccell.mod_relu, stiefel=pd['stiefel']) else: cell = ccell.StiefelGatedRecurrentUnit( pd['num_units'], num_proj=pd['num_proj'], complex_input=pd['fft'], complex_output=pd['fft'], activation=ccell.hirose, stiefel=pd['stiefel']) cell = RnnInputWrapper(1.0, cell) if pd['use_residuals']: cell = ResidualWrapper(cell=cell) elif pd['cell_type'] == 'gru': gru = rnn_cell.GRUCell(pd['num_units']) if pd['fft'] is True: dtype = tf.float32 # concatenate real and imaginary parts. data_encoder_freq = tf.concat([ tf.real(data_encoder_freq), tf.imag(data_encoder_freq) ], axis=-1) cell = LinearProjWrapper(pd['num_proj'] * 2, cell=gru, sample_prob=pd['sample_prob']) else: cell = LinearProjWrapper(pd['num_proj'], cell=gru, sample_prob=pd['sample_prob']) cell = RnnInputWrapper(1.0, cell) if pd['use_residuals']: cell = ResidualWrapper(cell=cell) else: print('cell type not supported.') if pd['fft']: encoder_in = data_encoder_freq else: encoder_in = data_encoder_time with tf.variable_scope("encoder_decoder") as scope: zero_state = cell.zero_state(pd['batch_size'], dtype=dtype) zero_state = LSTMStateTuple(encoder_in[:, 0, :], zero_state[1]) # debug_here() encoder_out, encoder_state = tf.nn.dynamic_rnn( cell, encoder_in, initial_state=zero_state, dtype=dtype) if not pd['fft']: if pd['linear_reshape']: decoder_in = tf.zeros( [pd['batch_size'], decoder_time_steps, 1]) else: decoder_in = tf.zeros( [pd['batch_size'], pd['pred_samples'], 1]) encoder_state = LSTMStateTuple(data_encoder_time[:, -1, :], encoder_state[-1]) else: freqs = encoder_in.shape[-1].value decoder_in = tf.zeros( [pd['batch_size'], fft_pred_samples, freqs], dtype=dtype) encoder_state = LSTMStateTuple(data_encoder_freq[:, -1, :], encoder_state[-1]) cell.close() scope.reuse_variables() # debug_here() decoder_out, _ = tf.nn.dynamic_rnn(cell, decoder_in, initial_state=encoder_state, dtype=dtype) if pd['fft'] and pd['cell_type'] == 'gru': # assemble complex output. decoder_freqs_t2 = decoder_out.shape[-1].value decoder_out = tf.complex( decoder_out[:, :, :int(decoder_freqs_t2 / 2)], decoder_out[:, :, int(decoder_freqs_t2 / 2):]) encoder_out = tf.complex( encoder_out[:, :, :int(decoder_freqs_t2 / 2)], encoder_out[:, :, int(decoder_freqs_t2 / 2):]) encoder_in = tf.complex( encoder_in[:, :, :int(decoder_freqs_t2 / 2)], encoder_in[:, :, int(decoder_freqs_t2 / 2):]) if pd['fft']: if (pd['freq_loss'] == 'complex_abs') \ or (pd['freq_loss'] == 'complex_abs_time'): diff = data_decoder_freq - decoder_out prd_loss = tf.abs(tf.real(diff)) + tf.abs(tf.imag(diff)) # tf.summary.histogram('complex_abs', prd_loss) # tf.summary.histogram('log_complex_abs', tf.log(prd_loss)) prd_loss = tf.reduce_mean(prd_loss) tf.summary.scalar('f_complex_abs', prd_loss) if (pd['freq_loss'] == 'complex_square') \ or (pd['freq_loss'] == 'complex_square_time'): diff = data_decoder_freq - decoder_out prd_loss = tf.real(diff) * tf.real(diff) + tf.imag( diff) * tf.imag(diff) # tf.summary.histogram('complex_square', prd_loss) prd_loss = tf.reduce_mean(prd_loss) tf.summary.scalar('f_complex_square', prd_loss) def expand_dims_and_transpose(input_tensor, pd, freqs): output = tf.expand_dims(input_tensor, 1) if pd['fft_compression_rate']: zero_coeffs = freqs - int(input_tensor.shape[-1]) zero_stack = tf.zeros( output.shape[:-1].as_list() + [zero_coeffs], tf.complex64) output = tf.concat([output, zero_stack], -1) return output decoder_out = expand_dims_and_transpose( decoder_out, pd, dec_freqs) decoder_out = eagerSTFT.istft(decoder_out, window, nperseg=pd['window_size'], noverlap=pd['overlap'], epsilon=pd['epsilon']) # data_encoder_gt = expand_dims_and_transpose(encoder_in, pd, enc_freqs) decoder_out = tf.transpose(decoder_out, [0, 2, 1]) elif pd['linear_reshape']: if pd['downsampling'] > 1: decoder_out_t = tf.transpose(decoder_out, [0, 2, 1]) decoder_out_t = eagerSTFT.interpolate( decoder_out_t, pd['step_size']) decoder_out = tf.transpose(decoder_out_t, [0, 2, 1]) decoder_out = tf.reshape( decoder_out, [pd['batch_size'], pd['pred_samples'], 1]) time_loss = tf.losses.mean_squared_error( tf.real(data_decoder_time), tf.real(decoder_out[:, :pd['pred_samples'], :])) if not pd['fft']: loss = time_loss else: if (pd['freq_loss'] == 'ad_time') or \ (pd['freq_loss'] == 'log_mse_time') or \ (pd['freq_loss'] == 'mse_time') or \ (pd['freq_loss'] == 'log_mse_mse_time') or \ (pd['freq_loss'] == 'complex_square_time') or \ (pd['freq_loss'] == 'complex_abs_time'): print('using freq and time based loss.') lambda_t = 1 loss = prd_loss * lambda_t + time_loss tf.summary.scalar('lambda_t', lambda_t) elif (pd['freq_loss'] is None): print('time loss only') loss = time_loss else: loss = prd_loss learning_rate = tf.train.exponential_decay( pd['init_learning_rate'], global_step, pd['decay_steps'], pd['decay_rate'], staircase=True) tf.summary.scalar('learning_rate', learning_rate) if (pd['cell_type'] == 'orthogonal' or pd['cell_type'] == 'cgRNN') \ and (pd['stiefel'] is True): optimizer = co.RMSpropNatGrad(learning_rate, global_step=global_step) else: optimizer = tf.train.RMSPropOptimizer(learning_rate) gvs = optimizer.compute_gradients(loss) with tf.variable_scope("clip_grads"): capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs] # grad_summary = tf.histogram_summary(grads) # training_op = optimizer.minimize(loss, global_step=global_step) self.training_op = optimizer.apply_gradients( capped_gvs, global_step=global_step) tf.summary.scalar('time_loss', time_loss) tf.summary.scalar('training_loss', loss) self.init_op = tf.global_variables_initializer() self.summary_sum = tf.summary.merge_all() self.total_parameters = compute_parameter_total( tf.trainable_variables()) self.saver = tf.train.Saver() self.loss = loss self.global_step = global_step self.decoder_out = decoder_out self.data_nd = data_nd self.data_encoder_time = data_encoder_time self.data_decoder_time = data_decoder_time if pd['fft']: self.window = window
def inference(x, y, n_batch, is_training, input_digits=None, output_digits=None, n_hidden=None, n_out=None): def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial) def bias_variable(shape): initial = tf.zeros(shape, dtype=tf.float32) return tf.Variable(initial) def batch_normalization(shape, x): with tf.name_scope('batch_normalization'): eps = 1e-8 # beta = tf.Variable(tf.zeros(shape)) # gamma = tf.Variable(tf.ones(shape)) mean, var = tf.nn.moments(x, [0, 1]) # nom_batch = gamma * (x - mean) / tf.sqrt(var + eps) + beta nom_batch = (x - mean) / tf.sqrt(var + eps) # print(nom_batch[0], len(nom_batch[0])) return nom_batch encoder = rnn_cell.GRUCell(n_hidden) encoder_outputs = [] encoder_states = [] # Encode # encoder = cudnn_rnn.CudnnGRU( # num_layers=1, # num_units=int(n_hidden), # input_mode='auto_select', # # direction='bidirectional', # dtype=tf.float32) state = encoder.zero_state(n_batch, tf.float32) # [input_digits, n_batch, 1], [1, n_batch, n_hidden] # encoder_outputs, encoder_states = \ # encoder(tf.reshape(batch_normalization(input_digits, x), \ # [input_digits, n_batch, n_in]), # # initial_state = state, # training = True # ) with tf.variable_scope('Encoder'): for t in range(input_digits): if t > 0: tf.get_variable_scope().reuse_variables() (output, state) = encoder( batch_normalization(input_digits, x)[:, t, :], state) encoder_outputs.append(output) encoder_states.append(state) # encoder = seq2seq.AttentionWrapper(encoder, # attention_mechanism = AttentionMechanism, # attention_layer_size = 128, # initial_cell_state = \ # AttentionWrapper.zero_state(n_batch, tf.float32)) # Decode AttentionMechanism = seq2seq.BahdanauAttention(num_units=100, memory=tf.reshape(encoder_outputs, \ [n_batch, input_digits, n_hidden * 1]) ) # when use bidirectional, n_hidden * 2 # tf.reshape(encoder_outputs, n_batch, input_digits, ), # memory_sequence_length = input_digits) # normalize=True) decoder = rnn_cell.GRUCell(n_hidden) decoder = seq2seq.AttentionWrapper( decoder, attention_mechanism=AttentionMechanism, attention_layer_size=50, output_attention=False) # initial_cell_state = encoder_states[-1])こいつが悪い state = decoder.zero_state(n_batch, tf.float32)\ .clone(cell_state=tf.reshape(encoder_states[-1], [n_batch, n_hidden])) # state = encoder_states[-1] # decoder_outputs = tf.reshape(encoder_outputs[-1, :, :], [n_batch, 1]) # [input_len, n_batch, n_hidden] # なんでかスライスだけエラーなし? decoder_outputs = [encoder_outputs[-1]] # decoder_outputs = [encoder_outputs[-1]] # 出力層の重みとバイアスを事前に定義 V = weight_variable([n_hidden, n_out]) c = bias_variable([n_out]) outputs = [] # decoder = seq2seq.BasicDecoder(cell = decoder, # heiper = helper, # initial_state=state, # ) with tf.variable_scope('Decoder'): for t in range(1, output_digits): if t > 1: tf.get_variable_scope().reuse_variables() if is_training is True: (output, state) = decoder( batch_normalization(output_digits, y)[:, t - 1, :], state) else: # 直前の出力を求める out = tf.matmul(decoder_outputs[-1], V) + c # elems = decoder_outputs[-1], V , c # out = tf.map_fn(lambda x: x[0] * x[1] + x[2], elems) # out = decoder_outputs outputs.append(out) (output, state) = decoder(out, state) # decoder_outputs.append(output) decoder_outputs = tf.concat([ decoder_outputs, tf.reshape(output, [1, n_batch, n_hidden]) ], axis=0) # decoder_outputs = tf.concat([decoder_outputs, output], 1) if is_training is True: output = tf.reshape(tf.concat(decoder_outputs, axis=1), [-1, output_digits, n_hidden]) with tf.name_scope('check'): linear = tf.einsum( 'ijk,kl->ijl', output, V, ) + c return linear else: # 最後の出力を求める linear = tf.matmul(decoder_outputs[-1], V) + c outputs.append(linear) output = tf.reshape(tf.concat(outputs, axis=1), [-1, output_digits, n_out]) return output
def __init__(self, args, infer=False): """ Initialisation function for the class Model. Params: args: Contains arguments required for the Model creation """ # If sampling new trajectories, then infer mode if infer: # Infer one position at a time args.batch_size = 1 args.obs_length = 1 args.pred_length = 1 # Store the arguments self.args = args # placeholders for the input data and the target data # A sequence contains an ordered set of consecutive frames # Each frame can contain a maximum of 'args.maxNumPeds' number of peds # For each ped we have their (pedID, x, y) positions as input self.input_data = tf.placeholder(tf.float32, [args.obs_length, args.maxNumPeds, 3], name="input_data") # target data would be the same format as input_data except with one time-step ahead self.target_data = tf.placeholder( tf.float32, [args.obs_length, args.maxNumPeds, 3], name="target_data") # Learning rate self.lr = tf.placeholder(tf.float32, shape=None, name="learning_rate") self.final_lr = tf.placeholder(tf.float32, shape=None, name="final_learning_rate") self.training_epoch = tf.placeholder(tf.float32, shape=None, name="training_epoch") # keep prob self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') cells = [] for _ in range(args.num_layers): # Initialize a BasicLSTMCell recurrent unit # args.rnn_size contains the dimension of the hidden state of the LSTM # cell = rnn_cell.BasicLSTMCell(args.rnn_size, name='basic_lstm_cell', state_is_tuple=False) # Construct the basicLSTMCell recurrent unit with a dimension given by args.rnn_size if args.model == "lstm": with tf.name_scope("LSTM_cell"): cell = rnn_cell.LSTMCell(args.rnn_size, state_is_tuple=False) elif args.model == "gru": with tf.name_scope("GRU_cell"): cell = rnn_cell.GRUCell(args.rnn_size, state_is_tuple=False) if not infer and args.keep_prob < 1: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob) cells.append(cell) # Multi-layer RNN construction, if more than one layer # cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=False) cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=False) # Store the recurrent unit self.cell = cell # Output size is the set of parameters (mu, sigma, corr) self.output_size = 5 # 2 mu, 2 sigma and 1 corr with tf.name_scope("learning_rate"): self.final_lr = self.lr * (self.args.decay_rate** self.training_epoch) self.define_embedding_and_output_layers(args) # Define LSTM states for each pedestrian with tf.variable_scope("LSTM_states"): self.LSTM_states = tf.zeros( [args.maxNumPeds, self.cell.state_size], name="LSTM_states") self.initial_states = tf.split(self.LSTM_states, args.maxNumPeds, 0) # https://stackoverflow.com/a/41384913/2049763 # Define hidden output states for each pedestrian with tf.variable_scope("Hidden_states"): self.output_states = tf.split( tf.zeros([args.maxNumPeds, self.cell.output_size]), args.maxNumPeds, 0) # List of tensors each of shape args.maxNumPeds x 3 corresponding to each frame in the sequence with tf.name_scope("frame_data_tensors"): frame_data = [ tf.squeeze(input_, [0]) for input_ in tf.split(self.input_data, args.obs_length, 0) ] with tf.name_scope("frame_target_data_tensors"): frame_target_data = [ tf.squeeze(target_, [0]) for target_ in tf.split(self.target_data, args.obs_length, 0) ] # Cost with tf.name_scope("Cost_related_stuff"): self.cost = tf.constant(0.0, name="cost") self.counter = tf.constant(0.0, name="counter") self.increment = tf.constant(1.0, name="increment") # Containers to store output distribution parameters with tf.name_scope("Distribution_parameters_stuff"): self.initial_output = tf.split( tf.zeros([args.maxNumPeds, self.output_size]), args.maxNumPeds, 0) # Tensor to represent non-existent ped with tf.name_scope("Non_existent_ped_stuff"): nonexistent_ped = tf.constant(0.0, name="zero_ped") self.final_result = [] # Iterate over each frame in the sequence for seq, frame in enumerate(frame_data): # print("Frame number", seq) final_result_ped = [] current_frame_data = frame # MNP x 3 tensor for ped in range(args.maxNumPeds): # pedID of the current pedestrian pedID = current_frame_data[ped, 0] # print("Pedestrian Number", ped) with tf.name_scope("extract_input_ped"): # Extract x and y positions of the current ped self.spatial_input = tf.slice( current_frame_data, [ped, 1], [1, 2]) # Tensor of shape (1,2) with tf.name_scope("embeddings_operations"): # Embed the spatial input embedded_spatial_input = tf.nn.relu( tf.nn.xw_plus_b(self.spatial_input, self.embedding_w, self.embedding_b)) # One step of LSTM with tf.variable_scope("LSTM") as scope: if seq > 0 or ped > 0: scope.reuse_variables() self.output_states[ped], self.initial_states[ ped] = self.cell(embedded_spatial_input, self.initial_states[ped]) # Apply the linear layer. Output would be a tensor of shape 1 x output_size with tf.name_scope("output_linear_layer"): self.initial_output[ped] = tf.nn.xw_plus_b( self.output_states[ped], self.output_w, self.output_b) with tf.name_scope("extract_target_ped"): # Extract x and y coordinates of the target data # x_data and y_data would be tensors of shape 1 x 1 [x_data, y_data] = tf.split( tf.slice(frame_target_data[seq], [ped, 1], [1, 2]), 2, 1) target_pedID = frame_target_data[seq][ped, 0] with tf.name_scope("get_coef"): # Extract coef from output of the linear output layer [o_mux, o_muy, o_sx, o_sy, o_corr] = self.get_coef(self.initial_output[ped]) final_result_ped.append([o_mux, o_muy, o_sx, o_sy, o_corr]) # Calculate loss for the current ped with tf.name_scope("calculate_loss"): lossfunc = self.get_lossfunc(o_mux, o_muy, o_sx, o_sy, o_corr, x_data, y_data) # If it is a non-existent ped, it should not contribute to cost # If the ped doesn't exist in the next frame, he/she should not contribute to cost as well with tf.name_scope("increment_cost"): self.cost = tf.where( tf.logical_or(tf.equal(pedID, nonexistent_ped), tf.equal(target_pedID, nonexistent_ped)), self.cost, tf.add(self.cost, lossfunc)) self.counter = tf.where( tf.logical_or(tf.equal(pedID, nonexistent_ped), tf.equal(target_pedID, nonexistent_ped)), self.counter, tf.add(self.counter, self.increment)) self.final_result.append(tf.stack(final_result_ped)) # Compute the cost with tf.name_scope("mean_cost"): # Mean of the cost self.cost = tf.div(self.cost, self.counter) # Get trainable_variables tvars = tf.trainable_variables() # L2 loss l2 = args.lambda_param * sum(tf.nn.l2_loss(tvar) for tvar in tvars) self.cost = self.cost + l2 # Get the final LSTM states self.final_states = tf.concat(self.initial_states, 0) # Get the final distribution parameters self.final_output = self.initial_output # initialize the optimizer with the given learning rate if args.optimizer == "RMSprop": optimizer = tf.train.RMSPropOptimizer(learning_rate=self.final_lr, momentum=0.9) elif args.optimizer == "AdamOpt": # NOTE: Using RMSprop as suggested by Social LSTM instead of Adam as Graves(2013) does optimizer = tf.train.AdamOptimizer(self.final_lr) # How to apply gradient clipping in TensorFlow? https://stackoverflow.com/a/43486487/2049763 # # https://stackoverflow.com/a/40540396/2049763 # TODO: (resolve) We are clipping the gradients as is usually done in LSTM # implementations. Social LSTM paper doesn't mention about this at all # Calculate gradients of the cost w.r.t all the trainable variables self.gradients = tf.gradients(self.cost, tvars) # self.gradients = optimizer.compute_gradients(self.cost, var_list=tvars) # Clip the gradients if they are larger than the value given in args self.clipped_gradients, _ = tf.clip_by_global_norm( self.gradients, args.grad_clip) # Train operator self.train_op = optimizer.apply_gradients( zip(self.clipped_gradients, tvars)) self.grad_placeholders = [] for var in tvars: self.grad_placeholders.append(tf.placeholder(var.dtype, var.shape)) # Train operator self.train_op_2 = optimizer.apply_gradients( zip(self.grad_placeholders, tvars))