def test_without_std_share_network_output_values(self, mock_normal, output_dim, hidden_dim, init_std): mock_normal.return_value = 0.5 model = GaussianLSTMModel(output_dim=output_dim, hidden_dim=hidden_dim, std_share_network=False, hidden_nonlinearity=None, recurrent_nonlinearity=None, hidden_w_init=self.default_initializer, recurrent_w_init=self.default_initializer, output_w_init=self.default_initializer, init_std=init_std) step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='step_hidden', dtype=tf.float32) step_cell_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='step_cell', dtype=tf.float32) (_, step_mean_var, step_log_std_var, step_hidden, step_cell, hidden_init_var, cell_init_var) = model.build(self.input_var, self.step_input_var, step_hidden_var, step_cell_var) hidden1 = hidden2 = np.full((self.batch_size, hidden_dim), hidden_init_var.eval()) cell1 = cell2 = np.full((self.batch_size, hidden_dim), cell_init_var.eval()) for _ in range(self.time_step): mean1, log_std1, hidden1, cell1 = self.sess.run( [step_mean_var, step_log_std_var, step_hidden, step_cell], feed_dict={ self.step_input_var: self.obs_input, step_hidden_var: hidden1, step_cell_var: cell1 }) hidden2, cell2 = recurrent_step_lstm(input_val=self.obs_input, num_units=hidden_dim, step_hidden=hidden2, step_cell=cell2, w_x_init=0.1, w_h_init=0.1, b_init=0., nonlinearity=None, gate_nonlinearity=None) output_nonlinearity = np.full( (np.prod(hidden2.shape[1:]), output_dim), 0.1) output2 = np.matmul(hidden2, output_nonlinearity) assert np.allclose(mean1, output2) expected_log_std = np.full((self.batch_size, output_dim), np.log(init_std)) assert np.allclose(log_std1, expected_log_std) assert np.allclose(hidden1, hidden2) assert np.allclose(cell1, cell2)
def test_output_value_trainable_hidden_and_cell(self, time_step, input_dim, output_dim): obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.) obs_input = np.full((self.batch_size, input_dim), 1.) _input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None, input_dim), name='input') _step_input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, input_dim), name='input') _output_nonlinearity = tf.keras.layers.Dense( units=output_dim, activation=None, kernel_initializer=tf.constant_initializer(1)) with tf.compat.v1.variable_scope('LSTM'): self.lstm = lstm(all_input_var=_input_var, name='lstm', lstm_cell=self.lstm_cell, step_input_var=_step_input_var, step_hidden_var=self._step_hidden_var, step_cell_var=self._step_cell_var, hidden_state_init_trainable=True, cell_state_init_trainable=True, output_nonlinearity_layer=_output_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) # Compute output by doing t step() on the lstm cell outputs_t, _, h_t, c_t, hidden_init, cell_init = self.lstm hidden = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) cell = np.full((self.batch_size, self.hidden_dim), cell_init.eval()) hidden, cell = self.sess.run( [h_t, c_t], feed_dict={ _step_input_var: obs_input, self._step_hidden_var: hidden, self._step_cell_var: cell }) with tf.compat.v1.variable_scope('LSTM/lstm', reuse=True): hidden_init_var = tf.compat.v1.get_variable(name='initial_hidden') cell_init_var = tf.compat.v1.get_variable(name='initial_cell') assert hidden_init_var in tf.compat.v1.trainable_variables() assert cell_init_var in tf.compat.v1.trainable_variables() full_output1 = self.sess.run(outputs_t, feed_dict={_input_var: obs_inputs}) hidden2 = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) cell2 = np.full((self.batch_size, self.hidden_dim), cell_init.eval()) stack_hidden = None for i in range(time_step): hidden2, cell2 = recurrent_step_lstm( input_val=obs_inputs[:, i, :], num_units=self.hidden_dim, step_hidden=hidden2, step_cell=cell2, w_x_init=1., w_h_init=1., b_init=0., nonlinearity=np.tanh, gate_nonlinearity=lambda x: 1. / (1. + np.exp(-x))) if stack_hidden is None: stack_hidden = hidden2[:, np.newaxis, :] else: stack_hidden = np.concatenate( (stack_hidden, hidden2[:, np.newaxis, :]), axis=1) output_nonlinearity = np.full((np.prod(hidden2.shape[1:]), output_dim), 1.) full_output2 = np.matmul(stack_hidden, output_nonlinearity) assert np.allclose(full_output1, full_output2)