def _build(self, state_input, step_input, hidden_input, name=None): action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = DiagonalGaussian(self._output_dim) return (mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, hidden_init_var, dist)
def _build(self, all_input_var, step_input_var, step_hidden_var, name=None): """Build model given input placeholder(s). Args: all_input_var (tf.Tensor): Place holder for entire time-series inputs. step_input_var (tf.Tensor): Place holder for step inputs. step_hidden_var (tf.Tensor): Place holder for step hidden state. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Entire time-series outputs. tf.Tensor: Step output. tf.Tensor: Step hidden state. tf.Tensor: Initial hidden state. """ del name return gru( name='gru', gru_cell=self._gru_cell, all_input_var=all_input_var, step_input_var=step_input_var, step_hidden_var=step_hidden_var, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self._hidden_state_init_trainable, output_nonlinearity_layer=self._output_nonlinearity_layer)
def _build(self, all_input_var, step_input_var, step_hidden_var, name=None): return gru( name='gru', gru_cell=self._gru_cell, all_input_var=all_input_var, step_input_var=step_input_var, step_hidden_var=step_hidden_var, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self._hidden_state_init_trainable, output_nonlinearity_layer=self._output_nonlinearity_layer)
def test_output_shapes(self, time_step, input_dim, output_dim, hidden_init, cell_init): obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.) obs_input = np.full((self.batch_size, input_dim), 1.) input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None, input_dim), name='input') step_input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, input_dim), name='step_input') output_nonlinearity = tf.keras.layers.Dense( units=output_dim, activation=None, kernel_initializer=tf.constant_initializer(1)) with tf.compat.v1.variable_scope('GRU'): self.gru = gru( all_input_var=input_var, name='gru', gru_cell=self.gru_cell, step_input_var=step_input_var, step_hidden_var=self.step_hidden_var, hidden_state_init=tf.constant_initializer(hidden_init), output_nonlinearity_layer=output_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) # Compute output by doing t step() on the gru cell outputs_t, output_t, h_t, hidden_init = self.gru hidden = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) for _ in range(time_step): output, hidden = self.sess.run([output_t, h_t], feed_dict={ step_input_var: obs_input, self.step_hidden_var: hidden, }) # noqa: E126 assert output.shape == (self.batch_size, output_dim) assert hidden.shape == (self.batch_size, self.hidden_dim) full_output = self.sess.run(outputs_t, feed_dict={input_var: obs_inputs}) assert full_output.shape == (self.batch_size, time_step, output_dim)
def _build(self, state_input, step_input, step_hidden, name=None): """Build model. Args: state_input (tf.Tensor): Entire time-series observation input, with shape :math:`(N, T, S^*)`. step_input (tf.Tensor): Single timestep observation input, with shape :math:`(N, S^*)`. step_hidden (tf.Tensor): Hidden state for step, with shape :math:`(N, S^*)`. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Returns: tfp.distributions.MultivariateNormalDiag: Policy distribution. tf.Tensor: Step means, with shape :math:`(N, S^*)`. tf.Tensor: Step log std, with shape :math:`(N, S^*)`. tf.Tensor: Step hidden state, with shape :math:`(N, S^*)`. tf.Tensor: Initial hidden state, with shape :math:`(S^*)`. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=step_hidden, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=step_hidden, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = tfp.distributions.MultivariateNormalDiag( loc=mean_var, scale_diag=tf.exp(log_std_var)) return (dist, step_mean_var, step_log_std_var, step_hidden, hidden_init_var)
def _build(self, state_input, step_input, hidden_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for entire time-series inputs. step_input (tf.Tensor): Place holder for step inputs. hidden_input (tf.Tensor): Place holder for step hidden state. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Entire time-series means. tf.Tensor: Step mean. tf.Tensor: Entire time-series std_log. tf.Tensor: Step std_log. tf.Tensor: Step hidden state. tf.Tensor: Initial hidden state. garage.tf.distributions.DiagonalGaussian: Policy distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = DiagonalGaussian(self._output_dim) return (mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, hidden_init_var, dist)
def test_output_same_as_rnn(self, time_step, input_dim, output_dim, hidden_init, cell_init): obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.) obs_input = np.full((self.batch_size, input_dim), 1.) input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None, input_dim), name='input') step_input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, input_dim), name='step_input') output_nonlinearity = tf.keras.layers.Dense( units=output_dim, activation=None, kernel_initializer=tf.constant_initializer(1)) with tf.compat.v1.variable_scope('GRU'): self.gru = gru( all_input_var=input_var, name='gru', gru_cell=self.gru_cell, step_input_var=step_input_var, step_hidden_var=self.step_hidden_var, hidden_state_init=tf.constant_initializer(hidden_init), output_nonlinearity_layer=output_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) # Create a RNN and compute the entire outputs rnn_layer = tf.keras.layers.RNN(cell=self.gru_cell, return_sequences=True, return_state=True) # Set initial state to all 0s hidden_var = tf.compat.v1.get_variable( name='initial_hidden', shape=(self.batch_size, self.hidden_dim), initializer=tf.constant_initializer(hidden_init), trainable=False, dtype=tf.float32) outputs, hiddens = rnn_layer(input_var, initial_state=[hidden_var]) outputs = output_nonlinearity(outputs) self.sess.run(tf.compat.v1.global_variables_initializer()) outputs, hiddens = self.sess.run([outputs, hiddens], feed_dict={input_var: obs_inputs}) # Compute output by doing t step() on the gru cell hidden = np.full((self.batch_size, self.hidden_dim), hidden_init) _, output_t, hidden_t, _ = self.gru for i in range(time_step): output, hidden = self.sess.run([output_t, hidden_t], feed_dict={ step_input_var: obs_input, self.step_hidden_var: hidden, }) # noqa: E126 # The output from i-th timestep assert np.array_equal(output, outputs[:, i, :]) assert np.array_equal(hidden, hiddens) # Also the full output from lstm full_outputs = self.sess.run(self.gru[0], feed_dict={input_var: obs_inputs}) assert np.array_equal(outputs, full_outputs)
def test_gradient_paths(self): time_step = 3 input_dim = 2 output_dim = 4 obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.) obs_input = np.full((self.batch_size, input_dim), 1.) input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None, input_dim), name='input') step_input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, input_dim), name='step_input') output_nonlinearity = tf.keras.layers.Dense( units=output_dim, activation=None, kernel_initializer=tf.constant_initializer(1)) with tf.compat.v1.variable_scope('GRU'): self.gru = gru(all_input_var=input_var, name='gru', gru_cell=self.gru_cell, step_input_var=step_input_var, step_hidden_var=self.step_hidden_var, output_nonlinearity_layer=output_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) # Compute output by doing t step() on the gru cell outputs_t, output_t, h_t, hidden_init = self.gru hidden = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) grads_step_o_i = tf.gradients(output_t, step_input_var) grads_step_o_h = tf.gradients(output_t, self.step_hidden_var) grads_step_h = tf.gradients(h_t, step_input_var) self.sess.run([grads_step_o_i, grads_step_o_h, grads_step_h], feed_dict={ step_input_var: obs_input, self.step_hidden_var: hidden, }) # noqa: E126 grads_full = tf.gradients(outputs_t, input_var) self.sess.run(grads_full, feed_dict={input_var: obs_inputs}) grads_step_o_i = tf.gradients(outputs_t, step_input_var) grads_step_o_h = tf.gradients(outputs_t, self.step_hidden_var) grads_step_h = tf.gradients(h_t, input_var) # No gradient flow with pytest.raises(TypeError): self.sess.run(grads_step_o_i, feed_dict={ step_input_var: obs_input, self.step_hidden_var: hidden, }) with pytest.raises(TypeError): self.sess.run(grads_step_o_h, feed_dict={ step_input_var: obs_input, self.step_hidden_var: hidden, }) with pytest.raises(TypeError): self.sess.run(grads_step_h, feed_dict={input_var: obs_inputs})
def test_output_value_trainable_hidden_and_cell(self, time_step, input_dim, output_dim): obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.) obs_input = np.full((self.batch_size, input_dim), 1.) input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None, input_dim), name='input') step_input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, input_dim), name='step_input') output_nonlinearity = tf.keras.layers.Dense( units=output_dim, activation=None, kernel_initializer=tf.constant_initializer(1)) with tf.compat.v1.variable_scope('GRU'): self.gru = gru(all_input_var=input_var, name='gru', gru_cell=self.gru_cell, step_input_var=step_input_var, step_hidden_var=self.step_hidden_var, hidden_state_init_trainable=True, output_nonlinearity_layer=output_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) # Compute output by doing t step() on the gru cell outputs_t, output_t, h_t, hidden_init = self.gru hidden = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) _, hidden = self.sess.run([output_t, h_t], feed_dict={ step_input_var: obs_input, self.step_hidden_var: hidden, }) # noqa: E126 with tf.compat.v1.variable_scope('GRU/gru', reuse=True): hidden_init_var = tf.compat.v1.get_variable(name='initial_hidden') assert hidden_init_var in tf.compat.v1.trainable_variables() full_output1 = self.sess.run(outputs_t, feed_dict={input_var: obs_inputs}) hidden2 = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) stack_hidden = None for i in range(time_step): hidden2 = recurrent_step_gru(input_val=obs_inputs[:, i, :], num_units=self.hidden_dim, step_hidden=hidden2, w_x_init=1., w_h_init=1., b_init=0., nonlinearity=np.tanh, gate_nonlinearity=lambda x: 1. / (1. + np.exp(-x))) if stack_hidden is None: stack_hidden = hidden2[:, np.newaxis, :] else: stack_hidden = np.concatenate( (stack_hidden, hidden2[:, np.newaxis, :]), axis=1) output_nonlinearity = np.full((np.prod(hidden2.shape[1:]), output_dim), 1.) full_output2 = np.matmul(stack_hidden, output_nonlinearity) assert np.allclose(full_output1, full_output2)