def _build(self, obs_input, step_obs_input, step_hidden, step_cell, name=None): return_var = tf.compat.v1.get_variable( 'return_var', (), initializer=tf.constant_initializer(0.5)) mean = log_std = tf.fill( (tf.shape(obs_input)[0], tf.shape(obs_input)[1], self.output_dim), return_var) step_mean = step_log_std = tf.fill( (tf.shape(step_obs_input)[0], self.output_dim), return_var) hidden_init_var = tf.compat.v1.get_variable( name='initial_hidden', shape=(self.hidden_dim, ), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.float32) cell_init_var = tf.compat.v1.get_variable( name='initial_cell', shape=(self.hidden_dim, ), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.float32) dist = DiagonalGaussian(self.output_dim) # sample = 0.5 * 0.5 + 0.5 = 0.75 return (mean, step_mean, log_std, step_log_std, step_hidden, step_cell, hidden_init_var, cell_init_var, dist)
def _build(self, state_input, step_input, hidden_input, name=None): action_dim = self._output_dim with tf.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var = parameter(state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_param') step_log_std_var = parameter( step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='step_log_std_param') dist = DiagonalGaussian(self._output_dim) rnd = tf.random.normal(shape=step_mean_var.get_shape().as_list()[1:]) action_var = rnd * tf.exp(step_log_std_var) + step_mean_var return (action_var, mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, hidden_init_var, dist)
def _build(self, obs_input, name=None): return_var = tf.get_variable('return_var', (), initializer=tf.constant_initializer(0.5)) mean = tf.fill((tf.shape(obs_input)[0], self.output_dim), return_var) log_std = tf.fill((tf.shape(obs_input)[0], self.output_dim), 0.5) action = mean + log_std * 0.5 dist = DiagonalGaussian(self.output_dim) # action will be 0.5 + 0.5 * 0.5 = 0.75 return action, mean, log_std, log_std, dist
def _build(self, obs_input, step_obs_input, step_hidden, step_cell, name=None): """Build model given input placeholder(s). Args: obs_input (tf.Tensor): Place holder for entire time-series inputs. step_obs_input (tf.Tensor): Place holder for step inputs. step_hidden (tf.Tensor): Place holder for step hidden state. step_cell (tf.Tensor): Place holder for step cell state. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Entire time-series means. tf.Tensor: Step mean. tf.Tensor: Entire time-series log std. tf.Tensor: Step log std. tf.Tensor: Step hidden state. tf.Tensor: Step cell state. tf.Tensor: Initial hidden state. tf.Tensor: Initial cell state. garage.distributions.DiagonalGaussian: Distribution. """ del name return_var = tf.compat.v1.get_variable( 'return_var', (), initializer=tf.constant_initializer(0.5)) mean = log_std = tf.fill( (tf.shape(obs_input)[0], tf.shape(obs_input)[1], self.output_dim), return_var) step_mean = step_log_std = tf.fill( (tf.shape(step_obs_input)[0], self.output_dim), return_var) hidden_init_var = tf.compat.v1.get_variable( name='initial_hidden', shape=(self.hidden_dim, ), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.float32) cell_init_var = tf.compat.v1.get_variable( name='initial_cell', shape=(self.hidden_dim, ), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.float32) dist = DiagonalGaussian(self.output_dim) # sample = 0.5 * 0.5 + 0.5 = 0.75 return (mean, step_mean, log_std, step_log_std, step_hidden, step_cell, hidden_init_var, cell_init_var, dist)
def _build(self, obs_input, name=None): """Build model. Args: obs_input (tf.Tensor): Entire time-series observation input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Returns: tf.tensor: Mean. tf.Tensor: Log of standard deviation. garage.distributions.DiagonalGaussian: Distribution. """ del name return_var = tf.compat.v1.get_variable( 'return_var', (), initializer=tf.constant_initializer(0.5)) mean = tf.fill((tf.shape(obs_input)[0], self.output_dim), return_var) log_std = tf.fill((tf.shape(obs_input)[0], self.output_dim), np.log(0.5)) dist = DiagonalGaussian(self.output_dim) # action will be 0.5 + 0.5 * 0.5 = 0.75 return mean, log_std, dist
def __init__(self, input_shape, output_dim, name="GaussianMLPRegressor", mean_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, optimizer=None, optimizer_args=None, use_trust_region=True, max_kl_step=0.01, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), std_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, subsample_factor=1.0): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param max_kl_step: KL divergence constraint for each iteration :param learn_std: Whether to learn the standard deviations. Only effective if adaptive_std is False. If adaptive_std is True, this parameter is ignored, and the weights for the std network are always earned. :param adaptive_std: Whether to make the std a function of the states. :param std_share_network: Whether to use the same network as the mean. :param std_hidden_sizes: Number of hidden units of each layer of the std network. Only used if `std_share_network` is False. It defaults to the same architecture as the mean. :param std_nonlinearity: Non-linearity used for each layer of the std network. Only used if `std_share_network` is False. It defaults to the same non-linearity as the mean. """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) self._mean_network_name = "mean_network" self._std_network_name = "std_network" with tf.variable_scope(name): if optimizer_args is None: optimizer_args = dict() if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self._subsample_factor = subsample_factor if mean_network is None: if std_share_network: mean_network = MLP( name="mean_network", input_shape=input_shape, output_dim=2 * output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = L.SliceLayer( mean_network.output_layer, slice(output_dim), name="mean_slice", ) else: mean_network = MLP( name="mean_network", input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = mean_network.output_layer if adaptive_std: l_log_std = MLP( name="log_std_network", input_shape=input_shape, input_var=mean_network.input_layer.input_var, output_dim=output_dim, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_nonlinearity, output_nonlinearity=None, ).output_layer elif std_share_network: l_log_std = L.SliceLayer( mean_network.output_layer, slice(output_dim, 2 * output_dim), name="log_std_slice", ) else: l_log_std = L.ParamLayer( mean_network.input_layer, num_units=output_dim, param=tf.constant_initializer(np.log(init_std)), name="output_log_std", trainable=learn_std, ) LayersPowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim)) old_means_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim)) old_log_stds_var = tf.placeholder(dtype=tf.float32, name="old_log_stds", shape=(None, output_dim)) x_mean_var = tf.Variable( np.zeros((1, ) + input_shape, dtype=np.float32), name="x_mean", ) x_std_var = tf.Variable( np.ones((1, ) + input_shape, dtype=np.float32), name="x_std", ) y_mean_var = tf.Variable( np.zeros((1, output_dim), dtype=np.float32), name="y_mean", ) y_std_var = tf.Variable( np.ones((1, output_dim), dtype=np.float32), name="y_std", ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var with tf.name_scope(self._mean_network_name, values=[normalized_xs_var]): normalized_means_var = L.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) with tf.name_scope(self._std_network_name, values=[normalized_xs_var]): normalized_log_stds_var = L.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + tf.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = old_log_stds_var - tf.log(y_std_var) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict(mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = tf.reduce_mean( dist.kl_sym( dict(mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -tf.reduce_mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = tensor_utils.compile_function([xs_var], means_var) self._f_pdists = tensor_utils.compile_function( [xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[ normalized_means_var, normalized_log_stds_var ], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, max_kl_step) optimizer_args["inputs"] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var # Optionally create assign operations for normalization if self._normalize_inputs: self._x_mean_var_ph = tf.placeholder( shape=(1, ) + input_shape, dtype=tf.float32, ) self._x_std_var_ph = tf.placeholder( shape=(1, ) + input_shape, dtype=tf.float32, ) self._assign_x_mean = tf.assign(self._x_mean_var, self._x_mean_var_ph) self._assign_x_std = tf.assign(self._x_std_var, self._x_std_var_ph) if self._normalize_outputs: self._y_mean_var_ph = tf.placeholder( shape=(1, output_dim), dtype=tf.float32, ) self._y_std_var_ph = tf.placeholder( shape=(1, output_dim), dtype=tf.float32, ) self._assign_y_mean = tf.assign(self._y_mean_var, self._y_mean_var_ph) self._assign_y_std = tf.assign(self._y_std_var, self._y_std_var_ph)
def _build(self, state_input): action_dim = self._output_dim with tf.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_network = mlp( state_input, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.log(1. + tf.exp(std_param)) with tf.variable_scope('std_limits'): if self._min_std_param is not None: log_std_var = tf.maximum(log_std_var, self._min_std_param) if self._max_std_param is not None: log_std_var = tf.minimum(log_std_var, self._max_std_param) dist = DiagonalGaussian(self._output_dim) rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:], seed=deterministic.get_seed()) action_var = rnd * tf.exp(log_std_var) + mean_var return action_var, mean_var, log_std_var, std_param, dist
def __init__(self, env_spec, name=None, hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, mean_network=None, std_network=None, std_parametrization='exp'): """ :param env_spec: :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable :param init_std: Initial std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parametrization: how the std should be parametrized. There are a few options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Box) self.name = name self._mean_network_name = "mean_network" self._std_network_name = "std_network" with tf.variable_scope(name, "GaussianMLPPolicy"): obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim # create network if mean_network is None: if std_share_network: if std_parametrization == "exp": init_std_param = np.log(init_std) elif std_parametrization == "softplus": init_std_param = np.log(np.exp(init_std) - 1) else: raise NotImplementedError init_b = tf.constant_initializer(init_std_param) with tf.variable_scope(self._mean_network_name): mean_network = MLP( name="mlp", input_shape=(obs_dim, ), output_dim=2 * action_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_b_init=init_b, ) l_mean = L.SliceLayer( mean_network.output_layer, slice(action_dim), name="mean_slice", ) else: mean_network = MLP( name=self._mean_network_name, input_shape=(obs_dim, ), output_dim=action_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, ) l_mean = mean_network.output_layer self._mean_network = mean_network obs_var = mean_network.input_layer.input_var if std_network is not None: l_std_param = std_network.output_layer else: if adaptive_std: std_network = MLP( name=self._std_network_name, input_shape=(obs_dim, ), input_layer=mean_network.input_layer, output_dim=action_dim, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_hidden_nonlinearity, output_nonlinearity=None, ) l_std_param = std_network.output_layer elif std_share_network: with tf.variable_scope(self._std_network_name): l_std_param = L.SliceLayer( mean_network.output_layer, slice(action_dim, 2 * action_dim), name="std_slice", ) else: if std_parametrization == 'exp': init_std_param = np.log(init_std) elif std_parametrization == 'softplus': init_std_param = np.log(np.exp(init_std) - 1) else: raise NotImplementedError with tf.variable_scope(self._std_network_name): l_std_param = L.ParamLayer( mean_network.input_layer, num_units=action_dim, param=tf.constant_initializer(init_std_param), name="output_std_param", trainable=learn_std, ) self.std_parametrization = std_parametrization if std_parametrization == 'exp': min_std_param = np.log(min_std) elif std_parametrization == 'softplus': min_std_param = np.log(np.exp(min_std) - 1) else: raise NotImplementedError self.min_std_param = min_std_param # mean_var, log_std_var = L.get_output([l_mean, l_std_param]) # # if self.min_std_param is not None: # log_std_var = tf.maximum(log_std_var, np.log(min_std)) # # self._mean_var, self._log_std_var = mean_var, log_std_var self._l_mean = l_mean self._l_std_param = l_std_param self._dist = DiagonalGaussian(action_dim) LayersPowered.__init__(self, [l_mean, l_std_param]) super(GaussianMLPPolicy, self).__init__(env_spec) dist_info_sym = self.dist_info_sym( mean_network.input_layer.input_var, dict()) mean_var = tf.identity(dist_info_sym["mean"], name="mean") log_std_var = tf.identity(dist_info_sym["log_std"], name="standard_dev") self._f_dist = tensor_utils.compile_function( inputs=[obs_var], outputs=[mean_var, log_std_var], )
def __init__(self, env_spec, embedding, task_space, name="GaussianMLPMultitaskPolicy", hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, max_std=None, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, mean_network=None, std_network=None, std_parameterization='exp'): """ :param env_spec: observation space is a concatenation of task space and vanilla env observation space :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable :param init_std: Initial std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parametrization: how the std should be parametrized. There are a few options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) :return: """ assert isinstance(env_spec.action_space, Box) StochasticMultitaskPolicy.__init__(self, env_spec, embedding, task_space) Parameterized.__init__(self) Serializable.quick_init(self, locals()) if mean_network or std_network: raise NotImplementedError self.name = name self._variable_scope = tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) self._name_scope = tf.name_scope(self.name) # TODO: eliminate self._dist = DiagonalGaussian(self.action_space.flat_dim) # Network parameters self._hidden_sizes = hidden_sizes self._learn_std = learn_std self._init_std = init_std self._adaptive_std = adaptive_std self._std_share_network = std_share_network self._std_hidden_sizes = std_hidden_sizes self._min_std = min_std self._max_std = max_std self._std_hidden_nonlinearity = std_hidden_nonlinearity self._hidden_nonlinearity = hidden_nonlinearity self._output_nonlinearity = output_nonlinearity self._mean_network = mean_network self._std_network = std_network self._std_parameterization = std_parameterization # Tranform std arguments to parameterized space self._init_std_param = None self._min_std_param = None self._max_std_param = None if self._std_parameterization == 'exp': self._init_std_param = np.log(init_std) if min_std: self._min_std_param = np.log(min_std) if max_std: self._max_std_param = np.log(max_std) elif self._std_parameterization == 'softplus': self._init_std_param = np.log(np.exp(init_std) - 1) if min_std: self._min_std_param = np.log(np.exp(min_std) - 1) if max_std: self._max_std_param = np.log(np.exp(max_std) - 1) else: raise NotImplementedError # Build default graph with self._name_scope: # inputs self._task_input = self._embedding._input self._latent_input = self.latent_space.new_tensor_variable( name="latent_input", extra_dims=1) self._obs_input = self.observation_space.new_tensor_variable( name="obs_input", extra_dims=1) with tf.name_scope("default", values=[self._task_input, self._obs_input]): # network (connect with embedding) latent = self._embedding.latent latent_mean = self._embedding.latent_mean latent_std_param = self._embedding.latent_std_param action_var, mean_var, std_param_var, dist = self._build_graph( latent, self._obs_input) # outputs self._action = tf.identity(action_var, name="action") self._action_mean = tf.identity(mean_var, name="action_mean") self._action_std_param = tf.identity(std_param_var, "action_std_param") self._action_distribution = dist # special auxiliary graph for feedforward using only latents with tf.name_scope("from_latent", values=[self._latent_input, self._obs_input]): action_var, mean_var, std_param_var, dist = self._build_graph( self._latent_input, self._obs_input) # auxiliary outputs self._action_from_latent = action_var self._action_mean_from_latent = mean_var self._action_std_param_from_latent = std_param_var self._action_distribution_from_latent = dist # compiled functions with tf.variable_scope("f_dist_task_obs"): self.f_dist_task_obs = tensor_utils.compile_function( inputs=[self._task_input, self._obs_input], outputs=[ self._action, self._action_mean, self._action_std_param, latent, latent_mean, latent_std_param ], ) with tf.variable_scope("f_dist_latent_obs"): self.f_dist_latent_obs = tensor_utils.compile_function( inputs=[self._latent_input, self._obs_input], outputs=[ self._action_from_latent, self._action_mean_from_latent, self._action_std_param_from_latent ], )
def _build(self, state_input, step_input, hidden_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for entire time-series inputs. step_input (tf.Tensor): Place holder for step inputs. hidden_input (tf.Tensor): Place holder for step hidden state. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Entire time-series means. tf.Tensor: Step mean. tf.Tensor: Entire time-series std_log. tf.Tensor: Step std_log. tf.Tensor: Step hidden state. tf.Tensor: Initial hidden state. garage.tf.distributions.DiagonalGaussian: Policy distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = DiagonalGaussian(self._output_dim) return (mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, hidden_init_var, dist)
def __init__(self, embedding_spec, name="GaussianMLPEmbedding", hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, max_std=None, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, mean_scale=1., output_nonlinearity=None, mean_network=None, std_network=None, std_parameterization='exp', normalize=False, mean_output_nonlinearity=None): """ :param embedding_spec: :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable? :param init_std: Inital std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parameterization: how the std should be parameterized. There are a few options: -exp: the logarithm of the std will be stored, and applied an exponential transformation -softplus: the std will be computed as log(1+exp(x)) :return: """ assert isinstance(embedding_spec.latent_space, Box) StochasticEmbedding.__init__(self, embedding_spec) Parameterized.__init__(self) Serializable.quick_init(self, locals()) if mean_network or std_network: raise NotImplementedError self.name = name self._variable_scope = tf.variable_scope( self.name, reuse=tf.AUTO_REUSE) self._name_scope = tf.name_scope(self.name) # TODO: eliminate self._dist = DiagonalGaussian(self.latent_space.flat_dim) # Network parameters self._hidden_sizes = hidden_sizes self._learn_std = learn_std self._init_std = init_std self._adaptive_std = adaptive_std self._std_share_network = std_share_network self._std_hidden_sizes = std_hidden_sizes self._min_std = min_std self._max_std = max_std self._std_hidden_nonlinearity = std_hidden_nonlinearity self._hidden_nonlinearity = hidden_nonlinearity self._output_nonlinearity = output_nonlinearity self._mean_network = mean_network self._std_network = std_network self._std_parameterization = std_parameterization self._normalize = normalize self._mean_output_nonlinearity = mean_output_nonlinearity if self._normalize: latent_dim = self.latent_space.flat_dim self._max_std = np.sqrt(1.0 / latent_dim) self._init_std = self._max_std / 2.0 # Tranform std arguments to parameterized space self._init_std_param = None self._min_std_param = None self._max_std_param = None if self._std_parameterization == 'exp': self._init_std_param = np.log(self._init_std) if self._min_std: self._min_std_param = np.log(self._min_std) if self._max_std: self._max_std_param = np.log(self._max_std) elif self._std_parameterization == 'softplus': self._init_std_param = np.log(np.exp(self._init_std) - 1) if self._min_std: self._min_std_param = np.log(np.exp(self._min_std) - 1) if self._max_std: self._max_std_param = np.log(np.exp(self._max_std) - 1) else: raise NotImplementedError self._mean_scale = mean_scale # Build default graph with self._name_scope: # inputs self._input = self.input_space.new_tensor_variable( name="input", extra_dims=1) with tf.name_scope("default", values=[self._input]): # network latent_var, mean_var, std_param_var, dist = self._build_graph( self._input) # outputs self._latent = tf.identity(latent_var, name="latent") self._latent_mean = tf.identity(mean_var, name="latent_mean") self._latent_std_param = tf.identity(std_param_var, "latent_std_param") self._latent_distribution = dist # compiled functions with tf.variable_scope("f_dist"): self._f_dist = tensor_utils.compile_function( inputs=[self._input], outputs=[ self._latent, self._latent_mean, self._latent_std_param ], )
class GaussianMLPEmbedding(StochasticEmbedding, Parameterized, Serializable): def __init__(self, embedding_spec, name="GaussianMLPEmbedding", hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, max_std=None, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, mean_scale=1., output_nonlinearity=None, mean_network=None, std_network=None, std_parameterization='exp', normalize=False, mean_output_nonlinearity=None): """ :param embedding_spec: :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable? :param init_std: Inital std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parameterization: how the std should be parameterized. There are a few options: -exp: the logarithm of the std will be stored, and applied an exponential transformation -softplus: the std will be computed as log(1+exp(x)) :return: """ assert isinstance(embedding_spec.latent_space, Box) StochasticEmbedding.__init__(self, embedding_spec) Parameterized.__init__(self) Serializable.quick_init(self, locals()) if mean_network or std_network: raise NotImplementedError self.name = name self._variable_scope = tf.variable_scope( self.name, reuse=tf.AUTO_REUSE) self._name_scope = tf.name_scope(self.name) # TODO: eliminate self._dist = DiagonalGaussian(self.latent_space.flat_dim) # Network parameters self._hidden_sizes = hidden_sizes self._learn_std = learn_std self._init_std = init_std self._adaptive_std = adaptive_std self._std_share_network = std_share_network self._std_hidden_sizes = std_hidden_sizes self._min_std = min_std self._max_std = max_std self._std_hidden_nonlinearity = std_hidden_nonlinearity self._hidden_nonlinearity = hidden_nonlinearity self._output_nonlinearity = output_nonlinearity self._mean_network = mean_network self._std_network = std_network self._std_parameterization = std_parameterization self._normalize = normalize self._mean_output_nonlinearity = mean_output_nonlinearity if self._normalize: latent_dim = self.latent_space.flat_dim self._max_std = np.sqrt(1.0 / latent_dim) self._init_std = self._max_std / 2.0 # Tranform std arguments to parameterized space self._init_std_param = None self._min_std_param = None self._max_std_param = None if self._std_parameterization == 'exp': self._init_std_param = np.log(self._init_std) if self._min_std: self._min_std_param = np.log(self._min_std) if self._max_std: self._max_std_param = np.log(self._max_std) elif self._std_parameterization == 'softplus': self._init_std_param = np.log(np.exp(self._init_std) - 1) if self._min_std: self._min_std_param = np.log(np.exp(self._min_std) - 1) if self._max_std: self._max_std_param = np.log(np.exp(self._max_std) - 1) else: raise NotImplementedError self._mean_scale = mean_scale # Build default graph with self._name_scope: # inputs self._input = self.input_space.new_tensor_variable( name="input", extra_dims=1) with tf.name_scope("default", values=[self._input]): # network latent_var, mean_var, std_param_var, dist = self._build_graph( self._input) # outputs self._latent = tf.identity(latent_var, name="latent") self._latent_mean = tf.identity(mean_var, name="latent_mean") self._latent_std_param = tf.identity(std_param_var, "latent_std_param") self._latent_distribution = dist # compiled functions with tf.variable_scope("f_dist"): self._f_dist = tensor_utils.compile_function( inputs=[self._input], outputs=[ self._latent, self._latent_mean, self._latent_std_param ], ) @property def input(self): return self._input @property def latent(self): return self._latent @property def latent_mean(self): return self._latent_mean @property def latent_std_param(self): return self._latent_std_param @property def inputs(self): return self._input @property def outputs(self): return (self._latent, self._latent_mean, self._latent_std_param, self._latent_distribution) def _build_graph(self, from_input): latent_dim = self.latent_space.flat_dim small = 1e-5 with self._variable_scope: with tf.variable_scope("dist_params"): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate( [ np.zeros(latent_dim), np.full(latent_dim, self._init_std_param) ], axis=0) b = tf.constant_initializer(b) mean_std_network = mlp( with_input=from_input, output_dim=latent_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, # hidden_w_init=tf.orthogonal_initializer(1.0), # output_w_init=tf.orthogonal_initializer(1.0), output_b_init=b, name="mean_std_network") with tf.variable_scope("mean_network"): mean_network = mean_std_network[..., :latent_dim] with tf.variable_scope("std_network"): std_network = mean_std_network[..., latent_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( with_input=from_input, output_dim=latent_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, name="mean_network") # std network if self._adaptive_std: b = tf.constant_initializer(self._init_std_param) std_network = mlp( with_input=from_input, output_dim=latent_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, output_b_init=b, name="std_network") else: p = tf.constant_initializer(self._init_std_param) std_network = parameter( with_input=from_input, length=latent_dim, initializer=p, trainable=self._learn_std, name="std_network") if self._mean_scale != 1.: mean_var = tf.identity(mean_network * self._mean_scale, "mean_scale") else: mean_var = mean_network if self._mean_output_nonlinearity is not None: mean_var =self._mean_output_nonlinearity(mean_var) std_param_var = std_network with tf.variable_scope("std_limits"): if self._min_std_param: std_param_var = tf.maximum(std_param_var, self._min_std_param) if self._max_std_param: std_param_var = tf.minimum(std_param_var, self._max_std_param) with tf.variable_scope("std_parameterization"): # build std_var with std parameterization if self._std_parameterization == "exp": std_var = tf.exp(std_param_var) elif self._std_parameterization == "softplus": std_var = tf.log(1. + tf.exp(std_param_var)) else: raise NotImplementedError if self._normalize: mean_var = tf.nn.l2_normalize(mean_var) #std_var = tf.nn.l2_normalize(std_var) dist = tf.contrib.distributions.MultivariateNormalDiag( mean_var, std_var) latent_var = dist.sample(seed=ext.get_seed()) return latent_var, mean_var, std_param_var, dist @overrides def get_params_internal(self, **tags): if tags.get("trainable"): params = [v for v in tf.trainable_variables(scope=self.name)] else: params = [v for v in tf.global_variables(scope=self.name)] return params @property def vectorized(self): return True def dist_info_sym(self, input_var, state_info_vars=None, name=None): with tf.name_scope(name, "dist_info_sym", [input_var, state_info_vars]): _, mean, log_std, _ = self._build_graph(input_var) return dict(mean=mean, log_std=log_std) def latent_sym(self, input_var, name=None): with tf.name_scope(name, "latent_sym", [input_var]): latent, _, _, _ = self._build_graph(input_var) return latent @overrides def get_latent(self, an_input): # flat_in = self.input_space.flatten(an_input) # mean, log_std = [x[0] for x in self._f_dist([flat_in])] # rnd = np.random.normal(size=mean.shape) # latent = rnd * np.exp(log_std) + mean # return latent, dict(mean=mean, log_std=log_std) flat_in = self.input_space.flatten(an_input) latent, mean, log_std = [x[0] for x in self._f_dist([flat_in])] return latent, dict(mean=mean, log_std=log_std) def get_latents(self, inputs): # flat_in = self.input_space.flatten_n(inputs) # means, log_stds = self._f_dist(flat_in) # rnd = np.random.normal(size=means.shape) # latents = rnd * np.exp(log_stds) + means # return latents, dict(mean=means, log_std=log_stds) flat_in = self.input_space.flatten_n(inputs) latents, means, log_stds = self._f_dist(flat_in) return latents, dict(mean=means, log_std=log_stds) def get_reparam_latent_sym(self, input_var, latent_var, old_dist_info_vars, name=None): """ Given inputs, old latent outputs, and a distribution of old latent outputs, return a symbolically reparameterized representation of the inputs in terms of the embedding parameters :param in_var: :param latent_var: :param old_dist_info_vars: :return: """ with tf.name_scope(name, "get_reparam_latent_sym", [input_var, latent_var, old_dist_info_vars]): new_dist_info_vars = self.dist_info_sym(input_var, latent_var) new_mean_var, new_log_std_var = new_dist_info_vars[ "mean"], new_dist_info_vars["log_std"] old_mean_var, old_log_std_var = old_dist_info_vars[ "mean"], old_dist_info_vars["log_std"] epsilon_var = (latent_var - old_mean_var) / ( tf.exp(old_log_std_var) + 1e-8) new_latent_var = new_mean_var + epsilon_var * tf.exp( new_log_std_var) return new_latent_var def log_likelihood(self, an_input, latent): flat_in = self.input_space.flatten(an_input) _, mean, log_std = [x[0] for x in self._f_dist([flat_in])] return self._dist.log_likelihood(latent, dict(mean=mean, log_std=log_std)) def log_likelihoods(self, inputs, latents): flat_in = self.input_space.flatten_n(inputs) _, means, log_stds = self._f_dist(flat_in) return self._dist.log_likelihood(latents, dict(mean=means, log_std=log_stds)) def log_likelihood_sym(self, input_var, latent_var, name=None): with tf.name_scope(name, "log_likelihood_sym", [input_var, latent_var]): # dist_info = self.dist_info_sym(input_var, latent_var) # means_var, log_stds_var = dist_info['mean'], dist_info['log_std'] # return self._dist.log_likelihood_sym( # latent_var, dict(mean=means_var, log_std=log_stds_var)) _, _, _, dist = self._build_graph(input_var) return dist.log_prob(latent_var) def entropy_sym(self, input_var, name=None): with tf.name_scope(name, "entropy_sym", [input_var]): _, _, _, dist = self._build_graph(input_var) return dist.entropy() def entropy_sym_sampled(self, dist_info_vars, name=None): with tf.name_scope(name, "entropy_sym_sampled", [dist_info_vars]): return self._dist.entropy_sym(dist_info_vars) def log_diagnostics(self): log_stds = np.vstack( [path["agent_infos"]["log_std"] for path in paths]) logger.record_tabular('AverageEmbeddingStd', np.mean(np.exp(log_stds)))
def __init__(self, input_shape, output_dim, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, name='GaussianConvRegressor', mean_network=None, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_conv_filters=[], std_conv_filter_sizes=[], std_conv_strides=[], std_conv_pads=[], std_hidden_sizes=[], std_hidden_nonlinearity=None, std_output_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, subsample_factor=1., optimizer=None, optimizer_args=dict(), use_trust_region=True, max_kl_step=0.01): Parameterized.__init__(self) Serializable.quick_init(self, locals()) self._mean_network_name = 'mean_network' self._std_network_name = 'std_network' with tf.compat.v1.variable_scope(name): if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self._subsample_factor = subsample_factor if mean_network is None: if std_share_network: b = np.concatenate( [ np.zeros(output_dim), np.full(output_dim, np.log(init_std)) ], axis=0) # yapf: disable b = tf.constant_initializer(b) mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=2 * output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_b_init=b) l_mean = layers.SliceLayer( mean_network.output_layer, slice(output_dim), name='mean_slice', ) else: mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity) l_mean = mean_network.output_layer if adaptive_std: l_log_std = ConvNetwork( name=self._std_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=std_conv_filters, conv_filter_sizes=std_conv_filter_sizes, conv_strides=std_conv_strides, conv_pads=std_conv_pads, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_hidden_nonlinearity, output_nonlinearity=std_output_nonlinearity, output_b_init=tf.constant_initializer(np.log(init_std)), ).output_layer elif std_share_network: l_log_std = layers.SliceLayer( mean_network.output_layer, slice(output_dim, 2 * output_dim), name='log_std_slice', ) else: l_log_std = layers.ParamLayer( mean_network.input_layer, num_units=output_dim, param=tf.constant_initializer(np.log(init_std)), trainable=learn_std, name=self._std_network_name, ) LayersPowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = tf.compat.v1.placeholder( dtype=tf.float32, name='ys', shape=(None, output_dim)) old_means_var = tf.compat.v1.placeholder( dtype=tf.float32, name='ys', shape=(None, output_dim)) old_log_stds_var = tf.compat.v1.placeholder( dtype=tf.float32, name='old_log_stds', shape=(None, output_dim)) x_mean_var = tf.Variable( np.zeros((1, np.prod(input_shape)), dtype=np.float32), name='x_mean', ) x_std_var = tf.Variable( np.ones((1, np.prod(input_shape)), dtype=np.float32), name='x_std', ) y_mean_var = tf.Variable( np.zeros((1, output_dim), dtype=np.float32), name='y_mean', ) y_std_var = tf.Variable( np.ones((1, output_dim), dtype=np.float32), name='y_std', ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var with tf.name_scope( self._mean_network_name, values=[normalized_xs_var]): normalized_means_var = layers.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) with tf.name_scope( self._std_network_name, values=[normalized_xs_var]): normalized_log_stds_var = layers.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + tf.math.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = ( old_log_stds_var - tf.math.log(y_std_var)) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict( mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = tf.reduce_mean( dist.kl_sym( dict( mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -tf.reduce_mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = tensor_utils.compile_function([xs_var], means_var) self._f_pdists = tensor_utils.compile_function( [xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[ normalized_means_var, normalized_log_stds_var ], ) if use_trust_region: optimizer_args['leq_constraint'] = (mean_kl, max_kl_step) optimizer_args['inputs'] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args['inputs'] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var
def _build(self, state_input, step_input, hidden_input, cell_input, name=None): action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, step_cell, hidden_init_var, cell_init_var) = lstm( name='mean_std_network', lstm_cell=self._mean_std_lstm_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, step_cell_var=cell_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, cell_state_init=self._cell_state_init, cell_state_init_trainable=self._cell_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, step_cell, hidden_init_var, cell_init_var) = lstm( name='mean_network', lstm_cell=self._mean_lstm_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, step_cell_var=cell_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, cell_state_init=self._cell_state_init, cell_state_init_trainable=self._cell_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = DiagonalGaussian(self._output_dim) return (mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, step_cell, hidden_init_var, cell_init_var, dist)
def _build(self, state_input): action_dim = self._output_dim with tf.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable b = tf.constant_initializer(b) mean_std_network = mlp( state_input, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, output_b_init=b, name='mean_std_network') with tf.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.variable_scope('std_network'): std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, name='mean_network') # std network if self._adaptive_std: b = tf.constant_initializer(self._init_std_param) std_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, output_nonlinearity=self._std_output_nonlinearity, output_b_init=b, name='std_network') else: p = tf.constant_initializer(self._init_std_param) std_network = parameter(state_input, length=action_dim, initializer=p, trainable=self._learn_std, name='std_network') mean_var = mean_network std_param_var = std_network with tf.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': std_param_var = std_param_var elif self._std_parameterization == 'softplus': std_param_var = tf.log(1. + tf.exp(std_param_var)) else: raise NotImplementedError with tf.variable_scope('std_limits'): if self._min_std_param: std_var = tf.maximum(std_param_var, self._min_std_param) if self._max_std_param: std_var = tf.minimum(std_param_var, self._max_std_param) dist = DiagonalGaussian(action_dim) rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:], seed=deterministic.get_seed()) action_var = rnd * tf.exp(std_var) + mean_var return action_var, mean_var, std_var, std_param_var, dist
def test_sample(): gaussian = DiagonalGaussian(dim=2) dist = dict(mean=np.array([1, 1]), log_std=np.array([0, 0])) samples = [gaussian.sample(dist) for _ in range(10000)] assert np.isclose(np.mean(samples), 1, atol=0.1) assert np.isclose(np.var(samples), 1, atol=0.1)
def _build(self, state_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for state input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Mean. tf.Tensor: Parameterized log_std. tf.Tensor: log_std. garage.tf.distributions.DiagonalGaussian: Policy distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_network = mlp( state_input, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) dist = DiagonalGaussian(self._output_dim) return mean_var, log_std_var, std_param, dist