def __init__(self, input_shape, output_dim, name='BernoulliMLPRegressorWithModel', hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, hidden_w_init=tf.glorot_uniform_initializer(), hidden_b_init=tf.zeros_initializer(), output_nonlinearity=tf.nn.sigmoid, output_w_init=tf.glorot_uniform_initializer(), output_b_init=tf.zeros_initializer(), optimizer=None, optimizer_args=None, tr_optimizer=None, tr_optimizer_args=None, use_trust_region=True, max_kl_step=0.01, normalize_inputs=True, layer_normalization=False): super().__init__(input_shape, output_dim, name) self._use_trust_region = use_trust_region self._max_kl_step = max_kl_step self._normalize_inputs = normalize_inputs with tf.variable_scope(self._name, reuse=False) as vs: self._variable_scope = vs if optimizer_args is None: optimizer_args = dict() if tr_optimizer_args is None: tr_optimizer_args = dict() if optimizer is None: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) if tr_optimizer is None: tr_optimizer = ConjugateGradientOptimizer(**tr_optimizer_args) else: tr_optimizer = tr_optimizer(**tr_optimizer_args) self._optimizer = optimizer self._tr_optimizer = tr_optimizer self.model = NormalizedInputMLPModel( input_shape, output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, layer_normalization=layer_normalization) self._dist = Bernoulli(output_dim) self._initialize()
def __init__( self, input_shape, output_dim, name="BernoulliMLPRegressor", hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, optimizer=None, tr_optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, no_initial_trust_region=True, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Serializable.quick_init(self, locals()) with tf.variable_scope(name): if optimizer is None: optimizer = LbfgsOptimizer(name="optimizer") if tr_optimizer is None: tr_optimizer = ConjugateGradientOptimizer() self.output_dim = output_dim self.optimizer = optimizer self.tr_optimizer = tr_optimizer p_network = MLP(input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.sigmoid, name="p_network") l_p = p_network.output_layer LayersPowered.__init__(self, [l_p]) xs_var = p_network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, shape=(None, output_dim), name="ys") old_p_var = tf.placeholder(dtype=tf.float32, shape=(None, output_dim), name="old_p") x_mean_var = tf.get_variable(name="x_mean", initializer=tf.zeros_initializer(), shape=(1, ) + input_shape) x_std_var = tf.get_variable(name="x_std", initializer=tf.ones_initializer(), shape=(1, ) + input_shape) normalized_xs_var = (xs_var - x_mean_var) / x_std_var p_var = L.get_output(l_p, {p_network.input_layer: normalized_xs_var}) old_info_vars = dict(p=old_p_var) info_vars = dict(p=p_var) dist = self._dist = Bernoulli(output_dim) mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars)) loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = p_var >= 0.5 self.f_predict = tensor_utils.compile_function([xs_var], predicted) self.f_p = tensor_utils.compile_function([xs_var], p_var) self.l_p = l_p self.optimizer.update_opt(loss=loss, target=self, network_outputs=[p_var], inputs=[xs_var, ys_var]) self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[p_var], inputs=[xs_var, ys_var, old_p_var], leq_constraint=(mean_kl, step_size)) self.use_trust_region = use_trust_region self.name = name self.normalize_inputs = normalize_inputs self.x_mean_var = x_mean_var self.x_std_var = x_std_var self.first_optimized = not no_initial_trust_region
def __init__( self, input_shape, output_dim, name='CategoricalMLPRegressor', prob_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, optimizer=None, tr_optimizer=None, use_trust_region=True, max_kl_step=0.01, normalize_inputs=True, no_initial_trust_region=True, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param max_kl_step: KL divergence constraint for each iteration """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) with tf.compat.v1.variable_scope(name, 'CategoricalMLPRegressor'): if optimizer is None: optimizer = LbfgsOptimizer() if tr_optimizer is None: tr_optimizer = ConjugateGradientOptimizer() self.output_dim = output_dim self.optimizer = optimizer self.tr_optimizer = tr_optimizer self._prob_network_name = 'prob_network' if prob_network is None: prob_network = MLP(input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.softmax, name=self._prob_network_name) l_prob = prob_network.output_layer LayersPowered.__init__(self, [l_prob]) xs_var = prob_network.input_layer.input_var ys_var = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, output_dim], name='ys') old_prob_var = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, output_dim], name='old_prob') x_mean_var = tf.compat.v1.get_variable( name='x_mean', shape=(1, ) + input_shape, initializer=tf.constant_initializer(0., dtype=tf.float32)) x_std_var = tf.compat.v1.get_variable( name='x_std', shape=(1, ) + input_shape, initializer=tf.constant_initializer(1., dtype=tf.float32)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var with tf.name_scope(self._prob_network_name, values=[normalized_xs_var]): prob_var = L.get_output( l_prob, {prob_network.input_layer: normalized_xs_var}) old_info_vars = dict(prob=old_prob_var) info_vars = dict(prob=prob_var) dist = self._dist = Categorical(output_dim) mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars)) loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = tf.one_hot(tf.argmax(prob_var, axis=1), depth=output_dim) self.prob_network = prob_network self.f_predict = tensor_utils.compile_function([xs_var], predicted) self.f_prob = tensor_utils.compile_function([xs_var], prob_var) self.l_prob = l_prob self.optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var]) self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var, old_prob_var], leq_constraint=(mean_kl, max_kl_step)) self.use_trust_region = use_trust_region self.name = name self.normalize_inputs = normalize_inputs self.x_mean_var = x_mean_var self.x_std_var = x_std_var self.first_optimized = not no_initial_trust_region
from garage.envs.box2d import CartpoleEnv from garage.tf.algos import TRPO import garage.tf.core.layers as L from garage.tf.envs import TfEnv from garage.tf.optimizers import ConjugateGradientOptimizer, FiniteDifferenceHvp from garage.tf.policies import GaussianLSTMPolicy env = TfEnv(normalize(CartpoleEnv())) policy = GaussianLSTMPolicy( name="policy", env_spec=env.spec, lstm_layer_cls=L.TfBasicLSTMLayer, # gru_layer_cls=L.GRULayer, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=10, discount=0.99, step_size=0.01, optimizer=ConjugateGradientOptimizer( hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))) algo.train()
def __init__(self, optimizer=None, optimizer_args=None, **kwargs): if optimizer is None: if optimizer_args is None: optimizer_args = dict() optimizer = ConjugateGradientOptimizer(**optimizer_args) super(TRPO, self).__init__(optimizer=optimizer, **kwargs)