Esempio n. 1
0
    def _initialize(self):
        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, ) +
                                             self._input_shape)

        with tf.compat.v1.variable_scope(self._variable_scope):
            self.model.build(input_var)

            ys_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                              name='ys',
                                              shape=(None, self._output_dim))

            old_prob_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                                    name='old_prob',
                                                    shape=(None,
                                                           self._output_dim))

            y_hat = self.model.networks['default'].y_hat

            old_info_vars = dict(prob=old_prob_var)
            info_vars = dict(prob=y_hat)

            self._dist = Categorical(self._output_dim)
            mean_kl = tf.reduce_mean(
                self._dist.kl_sym(old_info_vars, info_vars))

            loss = -tf.reduce_mean(
                self._dist.log_likelihood_sym(ys_var, info_vars))

            predicted = tf.one_hot(tf.argmax(y_hat, axis=1),
                                   depth=self._output_dim)

            self._f_predict = tensor_utils.compile_function([input_var],
                                                            predicted)
            self._f_prob = tensor_utils.compile_function([input_var], y_hat)

            self._optimizer.update_opt(loss=loss,
                                       target=self,
                                       network_output=[y_hat],
                                       inputs=[input_var, ys_var])
            self._tr_optimizer.update_opt(
                loss=loss,
                target=self,
                network_output=[y_hat],
                inputs=[input_var, ys_var, old_prob_var],
                leq_constraint=(mean_kl, self._max_kl_step))
Esempio n. 2
0
    def distribution(self):
        """Policy distribution.

        Returns:
            garage.tf.distributions.Categorical: Policy distribution.

        """
        return Categorical(self._action_dim)
Esempio n. 3
0
    def __init__(
        self,
        env_spec,
        conv_filters,
        conv_filter_sizes,
        conv_strides,
        conv_pads,
        hidden_sizes=[],
        hidden_nonlinearity=tf.nn.relu,
        output_nonlinearity=tf.nn.softmax,
        prob_network=None,
        name="CategoricalConvPolicy",
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected
        hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other
         network params
        are ignored
        :return:
        """
        assert isinstance(env_spec.action_space, Discrete)

        Serializable.quick_init(self, locals())

        self._name = name
        self._env_spec = env_spec
        self._prob_network_name = "prob_network"

        with tf.variable_scope(name, "CategoricalConvPolicy"):
            if prob_network is None:
                prob_network = ConvNetwork(
                    input_shape=env_spec.observation_space.shape,
                    output_dim=env_spec.action_space.n,
                    conv_filters=conv_filters,
                    conv_filter_sizes=conv_filter_sizes,
                    conv_strides=conv_strides,
                    conv_pads=conv_pads,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=output_nonlinearity,
                    name="conv_prob_network",
                )

            with tf.name_scope(self._prob_network_name):
                out_prob = L.get_output(prob_network.output_layer)

            self._l_prob = prob_network.output_layer
            self._l_obs = prob_network.input_layer
            self._f_prob = tensor_utils.compile_function(
                [prob_network.input_layer.input_var], [out_prob])

            self._dist = Categorical(env_spec.action_space.n)

            super(CategoricalConvPolicy, self).__init__(env_spec)
            LayersPowered.__init__(self, [prob_network.output_layer])
Esempio n. 4
0
    def __init__(
        self,
        env_spec,
        name='CategoricalMLPPolicy',
        hidden_sizes=(32, 32),
        hidden_nonlinearity=tf.nn.tanh,
        prob_network=None,
    ):
        """
        CategoricalMLPPolicy.

        A policy that uses a MLP to estimate a categorical distribution.

        Args:
            env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
            hidden_sizes (list[int]): Output dimension of dense layer(s).
                For example, (32, 32) means the MLP of this policy consists
                of two hidden layers, each with 32 hidden units.
            hidden_nonlinearity: Activation function for
                intermediate dense layer(s).
            prob_network (tf.Tensor): manually specified network for this
                policy. If None, a MLP with the network parameters will be
                created. If not None, other network params are ignored.
        """
        assert isinstance(env_spec.action_space, akro.Discrete)

        Serializable.quick_init(self, locals())

        self.name = name
        self._prob_network_name = 'prob_network'
        with tf.variable_scope(name, 'CategoricalMLPPolicy'):
            if prob_network is None:
                prob_network = MLP(
                    input_shape=(env_spec.observation_space.flat_dim, ),
                    output_dim=env_spec.action_space.n,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    name=self._prob_network_name,
                )

            self._l_prob = prob_network.output_layer
            self._l_obs = prob_network.input_layer
            with tf.name_scope(self._prob_network_name):
                prob_network_outputs = L.get_output(prob_network.output_layer)
            self._f_prob = tensor_utils.compile_function(
                [prob_network.input_layer.input_var], prob_network_outputs)

            self._dist = Categorical(env_spec.action_space.n)

            super(CategoricalMLPPolicy, self).__init__(env_spec)
            LayersPowered.__init__(self, [prob_network.output_layer])
Esempio n. 5
0
    def __init__(
            self,
            env_spec,
            name="CategoricalMLPPolicy",
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.tanh,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected
        hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other
         network params
        are ignored
        :return:
        """
        assert isinstance(env_spec.action_space, Discrete)

        Serializable.quick_init(self, locals())

        self.name = name
        self._prob_network_name = "prob_network"
        with tf.variable_scope(name, "CategoricalMLPPolicy"):
            if prob_network is None:
                prob_network = MLP(
                    input_shape=(env_spec.observation_space.flat_dim, ),
                    output_dim=env_spec.action_space.n,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    name=self._prob_network_name,
                )

            self._l_prob = prob_network.output_layer
            self._l_obs = prob_network.input_layer
            with tf.name_scope(self._prob_network_name):
                prob_network_outputs = L.get_output(prob_network.output_layer)
            self._f_prob = tensor_utils.compile_function(
                [prob_network.input_layer.input_var], prob_network_outputs)

            self._dist = Categorical(env_spec.action_space.n)

            super(CategoricalMLPPolicy, self).__init__(env_spec)
            LayersPowered.__init__(self, [prob_network.output_layer])
 def distribution(self):
     """Policy distribution."""
     return Categorical(self.action_dim)
Esempio n. 7
0
class CategoricalMLPRegressorWithModel(StochasticRegressor2):
    """
    CategoricalMLPRegressor with garage.tf.models.NormalizedInputMLPModel.

    A class for performing regression (or classification, really) by fitting
    a Categorical distribution to the outputs. Assumes that the output will
    always be a one hot vector

    Args:
        input_shape (tuple[int]): Input shape of the training data. Since an
            MLP model is used, implementation assumes flattened inputs. The
            input shape of each data point should thus be of shape (x, ).
        output_dim (int): Output dimension of the model.
        name (str): Model name, also the variable scope.
        hidden_sizes (list[int]): Output dimension of dense layer(s) for
            the MLP for the network. For example, (32, 32) means the MLP
            consists of two hidden layers, each with 32 hidden units.
        hidden_nonlinearity (callable): Activation function for intermediate
            dense layer(s). It should return a tf.Tensor. Set it to
            None to maintain a tanh activation.
        hidden_w_init (callable): Initializer function for the weight
            of intermediate dense layer(s). The function should return a
            tf.Tensor. Default is Glorot uniform initializer.
        hidden_b_init (callable): Initializer function for the bias
            of intermediate dense layer(s). The function should return a
            tf.Tensor. Default is zero initializer.
        output_nonlinearity (callable): Activation function for output dense
            layer. It should return a tf.Tensor. Set it to None to
            maintain a softmax activation.
        output_w_init (callable): Initializer function for the weight
            of output dense layer(s). The function should return a
            tf.Tensor. Default is Glorot uniform initializer.
        output_b_init (callable): Initializer function for the bias
            of output dense layer(s). The function should return a
            tf.Tensor. Default is zero initializer.
        optimizer (garage.tf.Optimizer): Optimizer for minimizing the negative
            log-likelihood. Defaults to LbsgsOptimizer
        optimizer_args (dict): Arguments for the optimizer. Default is None,
            which means no arguments.
        tr_optimizer (garage.tf.Optimizer): Optimizer for trust region
            approximation. Defaults to ConjugateGradientOptimizer.
        tr_optimizer_args (dict): Arguments for the trust region optimizer.
            Default is None, which means no arguments.
        use_trust_region (bool): Whether to use trust region constraint.
        max_kl_step (float): KL divergence constraint for each iteration.
        normalize_inputs (bool): Bool for normalizing inputs or not.
        layer_normalization (bool): Bool for using layer normalization or not.
    """
    def __init__(self,
                 input_shape,
                 output_dim,
                 name='CategoricalMLPRegressorWithModel',
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.tanh,
                 hidden_w_init=tf.glorot_uniform_initializer(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_nonlinearity=tf.nn.softmax,
                 output_w_init=tf.glorot_uniform_initializer(),
                 output_b_init=tf.zeros_initializer(),
                 optimizer=None,
                 optimizer_args=None,
                 tr_optimizer=None,
                 tr_optimizer_args=None,
                 use_trust_region=True,
                 max_kl_step=0.01,
                 normalize_inputs=True,
                 layer_normalization=False):

        super().__init__(input_shape, output_dim, name)
        self._use_trust_region = use_trust_region
        self._max_kl_step = max_kl_step
        self._normalize_inputs = normalize_inputs

        with tf.compat.v1.variable_scope(self._name, reuse=False) as vs:
            self._variable_scope = vs
            if optimizer_args is None:
                optimizer_args = dict()
            if tr_optimizer_args is None:
                tr_optimizer_args = dict()

            if optimizer is None:
                optimizer = LbfgsOptimizer(**optimizer_args)
            else:
                optimizer = optimizer(**optimizer_args)

            if tr_optimizer is None:
                tr_optimizer = ConjugateGradientOptimizer(**tr_optimizer_args)
            else:
                tr_optimizer = tr_optimizer(**tr_optimizer_args)

            self._optimizer = optimizer
            self._tr_optimizer = tr_optimizer

        self.model = NormalizedInputMLPModel(
            input_shape,
            output_dim,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            output_nonlinearity=output_nonlinearity,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            layer_normalization=layer_normalization)

        self._initialize()

    def _initialize(self):
        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, ) +
                                             self._input_shape)

        with tf.compat.v1.variable_scope(self._variable_scope):
            self.model.build(input_var)

            ys_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                              name='ys',
                                              shape=(None, self._output_dim))

            old_prob_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                                    name='old_prob',
                                                    shape=(None,
                                                           self._output_dim))

            y_hat = self.model.networks['default'].y_hat

            old_info_vars = dict(prob=old_prob_var)
            info_vars = dict(prob=y_hat)

            self._dist = Categorical(self._output_dim)
            mean_kl = tf.reduce_mean(
                self._dist.kl_sym(old_info_vars, info_vars))

            loss = -tf.reduce_mean(
                self._dist.log_likelihood_sym(ys_var, info_vars))

            predicted = tf.one_hot(tf.argmax(y_hat, axis=1),
                                   depth=self._output_dim)

            self._f_predict = tensor_utils.compile_function([input_var],
                                                            predicted)
            self._f_prob = tensor_utils.compile_function([input_var], y_hat)

            self._optimizer.update_opt(loss=loss,
                                       target=self,
                                       network_output=[y_hat],
                                       inputs=[input_var, ys_var])
            self._tr_optimizer.update_opt(
                loss=loss,
                target=self,
                network_output=[y_hat],
                inputs=[input_var, ys_var, old_prob_var],
                leq_constraint=(mean_kl, self._max_kl_step))

    def fit(self, xs, ys):
        """
        Fit with input data xs and label ys.

        Args:
            xs (numpy.ndarray): Input data.
            ys (numpy.ndarray): Label of input data.
        """
        if self._normalize_inputs:
            # recompute normalizing constants for inputs
            self.model.networks['default'].x_mean.load(
                np.mean(xs, axis=0, keepdims=True))
            self.model.networks['default'].x_std.load(
                np.std(xs, axis=0, keepdims=True))

        if self._use_trust_region:
            # To use trust region constraint and optimizer
            old_prob = self._f_prob(xs)
            inputs = [xs, ys, old_prob]
            optimizer = self._tr_optimizer
        else:
            inputs = [xs, ys]
            optimizer = self._optimizer
        loss_before = optimizer.loss(inputs)
        tabular.record('{}/LossBefore'.format(self._name), loss_before)
        optimizer.optimize(inputs)
        loss_after = optimizer.loss(inputs)
        tabular.record('{}/LossAfter'.format(self._name), loss_after)
        tabular.record('{}/dLoss'.format(self._name), loss_before - loss_after)
        self.first_optimized = True

    def predict(self, xs):
        """
        Predict ys based on input xs.

        Args:
            xs (numpy.ndarray): Input data.

        Return:
            The predicted ys (one hot vectors).
        """
        return self._f_predict(xs)

    def predict_log_likelihood(self, xs, ys):
        """
        Predict log likelihood of output based on input xs and labels ys.

        Args:
            xs (numpy.ndarray): Input data.
            ys (numpy.ndarray): Input labels in one hot representation.

        Return:
            The predicted log likelihoods.

        """
        prob = self._f_prob(xs)
        return self._dist.log_likelihood(ys, dict(prob=prob))

    def dist_info_sym(self, x_var, name=None):
        """
        Symbolic graph of the distribution.

        Args:
            x_var (tf.Tensor): Input tf.Tensor for the input data.
            name (str): Name of the new graph.

        Return:
            tf.Tensor output of the symbolic graph of the distribution.
        """
        with tf.compat.v1.variable_scope(self._variable_scope):
            prob, _, _ = self.model.build(x_var, name=name)

        return dict(prob=prob)

    def log_likelihood_sym(self, x_var, y_var, name=None):
        """
        Symbolic graph of the log likelihood.

        Args:
            x_var (tf.Tensor): Input tf.Tensor for the input data.
            y_var (tf.Tensor): Input tf.Tensor for the one hot label of data.
            name (str): Name of the new graph.

        Return:
            tf.Tensor output of the symbolic log likelihood.
        """
        with tf.compat.v1.variable_scope(self._variable_scope):
            prob, _, _ = self.model.build(x_var, name=name)

        return self._dist.log_likelihood_sym(y_var, dict(prob=prob))

    def get_params_internal(self, **args):
        """Get the params, which are the trainable variables."""
        return self._variable_scope.trainable_variables()

    def __getstate__(self):
        """Object.__getstate__."""
        new_dict = super().__getstate__()
        del new_dict['_f_predict']
        del new_dict['_f_prob']
        del new_dict['_dist']
        return new_dict

    def __setstate__(self, state):
        """Object.__setstate__."""
        super().__setstate__(state)
        self._initialize()
    def __init__(
        self,
        input_shape,
        output_dim,
        name='CategoricalMLPRegressor',
        prob_network=None,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=tf.nn.tanh,
        optimizer=None,
        tr_optimizer=None,
        use_trust_region=True,
        max_kl_step=0.01,
        normalize_inputs=True,
        no_initial_trust_region=True,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean
        network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the
        mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param max_kl_step: KL divergence constraint for each iteration
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        with tf.compat.v1.variable_scope(name, 'CategoricalMLPRegressor'):
            if optimizer is None:
                optimizer = LbfgsOptimizer()
            if tr_optimizer is None:
                tr_optimizer = ConjugateGradientOptimizer()

            self.output_dim = output_dim
            self.optimizer = optimizer
            self.tr_optimizer = tr_optimizer

            self._prob_network_name = 'prob_network'
            if prob_network is None:
                prob_network = MLP(input_shape=input_shape,
                                   output_dim=output_dim,
                                   hidden_sizes=hidden_sizes,
                                   hidden_nonlinearity=hidden_nonlinearity,
                                   output_nonlinearity=tf.nn.softmax,
                                   name=self._prob_network_name)

            l_prob = prob_network.output_layer

            LayersPowered.__init__(self, [l_prob])

            xs_var = prob_network.input_layer.input_var
            ys_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                              shape=[None, output_dim],
                                              name='ys')
            old_prob_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                                    shape=[None, output_dim],
                                                    name='old_prob')

            x_mean_var = tf.compat.v1.get_variable(
                name='x_mean',
                shape=(1, ) + input_shape,
                initializer=tf.constant_initializer(0., dtype=tf.float32))
            x_std_var = tf.compat.v1.get_variable(
                name='x_std',
                shape=(1, ) + input_shape,
                initializer=tf.constant_initializer(1., dtype=tf.float32))

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var

            with tf.name_scope(self._prob_network_name,
                               values=[normalized_xs_var]):
                prob_var = L.get_output(
                    l_prob, {prob_network.input_layer: normalized_xs_var})

            old_info_vars = dict(prob=old_prob_var)
            info_vars = dict(prob=prob_var)

            dist = self._dist = Categorical(output_dim)

            mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars))

            loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars))

            predicted = tf.one_hot(tf.argmax(prob_var, axis=1),
                                   depth=output_dim)

            self.prob_network = prob_network
            self.f_predict = tensor_utils.compile_function([xs_var], predicted)
            self.f_prob = tensor_utils.compile_function([xs_var], prob_var)
            self.l_prob = l_prob

            self.optimizer.update_opt(loss=loss,
                                      target=self,
                                      network_outputs=[prob_var],
                                      inputs=[xs_var, ys_var])
            self.tr_optimizer.update_opt(loss=loss,
                                         target=self,
                                         network_outputs=[prob_var],
                                         inputs=[xs_var, ys_var, old_prob_var],
                                         leq_constraint=(mean_kl, max_kl_step))

            self.use_trust_region = use_trust_region
            self.name = name

            self.normalize_inputs = normalize_inputs
            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var
            self.first_optimized = not no_initial_trust_region
Esempio n. 9
0
class RecurrentCategorical(Distribution):
    def __init__(self, dim, name="RecurrentCategorical"):
        self._cat = Categorical(dim, name)
        self._dim = dim
        self._name = name

    @property
    def dim(self):
        return self._dim

    def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name=None):
        """
        Compute the symbolic KL divergence of two categorical distributions
        """
        with tf.name_scope(name, "kl_sym",
                           [old_dist_info_vars, new_dist_info_vars]):
            old_prob_var = old_dist_info_vars["prob"]
            new_prob_var = new_dist_info_vars["prob"]
            # Assume layout is N * T * A
            return tf.reduce_sum(
                old_prob_var *
                (tf.log(old_prob_var + TINY) - tf.log(new_prob_var + TINY)),
                axis=2)

    def kl(self, old_dist_info, new_dist_info):
        """
        Compute the KL divergence of two categorical distributions
        """
        old_prob = old_dist_info["prob"]
        new_prob = new_dist_info["prob"]
        return np.sum(old_prob *
                      (np.log(old_prob + TINY) - np.log(new_prob + TINY)),
                      axis=2)

    def likelihood_ratio_sym(self,
                             x_var,
                             old_dist_info_vars,
                             new_dist_info_vars,
                             name=None):
        with tf.name_scope(name, "likelihood_ratio_sym",
                           [x_var, old_dist_info_vars, new_dist_info_vars]):
            old_prob_var = old_dist_info_vars["prob"]
            new_prob_var = new_dist_info_vars["prob"]
            # Assume layout is N * T * A
            a_dim = tf.shape(x_var)[2]
            flat_ratios = self._cat.likelihood_ratio_sym(
                tf.reshape(x_var, tf.stack([-1, a_dim])),
                dict(prob=tf.reshape(old_prob_var, tf.stack([-1, a_dim]))),
                dict(prob=tf.reshape(new_prob_var, tf.stack([-1, a_dim]))))
            return tf.reshape(flat_ratios, tf.shape(old_prob_var)[:2])

    def entropy(self, dist_info):
        probs = dist_info["prob"]
        return -np.sum(probs * np.log(probs + TINY), axis=2)

    def entropy_sym(self, dist_info_vars, name=None):
        with tf.name_scope(name, "entropy_sym", [dist_info_vars]):
            probs = dist_info_vars["prob"]
            return -tf.reduce_sum(probs * tf.log(probs + TINY), 2)

    def log_likelihood_sym(self, xs, dist_info_vars, name=None):
        with tf.name_scope(name, "log_likelihood_sym", [xs, dist_info_vars]):
            probs = dist_info_vars["prob"]
            # Assume layout is N * T * A
            a_dim = tf.shape(probs)[2]
            flat_logli = self._cat.log_likelihood_sym(
                tf.reshape(xs, tf.stack([-1, a_dim])),
                dict(prob=tf.reshape(probs, tf.stack((-1, a_dim)))))
            return tf.reshape(flat_logli, tf.shape(probs)[:2])

    def log_likelihood(self, xs, dist_info):
        probs = dist_info["prob"]
        # Assume layout is N * T * A
        a_dim = tf.shape(probs)[2]
        flat_logli = self._cat.log_likelihood_sym(
            xs.reshape((-1, a_dim)), dict(prob=probs.reshape((-1, a_dim))))
        return flat_logli.reshape(probs.shape[:2])

    @property
    def dist_info_specs(self):
        return [("prob", (self.dim, ))]
Esempio n. 10
0
 def __init__(self, dim, name="RecurrentCategorical"):
     self._cat = Categorical(dim, name)
     self._dim = dim
     self._name = name