예제 #1
0
def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation,
                        action_space):
    """
    Builds symbols to sample actions and compute log-probs of actions.

    Special instructions: Make log_std a tf variable with the same shape as
    the action vector, independent of x, initialized to [-0.5, -0.5, ..., -0.5].

    Args:
        x: Input tensor of states. Shape [batch, obs_dim].

        a: Input tensor of actions. Shape [batch, act_dim].

        hidden_sizes: Sizes of hidden layers for action network MLP.

        activation: Activation function for all layers except last.

        output_activation: Activation function for last layer (action layer).

        action_space: A gym.spaces object describing the action space of the
            environment this agent will interact with.

    Returns:
        pi: A symbol for sampling stochastic actions from a Gaussian 
            distribution.

        logp: A symbol for computing log-likelihoods of actions from a Gaussian 
            distribution.

        logp_pi: A symbol for computing log-likelihoods of actions in pi from a 
            Gaussian distribution.

    """
    #######################
    #                     #
    #   YOUR CODE HERE    #
    #                     #
    #######################
    #New code
    #get dimensions of actions
    act_dim = a.shape[-1]
    mu = mlp(x, list(hidden_sizes) + [act_dim], activation, output_activation)
    log_std = tf.get_variable(name="log_std",
                              initializer=-0.5 *
                              np.ones(shape=act_dim, dtype=np.float32))
    std = tf.exp(log_std)
    pi = mu + tf.random_normal(tf.shape(mu)) * std

    #Old code
    '''
    mu, var = tf.nn.moments(a, axes=[1, np.shape(a)[1]])
    log_std = tf.Variable(-0.5, shape=tf.shape(a))
    pi = exercise1_1.gaussian_likelihood(a, mu, log_std)
    '''

    logp = exercise1_1.gaussian_likelihood(a, mu, log_std)
    logp_pi = exercise1_1.gaussian_likelihood(pi, mu, log_std)
    return pi, logp, logp_pi
예제 #2
0
def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation,
                        action_space):
    """
    Builds symbols to sample actions and compute log-probs of actions.

    Special instructions: Make log_std a tf variable with the same shape as
    the action vector, independent of x, initialized to [-0.5, -0.5, ..., -0.5].

    Args:
        x: Input tensor of states. Shape [batch, obs_dim].

        a: Input tensor of actions. Shape [batch, act_dim].

        hidden_sizes: Sizes of hidden layers for action network MLP.

        activation: Activation function for all layers except last.

        output_activation: Activation function for last layer (action layer).

        action_space: A gym.spaces object describing the action space of the
            environment this agent will interact with.

    Returns:
        pi: A symbol for sampling stochastic actions from a Gaussian 
            distribution.

        logp: A symbol for computing log-likelihoods of actions from a Gaussian 
            distribution.

        logp_pi: A symbol for computing log-likelihoods of actions in pi from a 
            Gaussian distribution.

    """
    action_space_dim = a.shape.as_list()[-1]
    #add layer for logits the size of action space dim
    hidden = list(hidden_sizes) + [action_space_dim]
    mu = mlp(x,
             hidden_sizes=hidden,
             activation=activation,
             output_activation=output_activation)
    log_std = tf.get_variable(name='log_std',
                              initializer=-0.5 *
                              np.ones(action_space_dim, dtype=np.float32))
    std = tf.exp(log_std)
    # sample actions from the current estimated policy
    pi = mu + tf.random_normal(tf.shape(mu)) * std
    # compute log liklihood of actions
    logp = exercise1_1.gaussian_likelihood(a, mu, log_std)
    logp_pi = exercise1_1.gaussian_likelihood(pi, mu, log_std)
    return pi, logp, logp_pi
예제 #3
0
def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation,
                        action_space):
    """
    Builds symbols to sample actions and compute log-probs of actions.

    Special instructions: Make log_std a tf variable with the same shape as
    the action vector, independent of x, initialized to [-0.5, -0.5, ..., -0.5].

    Args:
        x: Input tensor of states. Shape [batch, obs_dim].

        a: Input tensor of actions. Shape [batch, act_dim].

        hidden_sizes: Sizes of hidden layers for action network MLP.

        activation: Activation function for all layers except last.

        output_activation: Activation function for last layer (action layer).

        action_space: A gym.spaces object describing the action space of the
            environment this agent will interact with.

    Returns:
        pi: A symbol for sampling stochastic actions from a Gaussian 
            distribution.

        logp: A symbol for computing log-likelihoods of actions from a Gaussian 
            distribution.

        logp_pi: A symbol for computing log-likelihoods of actions in pi from a 
            Gaussian distribution.

    """
    #######################
    #                     #
    #   YOUR CODE HERE    #
    #                     #
    #######################
    # mu =
    # log_std =
    # pi =
    logits = mlp(x, hidden_sizes + [action_space], activation,
                 output_activation)  #action vector,[batch,action_space]
    mu = tf.reduce_mean(x, axis=1)
    log_std = tf.Variable(tf.ones(logits.shape) * (-0.5), validate_shape=False)
    pi = gaussian_likelihood(logits, mu, log_std)  #
    logp = exercise1_1.gaussian_likelihood(a, mu, log_std)
    logp_pi = exercise1_1.gaussian_likelihood(pi, mu, log_std)
    return pi, logp, logp_pi