Exemple #1
0
def policy_network(state, obs_dim, act_dim):
    with tf.variable_scope("policy_network"):
        fc1 = fclayer(state, num_outputs=10)
        linear = fclayer(fc1, num_outputs=act_dim, activation_fn=None)
        probabilities = tf.nn.softmax(linear)

    return probabilities
Exemple #2
0
def value_network(state, obs_dim):
    with tf.variable_scope("value_network"):
        fc1 = fclayer(state, num_outputs=10)
        calculated = fclayer(fc1, num_outputs=1, activation_fn=None)

    return calculated