def policy_network(state, obs_dim, act_dim): with tf.variable_scope("policy_network"): fc1 = fclayer(state, num_outputs=10) linear = fclayer(fc1, num_outputs=act_dim, activation_fn=None) probabilities = tf.nn.softmax(linear) return probabilities
def value_network(state, obs_dim): with tf.variable_scope("value_network"): fc1 = fclayer(state, num_outputs=10) calculated = fclayer(fc1, num_outputs=1, activation_fn=None) return calculated