Exemple #1
0
def build_actor_critic(sess, env):
    w_init = tflearn.initializations.xavier_initializer()

    with tf.variable_scope("model", reuse=None, initializer=w_init):
        with tf.name_scope("actor"):
            actor = ActorNetwork(sess, env, config, is_training=True)

        with tf.name_scope("critic"):
            critic = CriticNetwork(sess, env, config, is_training=True)

    sess.run(tf.global_variables_initializer())

    # Initialize target network weights
    actor.update_target_network()
    critic.update_target_network()

    return actor, critic
Exemple #2
0
def build_actor_critic(sess, env, env_eval):
    w_init = tflearn.initializations.xavier_initializer()

    with tf.variable_scope("model", reuse=None, initializer=w_init):
        with tf.name_scope("actor"):
            actor = ActorNetwork(sess, env, config, is_training=True)

        with tf.name_scope("critic"):
            critic = CriticNetwork(sess, env, config, is_training=True)

        # if config.noise_std:
        #     actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(env.input_cardin * env.state_cardin),
        #                                                sigma=config.noise_std,
        #                                                sigma_dec=config.noise_dec)
        # else:
        #     actor_noise = None

    with tf.variable_scope("model", reuse=True):
        with tf.name_scope("actor"):
            actor_eval = ActorNetwork(sess,
                                      env_eval,
                                      config,
                                      is_training=False)

        with tf.name_scope("critic"):
            critic_eval = CriticNetwork(sess,
                                        env_eval,
                                        config,
                                        is_training=False)

    sess.run(tf.global_variables_initializer())

    # Initialize target network weights
    actor.update_target_network()
    critic.update_target_network()

    return actor, critic, actor_eval, critic_eval  #, actor_noise