def build_actor_critic(sess, env): w_init = tflearn.initializations.xavier_initializer() with tf.variable_scope("model", reuse=None, initializer=w_init): with tf.name_scope("actor"): actor = ActorNetwork(sess, env, config, is_training=True) with tf.name_scope("critic"): critic = CriticNetwork(sess, env, config, is_training=True) sess.run(tf.global_variables_initializer()) # Initialize target network weights actor.update_target_network() critic.update_target_network() return actor, critic
def build_actor_critic(sess, env, env_eval): w_init = tflearn.initializations.xavier_initializer() with tf.variable_scope("model", reuse=None, initializer=w_init): with tf.name_scope("actor"): actor = ActorNetwork(sess, env, config, is_training=True) with tf.name_scope("critic"): critic = CriticNetwork(sess, env, config, is_training=True) # if config.noise_std: # actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(env.input_cardin * env.state_cardin), # sigma=config.noise_std, # sigma_dec=config.noise_dec) # else: # actor_noise = None with tf.variable_scope("model", reuse=True): with tf.name_scope("actor"): actor_eval = ActorNetwork(sess, env_eval, config, is_training=False) with tf.name_scope("critic"): critic_eval = CriticNetwork(sess, env_eval, config, is_training=False) sess.run(tf.global_variables_initializer()) # Initialize target network weights actor.update_target_network() critic.update_target_network() return actor, critic, actor_eval, critic_eval #, actor_noise