Example #1
0
    def __init__(self, actor, critic, num_action, random_process, gamma,
                 L2_reg, actor_model_dir, critic_model_dir):
        self._num_action = num_action
        self._gamma = gamma
        self._L2_reg = L2_reg

        self.global_ts = tf.compat.v1.train.get_or_create_global_step()
        self.eval_flg = False
        self.actor = actor(num_action)
        self.critic = critic(1)
        # self.target_actor = deepcopy(self.actor)
        # self.target_critic = deepcopy(self.critic)
        self.target_actor = actor(num_action)
        self.target_critic = critic(1)
        self.actor_optimizer = tf.compat.v1.train.AdamOptimizer(
            learning_rate=1e-4)
        self.critic_optimizer = tf.compat.v1.train.AdamOptimizer(
            learning_rate=1e-3)
        self.random_process = random_process
        self.actor_manager = create_checkpoint(model=self.actor,
                                               optimizer=self.actor_optimizer,
                                               model_dir=actor_model_dir)
        self.critic_manager = create_checkpoint(
            model=self.critic,
            optimizer=self.critic_optimizer,
            model_dir=critic_model_dir)
Example #2
0
 def __init__(self, ggnn, critic, node_info, num_action, params):
     self.params = params
     self.num_action = num_action
     self.eval_flg = False
     self.index_timestep = 0
     self.actor = ggnn(state_dim=params.num_node_features,
                       node_info=node_info,
                       rec_hidden_unit=params.rec_hidden_unit,
                       rec_output_unit=params.rec_output_unit,
                       recurrent_step=params.recurrent_step)
     self.critic = critic(1)
     self.target_actor = deepcopy(self.actor)
     self.target_critic = deepcopy(self.critic)
     self.actor_optimizer = tf.compat.v1.train.AdamOptimizer(
         learning_rate=1e-4)
     self.critic_optimizer = tf.compat.v1.train.AdamOptimizer(
         learning_rate=1e-3)
     self.actor_manager = create_checkpoint(
         model=self.actor,
         optimizer=self.actor_optimizer,
         model_dir=params.actor_model_dir)
     self.critic_manager = create_checkpoint(
         model=self.critic,
         optimizer=self.critic_optimizer,
         model_dir=params.critic_model_dir)
Example #3
0
 def __init__(self, actor, critic, num_action, params):
     self.params = params
     self.num_action = num_action
     self.eval_flg = False
     self.index_timestep = 0
     self.actor = actor(num_action)
     self.critic = critic(1)
     self.target_actor = deepcopy(self.actor)
     self.target_critic = deepcopy(self.critic)
     self.actor_optimizer = tf.train.AdamOptimizer(learning_rate=3e-4)  # used as in paper
     self.critic_optimizer = tf.train.AdamOptimizer(learning_rate=3e-4)  # used as in paper
     self.actor_manager = create_checkpoint(model=self.actor,
                                            optimizer=self.actor_optimizer,
                                            model_dir=params.actor_model_dir)
     self.critic_manager = create_checkpoint(model=self.critic,
                                             optimizer=self.critic_optimizer,
                                             model_dir=params.critic_model_dir)
Example #4
0
 def __init__(self, actor, critic, num_action, random_process, params):
     self.params = params
     self.num_action = num_action
     self.eval_flg = False
     self.index_timestep = 0
     self.actor = actor(num_action)
     self.critic = critic(1)
     self.target_actor = deepcopy(self.actor)
     self.target_critic = deepcopy(self.critic)
     self.actor_optimizer = tf.compat.v1.train.AdamOptimizer(
         learning_rate=1e-4)
     self.critic_optimizer = tf.compat.v1.train.AdamOptimizer(
         learning_rate=1e-3)
     self.random_process = random_process
     self.actor_manager = create_checkpoint(
         model=self.actor,
         optimizer=self.actor_optimizer,
         model_dir=params.actor_model_dir)
     self.critic_manager = create_checkpoint(
         model=self.critic,
         optimizer=self.critic_optimizer,
         model_dir=params.critic_model_dir)
Example #5
0
 def __init__(self, model, optimizer, loss_fn, grad_clip_fn, num_action,
              params):
     self.params = params
     self.num_action = num_action
     self.grad_clip_fn = grad_clip_fn
     self.loss_fn = loss_fn
     self.eval_flg = False
     self.index_timestep = 0
     self.main_model = model(num_action)
     self.target_model = model(num_action)
     self.optimizer = optimizer
     self.manager = create_checkpoint(model=self.main_model,
                                      optimizer=self.optimizer,
                                      model_dir=params.model_dir)
Example #6
0
    def __init__(self, model, policy, optimizer, loss_fn, grad_clip_fn,
                 obs_prc_fn, num_action, model_dir, gamma):
        self._gamma = gamma
        self._grad_clip_fn = grad_clip_fn
        self._loss_fn = loss_fn
        self._timestep = 0
        self._optimizer = optimizer
        self._obs_prc_fn = obs_prc_fn

        # === Supposed to access from outside ===
        self.policy = policy
        self.eval_flg = False
        self.num_action = num_action
        self.main_model = model(num_action)
        self.target_model = model(num_action)
        self.manager = create_checkpoint(model=self.main_model,
                                         optimizer=self._optimizer,
                                         model_dir=model_dir)