def __init__(self, actor, critic, num_action, random_process, gamma, L2_reg, actor_model_dir, critic_model_dir): self._num_action = num_action self._gamma = gamma self._L2_reg = L2_reg self.global_ts = tf.compat.v1.train.get_or_create_global_step() self.eval_flg = False self.actor = actor(num_action) self.critic = critic(1) # self.target_actor = deepcopy(self.actor) # self.target_critic = deepcopy(self.critic) self.target_actor = actor(num_action) self.target_critic = critic(1) self.actor_optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=1e-4) self.critic_optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=1e-3) self.random_process = random_process self.actor_manager = create_checkpoint(model=self.actor, optimizer=self.actor_optimizer, model_dir=actor_model_dir) self.critic_manager = create_checkpoint( model=self.critic, optimizer=self.critic_optimizer, model_dir=critic_model_dir)
def __init__(self, ggnn, critic, node_info, num_action, params): self.params = params self.num_action = num_action self.eval_flg = False self.index_timestep = 0 self.actor = ggnn(state_dim=params.num_node_features, node_info=node_info, rec_hidden_unit=params.rec_hidden_unit, rec_output_unit=params.rec_output_unit, recurrent_step=params.recurrent_step) self.critic = critic(1) self.target_actor = deepcopy(self.actor) self.target_critic = deepcopy(self.critic) self.actor_optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=1e-4) self.critic_optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=1e-3) self.actor_manager = create_checkpoint( model=self.actor, optimizer=self.actor_optimizer, model_dir=params.actor_model_dir) self.critic_manager = create_checkpoint( model=self.critic, optimizer=self.critic_optimizer, model_dir=params.critic_model_dir)
def __init__(self, actor, critic, num_action, params): self.params = params self.num_action = num_action self.eval_flg = False self.index_timestep = 0 self.actor = actor(num_action) self.critic = critic(1) self.target_actor = deepcopy(self.actor) self.target_critic = deepcopy(self.critic) self.actor_optimizer = tf.train.AdamOptimizer(learning_rate=3e-4) # used as in paper self.critic_optimizer = tf.train.AdamOptimizer(learning_rate=3e-4) # used as in paper self.actor_manager = create_checkpoint(model=self.actor, optimizer=self.actor_optimizer, model_dir=params.actor_model_dir) self.critic_manager = create_checkpoint(model=self.critic, optimizer=self.critic_optimizer, model_dir=params.critic_model_dir)
def __init__(self, actor, critic, num_action, random_process, params): self.params = params self.num_action = num_action self.eval_flg = False self.index_timestep = 0 self.actor = actor(num_action) self.critic = critic(1) self.target_actor = deepcopy(self.actor) self.target_critic = deepcopy(self.critic) self.actor_optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=1e-4) self.critic_optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=1e-3) self.random_process = random_process self.actor_manager = create_checkpoint( model=self.actor, optimizer=self.actor_optimizer, model_dir=params.actor_model_dir) self.critic_manager = create_checkpoint( model=self.critic, optimizer=self.critic_optimizer, model_dir=params.critic_model_dir)
def __init__(self, model, optimizer, loss_fn, grad_clip_fn, num_action, params): self.params = params self.num_action = num_action self.grad_clip_fn = grad_clip_fn self.loss_fn = loss_fn self.eval_flg = False self.index_timestep = 0 self.main_model = model(num_action) self.target_model = model(num_action) self.optimizer = optimizer self.manager = create_checkpoint(model=self.main_model, optimizer=self.optimizer, model_dir=params.model_dir)
def __init__(self, model, policy, optimizer, loss_fn, grad_clip_fn, obs_prc_fn, num_action, model_dir, gamma): self._gamma = gamma self._grad_clip_fn = grad_clip_fn self._loss_fn = loss_fn self._timestep = 0 self._optimizer = optimizer self._obs_prc_fn = obs_prc_fn # === Supposed to access from outside === self.policy = policy self.eval_flg = False self.num_action = num_action self.main_model = model(num_action) self.target_model = model(num_action) self.manager = create_checkpoint(model=self.main_model, optimizer=self._optimizer, model_dir=model_dir)