コード例 #1
0
ファイル: agent.py プロジェクト: LONG-9621/Stackedcapsule
  def __init__(self,
               units=16,
               objective='mapo',
               learning_rate=1e-3,
               entropy_reg_coeff=0.0,
               max_grad_norm=1.0,
               use_critic=False,
               log_summaries=False,
               **kwargs):

    super(RLAgent, self).__init__(num_actions=4, **kwargs)
    self._use_critic = use_critic
    self.max_grad_norm = max_grad_norm
    self.pi = nn_model.EncoderDecoder(
        embedding_dim=4, units=units, num_outputs=self._num_actions)
    if self._use_critic:
      self.value_fn = nn_model.EncoderDecoder(
          embedding_dim=4, units=units, num_outputs=1)
    self._objective = objective
    self._entropy_reg_coeff = entropy_reg_coeff  # Entropy regularization
    self.global_step = tf.train.get_or_create_global_step()
    self.optimizer = contrib_optimizer_v2.AdamOptimizer(
        learning_rate=learning_rate)
    # This is need so that the product with IndexedSlices object is defined
    self.learning_rate = tf.constant(learning_rate)
    self.log_summaries = log_summaries
    self._init_models()
コード例 #2
0
ファイル: agent.py プロジェクト: LONG-9621/Stackedcapsule
 def __init__(self, meta_lr, score_fn, **kwargs):
   super(MetaRLAgent, self).__init__(**kwargs)
   if score_fn == 'simple_linear':
     tf.logging.info('Using simple linear score function.')
     self.score_fn = nn_model.SimpleLinearNN()
   elif score_fn == 'linear':
     tf.logging.info('Using linear score function with priors.')
     self.score_fn = nn_model.LinearNN()
   else:
     raise NotImplementedError
   self._init_score_fn()
   self.score_optimizer = contrib_optimizer_v2.AdamOptimizer(
       learning_rate=meta_lr)
   self._meta_train = True
   # Adaptive gradient clipping
   self._score_grad_clipping = optimizers_lib.adaptive_clipping_fn(
       decay=0.9,
       report_summary=self.log_summaries,
       static_max_norm=self.max_grad_norm / 2.0,
       global_step=self.global_step)