def __init__(self, units=16, objective='mapo', learning_rate=1e-3, entropy_reg_coeff=0.0, max_grad_norm=1.0, use_critic=False, log_summaries=False, **kwargs): super(RLAgent, self).__init__(num_actions=4, **kwargs) self._use_critic = use_critic self.max_grad_norm = max_grad_norm self.pi = nn_model.EncoderDecoder( embedding_dim=4, units=units, num_outputs=self._num_actions) if self._use_critic: self.value_fn = nn_model.EncoderDecoder( embedding_dim=4, units=units, num_outputs=1) self._objective = objective self._entropy_reg_coeff = entropy_reg_coeff # Entropy regularization self.global_step = tf.train.get_or_create_global_step() self.optimizer = contrib_optimizer_v2.AdamOptimizer( learning_rate=learning_rate) # This is need so that the product with IndexedSlices object is defined self.learning_rate = tf.constant(learning_rate) self.log_summaries = log_summaries self._init_models()
def __init__(self, meta_lr, score_fn, **kwargs): super(MetaRLAgent, self).__init__(**kwargs) if score_fn == 'simple_linear': tf.logging.info('Using simple linear score function.') self.score_fn = nn_model.SimpleLinearNN() elif score_fn == 'linear': tf.logging.info('Using linear score function with priors.') self.score_fn = nn_model.LinearNN() else: raise NotImplementedError self._init_score_fn() self.score_optimizer = contrib_optimizer_v2.AdamOptimizer( learning_rate=meta_lr) self._meta_train = True # Adaptive gradient clipping self._score_grad_clipping = optimizers_lib.adaptive_clipping_fn( decay=0.9, report_summary=self.log_summaries, static_max_norm=self.max_grad_norm / 2.0, global_step=self.global_step)