Example #1
0
 def __init__(self, step, config):
   self._step = step
   self._config = config
   self.encoder = networks.ConvEncoder(
       config.cnn_depth, config.act, config.encoder_kernels)
   self.dynamics = networks.RSSM(
       config.dyn_stoch, config.dyn_deter, config.dyn_hidden,
       config.dyn_input_layers, config.dyn_output_layers, config.dyn_shared,
       config.dyn_discrete, config.act, config.dyn_mean_act,
       config.dyn_std_act, config.dyn_min_std, config.dyn_cell)
   self.heads = {}
   channels = (1 if config.atari_grayscale else 3)
   shape = config.size + (channels,)
   self.heads['image'] = networks.ConvDecoder(
       config.cnn_depth, config.act, shape, config.decoder_kernels,
       config.decoder_thin)
   self.heads['reward'] = networks.DenseHead(
       [], config.reward_layers, config.units, config.act)
   if config.pred_discount:
     self.heads['discount'] = networks.DenseHead(
         [], config.discount_layers, config.units, config.act, dist='binary')
   for name in config.grad_heads:
     assert name in self.heads, name
   self._model_opt = tools.Optimizer(
       'model', config.model_lr, config.opt_eps, config.grad_clip,
       config.weight_decay, opt=config.opt)
   self._scales = dict(
       reward=config.reward_scale, discount=config.discount_scale)
Example #2
0
 def __init__(self, config, world_model, reward=None):
     self._config = config
     self._reward = reward
     self._behavior = models.ImagBehavior(config, world_model)
     self.actor = self._behavior.actor
     stoch_size = config.dyn_stoch
     if config.dyn_discrete:
         stoch_size *= config.dyn_discrete
     size = {
         'embed': 32 * config.cnn_depth,
         'stoch': stoch_size,
         'deter': config.dyn_deter,
         'feat': config.dyn_stoch + config.dyn_deter,
     }[self._config.disag_target]
     kw = dict(shape=size,
               layers=config.disag_layers,
               units=config.disag_units,
               act=config.act)
     self._networks = [
         networks.DenseHead(**kw) for _ in range(config.disag_models)
     ]
     self._opt = tools.Optimizer('ensemble',
                                 config.model_lr,
                                 config.opt_eps,
                                 config.grad_clip,
                                 config.weight_decay,
                                 opt=config.opt)
Example #3
0
 def __init__(self, config, world_model, stop_grad_actor=True, reward=None):
   self._config = config
   self._world_model = world_model
   self._stop_grad_actor = stop_grad_actor
   self._reward = reward
   self.actor = networks.ActionHead(
       config.num_actions, config.actor_layers, config.units, config.act,
       config.actor_dist, config.actor_init_std, config.actor_min_std,
       config.actor_dist, config.actor_temp, config.actor_outscale)
   self.value = networks.DenseHead(
       [], config.value_layers, config.units, config.act,
       config.value_head)
   if config.slow_value_target or config.slow_actor_target:
     self._slow_value = networks.DenseHead(
         [], config.value_layers, config.units, config.act)
     self._updates = tf.Variable(0, tf.int64)
   kw = dict(wd=config.weight_decay, opt=config.opt)
   self._actor_opt = tools.Optimizer(
       'actor', config.actor_lr, config.opt_eps, config.actor_grad_clip, **kw)
   self._value_opt = tools.Optimizer(
       'value', config.value_lr, config.opt_eps, config.value_grad_clip, **kw)