Exemple #1
0
def next_frame_epva():
  """EPVA hparams."""
  hparams = basic_deterministic_params.next_frame_basic_deterministic()
  hparams.video_num_input_frames = 4
  hparams.video_num_target_frames = 4
  hparams.modality = {
      "inputs": modalities.VideoModalityL2Raw,
      "targets": modalities.VideoModalityL2Raw,
  }
  hparams.learning_rate_schedule = "constant"
  hparams.learning_rate_constant = 1e-05
  hparams.batch_size = 2
  hparams.clip_grad_norm = 0.01
  # TODO(msaffar): disentangle EPVA from SV2P
  hparams.add_hparam("reward_prediction", False)
  hparams.add_hparam("clip_pixel_values", True)
  hparams.add_hparam("context_frames", 5)
  hparams.add_hparam("enc_learning_rate", 1e-5)
  hparams.add_hparam("enc_pred_loss_scale", 0.1)
  hparams.add_hparam("enc_pred_loss_scale_delay", 6e5)
  hparams.add_hparam("enc_size", 64)
  hparams.add_hparam("enc_keep_prob", .65)
  hparams.add_hparam("enc_pred_use_l1_loss", False)
  hparams.add_hparam("enc_pred_use_l2norm", False)
  hparams.add_hparam("van_learning_rate", 3e-5)
  hparams.add_hparam("van_keep_prob", .9)
  hparams.add_hparam("sequence_length ", 64)
  hparams.add_hparam("skip_num", 2)
  hparams.add_hparam("pred_noise_std", 0)
  hparams.add_hparam("lstm_state_noise_stddev", 0)
  return hparams
Exemple #2
0
def next_frame_epva():
    """EPVA hparams."""
    hparams = basic_deterministic_params.next_frame_basic_deterministic()
    hparams.video_num_input_frames = 4
    hparams.video_num_target_frames = 4
    hparams.target_modality = "video:l2raw"
    hparams.input_modalities = "inputs:video:l2raw"
    hparams.learning_rate_constant = 1e-05
    hparams.batch_size = 2
    hparams.clip_grad_norm = 0.01
    hparams.add_hparam("context_frames", 5)
    hparams.add_hparam("enc_learning_rate", 1e-5)
    hparams.add_hparam("enc_pred_loss_scale", 0.1)
    hparams.add_hparam("enc_pred_loss_scale_delay", 6e5)
    hparams.add_hparam("enc_size", 64)
    hparams.add_hparam("enc_keep_prob", .65)
    hparams.add_hparam("enc_pred_use_l1_loss", False)
    hparams.add_hparam("enc_pred_use_l2norm", False)
    hparams.add_hparam("van_learning_rate", 3e-5)
    hparams.add_hparam("van_keep_prob", .9)
    hparams.add_hparam("sequence_length ", 64)
    hparams.add_hparam("skip_num", 2)
    hparams.add_hparam("pred_noise_std", 0)
    hparams.add_hparam("lstm_state_noise_stddev", 0)
    return hparams
Exemple #3
0
def next_frame_epva():
  """EPVA hparams."""
  hparams = basic_deterministic_params.next_frame_basic_deterministic()
  hparams.video_num_input_frames = 4
  hparams.video_num_target_frames = 4
  hparams.modality = {
      "inputs": modalities.ModalityType.VIDEO_L2_RAW,
      "targets": modalities.ModalityType.VIDEO_L2_RAW,
  }
  hparams.learning_rate_schedule = "constant"
  hparams.learning_rate_constant = 1e-05
  hparams.batch_size = 2
  hparams.clip_grad_norm = 0.01
  # TODO(msaffar): disentangle EPVA from SV2P
  hparams.add_hparam("reward_prediction", False)
  hparams.add_hparam("clip_pixel_values", True)
  hparams.add_hparam("context_frames", 5)
  hparams.add_hparam("enc_learning_rate", 1e-5)
  hparams.add_hparam("enc_pred_loss_scale", 0.1)
  hparams.add_hparam("enc_pred_loss_scale_delay", 6e5)
  hparams.add_hparam("enc_size", 64)
  hparams.add_hparam("enc_keep_prob", .65)
  hparams.add_hparam("enc_pred_use_l1_loss", False)
  hparams.add_hparam("enc_pred_use_l2norm", False)
  hparams.add_hparam("van_learning_rate", 3e-5)
  hparams.add_hparam("van_keep_prob", .9)
  hparams.add_hparam("sequence_length ", 64)
  hparams.add_hparam("skip_num", 2)
  hparams.add_hparam("pred_noise_std", 0)
  hparams.add_hparam("lstm_state_noise_stddev", 0)
  return hparams
Exemple #4
0
def next_frame_basic_stochastic_discrete():
    """Basic 2-frame conv model with stochastic discrete latent."""
    hparams = basic_deterministic_params.next_frame_basic_deterministic()
    hparams.num_compress_steps = 8
    hparams.filter_double_steps = 3
    hparams.add_hparam("bottleneck_bits", 32)
    hparams.add_hparam("bottleneck_noise", 0.05)
    return hparams
Exemple #5
0
def ppo_original_world_model():
    """Atari parameters with world model as policy."""
    hparams = ppo_original_params()
    hparams.policy_network = "next_frame_basic_deterministic"
    hparams_keys = hparams.values().keys()
    video_hparams = basic_deterministic_params.next_frame_basic_deterministic()
    for (name, value) in six.iteritems(video_hparams.values()):
        if name in hparams_keys:
            hparams.set_hparam(name, value)
        else:
            hparams.add_hparam(name, value)
    return hparams
Exemple #6
0
def ppo_original_world_model():
  """Atari parameters with world model as policy."""
  hparams = ppo_original_params()
  hparams.policy_network = "next_frame_basic_deterministic"
  hparams_keys = hparams.values().keys()
  video_hparams = basic_deterministic_params.next_frame_basic_deterministic()
  for (name, value) in six.iteritems(video_hparams.values()):
    if name in hparams_keys:
      hparams.set_hparam(name, value)
    else:
      hparams.add_hparam(name, value)
  # Mostly to avoid decaying WM params when training the policy.
  hparams.weight_decay = 0
  return hparams
Exemple #7
0
def next_frame_basic_stochastic():
    """Basic 2-frame conv model with stochastic tower."""
    hparams = basic_deterministic_params.next_frame_basic_deterministic()
    hparams.stochastic_model = True
    hparams.add_hparam("latent_channels", 1)
    hparams.add_hparam("latent_std_min", -5.0)
    hparams.add_hparam("num_iterations_1st_stage", 25000)
    hparams.add_hparam("num_iterations_2nd_stage", 25000)
    hparams.add_hparam("latent_loss_multiplier", 1e-3)
    hparams.add_hparam("latent_loss_multiplier_schedule", "constant")
    hparams.add_hparam("latent_num_frames", 0)  # 0 means use all frames.
    hparams.add_hparam("anneal_end", 100000)
    hparams.add_hparam("information_capacity", 0.0)
    return hparams
def next_frame_basic_stochastic():
  """Basic 2-frame conv model with stochastic tower."""
  hparams = basic_deterministic_params.next_frame_basic_deterministic()
  hparams.stochastic_model = True
  hparams.add_hparam("latent_channels", 1)
  hparams.add_hparam("latent_std_min", -5.0)
  hparams.add_hparam("num_iterations_1st_stage", 15000)
  hparams.add_hparam("num_iterations_2nd_stage", 15000)
  hparams.add_hparam("latent_loss_multiplier", 1e-3)
  hparams.add_hparam("latent_loss_multiplier_dynamic", False)
  hparams.add_hparam("latent_loss_multiplier_alpha", 1e-5)
  hparams.add_hparam("latent_loss_multiplier_epsilon", 1.0)
  hparams.add_hparam("latent_loss_multiplier_schedule", "constant")
  hparams.add_hparam("latent_num_frames", 0)  # 0 means use all frames.
  hparams.add_hparam("anneal_end", 50000)
  hparams.add_hparam("information_capacity", 0.0)
  return hparams
Exemple #9
0
 def testBasicDeterministic(self):
     self.TestOnVariousInputOutputSizes(
         basic_deterministic_params.next_frame_basic_deterministic(),
         basic_deterministic.NextFrameBasicDeterministic, 256, False)
 def testBasicDeterministic(self):
   self.TestOnVariousInputOutputSizes(
       basic_deterministic_params.next_frame_basic_deterministic(),
       basic_deterministic.NextFrameBasicDeterministic,
       256,
       False)