def next_frame_epva(): """EPVA hparams.""" hparams = basic_deterministic_params.next_frame_basic_deterministic() hparams.video_num_input_frames = 4 hparams.video_num_target_frames = 4 hparams.modality = { "inputs": modalities.VideoModalityL2Raw, "targets": modalities.VideoModalityL2Raw, } hparams.learning_rate_schedule = "constant" hparams.learning_rate_constant = 1e-05 hparams.batch_size = 2 hparams.clip_grad_norm = 0.01 # TODO(msaffar): disentangle EPVA from SV2P hparams.add_hparam("reward_prediction", False) hparams.add_hparam("clip_pixel_values", True) hparams.add_hparam("context_frames", 5) hparams.add_hparam("enc_learning_rate", 1e-5) hparams.add_hparam("enc_pred_loss_scale", 0.1) hparams.add_hparam("enc_pred_loss_scale_delay", 6e5) hparams.add_hparam("enc_size", 64) hparams.add_hparam("enc_keep_prob", .65) hparams.add_hparam("enc_pred_use_l1_loss", False) hparams.add_hparam("enc_pred_use_l2norm", False) hparams.add_hparam("van_learning_rate", 3e-5) hparams.add_hparam("van_keep_prob", .9) hparams.add_hparam("sequence_length ", 64) hparams.add_hparam("skip_num", 2) hparams.add_hparam("pred_noise_std", 0) hparams.add_hparam("lstm_state_noise_stddev", 0) return hparams
def next_frame_epva(): """EPVA hparams.""" hparams = basic_deterministic_params.next_frame_basic_deterministic() hparams.video_num_input_frames = 4 hparams.video_num_target_frames = 4 hparams.target_modality = "video:l2raw" hparams.input_modalities = "inputs:video:l2raw" hparams.learning_rate_constant = 1e-05 hparams.batch_size = 2 hparams.clip_grad_norm = 0.01 hparams.add_hparam("context_frames", 5) hparams.add_hparam("enc_learning_rate", 1e-5) hparams.add_hparam("enc_pred_loss_scale", 0.1) hparams.add_hparam("enc_pred_loss_scale_delay", 6e5) hparams.add_hparam("enc_size", 64) hparams.add_hparam("enc_keep_prob", .65) hparams.add_hparam("enc_pred_use_l1_loss", False) hparams.add_hparam("enc_pred_use_l2norm", False) hparams.add_hparam("van_learning_rate", 3e-5) hparams.add_hparam("van_keep_prob", .9) hparams.add_hparam("sequence_length ", 64) hparams.add_hparam("skip_num", 2) hparams.add_hparam("pred_noise_std", 0) hparams.add_hparam("lstm_state_noise_stddev", 0) return hparams
def next_frame_epva(): """EPVA hparams.""" hparams = basic_deterministic_params.next_frame_basic_deterministic() hparams.video_num_input_frames = 4 hparams.video_num_target_frames = 4 hparams.modality = { "inputs": modalities.ModalityType.VIDEO_L2_RAW, "targets": modalities.ModalityType.VIDEO_L2_RAW, } hparams.learning_rate_schedule = "constant" hparams.learning_rate_constant = 1e-05 hparams.batch_size = 2 hparams.clip_grad_norm = 0.01 # TODO(msaffar): disentangle EPVA from SV2P hparams.add_hparam("reward_prediction", False) hparams.add_hparam("clip_pixel_values", True) hparams.add_hparam("context_frames", 5) hparams.add_hparam("enc_learning_rate", 1e-5) hparams.add_hparam("enc_pred_loss_scale", 0.1) hparams.add_hparam("enc_pred_loss_scale_delay", 6e5) hparams.add_hparam("enc_size", 64) hparams.add_hparam("enc_keep_prob", .65) hparams.add_hparam("enc_pred_use_l1_loss", False) hparams.add_hparam("enc_pred_use_l2norm", False) hparams.add_hparam("van_learning_rate", 3e-5) hparams.add_hparam("van_keep_prob", .9) hparams.add_hparam("sequence_length ", 64) hparams.add_hparam("skip_num", 2) hparams.add_hparam("pred_noise_std", 0) hparams.add_hparam("lstm_state_noise_stddev", 0) return hparams
def next_frame_basic_stochastic_discrete(): """Basic 2-frame conv model with stochastic discrete latent.""" hparams = basic_deterministic_params.next_frame_basic_deterministic() hparams.num_compress_steps = 8 hparams.filter_double_steps = 3 hparams.add_hparam("bottleneck_bits", 32) hparams.add_hparam("bottleneck_noise", 0.05) return hparams
def ppo_original_world_model(): """Atari parameters with world model as policy.""" hparams = ppo_original_params() hparams.policy_network = "next_frame_basic_deterministic" hparams_keys = hparams.values().keys() video_hparams = basic_deterministic_params.next_frame_basic_deterministic() for (name, value) in six.iteritems(video_hparams.values()): if name in hparams_keys: hparams.set_hparam(name, value) else: hparams.add_hparam(name, value) return hparams
def ppo_original_world_model(): """Atari parameters with world model as policy.""" hparams = ppo_original_params() hparams.policy_network = "next_frame_basic_deterministic" hparams_keys = hparams.values().keys() video_hparams = basic_deterministic_params.next_frame_basic_deterministic() for (name, value) in six.iteritems(video_hparams.values()): if name in hparams_keys: hparams.set_hparam(name, value) else: hparams.add_hparam(name, value) # Mostly to avoid decaying WM params when training the policy. hparams.weight_decay = 0 return hparams
def next_frame_basic_stochastic(): """Basic 2-frame conv model with stochastic tower.""" hparams = basic_deterministic_params.next_frame_basic_deterministic() hparams.stochastic_model = True hparams.add_hparam("latent_channels", 1) hparams.add_hparam("latent_std_min", -5.0) hparams.add_hparam("num_iterations_1st_stage", 25000) hparams.add_hparam("num_iterations_2nd_stage", 25000) hparams.add_hparam("latent_loss_multiplier", 1e-3) hparams.add_hparam("latent_loss_multiplier_schedule", "constant") hparams.add_hparam("latent_num_frames", 0) # 0 means use all frames. hparams.add_hparam("anneal_end", 100000) hparams.add_hparam("information_capacity", 0.0) return hparams
def next_frame_basic_stochastic(): """Basic 2-frame conv model with stochastic tower.""" hparams = basic_deterministic_params.next_frame_basic_deterministic() hparams.stochastic_model = True hparams.add_hparam("latent_channels", 1) hparams.add_hparam("latent_std_min", -5.0) hparams.add_hparam("num_iterations_1st_stage", 15000) hparams.add_hparam("num_iterations_2nd_stage", 15000) hparams.add_hparam("latent_loss_multiplier", 1e-3) hparams.add_hparam("latent_loss_multiplier_dynamic", False) hparams.add_hparam("latent_loss_multiplier_alpha", 1e-5) hparams.add_hparam("latent_loss_multiplier_epsilon", 1.0) hparams.add_hparam("latent_loss_multiplier_schedule", "constant") hparams.add_hparam("latent_num_frames", 0) # 0 means use all frames. hparams.add_hparam("anneal_end", 50000) hparams.add_hparam("information_capacity", 0.0) return hparams
def testBasicDeterministic(self): self.TestOnVariousInputOutputSizes( basic_deterministic_params.next_frame_basic_deterministic(), basic_deterministic.NextFrameBasicDeterministic, 256, False)
def testBasicDeterministic(self): self.TestOnVariousInputOutputSizes( basic_deterministic_params.next_frame_basic_deterministic(), basic_deterministic.NextFrameBasicDeterministic, 256, False)