def test_bcmodule_defaults(): # See if default values match mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings(demo_path=CONTINUOUS_DEMO_PATH) bc_module = create_bc_module(mock_specs, bc_settings, False, False) assert bc_module.num_epoch == 3 assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size # Assign strange values and see if it overrides properly bc_settings = BehavioralCloningSettings( demo_path=CONTINUOUS_DEMO_PATH, num_epoch=100, batch_size=10000 ) bc_module = create_bc_module(mock_specs, bc_settings, False, False) assert bc_module.num_epoch == 100 assert bc_module.batch_size == 10000
def test_gail_cc(trainer_config, gail_dummy_config): trainer_config.behavioral_cloning = BehavioralCloningSettings( demo_path=CONTINUOUS_PATH) optimizer = create_optimizer_mock(trainer_config, gail_dummy_config, False, False, False) reward_signal_eval(optimizer, "gail") reward_signal_update(optimizer, "gail")
def test_gail_visual_sac(simple_record, use_discrete): demo_path = simple_record(use_discrete, num_visual=1, num_vector=0) env = SimpleEnvironment( [BRAIN_NAME], num_visual=1, num_vector=0, use_discrete=use_discrete, step_size=0.2, ) bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000) reward_signals = { RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) } hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16) config = attr.evolve( SAC_TF_CONFIG, reward_signals=reward_signals, hyperparameters=hyperparams, behavioral_cloning=bc_settings, max_steps=500, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_bcmodule_dc_visual_update(is_sac): mock_specs = mb.create_mock_banana_behavior_specs() bc_settings = BehavioralCloningSettings(demo_path=DISCRETE_DEMO_PATH) bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32)
def test_bcmodule_rnn_update(is_sac): mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings(demo_path=CONTINUOUS_DEMO_PATH) bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32)
def test_bcmodule_update(is_sac): mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo") bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac) stats = bc_module.update() assert_stats_are_float(stats)
def test_bcmodule_rnn_update(is_sac): mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo") bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32)
def test_bcmodule_dc_visual_update(is_sac): mock_brain = mb.create_mock_banana_brain() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo") bc_module = create_bc_module(mock_brain, bc_settings, False, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32)
def test_bcmodule_defaults(): # See if default values match mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo" ) bc_module = create_bc_module(mock_specs, bc_settings, False, False) assert bc_module.num_epoch == 3 assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size # Assign strange values and see if it overrides properly bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo", num_epoch=100, batch_size=10000, ) bc_module = create_bc_module(mock_specs, bc_settings, False, False) assert bc_module.num_epoch == 100 assert bc_module.batch_size == 10000
def test_bcmodule_constant_lr_update(is_sac): mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings(demo_path=CONTINUOUS_DEMO_PATH, steps=0) bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32) old_learning_rate = bc_module.current_lr _ = bc_module.update() assert old_learning_rate == bc_module.current_lr
def test_bcmodule_linear_lr_update(is_sac): mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo", steps=100, ) bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac) # Should decay by 10/100 * 0.0003 = 0.00003 bc_module.policy.get_current_step = MagicMock(return_value=10) old_learning_rate = bc_module.current_lr _ = bc_module.update() assert old_learning_rate - 0.00003 == pytest.approx(bc_module.current_lr, abs=0.01)
def test_gail(simple_record, use_discrete, trainer_config): demo_path = simple_record(use_discrete) env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2) bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000) reward_signals = { RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) } config = attr.evolve( trainer_config, reward_signals=reward_signals, behavioral_cloning=bc_settings, max_steps=500, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_bcmodule_constant_lr_update(is_sac): mock_specs = mb.create_mock_3dball_behavior_specs() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo", steps=0, ) bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32) old_learning_rate = bc_module.current_lr _ = bc_module.update() assert old_learning_rate == bc_module.current_lr
def test_gail(simple_record, action_sizes, trainer_config): demo_path = simple_record(action_sizes) env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2) bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000) reward_signals = { RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) } config = attr.evolve( trainer_config, reward_signals=reward_signals, behavioral_cloning=bc_settings, max_steps=500, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_gail_visual_ppo(simple_record, action_sizes): demo_path = simple_record(action_sizes, num_visual=1, num_vector=0) env = SimpleEnvironment( [BRAIN_NAME], num_visual=1, num_vector=0, action_sizes=action_sizes, step_size=0.2, ) bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500) reward_signals = { RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) } hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3) config = attr.evolve( PPO_TORCH_CONFIG, reward_signals=reward_signals, hyperparameters=hyperparams, behavioral_cloning=bc_settings, max_steps=1000, ) check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)