def test_bcmodule_update(is_sac): mock_brain = mb.create_mock_3dball_brain() bc_module = create_bc_module(mock_brain, ppo_dummy_config(), False, "test.demo", is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32)
def create_bc_trainer(dummy_config, is_discrete=False): mock_env = mock.Mock() if is_discrete: mock_brain = mb.create_mock_pushblock_brain() mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=70) else: mock_brain = mb.create_mock_3dball_brain() mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8) mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) env = mock_env() trainer_parameters = dummy_config trainer_parameters["summary_path"] = "tmp" trainer_parameters["model_path"] = "tmp" trainer_parameters["demo_path"] = ( os.path.dirname(os.path.abspath(__file__)) + "/test.demo") trainer = BCTrainer(mock_brain, trainer_parameters, training=True, load=False, seed=0, run_id=0) trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy, 100) return trainer, env
def test_bcmodule_rnn_update(trainer_config): mock_brain = mb.create_mock_3dball_brain() policy = create_policy_with_bc_mock(mock_brain, trainer_config, True, "test.demo") stats = policy.bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32)
def test_bcmodule_update(mock_env, trainer_config): mock_brain = mb.create_mock_3dball_brain() env, policy = create_policy_with_bc_mock(mock_env, mock_brain, trainer_config, False, "test.demo") stats = policy.bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32) env.close()
def test_bcmodule_rnn_update(is_sac): mock_brain = mb.create_mock_3dball_brain() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo") bc_module = create_bc_module(mock_brain, bc_settings, True, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32)
def test_bcmodule_constant_lr_update(trainer_config): mock_brain = mb.create_mock_3dball_brain() trainer_config["behavioral_cloning"]["steps"] = 0 policy = create_policy_with_bc_mock(mock_brain, trainer_config, False, "test.demo") stats = policy.bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32) old_learning_rate = policy.bc_module.current_lr stats = policy.bc_module.update() assert old_learning_rate == policy.bc_module.current_lr
def test_bcmodule_constant_lr_update(is_sac): trainer_config = ppo_dummy_config() mock_brain = mb.create_mock_3dball_brain() trainer_config["behavioral_cloning"]["steps"] = 0 bc_module = create_bc_module(mock_brain, trainer_config, False, "test.demo", is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32) old_learning_rate = bc_module.current_lr stats = bc_module.update() assert old_learning_rate == bc_module.current_lr
def test_bcmodule_defaults(): # See if default values match mock_brain = mb.create_mock_3dball_brain() trainer_config = ppo_dummy_config() policy = create_policy_with_bc_mock(mock_brain, trainer_config, False, "test.demo") assert policy.bc_module.num_epoch == 3 assert policy.bc_module.batch_size == trainer_config["batch_size"] # Assign strange values and see if it overrides properly trainer_config["behavioral_cloning"]["num_epoch"] = 100 trainer_config["behavioral_cloning"]["batch_size"] = 10000 policy = create_policy_with_bc_mock(mock_brain, trainer_config, False, "test.demo") assert policy.bc_module.num_epoch == 100 assert policy.bc_module.batch_size == 10000
def test_bcmodule_constant_lr_update(is_sac): mock_brain = mb.create_mock_3dball_brain() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo", steps=0, ) bc_module = create_bc_module(mock_brain, bc_settings, False, is_sac) stats = bc_module.update() for _, item in stats.items(): assert isinstance(item, np.float32) old_learning_rate = bc_module.current_lr stats = bc_module.update() assert old_learning_rate == bc_module.current_lr
def test_bcmodule_defaults(mock_env, dummy_config): # See if default values match mock_brain = mb.create_mock_3dball_brain() env, policy = create_ppo_policy_with_bc_mock(mock_env, mock_brain, dummy_config, False, "test.demo") assert policy.bc_module.num_epoch == dummy_config["num_epoch"] assert policy.bc_module.batch_size == dummy_config["batch_size"] env.close() # Assign strange values and see if it overrides properly dummy_config["pretraining"]["num_epoch"] = 100 dummy_config["pretraining"]["batch_size"] = 10000 env, policy = create_ppo_policy_with_bc_mock(mock_env, mock_brain, dummy_config, False, "test.demo") assert policy.bc_module.num_epoch == 100 assert policy.bc_module.batch_size == 10000 env.close()
def test_bcmodule_defaults(): # See if default values match mock_brain = mb.create_mock_3dball_brain() bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo") bc_module = create_bc_module(mock_brain, bc_settings, False, False) assert bc_module.num_epoch == 3 assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size # Assign strange values and see if it overrides properly bc_settings = BehavioralCloningSettings( demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo", num_epoch=100, batch_size=10000, ) bc_module = create_bc_module(mock_brain, bc_settings, False, False) assert bc_module.num_epoch == 100 assert bc_module.batch_size == 10000
def test_bc_trainer(mock_env, dummy_config): mock_brain = mb.create_mock_3dball_brain() mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8) mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) env = mock_env() trainer_parameters = dummy_config trainer_parameters["summary_path"] = "tmp" trainer_parameters["model_path"] = "tmp" trainer_parameters["demo_path"] = ( os.path.dirname(os.path.abspath(__file__)) + "/test.demo") trainer = BCTrainer(mock_brain, trainer_parameters, training=True, load=False, seed=0, run_id=0) trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy, 100) trainer.update_policy() assert len(trainer.stats["Losses/Cloning Loss"]) > 0 trainer.increment_step(1) assert trainer.step == 1