def test_sac_save_load_buffer(tmpdir, dummy_config): mock_specs = mb.setup_test_behavior_specs( False, False, vector_action_space=VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, ) trainer_params = dummy_config trainer_params.hyperparameters.save_replay_buffer = True trainer = SACTrainer("test", 1, trainer_params, True, False, 0, "testdir") behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name) policy = trainer.create_policy(behavior_id, mock_specs) trainer.add_policy(behavior_id, policy) trainer.update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, policy.behavior_spec) buffer_len = trainer.update_buffer.num_experiences trainer.save_model() # Wipe Trainer and try to load trainer2 = SACTrainer("test", 1, trainer_params, True, True, 0, "testdir") policy = trainer2.create_policy(behavior_id, mock_specs) trainer2.add_policy(behavior_id, policy) assert trainer2.update_buffer.num_experiences == buffer_len
def test_sac_save_load_buffer(tmpdir, dummy_config): mock_brain = mb.setup_mock_brain( False, False, vector_action_space=VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, discrete_action_space=DISCRETE_ACTION_SPACE, ) trainer_params = dummy_config trainer_params.hyperparameters.save_replay_buffer = True trainer = SACTrainer( mock_brain.brain_name, 1, trainer_params, True, False, 0, "testdir" ) policy = trainer.create_policy(mock_brain.brain_name, mock_brain) trainer.add_policy(mock_brain.brain_name, policy) trainer.update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, policy.brain) buffer_len = trainer.update_buffer.num_experiences trainer.save_model(mock_brain.brain_name) # Wipe Trainer and try to load trainer2 = SACTrainer( mock_brain.brain_name, 1, trainer_params, True, True, 0, "testdir" ) policy = trainer2.create_policy(mock_brain.brain_name, mock_brain) trainer2.add_policy(mock_brain.brain_name, policy) assert trainer2.update_buffer.num_experiences == buffer_len
def test_sac_save_load_buffer(tmpdir, dummy_config): env, mock_brain, _ = mb.setup_mock_env_and_brains( mock.Mock(), False, False, num_agents=NUM_AGENTS, vector_action_space=VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, discrete_action_space=DISCRETE_ACTION_SPACE, ) trainer_params = dummy_config trainer_params["summary_path"] = str(tmpdir) trainer_params["model_path"] = str(tmpdir) trainer_params["save_replay_buffer"] = True trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0) policy = trainer.create_policy(mock_brain) trainer.add_policy(mock_brain.brain_name, policy) trainer.update_buffer = mb.simulate_rollout(env, trainer.policy, BUFFER_INIT_SAMPLES) buffer_len = trainer.update_buffer.num_experiences trainer.save_model(mock_brain.brain_name) # Wipe Trainer and try to load trainer2 = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, True, 0, 0) policy = trainer2.create_policy(mock_brain) trainer2.add_policy(mock_brain.brain_name, policy) assert trainer2.update_buffer.num_experiences == buffer_len