def test_add_get_policy(sac_optimizer, mock_create_saver, dummy_config): mock_optimizer = mock.Mock() mock_optimizer.reward_signals = {} sac_optimizer.return_value = mock_optimizer trainer = SACTrainer("test", 0, dummy_config, True, False, 0, "0") policy = mock.Mock(spec=TFPolicy) policy.get_current_step.return_value = 2000 behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name) trainer.add_policy(behavior_id, policy) assert trainer.get_policy(behavior_id.behavior_id) == policy # Make sure the summary steps were loaded properly assert trainer.get_step == 2000
def test_add_get_policy(sac_optimizer, dummy_config): brain_params = make_brain_parameters( discrete_action=False, visual_inputs=0, vec_obs_size=6 ) mock_optimizer = mock.Mock() mock_optimizer.reward_signals = {} sac_optimizer.return_value = mock_optimizer dummy_config["summary_path"] = "./summaries/test_trainer_summary" dummy_config["model_path"] = "./models/test_trainer_models/TestModel" trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = mock.Mock(spec=NNPolicy) policy.get_current_step.return_value = 2000 trainer.add_policy(brain_params.brain_name, policy) assert trainer.get_policy(brain_params.brain_name) == policy # Make sure the summary steps were loaded properly assert trainer.get_step == 2000 assert trainer.next_summary_step > 2000 # Test incorrect class of policy policy = mock.Mock() with pytest.raises(RuntimeError): trainer.add_policy(brain_params, policy)
def test_bad_config(dummy_config): brain_params = make_brain_parameters(discrete_action=False, visual_inputs=0, vec_obs_size=6) # Test that we throw an error if we have sequence length greater than batch size dummy_config["sequence_length"] = 64 dummy_config["batch_size"] = 32 dummy_config["use_recurrent"] = True dummy_config["output_path"] = "./results/test_trainer_models/TestModel" with pytest.raises(UnityTrainerException): _ = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
def test_process_trajectory(dummy_config): brain_params = make_brain_parameters(discrete_action=False, visual_inputs=0, vec_obs_size=6) dummy_config["summary_path"] = "./summaries/test_trainer_summary" dummy_config["model_path"] = "./models/test_trainer_models/TestModel" trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = trainer.create_policy(brain_params) trainer.add_policy(brain_params.brain_name, policy) trajectory = make_fake_trajectory(length=15, max_step_complete=True, vec_obs_size=6, num_vis_obs=0, action_space=2) trainer.process_trajectory(trajectory) # Check that trainer put trajectory in update buffer assert trainer.update_buffer.num_experiences == 15 # Check that the stats are being collected as episode isn't complete for reward in trainer.collected_rewards.values(): for agent in reward.values(): assert agent > 0 # Add a terminal trajectory trajectory = make_fake_trajectory( length=15, max_step_complete=False, vec_obs_size=6, num_vis_obs=0, action_space=2, ) trainer.process_trajectory(trajectory) # Check that the stats are reset as episode is finished for reward in trainer.collected_rewards.values(): for agent in reward.values(): assert agent == 0 assert trainer.stats_reporter.get_stats_summaries( "Policy/Extrinsic Reward").num > 0
def test_add_get_policy(sac_optimizer, dummy_config): mock_optimizer = mock.Mock() mock_optimizer.reward_signals = {} sac_optimizer.return_value = mock_optimizer trainer = SACTrainer("test", 0, dummy_config, True, False, 0, "0") policy = mock.Mock(spec=NNPolicy) policy.get_current_step.return_value = 2000 trainer.add_policy("test", policy) assert trainer.get_policy("test") == policy # Make sure the summary steps were loaded properly assert trainer.get_step == 2000 # Test incorrect class of policy policy = mock.Mock() with pytest.raises(RuntimeError): trainer.add_policy("test", policy)
def test_sac_save_load_buffer(tmpdir): env, mock_brain, _ = mb.setup_mock_env_and_brains( mock.Mock(), False, False, num_agents=NUM_AGENTS, vector_action_space=VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, discrete_action_space=DISCRETE_ACTION_SPACE, ) trainer_params = dummy_config() trainer_params["summary_path"] = str(tmpdir) trainer_params["model_path"] = str(tmpdir) trainer_params["save_replay_buffer"] = True trainer = SACTrainer(mock_brain, 1, trainer_params, True, False, 0, 0) trainer.update_buffer = mb.simulate_rollout(env, trainer.policy, BUFFER_INIT_SAMPLES) buffer_len = trainer.update_buffer.num_experiences trainer.save_model() # Wipe Trainer and try to load trainer2 = SACTrainer(mock_brain, 1, trainer_params, True, True, 0, 0) assert trainer2.update_buffer.num_experiences == buffer_len
def initialize_trainer( trainer_config: Any, brain_parameters: BrainParameters, summaries_dir: str, run_id: str, model_path: str, keep_checkpoints: int, train_model: bool, load_model: bool, seed: int, meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ) -> Trainer: """ Initializes a trainer given a provided trainer configuration and brain parameters, as well as some general training session options. :param trainer_config: Original trainer configuration loaded from YAML :param brain_parameters: BrainParameters provided by the Unity environment :param summaries_dir: Directory to store trainer summary statistics :param run_id: Run ID to associate with this training run :param model_path: Path to save the model :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param seed: The random seed to use :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :param multi_gpu: Whether to use multi-GPU training :return: """ brain_name = brain_parameters.brain_name if "default" not in trainer_config and brain_name not in trainer_config: raise TrainerConfigError( f'Trainer config must have either a "default" section, or a section for the brain name ({brain_name}). ' "See config/trainer_config.yaml for an example.") trainer_parameters = trainer_config.get("default", {}).copy() trainer_parameters["summary_path"] = str(run_id) + "_" + brain_name trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=model_path, name=brain_name) trainer_parameters["keep_checkpoints"] = keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) min_lesson_length = 1 if meta_curriculum: if brain_name in meta_curriculum.brains_to_curriculums: min_lesson_length = meta_curriculum.brains_to_curriculums[ brain_name].min_lesson_length else: logger.warning( f"Metacurriculum enabled, but no curriculum for brain {brain_name}. " f"Brains with curricula: {meta_curriculum.brains_to_curriculums.keys()}. " ) trainer: Trainer = None # type: ignore # will be set to one of these, or raise if "trainer" not in trainer_parameters: raise TrainerConfigError( f'The "trainer" key must be set in your trainer config for brain {brain_name} (or the default brain).' ) trainer_type = trainer_parameters["trainer"] if trainer_type == "offline_bc": raise UnityTrainerException( "The offline_bc trainer has been removed. To train with demonstrations, " "please use a PPO or SAC trainer with the GAIL Reward Signal and/or the " "Behavioral Cloning feature enabled.") elif trainer_type == "ppo": trainer = PPOTrainer( brain_parameters, min_lesson_length, trainer_parameters, train_model, load_model, seed, run_id, multi_gpu, ) elif trainer_type == "sac": trainer = SACTrainer( brain_parameters, min_lesson_length, trainer_parameters, train_model, load_model, seed, run_id, ) else: raise TrainerConfigError( f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' ) return trainer
def initialize_trainers( trainer_config: Dict[str, Any], external_brains: Dict[str, BrainParameters], summaries_dir: str, run_id: str, model_path: str, keep_checkpoints: int, train_model: bool, load_model: bool, seed: int, meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ) -> Dict[str, Trainer]: """ Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as some general training session options. :param trainer_config: Original trainer configuration loaded from YAML :param external_brains: BrainParameters provided by the Unity environment :param summaries_dir: Directory to store trainer summary statistics :param run_id: Run ID to associate with this training run :param model_path: Path to save the model :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param seed: The random seed to use :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :param multi_gpu: Whether to use multi-GPU training :return: """ trainers = {} trainer_parameters_dict = {} for brain_name in external_brains: trainer_parameters = trainer_config["default"].copy() trainer_parameters["summary_path"] = "{basedir}/{name}".format( basedir=summaries_dir, name=str(run_id) + "_" + brain_name) trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=model_path, name=brain_name) trainer_parameters["keep_checkpoints"] = keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in external_brains: if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": trainers[brain_name] = OfflineBCTrainer( external_brains[brain_name], trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": trainers[brain_name] = OnlineBCTrainer( external_brains[brain_name], trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": trainers[brain_name] = PPOTrainer( external_brains[brain_name], meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if meta_curriculum else 1, trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, multi_gpu, ) elif trainer_parameters_dict[brain_name]["trainer"] == "sac": trainers[brain_name] = SACTrainer( external_brains[brain_name], meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if meta_curriculum else 1, trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) else: raise UnityEnvironmentException("The trainer config contains " "an unknown trainer type for " "brain {}".format(brain_name)) return trainers
def test_sac_save_load_buffer(tmpdir, dummy_config): mock_brain = mb.setup_mock_brain( False, False, vector_action_space=VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, discrete_action_space=DISCRETE_ACTION_SPACE, ) trainer_params = dummy_config trainer_params.hyperparameters.save_replay_buffer = True trainer = SACTrainer( mock_brain.brain_name, 1, trainer_params, True, False, 0, "testdir" ) policy = trainer.create_policy(mock_brain.brain_name, mock_brain) trainer.add_policy(mock_brain.brain_name, policy) trainer.update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, policy.brain) buffer_len = trainer.update_buffer.num_experiences trainer.save_model(mock_brain.brain_name) # Wipe Trainer and try to load trainer2 = SACTrainer( mock_brain.brain_name, 1, trainer_params, True, True, 0, "testdir" ) policy = trainer2.create_policy(mock_brain.brain_name, mock_brain) trainer2.add_policy(mock_brain.brain_name, policy) assert trainer2.update_buffer.num_experiences == buffer_len
def test_advance(dummy_config): brain_params = make_brain_parameters( discrete_action=False, visual_inputs=0, vec_obs_size=6 ) dummy_config.hyperparameters.steps_per_update = 20 dummy_config.hyperparameters.reward_signal_steps_per_update = 20 dummy_config.hyperparameters.buffer_init_steps = 0 trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = trainer.create_policy(brain_params.brain_name, brain_params) trainer.add_policy(brain_params.brain_name, policy) trajectory_queue = AgentManagerQueue("testbrain") policy_queue = AgentManagerQueue("testbrain") trainer.subscribe_trajectory_queue(trajectory_queue) trainer.publish_policy_queue(policy_queue) trajectory = make_fake_trajectory( length=15, max_step_complete=True, vec_obs_size=6, num_vis_obs=0, action_space=[2], is_discrete=False, ) trajectory_queue.put(trajectory) trainer.advance() # Check that trainer put trajectory in update buffer assert trainer.update_buffer.num_experiences == 15 # Check that the stats are being collected as episode isn't complete for reward in trainer.collected_rewards.values(): for agent in reward.values(): assert agent > 0 # Add a terminal trajectory trajectory = make_fake_trajectory( length=6, max_step_complete=False, vec_obs_size=6, num_vis_obs=0, action_space=[2], is_discrete=False, ) trajectory_queue.put(trajectory) trainer.advance() # Check that the stats are reset as episode is finished for reward in trainer.collected_rewards.values(): for agent in reward.values(): assert agent == 0 assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0 # Assert we're not just using the default values assert ( trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").mean > 0 ) # Make sure there is a policy on the queue policy_queue.get_nowait() # Add another trajectory. Since this is less than 20 steps total (enough for) # two updates, there should NOT be a policy on the queue. trajectory = make_fake_trajectory( length=5, max_step_complete=False, vec_obs_size=6, num_vis_obs=0, action_space=[2], is_discrete=False, ) trajectory_queue.put(trajectory) trainer.advance() with pytest.raises(AgentManagerQueue.Empty): policy_queue.get_nowait() # Call add_policy and check that we update the correct number of times. # This is to emulate a load from checkpoint. policy = trainer.create_policy(brain_params.brain_name, brain_params) policy.get_current_step = lambda: 200 trainer.add_policy(brain_params.brain_name, policy) trainer.optimizer.update = mock.Mock() trainer.optimizer.update_reward_signals = mock.Mock() trainer.optimizer.update_reward_signals.return_value = {} trainer.optimizer.update.return_value = {} trajectory_queue.put(trajectory) trainer.advance() # Make sure we did exactly 1 update assert trainer.optimizer.update.call_count == 1 assert trainer.optimizer.update_reward_signals.call_count == 1
def _initialize_trainer( trainer_settings: TrainerSettings, brain_name: str, output_path: str, train_model: bool, load_model: bool, ghost_controller: GhostController, seed: int, param_manager: EnvironmentParameterManager, init_path: str = None, multi_gpu: bool = False, ) -> Trainer: """ Initializes a trainer given a provided trainer configuration and brain parameters, as well as some general training session options. :param trainer_settings: Original trainer configuration loaded from YAML :param brain_name: Name of the brain to be associated with trainer :param output_path: Path to save the model and summary statistics :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer :param init_path: Path from which to load model, if different from model_path. :return: """ trainer_artifact_path = os.path.join(output_path, brain_name) if init_path is not None: trainer_settings.init_path = os.path.join(init_path, brain_name) min_lesson_length = param_manager.get_minimum_reward_buffer_size( brain_name) trainer: Trainer = None # type: ignore # will be set to one of these, or raise trainer_type = trainer_settings.trainer_type if trainer_type == TrainerType.PPO: trainer = PPOTrainer( brain_name, min_lesson_length, trainer_settings, train_model, load_model, seed, trainer_artifact_path, ) elif trainer_type == TrainerType.SAC: trainer = SACTrainer( brain_name, min_lesson_length, trainer_settings, train_model, load_model, seed, trainer_artifact_path, ) else: raise TrainerConfigError( f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' ) if trainer_settings.self_play is not None: trainer = GhostTrainer( trainer, brain_name, ghost_controller, min_lesson_length, trainer_settings, train_model, trainer_artifact_path, ) return trainer
def test_sac_save_load_buffer(tmpdir, dummy_config): mock_specs = mb.setup_test_behavior_specs( False, False, vector_action_space=VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, ) trainer_params = dummy_config trainer_params.hyperparameters.save_replay_buffer = True trainer = SACTrainer("test", 1, trainer_params, True, False, 0, "testdir") behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name) policy = trainer.create_policy(behavior_id, mock_specs) trainer.add_policy(behavior_id, policy) trainer.update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, policy.behavior_spec) buffer_len = trainer.update_buffer.num_experiences trainer.save_model() # Wipe Trainer and try to load trainer2 = SACTrainer("test", 1, trainer_params, True, True, 0, "testdir") policy = trainer2.create_policy(behavior_id, mock_specs) trainer2.add_policy(behavior_id, policy) assert trainer2.update_buffer.num_experiences == buffer_len
def initialize_trainer( trainer_config: Any, brain_name: str, run_id: str, output_path: str, keep_checkpoints: int, train_model: bool, load_model: bool, ghost_controller: GhostController, seed: int, init_path: str = None, meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ) -> Trainer: """ Initializes a trainer given a provided trainer configuration and brain parameters, as well as some general training session options. :param trainer_config: Original trainer configuration loaded from YAML :param brain_name: Name of the brain to be associated with trainer :param run_id: Run ID to associate with this training run :param output_path: Path to save the model and summary statistics :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use :param init_path: Path from which to load model, if different from model_path. :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :return: """ if "default" not in trainer_config and brain_name not in trainer_config: raise TrainerConfigError( f'Trainer config must have either a "default" section, or a section for the brain name {brain_name}. ' "See the config/ directory for examples.") trainer_parameters = trainer_config.get("default", {}).copy() trainer_parameters["output_path"] = os.path.join(output_path, brain_name) if init_path is not None: trainer_parameters["init_path"] = os.path.join(init_path, brain_name) trainer_parameters["keep_checkpoints"] = keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) if init_path is not None: trainer_parameters["init_path"] = "{basedir}/{name}".format( basedir=init_path, name=brain_name) min_lesson_length = 1 if meta_curriculum: if brain_name in meta_curriculum.brains_to_curricula: min_lesson_length = meta_curriculum.brains_to_curricula[ brain_name].min_lesson_length else: logger.warning( f"Metacurriculum enabled, but no curriculum for brain {brain_name}. " f"Brains with curricula: {meta_curriculum.brains_to_curricula.keys()}. " ) trainer: Trainer = None # type: ignore # will be set to one of these, or raise if "trainer" not in trainer_parameters: raise TrainerConfigError( f'The "trainer" key must be set in your trainer config for brain {brain_name} (or the default brain).' ) trainer_type = trainer_parameters["trainer"] if trainer_type == "offline_bc": raise UnityTrainerException( "The offline_bc trainer has been removed. To train with demonstrations, " "please use a PPO or SAC trainer with the GAIL Reward Signal and/or the " "Behavioral Cloning feature enabled.") elif trainer_type == "ppo": trainer = PPOTrainer( brain_name, min_lesson_length, trainer_parameters, train_model, load_model, seed, run_id, ) elif trainer_type == "sac": trainer = SACTrainer( brain_name, min_lesson_length, trainer_parameters, train_model, load_model, seed, run_id, ) else: raise TrainerConfigError( f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' ) if "self_play" in trainer_parameters: trainer = GhostTrainer( trainer, brain_name, ghost_controller, min_lesson_length, trainer_parameters, train_model, run_id, ) return trainer
def initialize_trainer( trainer_settings: TrainerSettings, brain_name: str, run_id: str, output_path: str, train_model: bool, load_model: bool, ghost_controller: GhostController, seed: int, init_path: str = None, meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ) -> Trainer: """ Initializes a trainer given a provided trainer configuration and brain parameters, as well as some general training session options. :param trainer_settings: Original trainer configuration loaded from YAML :param brain_name: Name of the brain to be associated with trainer :param run_id: Run ID to associate with this training run :param output_path: Path to save the model and summary statistics :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use :param init_path: Path from which to load model, if different from model_path. :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :return: """ trainer_settings.output_path = os.path.join(output_path, brain_name) if init_path is not None: trainer_settings.init_path = os.path.join(init_path, brain_name) min_lesson_length = 1 if meta_curriculum: if brain_name in meta_curriculum.brains_to_curricula: min_lesson_length = meta_curriculum.brains_to_curricula[ brain_name].min_lesson_length else: logger.warning( f"Metacurriculum enabled, but no curriculum for brain {brain_name}. " f"Brains with curricula: {meta_curriculum.brains_to_curricula.keys()}. " ) trainer: Trainer = None # type: ignore # will be set to one of these, or raise trainer_type = trainer_settings.trainer_type if trainer_type == TrainerType.PPO: trainer = PPOTrainer( brain_name, min_lesson_length, trainer_settings, train_model, load_model, seed, run_id, ) elif trainer_type == TrainerType.SAC: trainer = SACTrainer( brain_name, min_lesson_length, trainer_settings, train_model, load_model, seed, run_id, ) else: raise TrainerConfigError( f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' ) if trainer_settings.self_play is not None: trainer = GhostTrainer( trainer, brain_name, ghost_controller, min_lesson_length, trainer_settings, train_model, run_id, ) return trainer
def test_advance(dummy_config): brain_params = make_brain_parameters(discrete_action=False, visual_inputs=0, vec_obs_size=6) dummy_config["output_path"] = "./results/test_trainer_models/TestModel" dummy_config["steps_per_update"] = 20 trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = trainer.create_policy(brain_params.brain_name, brain_params) trainer.add_policy(brain_params.brain_name, policy) trajectory_queue = AgentManagerQueue("testbrain") policy_queue = AgentManagerQueue("testbrain") trainer.subscribe_trajectory_queue(trajectory_queue) trainer.publish_policy_queue(policy_queue) trajectory = make_fake_trajectory( length=15, max_step_complete=True, vec_obs_size=6, num_vis_obs=0, action_space=[2], is_discrete=False, ) trajectory_queue.put(trajectory) trainer.advance() # Check that trainer put trajectory in update buffer assert trainer.update_buffer.num_experiences == 15 # Check that the stats are being collected as episode isn't complete for reward in trainer.collected_rewards.values(): for agent in reward.values(): assert agent > 0 # Add a terminal trajectory trajectory = make_fake_trajectory( length=6, max_step_complete=False, vec_obs_size=6, num_vis_obs=0, action_space=[2], is_discrete=False, ) trajectory_queue.put(trajectory) trainer.advance() # Check that the stats are reset as episode is finished for reward in trainer.collected_rewards.values(): for agent in reward.values(): assert agent == 0 assert trainer.stats_reporter.get_stats_summaries( "Policy/Extrinsic Reward").num > 0 # Assert we're not just using the default values assert (trainer.stats_reporter.get_stats_summaries( "Policy/Extrinsic Reward").mean > 0) # Make sure there is a policy on the queue policy_queue.get_nowait() # Add another trajectory. Since this is less than 20 steps total (enough for) # two updates, there should NOT be a policy on the queue. trajectory = make_fake_trajectory( length=5, max_step_complete=False, vec_obs_size=6, num_vis_obs=0, action_space=[2], is_discrete=False, ) trajectory_queue.put(trajectory) trainer.advance() with pytest.raises(AgentManagerQueue.Empty): policy_queue.get_nowait()