def test_agent_manager_stats(): policy = mock.Mock() stats_reporter = StatsReporter("FakeCategory") writer = mock.Mock() stats_reporter.add_writer(writer) manager = AgentManager(policy, "MyBehavior", stats_reporter) all_env_stats = [ { "averaged": [(1.0, StatsAggregationMethod.AVERAGE)], "most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)], }, { "averaged": [(3.0, StatsAggregationMethod.AVERAGE)], "most_recent": [(4.0, StatsAggregationMethod.MOST_RECENT)], }, ] for env_stats in all_env_stats: manager.record_environment_stats(env_stats, worker_id=0) expected_stats = { "averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2), "most_recent": StatsSummary(mean=4.0, std=0.0, num=1), } stats_reporter.write_stats(123) writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123) # clean up our Mock from the global list StatsReporter.writers.remove(writer)
def _create_trainer_and_manager(self, env_manager: EnvManager, name_behavior_id: str) -> None: brain_name = BehaviorIdentifiers.from_name_behavior_id( name_behavior_id).brain_name try: trainer = self.trainers[brain_name] except KeyError: trainer = self.trainer_factory.generate(brain_name) self.trainers[brain_name] = trainer self.logger.info(trainer) if self.train_model: trainer.write_tensorboard_text("Hyperparameters", trainer.parameters) # print("*/*/*/*/*/*/*/*/*/*/*/*/*") # print(trainer) # print("*/*/*/*/*/*/*/*/*/*/*/*/*") policy = trainer.create_policy( env_manager.external_brains[name_behavior_id]) trainer.add_policy(name_behavior_id, policy) agent_manager = AgentManager( policy, name_behavior_id, trainer.stats_reporter, trainer.parameters.get("time_horizon", sys.maxsize), ) env_manager.set_agent_manager(name_behavior_id, agent_manager) env_manager.set_policy(name_behavior_id, policy) self.brain_name_to_identifier[brain_name].add(name_behavior_id) trainer.publish_policy_queue(agent_manager.policy_queue) trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
def _create_trainer_and_manager( self, env_manager: EnvManager, name_behavior_id: str ) -> None: parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(name_behavior_id) brain_name = parsed_behavior_id.brain_name try: trainer = self.trainers[brain_name] except KeyError: trainer = self.trainer_factory.generate(brain_name) self.trainers[brain_name] = trainer policy = trainer.create_policy( parsed_behavior_id, env_manager.external_brains[name_behavior_id] ) trainer.add_policy(parsed_behavior_id, policy) agent_manager = AgentManager( policy, name_behavior_id, trainer.stats_reporter, trainer.parameters.get("time_horizon", sys.maxsize), ) env_manager.set_agent_manager(name_behavior_id, agent_manager) env_manager.set_policy(name_behavior_id, policy) self.brain_name_to_identifier[brain_name].add(name_behavior_id) trainer.publish_policy_queue(agent_manager.policy_queue) trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
def _create_trainer_and_manager(self, env_manager: EnvManager, name_behavior_id: str) -> None: parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id( name_behavior_id) brain_name = parsed_behavior_id.brain_name try: trainer = self.trainers[brain_name] except KeyError: trainer = self.trainer_factory.generate(brain_name) self.trainers[brain_name] = trainer policy = trainer.create_policy( parsed_behavior_id, env_manager.external_brains[name_behavior_id]) trainer.add_policy(parsed_behavior_id, policy) agent_manager = AgentManager( policy, name_behavior_id, trainer.stats_reporter, trainer.parameters.get("time_horizon", sys.maxsize), ) env_manager.set_agent_manager(name_behavior_id, agent_manager) env_manager.set_policy(name_behavior_id, policy) self.brain_name_to_identifier[brain_name].add(name_behavior_id) trainer.publish_policy_queue(agent_manager.policy_queue) trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue) if trainer.threaded: # Start trainer thread trainerthread = threading.Thread(target=self.trainer_update_func, args=(trainer, ), daemon=True) trainerthread.start() self.trainer_threads.append(trainerthread)
def test_agent_manager(): policy = create_mock_policy() name_behavior_id = "test_brain_name" manager = AgentManager( policy, name_behavior_id, max_trajectory_length=5, stats_reporter=StatsReporter("testcat"), ) assert len(manager.trajectory_queues) == 1 assert isinstance(manager.trajectory_queues[0], AgentManagerQueue)
def _create_trainer_and_manager( self, env_manager: EnvManager, name_behavior_id: str ) -> None: parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(name_behavior_id) brain_name = parsed_behavior_id.brain_name trainerthread = None if brain_name in self.trainers: trainer = self.trainers[brain_name] else: trainer = self.trainer_factory.generate(brain_name) self.trainers[brain_name] = trainer if trainer.threaded: # Only create trainer thread for new trainers trainerthread = threading.Thread( target=self.trainer_update_func, args=(trainer,), daemon=True ) self.trainer_threads.append(trainerthread) env_manager.on_training_started( brain_name, self.trainer_factory.trainer_config[brain_name] ) policy = trainer.create_policy( parsed_behavior_id, env_manager.training_behaviors[name_behavior_id], create_graph=True, ) trainer.add_policy(parsed_behavior_id, policy) agent_manager = AgentManager( policy, name_behavior_id, trainer.stats_reporter, trainer.parameters.time_horizon, threaded=trainer.threaded, ) env_manager.set_agent_manager(name_behavior_id, agent_manager) env_manager.set_policy(name_behavior_id, policy) self.brain_name_to_identifier[brain_name].add(name_behavior_id) trainer.publish_policy_queue(agent_manager.policy_queue) trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue) # Only start new trainers if trainerthread is not None: trainerthread.start()