def evaluator( self, variable_source: acme.VariableSource, counter: counting.Counter, trainer: training.MADQNTrainer, ) -> Any: """System evaluator (an executor process not connected to a dataset) Args: variable_source (acme.VariableSource): variable server for updating network variables. counter (counting.Counter): step counter object. trainer (Optional[training.MADQNRecurrentCommTrainer], optional): system trainer. Defaults to None. Returns: Any: environment-executor evaluation loop instance for evaluating the performance of a system. """ # Create the behavior policy. networks = self._network_factory( # type: ignore environment_spec=self._environment_spec, shared_weights=self._shared_weights ) # Create system architecture with target networks. architecture = self._architecture( environment_spec=self._environment_spec, value_networks=networks["q_networks"], shared_weights=self._shared_weights, ) if self._builder._replay_stabiliser_fn is not None: architecture = self._builder._replay_stabiliser_fn( # type: ignore architecture ) communication_module = None if self._communication_module_fn is not None: communication_module = self._communication_module_fn( architecture=architecture, shared=True, channel_size=1, channel_noise=0, ) system_networks = communication_module.create_system() else: system_networks = architecture.create_system() # Create the agent. executor = self._builder.make_executor( q_networks=system_networks["values"], action_selectors=networks["action_selectors"], variable_source=variable_source, communication_module=communication_module, trainer=trainer, evaluator=True, ) # Make the environment. environment = self._environment_factory(evaluation=True) # type: ignore # Create logger and counter. counter = counting.Counter(counter, "evaluator") evaluator_logger_config = {} if self._logger_config and "evaluator" in self._logger_config: evaluator_logger_config = self._logger_config["evaluator"] eval_logger = self._logger_factory( # type: ignore "evaluator", **evaluator_logger_config ) # Create the run loop and return it. # Create the loop to connect environment and executor. eval_loop = self._eval_loop_fn( environment, executor, counter=counter, logger=eval_logger, **self._eval_loop_fn_kwargs, ) eval_loop = DetailedPerAgentStatistics(eval_loop) return eval_loop
def evaluator( self, variable_source: acme.VariableSource, counter: counting.Counter, logger: loggers.Logger = None, ) -> Any: """System evaluator (an executor process not connected to a dataset) Args: variable_source (acme.VariableSource): variable server for updating network variables. counter (counting.Counter): step counter object. logger (loggers.Logger, optional): logger object. Defaults to None. Returns: Any: environment-executor evaluation loop instance for evaluating the performance of a system. """ # Create the behavior policy. networks = self._network_factory( # type: ignore environment_spec=self._environment_spec, shared_weights=self._shared_weights) # Create system architecture with target networks. system = self._architecture( environment_spec=self._environment_spec, observation_networks=networks["observations"], policy_networks=networks["policies"], critic_networks=networks["critics"], shared_weights=self._shared_weights, ) # create variables _ = system.create_system() # behaviour policy networks (obs net + policy head) behaviour_policy_networks = system.create_behaviour_policy() # Create the agent. executor = self._builder.make_executor( policy_networks=behaviour_policy_networks, variable_source=variable_source, ) # Make the environment. environment = self._environment_factory( evaluation=True) # type: ignore # Create logger and counter. counter = counting.Counter(counter, "evaluator") evaluator_logger_config = {} if self._logger_config: if "evaluator" in self._logger_config: evaluator_logger_config = self._logger_config["evaluator"] eval_logger = self._logger_factory( # type: ignore "evaluator", **evaluator_logger_config) # Create the run loop and return it. # Create the loop to connect environment and executor. eval_loop = self._eval_loop_fn( environment, executor, counter=counter, logger=eval_logger, **self._eval_loop_fn_kwargs, ) eval_loop = DetailedPerAgentStatistics(eval_loop) return eval_loop
def executor( self, executor_id: str, replay: reverb.Client, variable_source: acme.VariableSource, counter: counting.Counter, trainer: Optional[training.MADQNTrainer] = None, ) -> mava.ParallelEnvironmentLoop: """System executor Args: executor_id (str): id to identify the executor process for logging purposes. replay (reverb.Client): replay data table to push data to. variable_source (acme.VariableSource): variable server for updating network variables. counter (counting.Counter): step counter object. trainer (Optional[training.MADQNRecurrentCommTrainer], optional): system trainer. Defaults to None. Returns: mava.ParallelEnvironmentLoop: environment-executor loop instance. """ # Create the behavior policy. networks = self._network_factory( # type: ignore environment_spec=self._environment_spec, shared_weights=self._shared_weights ) # Create system architecture with target networks. architecture = self._architecture( environment_spec=self._environment_spec, value_networks=networks["q_networks"], shared_weights=self._shared_weights, ) if self._builder._replay_stabiliser_fn is not None: architecture = self._builder._replay_stabiliser_fn( # type: ignore architecture ) communication_module = None if self._communication_module_fn is not None: communication_module = self._communication_module_fn( architecture=architecture, shared=True, channel_size=1, channel_noise=0, ) system_networks = communication_module.create_system() else: system_networks = architecture.create_system() # Create the executor. executor = self._builder.make_executor( q_networks=system_networks["values"], action_selectors=networks["action_selectors"], communication_module=communication_module, adder=self._builder.make_adder(replay), variable_source=variable_source, trainer=trainer, ) # TODO (Arnu): figure out why factory function are giving type errors # Create the environment. environment = self._environment_factory(evaluation=False) # type: ignore # Create logger and counter; actors will not spam bigtable. counter = counting.Counter(counter, "executor") # Create executor logger executor_logger_config = {} if self._logger_config and "executor" in self._logger_config: executor_logger_config = self._logger_config["executor"] exec_logger = self._logger_factory( # type: ignore f"executor_{executor_id}", **executor_logger_config ) # Create the loop to connect environment and executor. train_loop = self._train_loop_fn( environment, executor, counter=counter, logger=exec_logger, **self._train_loop_fn_kwargs, ) train_loop = DetailedPerAgentStatistics(train_loop) return train_loop
def executor( self, executor_id: str, replay: reverb.Client, variable_source: acme.VariableSource, counter: counting.Counter, ) -> mava.ParallelEnvironmentLoop: """System executor Args: executor_id (str): id to identify the executor process for logging purposes. replay (reverb.Client): replay data table to push data to. variable_source (acme.VariableSource): variable server for updating network variables. counter (counting.Counter): step counter object. Returns: mava.ParallelEnvironmentLoop: environment-executor loop instance. """ # Create the behavior policy. networks = self._network_factory( # type: ignore environment_spec=self._environment_spec, shared_weights=self._shared_weights) # Create system architecture with target networks. system = self._architecture( environment_spec=self._environment_spec, observation_networks=networks["observations"], policy_networks=networks["policies"], critic_networks=networks["critics"], shared_weights=self._shared_weights, ) # create variables _ = system.create_system() # behaviour policy networks (obs net + policy head) behaviour_policy_networks = system.create_behaviour_policy() # Create the executor. executor = self._builder.make_executor( policy_networks=behaviour_policy_networks, adder=self._builder.make_adder(replay), variable_source=variable_source, ) # TODO (Arnu): figure out why factory function are giving type errors # Create the environment. environment = self._environment_factory( evaluation=False) # type: ignore # Create logger and counter; actors will not spam bigtable. counter = counting.Counter(counter, "executor") # Create executor logger executor_logger_config = {} if self._logger_config: if "executor" in self._logger_config: executor_logger_config = self._logger_config["executor"] exec_logger = self._logger_factory( # type: ignore f"executor_{executor_id}", **executor_logger_config) # Create the loop to connect environment and executor. train_loop = self._train_loop_fn( environment, executor, counter=counter, logger=exec_logger, **self._train_loop_fn_kwargs, ) train_loop = DetailedPerAgentStatistics(train_loop) return train_loop