def add_policy( self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy ) -> None: """ Adds policy to trainer. :param parsed_behavior_id: Behavior identifiers that the policy should belong to. :param policy: Policy to associate with name_behavior_id. """ if self.policy: logger.warning( "Your environment contains multiple teams, but {} doesn't support adversarial games. Enable self-play to \ train adversarial games.".format( self.__class__.__name__ ) ) self.policy = policy self.policies[parsed_behavior_id.behavior_id] = policy self.optimizer = self.create_ppo_optimizer() for _reward_signal in self.optimizer.reward_signals.keys(): self.collected_rewards[_reward_signal] = defaultdict(lambda: 0) self.model_saver.register(self.policy) self.model_saver.register(self.optimizer) self.model_saver.initialize_or_load() # Needed to resume loads properly self.step = policy.get_current_step()
def add_policy(self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy) -> None: """ Adds policy to trainer. """ if self.policy: logger.warning( "Your environment contains multiple teams, but {} doesn't support adversarial games. Enable self-play to \ train adversarial games.".format(self.__class__.__name__)) self.policy = policy self.policies[parsed_behavior_id.behavior_id] = policy self.optimizer = self.create_sac_optimizer() for _reward_signal in self.optimizer.reward_signals.keys(): self.collected_rewards[_reward_signal] = defaultdict(lambda: 0) self.model_saver.register(self.policy) self.model_saver.register(self.optimizer) self.model_saver.initialize_or_load() # Needed to resume loads properly self._step = policy.get_current_step() # Assume steps were updated at the correct ratio before self.update_steps = int(max(1, self._step / self.steps_per_update)) self.reward_signal_update_steps = int( max(1, self._step / self.reward_signal_steps_per_update))
def add_policy(self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy) -> None: """ Adds policy to trainer. :param parsed_behavior_id: Behavior identifiers that the policy should belong to. :param policy: Policy to associate with name_behavior_id. """ if not isinstance(policy, TorchPolicy): raise RuntimeError( f"policy {policy} must be an instance of TorchPolicy.") self.policy = policy self.policies[parsed_behavior_id.behavior_id] = policy self.optimizer = self.create_poca_optimizer() for _reward_signal in self.optimizer.reward_signals.keys(): self.collected_rewards[_reward_signal] = defaultdict(lambda: 0) self.model_saver.register(self.policy) self.model_saver.register(self.optimizer) self.model_saver.initialize_or_load() # Needed to resume loads properly self.step = policy.get_current_step()