def add_policy(self, name_behavior_id: str, policy: TFPolicy) -> None: """ Adds policy to trainer. :param brain_parameters: specifications for policy construction """ if self.policy: logger.warning( "add_policy has been called twice. {} is not a multi-agent trainer" .format(self.__class__.__name__)) if not isinstance(policy, SACPolicy): raise RuntimeError( "Non-SACPolicy passed to SACTrainer.add_policy()") self.policy = policy self.step = policy.get_current_step()
def add_policy(self, name_behavior_id: str, policy: TFPolicy) -> None: """ Adds policy to trainer. :param brain_parameters: specifications for policy construction """ if self.policy: logger.warning( "add_policy has been called twice. {} is not a multi-agent trainer" .format(self.__class__.__name__)) if not isinstance(policy, NNPolicy): raise RuntimeError( "Non-SACPolicy passed to SACTrainer.add_policy()") self.policy = policy self.optimizer = SACOptimizer(self.policy, self.trainer_parameters) for _reward_signal in self.optimizer.reward_signals.keys(): self.collected_rewards[_reward_signal] = defaultdict(lambda: 0) # Needed to resume loads properly self.step = policy.get_current_step() self.next_summary_step = self._get_next_summary_step()
def add_policy(self, name_behavior_id: str, policy: TFPolicy) -> None: """ Adds policy to trainer. :param name_behavior_id: Behavior ID that the policy should belong to. :param policy: Policy to associate with name_behavior_id. """ if self.policy: logger.warning( "add_policy has been called twice. {} is not a multi-agent trainer" .format(self.__class__.__name__)) if not isinstance(policy, NNPolicy): raise RuntimeError( "Non-NNPolicy passed to PPOTrainer.add_policy()") self.policy = policy self.optimizer = PPOOptimizer(self.policy, self.trainer_parameters) for _reward_signal in self.optimizer.reward_signals.keys(): self.collected_rewards[_reward_signal] = defaultdict(lambda: 0) # Needed to resume loads properly self.step = policy.get_current_step() self.next_summary_step = self._get_next_summary_step()