Ejemplo n.º 1
0
    def act(self, step_wrappers):
        # """""""""""""""""""""
        # Act
        # """""""""""""""""""""
        SeatActorBase.act_mixed(step_wrappers=step_wrappers,
                                owner=self.seat_id,
                                br_learner=self.br_learner,
                                avg_learner=self.avg_learner,
                                current_policy_tags=self._current_policy_tags,
                                random_prob=self.br_learner.eps)

        # """""""""""""""""""""
        # Add to memories
        # """""""""""""""""""""
        for sw in step_wrappers:
            e_i = sw.env_idx
            if (self._current_policy_tags[e_i] == SeatActorBase.BR) and (
                    self._t_prof.add_random_actions_to_buffer or
                (not sw.action_was_random)):
                self._avg_buf_savers[e_i].add_step(
                    pub_obs=sw.obs,
                    a=sw.action,
                    legal_actions_mask=rl_util.get_legal_action_mask_np(
                        n_actions=self._env_bldr.N_ACTIONS,
                        legal_actions_list=sw.legal_actions_list))
            self._br_memory_savers[e_i].add_experience(
                obs_t_before_acted=sw.obs,
                a_selected_t=sw.action,
                legal_actions_list_t=sw.legal_actions_list)
Ejemplo n.º 2
0
 def act_for_br_opp(self, step_wrappers):
     """ Anticipatory; greedy BR + AVG """
     SeatActorBase.act_mixed(
         step_wrappers=step_wrappers,
         br_learner=self.br_learner,
         owner=self.owner,
         avg_learner=self.avg_learner,
         current_policy_tags=self._current_policy_tags_OPP_BR,
         random_prob=0)
Ejemplo n.º 3
0
 def act_for_avg_opp(self, step_wrappers):
     """
     Purely random because that's how it should be for correct reach
     """
     SeatActorBase.act_mixed(
         step_wrappers=step_wrappers,
         br_learner=self.br_learner,
         owner=self.owner,
         avg_learner=self.avg_learner,
         current_policy_tags=self._current_policy_tags_O_AVG,
         explore=True)
Ejemplo n.º 4
0
 def act_for_br_trav(self, step_wrappers):
     # Act
     SeatActorBase.act_mixed(
         step_wrappers=step_wrappers,
         br_learner=self.br_learner,
         owner=self.owner,
         avg_learner=self.avg_learner,
         current_policy_tags=self._current_policy_tags_T_BR,
         random_prob=self._constant_eps)
     # Add to memories
     for sw in step_wrappers:
         e_i = sw.env_idx
         self._br_memory_savers[e_i].add_experience(
             obs_t_before_acted=sw.obs,
             a_selected_t=sw.action,
             legal_actions_list_t=sw.legal_actions_list)