예제 #1
0
    def __init__(self, t_prof, seat_id, chief_handle):
        self.ddqn_args = t_prof.module_args["ddqn"]
        self.avg_args = t_prof.module_args["avg"]
        super().__init__(t_prof=t_prof, chief_handle=chief_handle)

        self.seat_id = seat_id
        self.global_iter_id = 0

        self.eps = self.ddqn_args.eps_start
        self.antic = self._t_prof.antic_start

        self.q_net = DuelingQNet(q_args=self.ddqn_args.q_args,
                                 env_bldr=self._env_bldr,
                                 device=self._device)
        self.avg_net = AvrgStrategyNet(
            avrg_net_args=self.avg_args.avg_net_args,
            env_bldr=self._env_bldr,
            device=self._device)

        self.br_optim = rl_util.str_to_optim_cls(self.ddqn_args.optim_str)(
            self.q_net.parameters(), lr=self.ddqn_args.lr)
        self.avg_optim = rl_util.str_to_optim_cls(self.avg_args.optim_str)(
            self.avg_net.parameters(), lr=self.avg_args.lr)

        self.eps_exp = self._ray.remote(
            self._chief_handle.create_experiment,
            t_prof.name + ": epsilon Plyr" + str(seat_id))
        self.antic_exp = self._ray.remote(
            self._chief_handle.create_experiment,
            t_prof.name + ": anticipatory Plyr" + str(seat_id))
        self._log_eps()
        self._log_antic()
예제 #2
0
 def __init__(self, owner, env_bldr, avrg_training_args, device):
     super().__init__(net=AvrgStrategyNet(
         avrg_net_args=avrg_training_args.avrg_net_args,
         env_bldr=env_bldr,
         device=device),
                      env_bldr=env_bldr,
                      args=avrg_training_args,
                      owner=owner,
                      device=device)
     self._all_range_idxs = torch.arange(self._env_bldr.rules.RANGE_SIZE,
                                         device=self.device,
                                         dtype=torch.long)
예제 #3
0
 def _get_new_avrg_net(self):
     return AvrgStrategyNet(avrg_net_args=self._avrg_args.avrg_net_args,
                            env_bldr=self._env_bldr,
                            device=self._device)