def __init__(self, t_prof, seat_id, chief_handle): self.ddqn_args = t_prof.module_args["ddqn"] self.avg_args = t_prof.module_args["avg"] super().__init__(t_prof=t_prof, chief_handle=chief_handle) self.seat_id = seat_id self.global_iter_id = 0 self.eps = self.ddqn_args.eps_start self.antic = self._t_prof.antic_start self.q_net = DuelingQNet(q_args=self.ddqn_args.q_args, env_bldr=self._env_bldr, device=self._device) self.avg_net = AvrgStrategyNet( avrg_net_args=self.avg_args.avg_net_args, env_bldr=self._env_bldr, device=self._device) self.br_optim = rl_util.str_to_optim_cls(self.ddqn_args.optim_str)( self.q_net.parameters(), lr=self.ddqn_args.lr) self.avg_optim = rl_util.str_to_optim_cls(self.avg_args.optim_str)( self.avg_net.parameters(), lr=self.avg_args.lr) self.eps_exp = self._ray.remote( self._chief_handle.create_experiment, t_prof.name + ": epsilon Plyr" + str(seat_id)) self.antic_exp = self._ray.remote( self._chief_handle.create_experiment, t_prof.name + ": anticipatory Plyr" + str(seat_id)) self._log_eps() self._log_antic()
def __init__(self, owner, env_bldr, avrg_training_args, device): super().__init__(net=AvrgStrategyNet( avrg_net_args=avrg_training_args.avrg_net_args, env_bldr=env_bldr, device=device), env_bldr=env_bldr, args=avrg_training_args, owner=owner, device=device) self._all_range_idxs = torch.arange(self._env_bldr.rules.RANGE_SIZE, device=self.device, dtype=torch.long)
def _get_new_avrg_net(self): return AvrgStrategyNet(avrg_net_args=self._avrg_args.avrg_net_args, env_bldr=self._env_bldr, device=self._device)