def __init__( self, rank: int, args: argparse.Namespace, env_info: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, state_dict: OrderedDict, device: str, loss_type: ConfigDict, ): DistributedWorker.__init__(self, rank, args, env_info, hyper_params, device) self.loss_fn = build_loss(loss_type) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.configs.state_size = self.env_info.observation_space.shape self.head_cfg.configs.output_size = self.env_info.action_space.n self.use_n_step = self.hyper_params.n_step > 1 self.max_epsilon = self.hyper_params.max_epsilon self.min_epsilon = self.hyper_params.min_epsilon self.epsilon = self.hyper_params.max_epsilon self._init_networks(state_dict)
def __init__( self, rank: int, device: str, hyper_params: ConfigDict, env_name: str, is_atari: bool, max_episode_steps: int, loss_type: ConfigDict, state_dict: OrderedDict, backbone: ConfigDict, head: ConfigDict, state_size: int, output_size: int, ): DistributedWorker.__init__(self, rank, device, hyper_params, env_name, is_atari, max_episode_steps) self.loss_fn = build_loss(loss_type) self.backbone_cfg = backbone self.head_cfg = head self.head_cfg.configs.state_size = state_size self.head_cfg.configs.output_size = output_size self.use_n_step = self.hyper_params.n_step > 1 self.max_epsilon = self.hyper_params.max_epsilon self.min_epsilon = self.hyper_params.min_epsilon self.epsilon = self.hyper_params.max_epsilon self._init_networks(state_dict)
def _init_network(self): """Initialize networks and optimizers.""" self.dqn = Brain(self.backbone_cfg, self.head_cfg).to(self.device) self.dqn_target = Brain(self.backbone_cfg, self.head_cfg).to(self.device) self.loss_fn = build_loss(self.loss_type) self.dqn_target.load_state_dict(self.dqn.state_dict()) # create optimizer self.dqn_optim = optim.Adam( self.dqn.parameters(), lr=self.optim_cfg.lr_dqn, weight_decay=self.optim_cfg.weight_decay, eps=self.optim_cfg.adam_eps, ) # load the optimizer and model parameters if self.args.load_from is not None: self.load_params(self.args.load_from)