Beispiel #1
0
    def __init__(
        self,
        rank: int,
        args: argparse.Namespace,
        env_info: ConfigDict,
        hyper_params: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        state_dict: OrderedDict,
        device: str,
        loss_type: ConfigDict,
    ):
        DistributedWorker.__init__(self, rank, args, env_info, hyper_params,
                                   device)
        self.loss_fn = build_loss(loss_type)
        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.configs.state_size = self.env_info.observation_space.shape
        self.head_cfg.configs.output_size = self.env_info.action_space.n

        self.use_n_step = self.hyper_params.n_step > 1

        self.max_epsilon = self.hyper_params.max_epsilon
        self.min_epsilon = self.hyper_params.min_epsilon
        self.epsilon = self.hyper_params.max_epsilon

        self._init_networks(state_dict)
    def __init__(
        self,
        rank: int,
        device: str,
        hyper_params: ConfigDict,
        env_name: str,
        is_atari: bool,
        max_episode_steps: int,
        loss_type: ConfigDict,
        state_dict: OrderedDict,
        backbone: ConfigDict,
        head: ConfigDict,
        state_size: int,
        output_size: int,
    ):
        DistributedWorker.__init__(self, rank, device, hyper_params, env_name,
                                   is_atari, max_episode_steps)

        self.loss_fn = build_loss(loss_type)
        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.configs.state_size = state_size
        self.head_cfg.configs.output_size = output_size

        self.use_n_step = self.hyper_params.n_step > 1

        self.max_epsilon = self.hyper_params.max_epsilon
        self.min_epsilon = self.hyper_params.min_epsilon
        self.epsilon = self.hyper_params.max_epsilon

        self._init_networks(state_dict)
Beispiel #3
0
    def _init_network(self):
        """Initialize networks and optimizers."""
        self.dqn = Brain(self.backbone_cfg, self.head_cfg).to(self.device)
        self.dqn_target = Brain(self.backbone_cfg,
                                self.head_cfg).to(self.device)
        self.loss_fn = build_loss(self.loss_type)

        self.dqn_target.load_state_dict(self.dqn.state_dict())

        # create optimizer
        self.dqn_optim = optim.Adam(
            self.dqn.parameters(),
            lr=self.optim_cfg.lr_dqn,
            weight_decay=self.optim_cfg.weight_decay,
            eps=self.optim_cfg.adam_eps,
        )

        # load the optimizer and model parameters
        if self.args.load_from is not None:
            self.load_params(self.args.load_from)