コード例 #1
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
    def __init__(
        self,
        args: argparse.Namespace,
        env_info: ConfigDict,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
    ):
        Learner.__init__(self, args, env_info, hyper_params, log_cfg)

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.actor.configs.state_size = (
            self.head_cfg.critic_vf.configs.state_size
        ) = self.env_info.observation_space.shape
        self.head_cfg.critic_qf.configs.state_size = (
            self.env_info.observation_space.shape[0] +
            self.env_info.action_space.shape[0], )
        self.head_cfg.actor.configs.output_size = self.env_info.action_space.shape[
            0]
        self.optim_cfg = optim_cfg

        self.update_step = 0
        if self.hyper_params.auto_entropy_tuning:
            self.target_entropy = -np.prod(
                (self.env_info.action_space.shape[0], )).item()
            self.log_alpha = torch.zeros(1,
                                         requires_grad=True,
                                         device=self.device)
            self.alpha_optim = optim.Adam([self.log_alpha],
                                          lr=optim_cfg.lr_entropy)

        self._init_network()
コード例 #2
0
    def __init__(
        self,
        loss_type: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        gru: ConfigDict,
        optim_cfg: ConfigDict,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        env_name: str,
        state_size: tuple,
        output_size: int,
        is_test: bool,
        load_from: str,
    ):
        Learner.__init__(self, hyper_params, log_cfg, env_name, is_test)
        self.backbone_cfg = backbone
        self.gru_cfg = gru
        self.head_cfg = head
        self.head_cfg.configs.state_size = state_size
        self.head_cfg.configs.output_size = output_size
        self.optim_cfg = optim_cfg
        self.use_n_step = self.hyper_params.n_step > 1
        self.loss_type = loss_type

        self.load_from = load_from

        self._init_network()
コード例 #3
0
    def __init__(
        self,
        args: argparse.Namespace,
        env_info: ConfigDict,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
        noise_cfg: ConfigDict,
        device: torch.device,
    ):
        Learner.__init__(self, args, env_info, hyper_params, log_cfg, device)

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.critic.configs.state_size = (
            self.env_info.observation_space.shape[0] +
            self.env_info.action_space.shape[0], )
        self.head_cfg.actor.configs.state_size = self.env_info.observation_space.shape
        self.head_cfg.actor.configs.output_size = self.env_info.action_space.shape[
            0]
        self.optim_cfg = optim_cfg
        self.noise_cfg = noise_cfg

        self._init_network()
コード例 #4
0
    def __init__(
        self,
        args: argparse.Namespace,
        env_info: ConfigDict,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
        device: str,
    ):
        Learner.__init__(self, args, env_info, hyper_params, log_cfg, device)

        if "stack_size" in args:
            state_dim = self.env_info.observation_space.shape[
                0] * args.stack_size
        else:
            state_dim = self.env_info.observation_space.shape[0]

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.configs.state_size = (state_dim, )
        self.head_cfg.configs.output_size = self.env_info.action_space.n
        self.optim_cfg = optim_cfg
        self.use_n_step = self.hyper_params.n_step > 1

        self._init_network()
コード例 #5
0
    def __init__(
        self,
        args: argparse.Namespace,
        env_info: ConfigDict,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
        noise_cfg: ConfigDict,
    ):
        Learner.__init__(self, args, env_info, hyper_params, log_cfg)

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.critic.configs.state_size = (
            self.env_info.observation_space.shape[0] +
            self.env_info.action_space.shape[0], )
        self.head_cfg.actor.configs.state_size = self.env_info.observation_space.shape
        self.head_cfg.actor.configs.output_size = self.env_info.action_space.shape[
            0]
        self.optim_cfg = optim_cfg
        self.noise_cfg = noise_cfg

        self.target_policy_noise = GaussianNoise(
            self.head_cfg.actor.configs.output_size,
            self.noise_cfg.target_policy_noise,
            self.noise_cfg.target_policy_noise,
        )

        self.update_step = 0

        self._init_network()
コード例 #6
0
ファイル: learner.py プロジェクト: medipixel/rl_algorithms
    def __init__(
        self,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
        trust_region: ConfigDict,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        env_info: ConfigDict,
        is_test: bool,
        load_from: str,
    ):
        Learner.__init__(self, hyper_params, log_cfg, env_info.name, is_test)

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.load_from = load_from
        self.head_cfg.actor.configs.state_size = env_info.observation_space.shape
        self.head_cfg.critic.configs.state_size = env_info.observation_space.shape
        self.head_cfg.actor.configs.output_size = env_info.action_space.n
        self.head_cfg.critic.configs.output_size = env_info.action_space.n
        self.optim_cfg = optim_cfg
        self.gradient_clip = hyper_params.gradient_clip
        self.trust_region = trust_region

        self._init_network()
コード例 #7
0
ファイル: learner.py プロジェクト: singmeasong/rl_algorithms
    def __init__(
        self,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
        noise_cfg: ConfigDict,
        env_name: str,
        state_size: tuple,
        output_size: int,
        is_test: bool,
        load_from: str,
    ):
        Learner.__init__(self, hyper_params, log_cfg, env_name, is_test)

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.critic.configs.state_size = (state_size[0] +
                                                   output_size, )
        self.head_cfg.actor.configs.state_size = state_size
        self.head_cfg.actor.configs.output_size = output_size
        self.optim_cfg = optim_cfg
        self.noise_cfg = noise_cfg
        self.load_from = load_from

        self.target_policy_noise = GaussianNoise(
            self.head_cfg.actor.configs.output_size,
            self.noise_cfg.target_policy_noise,
            self.noise_cfg.target_policy_noise,
        )

        self.update_step = 0

        self._init_network()
コード例 #8
0
ファイル: learner.py プロジェクト: singmeasong/rl_algorithms
    def __init__(
        self,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
        env_name: str,
        state_size: tuple,
        output_size: int,
        is_test: bool,
        load_from: str,
    ):
        Learner.__init__(self, hyper_params, log_cfg, env_name, is_test)

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.actor.configs.state_size = (
            self.head_cfg.critic.configs.state_size
        ) = state_size
        self.head_cfg.actor.configs.output_size = output_size
        self.optim_cfg = optim_cfg
        self.load_from = load_from

        self._init_network()
コード例 #9
0
ファイル: learner.py プロジェクト: medipixel/rl_algorithms
 def save_params(self, n_episode: int):
     params = {
         "actor_state_dict": self.actor.state_dict(),
         "actor_optim_state_dict": self.actor_optim.state_dict(),
         "critic_state_dict": self.critic.state_dict(),
         "critic_optim_state_dict": self.critic_optim.state_dict(),
     }
     Learner._save_params(self, params, n_episode)
コード例 #10
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
    def load_params(self, path: str):
        """Load model and optimizer parameters."""
        Learner.load_params(self, path)

        params = torch.load(path)
        self.dqn.load_state_dict(params["dqn_state_dict"])
        self.dqn_target.load_state_dict(params["dqn_target_state_dict"])
        self.dqn_optim.load_state_dict(params["dqn_optim_state_dict"])
        print("[INFO] loaded the model and optimizer from", path)
コード例 #11
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
 def save_params(self, n_episode: int):
     """Save model and optimizer parameters."""
     params = {
         "actor_state_dict": self.actor.state_dict(),
         "critic_state_dict": self.critic.state_dict(),
         "actor_optim_state_dict": self.actor_optim.state_dict(),
         "critic_optim_state_dict": self.critic_optim.state_dict(),
     }
     Learner._save_params(self, params, n_episode)
コード例 #12
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
    def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "dqn_state_dict": self.dqn.state_dict(),
            "dqn_target_state_dict": self.dqn_target.state_dict(),
            "dqn_optim_state_dict": self.dqn_optim.state_dict(),
        }

        Learner._save_params(self, params, n_episode)
コード例 #13
0
ファイル: learner.py プロジェクト: medipixel/rl_algorithms
    def load_params(self, path: str):
        Learner.load_params(self, path)

        params = torch.load(path)
        self.actor.load_state_dict(params["actor_state_dict"])
        self.critic.load_state_dict(params["critic_state_dict"])
        self.actor_optim.load_state_dict(params["actor_optim_state_dict"])
        self.critic_optim.load_state_dict(params["critic_optim_state_dict"])
        print("[INFO] Loaded the model and optimizer from", path)
コード例 #14
0
ファイル: learner.py プロジェクト: singmeasong/rl_algorithms
    def load_params(self, path: str):
        """Load model and optimizer parameters."""
        Learner.load_params(self, path)

        params = torch.load(path)
        self.critic1.load_state_dict(params["critic1"])
        self.critic2.load_state_dict(params["critic2"])
        self.critic_target1.load_state_dict(params["critic_target1"])
        self.critic_target2.load_state_dict(params["critic_target2"])
        self.critic_optim.load_state_dict(params["critic_optim"])
        self.actor.load_state_dict(params["actor"])
        self.actor_target.load_state_dict(params["actor_target"])
        self.actor_optim.load_state_dict(params["actor_optim"])
        print("[INFO] loaded the model and optimizer from", path)
コード例 #15
0
ファイル: learner.py プロジェクト: singmeasong/rl_algorithms
    def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "actor": self.actor.state_dict(),
            "actor_target": self.actor_target.state_dict(),
            "actor_optim": self.actor_optim.state_dict(),
            "critic1": self.critic1.state_dict(),
            "critic2": self.critic2.state_dict(),
            "critic_target1": self.critic_target1.state_dict(),
            "critic_target2": self.critic_target2.state_dict(),
            "critic_optim": self.critic_optim.state_dict(),
        }

        Learner._save_params(self, params, n_episode)
コード例 #16
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
    def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "actor": self.actor.state_dict(),
            "qf_1": self.qf_1.state_dict(),
            "qf_2": self.qf_2.state_dict(),
            "vf": self.vf.state_dict(),
            "vf_target": self.vf_target.state_dict(),
            "actor_optim": self.actor_optim.state_dict(),
            "qf_1_optim": self.qf_1_optim.state_dict(),
            "qf_2_optim": self.qf_2_optim.state_dict(),
            "vf_optim": self.vf_optim.state_dict(),
        }

        if self.hyper_params.auto_entropy_tuning:
            params["alpha_optim"] = self.alpha_optim.state_dict()

        Learner._save_params(self, params, n_episode)
コード例 #17
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
    def load_params(self, path: str):
        """Load model and optimizer parameters."""
        Learner.load_params(self, path)

        params = torch.load(path)
        self.actor.load_state_dict(params["actor"])
        self.qf_1.load_state_dict(params["qf_1"])
        self.qf_2.load_state_dict(params["qf_2"])
        self.vf.load_state_dict(params["vf"])
        self.vf_target.load_state_dict(params["vf_target"])
        self.actor_optim.load_state_dict(params["actor_optim"])
        self.qf_1_optim.load_state_dict(params["qf_1_optim"])
        self.qf_2_optim.load_state_dict(params["qf_2_optim"])
        self.vf_optim.load_state_dict(params["vf_optim"])

        if self.hyper_params.auto_entropy_tuning:
            self.alpha_optim.load_state_dict(params["alpha_optim"])

        print("[INFO] loaded the model and optimizer from", path)
コード例 #18
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
 def __init__(
     self,
     args: argparse.Namespace,
     env_info: ConfigDict,
     hyper_params: ConfigDict,
     log_cfg: ConfigDict,
     backbone: ConfigDict,
     head: ConfigDict,
     optim_cfg: ConfigDict,
     loss_type: ConfigDict,
 ):
     Learner.__init__(self, args, env_info, hyper_params, log_cfg)
     self.backbone_cfg = backbone
     self.head_cfg = head
     self.head_cfg.configs.state_size = self.env_info.observation_space.shape
     self.head_cfg.configs.output_size = self.env_info.action_space.n
     self.optim_cfg = optim_cfg
     self.use_n_step = self.hyper_params.n_step > 1
     self.loss_type = loss_type
     self._init_network()
コード例 #19
0
ファイル: learner.py プロジェクト: singmeasong/rl_algorithms
    def __init__(
        self,
        hyper_params: ConfigDict,
        log_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        optim_cfg: ConfigDict,
        env_name: str,
        state_size: tuple,
        output_size: int,
        is_test: bool,
        load_from: str,
    ):
        Learner.__init__(self, hyper_params, log_cfg, env_name, is_test)

        self.backbone_cfg = backbone
        self.head_cfg = head
        self.head_cfg.actor.configs.state_size = (
            self.head_cfg.critic_vf.configs.state_size) = state_size
        self.head_cfg.critic_qf.configs.state_size = (state_size[0] +
                                                      output_size, )
        self.head_cfg.actor.configs.state_size = state_size
        self.head_cfg.actor.configs.output_size = output_size
        self.optim_cfg = optim_cfg
        self.load_from = load_from

        self.update_step = 0
        if self.hyper_params.auto_entropy_tuning:
            self.target_entropy = -np.prod((output_size, )).item()
            self.log_alpha = torch.zeros(1,
                                         requires_grad=True,
                                         device=self.device)
            self.alpha_optim = optim.Adam([self.log_alpha],
                                          lr=optim_cfg.lr_entropy)

        self._init_network()