Exemple #1
0
    def __init__(
        self,
        env: gym.Env,
        args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optims: tuple,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic

        """
        Agent.__init__(self, env, args)

        self.actor, self.critic = models
        self.actor_optimizer, self.critic_optimizer = optims
        self.hyper_params = hyper_params
        self.log_prob = torch.zeros((1, ))
        self.predicted_value = torch.zeros((1, ))
        self.transition: list = list()
        self.episode_step = 0
        self.i_episode = 0

        if args.load_from is not None and os.path.exists(args.load_from):
            self.load_params(args.load_from)
Exemple #2
0
    def __init__(
        self,
        env: gym.Env,
        args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optim: torch.optim.Adam,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including main network and target
            optim (torch.optim.Adam): optimizers for dqn

        """
        Agent.__init__(self, env, args)

        self.use_n_step = hyper_params["N_STEP"] > 1
        self.epsilon = hyper_params["MAX_EPSILON"]
        self.dqn, self.dqn_target = models
        self.hyper_params = hyper_params
        self.curr_state = np.zeros(1)
        self.dqn_optimizer = optim
        self.episode_step = 0
        self.total_step = 0
        self.i_episode = 0

        # load the optimizer and model parameters
        if args.load_from is not None and os.path.exists(args.load_from):
            self.load_params(args.load_from)

        self._initialize()
Exemple #3
0
    def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "dqn_state_dict": self.dqn.state_dict(),
            "dqn_target_state_dict": self.dqn_target.state_dict(),
            "dqn_optim_state_dict": self.dqn_optimizer.state_dict(),
        }

        Agent.save_params(self, params, n_episode)
Exemple #4
0
 def save_params(self, n_episode: int):
     """Save model and optimizer parameters."""
     params = {
         "actor_state_dict": self.actor.state_dict(),
         "critic_state_dict": self.critic.state_dict(),
         "actor_optim_state_dict": self.actor_optimizer.state_dict(),
         "critic_optim_state_dict": self.critic_optimizer.state_dict(),
     }
     Agent.save_params(self, params, n_episode)
Exemple #5
0
    def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "actor": self.actor.state_dict(),
            "actor_target": self.actor_target.state_dict(),
            "actor_optim": self.actor_optim.state_dict(),
            "critic1": self.critic1.state_dict(),
            "critic2": self.critic2.state_dict(),
            "critic_target1": self.critic_target1.state_dict(),
            "critic_target2": self.critic_target2.state_dict(),
            "critic_optim": self.critic_optim.state_dict(),
        }

        Agent.save_params(self, params, n_episode)
Exemple #6
0
    def __init__(
            self,
            env: gym.Env,
            args: argparse.Namespace,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings

        """
        Agent.__init__(self, env, args)
        self.transition = []
Exemple #7
0
    def __init__(
        self,
        env: gym.Env,
        args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optims: tuple,
        exploration_noise: GaussianNoise,
        target_policy_noise: GaussianNoise,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic
            exploration_noise (GaussianNoise): random noise for exploration
            target_policy_noise (GaussianNoise): random noise for target values

        """
        Agent.__init__(self, env, args)

        self.actor, self.actor_target = models[0:2]
        self.critic1, self.critic2 = models[2:4]
        self.critic_target1, self.critic_target2 = models[4:6]
        self.actor_optim = optims[0]
        self.critic_optim = optims[1]
        self.hyper_params = hyper_params
        self.curr_state = np.zeros((1,))
        self.exploration_noise = exploration_noise
        self.target_policy_noise = target_policy_noise
        self.total_step = 0
        self.episode_step = 0
        self.update_step = 0
        self.i_episode = 0

        # load the optimizer and model parameters
        if args.load_from is not None and os.path.exists(args.load_from):
            self.load_params(args.load_from)

        if not self.args.test:
            # replay memory
            self.memory = ReplayBuffer(
                hyper_params["BUFFER_SIZE"], hyper_params["BATCH_SIZE"]
            )
Exemple #8
0
    def __init__(
        self,
        env: gym.Env,
        args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optims: tuple,
        target_entropy: float,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic
            target_entropy (float): target entropy for the inequality constraint

        """
        Agent.__init__(self, env, args)

        self.actor, self.vf, self.vf_target, self.qf_1, self.qf_2 = models
        self.actor_optimizer, self.vf_optimizer = optims[0:2]
        self.qf_1_optimizer, self.qf_2_optimizer = optims[2:4]
        self.hyper_params = hyper_params
        self.curr_state = np.zeros((1,))
        self.total_step = 0
        self.episode_step = 0
        self.update_step = 0
        self.i_episode = 0

        # automatic entropy tuning
        if self.hyper_params["AUTO_ENTROPY_TUNING"]:
            self.target_entropy = target_entropy
            self.log_alpha = torch.zeros(1, requires_grad=True, device=device)
            self.alpha_optimizer = optim.Adam(
                [self.log_alpha], lr=self.hyper_params["LR_ENTROPY"]
            )

        # load the optimizer and model parameters
        if args.load_from is not None and os.path.exists(args.load_from):
            self.load_params(args.load_from)

        self._initialize()
Exemple #9
0
    def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "actor": self.actor.state_dict(),
            "qf_1": self.qf_1.state_dict(),
            "qf_2": self.qf_2.state_dict(),
            "vf": self.vf.state_dict(),
            "vf_target": self.vf_target.state_dict(),
            "actor_optim": self.actor_optimizer.state_dict(),
            "qf_1_optim": self.qf_1_optimizer.state_dict(),
            "qf_2_optim": self.qf_2_optimizer.state_dict(),
            "vf_optim": self.vf_optimizer.state_dict(),
        }

        if self.hyper_params["AUTO_ENTROPY_TUNING"]:
            params["alpha_optim"] = self.alpha_optimizer.state_dict()

        Agent.save_params(self, params, n_episode)
Exemple #10
0
    def __init__(
        self,
        env_single: gym.Env,  # for testing
        env_multi: SubprocVecEnv,  # for training
        args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optims: tuple,
    ):
        """Initialization.

        Args:
            env_single (gym.Env): openAI Gym environment for testing
            env_multi (SubprocVecEnv): Gym env with multiprocessing for training
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic

        """
        Agent.__init__(self, env_single, args)

        if not self.args.test:
            self.env = env_multi
        self.actor, self.critic = models
        self.actor_optimizer, self.critic_optimizer = optims
        self.epsilon = hyper_params["EPSILON"]
        self.hyper_params = hyper_params
        self.episode_steps = np.zeros(hyper_params["N_WORKERS"], dtype=np.int)
        self.states: list = []
        self.actions: list = []
        self.rewards: list = []
        self.values: list = []
        self.masks: list = []
        self.log_probs: list = []
        self.i_episode = 0

        # load model parameters
        if self.args.load_from is not None and os.path.exists(
                self.args.load_from):
            self.load_params(self.args.load_from)
Exemple #11
0
    def __init__(
        self,
        env: gym.Env,
        args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optims: tuple,
        noise: OUNoise,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic
            noise (OUNoise): random noise for exploration

        """
        Agent.__init__(self, env, args)

        self.actor, self.actor_target, self.critic, self.critic_target = models
        self.actor_optimizer, self.critic_optimizer = optims
        self.hyper_params = hyper_params
        self.curr_state = np.zeros((1,))
        self.noise = noise
        self.total_step = 0
        self.episode_step = 0
        self.i_episode = 0

        # load the optimizer and model parameters
        if args.load_from is not None and os.path.exists(args.load_from):
            self.load_params(args.load_from)

        self._initialize()