def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optim: torch.optim.Adam, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including main network and target optim (torch.optim.Adam): optimizers for dqn """ Agent.__init__(self, env, args) self.use_n_step = hyper_params["N_STEP"] > 1 self.epsilon = hyper_params["MAX_EPSILON"] self.dqn, self.dqn_target = models self.hyper_params = hyper_params self.curr_state = np.zeros(1) self.dqn_optimizer = optim self.episode_step = 0 self.total_step = 0 self.i_episode = 0 # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic """ Agent.__init__(self, env, args) self.actor, self.critic = models self.actor_optimizer, self.critic_optimizer = optims self.hyper_params = hyper_params self.log_prob = torch.zeros((1, )) self.predicted_value = torch.zeros((1, )) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from)
def __init__( self, env: gym.Env, args: argparse.Namespace, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, args) self.transition = []
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, exploration_noise: GaussianNoise, target_policy_noise: GaussianNoise, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic exploration_noise (GaussianNoise): random noise for exploration target_policy_noise (GaussianNoise): random noise for target values """ Agent.__init__(self, env, args) self.actor, self.actor_target = models[0:2] self.critic1, self.critic2 = models[2:4] self.critic_target1, self.critic_target2 = models[4:6] self.actor_optim = optims[0] self.critic_optim = optims[1] self.hyper_params = hyper_params self.curr_state = np.zeros((1,)) self.exploration_noise = exploration_noise self.target_policy_noise = target_policy_noise self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) if not self.args.test: # replay memory self.memory = ReplayBuffer( hyper_params["BUFFER_SIZE"], hyper_params["BATCH_SIZE"] )
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, target_entropy: float, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic target_entropy (float): target entropy for the inequality constraint """ Agent.__init__(self, env, args) self.actor, self.vf, self.vf_target, self.qf_1, self.qf_2 = models self.actor_optimizer, self.vf_optimizer = optims[0:2] self.qf_1_optimizer, self.qf_2_optimizer = optims[2:4] self.hyper_params = hyper_params self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 # automatic entropy tuning if self.hyper_params["AUTO_ENTROPY_TUNING"]: self.target_entropy = target_entropy self.log_alpha = torch.zeros(1, requires_grad=True, device=device) self.alpha_optimizer = optim.Adam( [self.log_alpha], lr=self.hyper_params["LR_ENTROPY"] ) # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()
def __init__( self, env_single: gym.Env, # for testing env_multi: SubprocVecEnv, # for training args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, ): """Initialization. Args: env_single (gym.Env): openAI Gym environment for testing env_multi (SubprocVecEnv): Gym env with multiprocessing for training args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic """ Agent.__init__(self, env_single, args) if not self.args.test: self.env = env_multi self.actor, self.critic = models self.actor_optimizer, self.critic_optimizer = optims self.epsilon = hyper_params["EPSILON"] self.hyper_params = hyper_params self.episode_steps = np.zeros(hyper_params["N_WORKERS"], dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 # load model parameters if self.args.load_from is not None and os.path.exists( self.args.load_from): self.load_params(self.args.load_from)
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, noise: OUNoise, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic noise (OUNoise): random noise for exploration """ Agent.__init__(self, env, args) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims self.hyper_params = hyper_params self.curr_state = np.zeros((1,)) self.noise = noise self.total_step = 0 self.episode_step = 0 self.i_episode = 0 # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()