def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optim: torch.optim.Adam, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including main network and target optim (torch.optim.Adam): optimizers for dqn """ AbstractAgent.__init__(self, env, args) self.use_n_step = hyper_params["N_STEP"] > 1 self.epsilon = hyper_params["MAX_EPSILON"] self.dqn, self.dqn_target = models self.hyper_params = hyper_params self.curr_state = np.zeros(1) self.dqn_optimizer = optim self.episode_step = 0 self.total_step = 0 self.i_episode = 0 # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()
def __init__(self, env, args, hyper_params, models, optims, noises): """Initialization. Args: env (gym.Env): openAI Gym environment with discrete action space args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critics optims (tuple): optimizers for actor and critics noises (tuple): noises for exploration and regularization """ AbstractAgent.__init__(self, env, args) self.actor, self.actor_target = models[:2] self.critic1, self.critic1_target = models[2:4] self.critic2, self.critic2_target = models[4:] self.actor_optim, self.critic_optim = optims self.hyper_params = hyper_params self.exploration_noise, self.target_policy_noise = noises self.curr_state = np.zeros((1, )) self.total_steps = 0 self.episode_steps = 0 # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, noise: OUNoise, ): """Initialization. Args: env (gym.Env): openAI Gym environment with discrete action space args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic noise (OUNoise): random noise for exploration """ AbstractAgent.__init__(self, env, args) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims self.hyper_params = hyper_params self.curr_state = np.zeros((1, )) self.noise = noise # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) # replay memory self.memory = ReplayBuffer(hyper_params["BUFFER_SIZE"], hyper_params["BATCH_SIZE"], self.args.seed)
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic """ AbstractAgent.__init__(self, env, args) self.actor, self.critic = models self.actor_optimizer, self.critic_optimizer = optims self.hyper_params = hyper_params self.log_prob = torch.zeros((1,)) self.predicted_value = torch.zeros((1,)) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from)
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "dqn_state_dict": self.dqn.state_dict(), "dqn_target_state_dict": self.dqn_target.state_dict(), "dqn_optim_state_dict": self.dqn_optimizer.state_dict(), } AbstractAgent.save_params(self, params, n_episode)
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "actor_state_dict": self.actor.state_dict(), "critic_state_dict": self.critic.state_dict(), "actor_optim_state_dict": self.actor_optimizer.state_dict(), "critic_optim_state_dict": self.critic_optimizer.state_dict(), } AbstractAgent.save_params(self, params, n_episode)
def save_params(self, n_episode): """Save model and optimizer parameters.""" params = { "actor_state_dict": self.actor.state_dict(), "actor_target_state_dict": self.actor_target.state_dict(), "critic1_state_dict": self.critic1.state_dict(), "critic2_state_dict": self.critic2.state_dict(), "critic1_target_state_dict": self.critic1_target.state_dict(), "critic2_target_state_dict": self.critic2_target.state_dict(), "actor_optim_state_dict": self.actor_optim.state_dict(), "critic_optim_state_dict": self.critic_optim.state_dict(), } AbstractAgent.save_params(self, params, n_episode)
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "actor": self.actor.state_dict(), "actor_target": self.actor_target.state_dict(), "actor_optim": self.actor_optim.state_dict(), "critic1": self.critic1.state_dict(), "critic2": self.critic2.state_dict(), "critic_target1": self.critic_target1.state_dict(), "critic_target2": self.critic_target2.state_dict(), "critic_optim": self.critic_optim.state_dict(), } AbstractAgent.save_params(self, params, n_episode)
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, target_entropy: float, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic target_entropy (float): target entropy for the inequality constraint """ AbstractAgent.__init__(self, env, args) self.actor, self.vf, self.vf_target, self.qf_1, self.qf_2 = models self.actor_optimizer, self.vf_optimizer = optims[0:2] self.qf_1_optimizer, self.qf_2_optimizer = optims[2:4] self.hyper_params = hyper_params self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hook_transition = False self.hooked_transition: Tuple = tuple() # automatic entropy tuning if self.hyper_params["AUTO_ENTROPY_TUNING"]: self.target_entropy = target_entropy self.log_alpha = torch.zeros(1, requires_grad=True, device=device) self.alpha_optimizer = optim.Adam( [self.log_alpha], lr=self.hyper_params["LR_ENTROPY"] ) # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, exploration_noise: GaussianNoise, target_policy_noise: GaussianNoise, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic exploration_noise (GaussianNoise): random noise for exploration target_policy_noise (GaussianNoise): random noise for target values """ AbstractAgent.__init__(self, env, args) self.actor, self.actor_target = models[0:2] self.critic1, self.critic2 = models[2:4] self.critic_target1, self.critic_target2 = models[4:6] self.actor_optim = optims[0] self.critic_optim = optims[1] self.hyper_params = hyper_params self.curr_state = np.zeros((1, )) self.exploration_noise = exploration_noise self.target_policy_noise = target_policy_noise self.total_steps = 0 self.episode_steps = 0 self.update_steps = 0 self.i_episode = 0 # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) if not self.args.test: # replay memory self.memory = ReplayBuffer(hyper_params["BUFFER_SIZE"], hyper_params["BATCH_SIZE"])
def save_params(self, n_episode: int): """Save model and optimizer parameters.""" params = { "actor": self.actor.state_dict(), "qf_1": self.qf_1.state_dict(), "qf_2": self.qf_2.state_dict(), "vf": self.vf.state_dict(), "vf_target": self.vf_target.state_dict(), "actor_optim": self.actor_optimizer.state_dict(), "qf_1_optim": self.qf_1_optimizer.state_dict(), "qf_2_optim": self.qf_2_optimizer.state_dict(), "vf_optim": self.vf_optimizer.state_dict(), } if self.hyper_params["AUTO_ENTROPY_TUNING"]: params["alpha_optim"] = self.alpha_optimizer.state_dict() AbstractAgent.save_params(self, params, n_episode)
def __init__( self, env_single: gym.Env, # for testing env_multi: SubprocVecEnv, # for training args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, ): """Initialization. Args: env_single (gym.Env): openAI Gym environment for testing env_multi (SubprocVecEnv): Gym env with multiprocessing for training args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic """ AbstractAgent.__init__(self, env_single, args) if not self.args.test: self.env = env_multi self.actor, self.critic = models self.actor_optimizer, self.critic_optimizer = optims self.epsilon = hyper_params["EPSILON"] self.hyper_params = hyper_params self.episode_steps = np.zeros(hyper_params["N_WORKERS"], dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 # load model parameters if self.args.load_from is not None and os.path.exists( self.args.load_from): self.load_params(self.args.load_from)
def __init__( self, env_single: gym.Env, env_multi: SubprocVecEnv, args: argparse.Namespace, hyper_params: dict, models: tuple, optim: torch.optim.Adam, ): """Initialization. Args: env_single (gym.Env): openAI Gym environment env_multi (SubprocVecEnv): Gym env with multiprocessing for training args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including main network and target optim (torch.optim.Adam): optimizers for dqn """ AbstractAgent.__init__(self, env_single, args) if not self.args.test: self.env = env_multi self.dqn, self.dqn_target = models self.dqn_optimizer = optim self.hyper_params = hyper_params self.curr_state = np.zeros((1, )) self.total_steps = np.zeros(hyper_params["N_WORKERS"], dtype=np.int) self.episode_steps = np.zeros(hyper_params["N_WORKERS"], dtype=np.int) self.epsilon = self.hyper_params["MAX_EPSILON"] self.i_episode = 0 # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, noise: OUNoise, ): """Initialization. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic noise (OUNoise): random noise for exploration """ AbstractAgent.__init__(self, env, args) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims self.hyper_params = hyper_params self.curr_state = np.zeros((1, )) self.noise = noise self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hook_transition = False self.hooked_transition: Tuple = tuple() # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, hyper_params: dict, models: tuple, optims: tuple, noise: OUNoise, ): """Initialization. Args: env (gym.Env): openAI Gym environment with discrete action space args (argparse.Namespace): arguments including hyperparameters and training settings hyper_params (dict): hyper-parameters models (tuple): models including actor and critic optims (tuple): optimizers for actor and critic noise (OUNoise): random noise for exploration Crucial attributes: reward_ftn (method): Example : def goal_distance(goal_a, goal_b): assert goal_a.shape == goal_b.shape return np.linalg.norm(goal_a - goal_b, axis=-1) def compute_reward(self, achieved_goal, goal, info): # Compute distance between goal and the achieved goal. d = goal_distance(achieved_goal, goal) if self.reward_type == 'sparse': return -(d > self.distance_threshold).astype(np.float32) else: return -d """ AbstractAgent.__init__(self, env, args) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims self.hyper_params = hyper_params self.curr_state = np.zeros((1, )) self.noise = noise # get an environment's reward function : sparse / dense self.reward_ftn = env.reward_ftn(reward_type='sparse') # load the optimizer and model parameters if args.load_from is not None and os.path.exists(args.load_from): self.load_params(args.load_from) #obs_normalizer self.obs_norm = RunningMeanStd(shape=(1, ) + env.obs_shape) # self.goal_norm = RunningMeanStd(shape=(1, ) + env.goal_shape) # #HER self.her_sampler = Her_sampler(reward_func=self.reward_ftn) # replay memory self.memory = HER_ReplayBuffer( hyper_params["BUFFER_SIZE"], hyper_params["BATCH_SIZE"], self.args.seed, normalizer=[self.obs_norm, self.goal_norm], her_sampler=self.her_sampler, reward_ftn=self.reward_ftn) self.ep_obs, self.ep_obs_1, self.ep_ag, self.ep_ag_1, self.ep_g, self.ep_act, self.ep_rew, self.ep_dn = [], [], [], [], [], [], [] , []