def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, network_cfg: ConfigDict, optim_cfg: ConfigDict, noise_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.network_cfg = network_cfg self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.shape[0] # set noise self.noise = OUNoise( self.action_dim, theta=noise_cfg.ou_noise_theta, sigma=noise_cfg.ou_noise_sigma, ) self._initialize() self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device self._initialize()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, env_info, args, log_cfg) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device self.learner = build_learner(self.learner_cfg)
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, args, log_cfg) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, noise_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.noise_cfg = noise_cfg self.learner_cfg.device = device # set noise self.noise = OUNoise( env_info.action_space.shape[0], theta=noise_cfg.ou_noise_theta, sigma=noise_cfg.ou_noise_sigma, ) self._initialize()
def load_params(self, path: str): """Load model and optimizer parameters.""" Agent.load_params(self, path) params = torch.load(path) self.dqn.load_state_dict(params["dqn_state_dict"]) self.dqn_target.load_state_dict(params["dqn_target_state_dict"]) self.dqn_optim.load_state_dict(params["dqn_optim_state_dict"]) print("[INFO] loaded the model and optimizer from", path)
def save_params(self, n_episode: int): # type: ignore """Save model and optimizer parameters.""" params = { "actor_state_dict": self.actor.state_dict(), "critic_state_dict": self.critic.state_dict(), "actor_optim_state_dict": self.actor_optim.state_dict(), "critic_optim_state_dict": self.critic_optim.state_dict(), } Agent.save_params(self, params, n_episode)
def save_params(self, n_episode: int): # type: ignore """Save model and optimizer parameters.""" params = { "dqn_state_dict": self.dqn.state_dict(), "dqn_target_state_dict": self.dqn_target.state_dict(), "dqn_optim_state_dict": self.dqn_optim.state_dict(), } Agent.save_params(self, params, n_episode)
def load_params(self, path: str): """Load model and optimizer parameters.""" Agent.load_params(self, path) params = torch.load(path) self.actor.load_state_dict(params["actor_state_dict"]) self.critic.load_state_dict(params["critic_state_dict"]) self.actor_optim.load_state_dict(params["actor_optim_state_dict"]) self.critic_optim.load_state_dict(params["critic_optim_state_dict"]) print("[INFO] Loaded the model and optimizer from", path)
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.curr_state = np.zeros(1) self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.per_beta = hyper_params.per_beta self.use_n_step = hyper_params.n_step > 1 if self.learner_cfg.head.configs.use_noisy_net: self.max_epsilon = 0.0 self.min_epsilon = 0.0 self.epsilon = 0.0 else: self.max_epsilon = hyper_params.max_epsilon self.min_epsilon = hyper_params.min_epsilon self.epsilon = hyper_params.max_epsilon self._initialize()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, noise_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.noise_cfg = noise_cfg # set noise self.noise = OUNoise( env_info.action_space.shape[0], theta=noise_cfg.ou_noise_theta, sigma=noise_cfg.ou_noise_sigma, ) self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, noise_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.noise_cfg = noise_cfg self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] # noise instance to make randomness of action self.exploration_noise = GaussianNoise(self.action_dim, noise_cfg.exploration_noise, noise_cfg.exploration_noise) self.target_policy_noise = GaussianNoise( self.action_dim, noise_cfg.target_policy_noise, noise_cfg.target_policy_noise, ) if not self.args.test: # replay memory self.memory = ReplayBuffer(self.hyper_params.buffer_size, self.hyper_params.batch_size) self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg build_args = dict( hyper_params=self.hyper_params, log_cfg=self.log_cfg, env_name=self.env_info.name, state_size=self.env_info.observation_space.shape, output_size=self.env_info.action_space.shape[0], is_test=self.is_test, load_from=self.load_from, ) self.learner = build_learner(self.learner_cfg, build_args)
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.episode_step = 0 self.i_episode = 0 self.episode_num = episode_num self.hyper_params = hyper_params self.learner_cfg = learner_cfg build_args = dict( hyper_params=hyper_params, log_cfg=log_cfg, env_info=env_info, is_test=is_test, load_from=load_from, ) self.learner = build_learner(self.learner_cfg, build_args) self.memory = ReplayMemory( self.hyper_params.buffer_size, self.hyper_params.n_rollout )
def load_params(self, path: str): """Load model and optimizer parameters.""" Agent.load_params(self, path) params = torch.load(path) self.critic1.load_state_dict(params["critic1"]) self.critic2.load_state_dict(params["critic2"]) self.critic_target1.load_state_dict(params["critic_target1"]) self.critic_target2.load_state_dict(params["critic_target2"]) self.critic_optim.load_state_dict(params["critic_optim"]) self.actor.load_state_dict(params["actor"]) self.actor_target.load_state_dict(params["actor_target"]) self.actor_optim.load_state_dict(params["actor_optim"]) print("[INFO] loaded the model and optimizer from", path)
def save_params(self, n_episode: int): # type: ignore """Save model and optimizer parameters.""" params = { "actor": self.actor.state_dict(), "actor_target": self.actor_target.state_dict(), "actor_optim": self.actor_optim.state_dict(), "critic1": self.critic1.state_dict(), "critic2": self.critic2.state_dict(), "critic_target1": self.critic_target1.state_dict(), "critic_target2": self.critic_target2.state_dict(), "critic_optim": self.critic_optim.state_dict(), } Agent.save_params(self, params, n_episode)
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, noise_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.noise_cfg = noise_cfg self.learner_cfg.device = device # noise instance to make randomness of action self.exploration_noise = GaussianNoise( self.env_info.action_space.shape[0], noise_cfg.exploration_noise, noise_cfg.exploration_noise, ) if not self.args.test: # replay memory self.memory = ReplayBuffer( self.hyper_params.buffer_size, self.hyper_params.batch_size ) self.learner = build_learner(self.learner_cfg)
def __init__( self, env: gym.Env, # for testing args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ env_gen = env_generator(env.spec.id, args) env_multi = make_envs(env_gen, n_envs=hyper_params.n_workers) Agent.__init__(self, env, args, log_cfg) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg if not self.args.test: self.env = env_multi self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] self.epsilon = hyper_params.max_epsilon self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ env_gen = env_generator(env.spec.id, args) env_multi = make_envs(env_gen, n_envs=hyper_params.n_workers) Agent.__init__(self, env, env_info, args, log_cfg) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device if not self.args.test: self.env = env_multi self.epsilon = hyper_params.max_epsilon self.learner = build_learner(self.learner_cfg)
def save_params(self, n_episode: int): # type: ignore """Save model and optimizer parameters.""" params = { "actor": self.actor.state_dict(), "qf_1": self.qf_1.state_dict(), "qf_2": self.qf_2.state_dict(), "vf": self.vf.state_dict(), "vf_target": self.vf_target.state_dict(), "actor_optim": self.actor_optim.state_dict(), "qf_1_optim": self.qf_1_optim.state_dict(), "qf_2_optim": self.qf_2_optim.state_dict(), "vf_optim": self.vf_optim.state_dict(), } if self.hyper_params.auto_entropy_tuning: params["alpha_optim"] = self.alpha_optim.state_dict() Agent.save_params(self, params, n_episode)
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] # target entropy target_entropy = -np.prod((self.action_dim, )).item() # heuristic # automatic entropy tuning if hyper_params.auto_entropy_tuning: self.target_entropy = target_entropy self.log_alpha = torch.zeros(1, requires_grad=True, device=device) self.alpha_optim = optim.Adam([self.log_alpha], lr=optim_cfg.lr_entropy) self._initialize() self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self._initialize()
def load_params(self, path: str): """Load model and optimizer parameters.""" Agent.load_params(self, path) params = torch.load(path) self.actor.load_state_dict(params["actor"]) self.qf_1.load_state_dict(params["qf_1"]) self.qf_2.load_state_dict(params["qf_2"]) self.vf.load_state_dict(params["vf"]) self.vf_target.load_state_dict(params["vf_target"]) self.actor_optim.load_state_dict(params["actor_optim"]) self.qf_1_optim.load_state_dict(params["qf_1_optim"]) self.qf_2_optim.load_state_dict(params["qf_2_optim"]) self.vf_optim.load_state_dict(params["vf_optim"]) if self.hyper_params.auto_entropy_tuning: self.alpha_optim.load_state_dict(params["alpha_optim"]) print("[INFO] loaded the model and optimizer from", path)
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros(1) self.episode_step = 0 self.total_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.optim_cfg = optim_cfg self.backbone_cfg = backbone self.head_cfg = head self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.n self.per_beta = hyper_params.per_beta self.use_conv = len(self.state_dim) > 1 self.use_n_step = hyper_params.n_step > 1 if head.configs.use_noisy_net: self.max_epsilon = 0.0 self.min_epsilon = 0.0 self.epsilon = 0.0 else: self.max_epsilon = hyper_params.max_epsilon self.min_epsilon = hyper_params.min_epsilon self.epsilon = hyper_params.max_epsilon self._initialize() self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros(1) self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device self.per_beta = hyper_params.per_beta self.use_n_step = hyper_params.n_step > 1 if self.learner_cfg.head.configs.use_noisy_net: self.max_epsilon = 0.0 self.min_epsilon = 0.0 self.epsilon = 0.0 else: self.max_epsilon = hyper_params.max_epsilon self.min_epsilon = hyper_params.min_epsilon self.epsilon = hyper_params.max_epsilon self._initialize()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) env_multi = (env if is_test else self.make_parallel_env( max_episode_steps, hyper_params.n_workers)) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.learner_cfg = learner_cfg if not self.is_test: self.env = env_multi self.epsilon = hyper_params.max_epsilon output_size = (self.env_info.action_space.n if self.is_discrete else self.env_info.action_space.shape[0]) build_args = dict( hyper_params=self.hyper_params, log_cfg=self.log_cfg, env_name=self.env_info.name, state_size=self.env_info.observation_space.shape, output_size=output_size, is_test=self.is_test, load_from=self.load_from, ) self.learner = build_learner(self.learner_cfg, build_args)