def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, args, log_cfg) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, network_cfg: ConfigDict, optim_cfg: ConfigDict, noise_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.network_cfg = network_cfg self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.shape[0] # set noise self.noise = OUNoise( self.action_dim, theta=noise_cfg.ou_noise_theta, sigma=noise_cfg.ou_noise_sigma, ) self._initialize() self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, noise_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.noise_cfg = noise_cfg self.learner_cfg.device = device # set noise self.noise = OUNoise( env_info.action_space.shape[0], theta=noise_cfg.ou_noise_theta, sigma=noise_cfg.ou_noise_sigma, ) self._initialize()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, env_info, args, log_cfg) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device self.learner = build_learner(self.learner_cfg)
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.curr_state = np.zeros(1) self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.per_beta = hyper_params.per_beta self.use_n_step = hyper_params.n_step > 1 if self.learner_cfg.head.configs.use_noisy_net: self.max_epsilon = 0.0 self.min_epsilon = 0.0 self.epsilon = 0.0 else: self.max_epsilon = hyper_params.max_epsilon self.min_epsilon = hyper_params.min_epsilon self.epsilon = hyper_params.max_epsilon self._initialize()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, noise_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.noise_cfg = noise_cfg # set noise self.noise = OUNoise( env_info.action_space.shape[0], theta=noise_cfg.ou_noise_theta, sigma=noise_cfg.ou_noise_sigma, ) self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, noise_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.noise_cfg = noise_cfg self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] # noise instance to make randomness of action self.exploration_noise = GaussianNoise(self.action_dim, noise_cfg.exploration_noise, noise_cfg.exploration_noise) self.target_policy_noise = GaussianNoise( self.action_dim, noise_cfg.target_policy_noise, noise_cfg.target_policy_noise, ) if not self.args.test: # replay memory self.memory = ReplayBuffer(self.hyper_params.buffer_size, self.hyper_params.batch_size) self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.transition: list = list() self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg build_args = dict( hyper_params=self.hyper_params, log_cfg=self.log_cfg, env_name=self.env_info.name, state_size=self.env_info.observation_space.shape, output_size=self.env_info.action_space.shape[0], is_test=self.is_test, load_from=self.load_from, ) self.learner = build_learner(self.learner_cfg, build_args)
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.episode_step = 0 self.i_episode = 0 self.episode_num = episode_num self.hyper_params = hyper_params self.learner_cfg = learner_cfg build_args = dict( hyper_params=hyper_params, log_cfg=log_cfg, env_info=env_info, is_test=is_test, load_from=load_from, ) self.learner = build_learner(self.learner_cfg, build_args) self.memory = ReplayMemory( self.hyper_params.buffer_size, self.hyper_params.n_rollout )
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, noise_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.noise_cfg = noise_cfg self.learner_cfg.device = device # noise instance to make randomness of action self.exploration_noise = GaussianNoise( self.env_info.action_space.shape[0], noise_cfg.exploration_noise, noise_cfg.exploration_noise, ) if not self.args.test: # replay memory self.memory = ReplayBuffer( self.hyper_params.buffer_size, self.hyper_params.batch_size ) self.learner = build_learner(self.learner_cfg)
def __init__( self, env: gym.Env, # for testing args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ env_gen = env_generator(env.spec.id, args) env_multi = make_envs(env_gen, n_envs=hyper_params.n_workers) Agent.__init__(self, env, args, log_cfg) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg if not self.args.test: self.env = env_multi self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] self.epsilon = hyper_params.max_epsilon self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ env_gen = env_generator(env.spec.id, args) env_multi = make_envs(env_gen, n_envs=hyper_params.n_workers) Agent.__init__(self, env, env_info, args, log_cfg) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device if not self.args.test: self.env = env_multi self.epsilon = hyper_params.max_epsilon self.learner = build_learner(self.learner_cfg)
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros((1, )) self.total_step = 0 self.episode_step = 0 self.update_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] # target entropy target_entropy = -np.prod((self.action_dim, )).item() # heuristic # automatic entropy tuning if hyper_params.auto_entropy_tuning: self.target_entropy = target_entropy self.log_alpha = torch.zeros(1, requires_grad=True, device=device) self.alpha_optim = optim.Adam([self.log_alpha], lr=optim_cfg.lr_entropy) self._initialize() self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): """Initialize.""" Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.curr_state = np.zeros((1,)) self.total_step = 0 self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self._initialize()
def __init__( self, env: gym.Env, args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, args, log_cfg) self.curr_state = np.zeros(1) self.episode_step = 0 self.total_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.optim_cfg = optim_cfg self.backbone_cfg = backbone self.head_cfg = head self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.n self.per_beta = hyper_params.per_beta self.use_conv = len(self.state_dim) > 1 self.use_n_step = hyper_params.n_step > 1 if head.configs.use_noisy_net: self.max_epsilon = 0.0 self.min_epsilon = 0.0 self.epsilon = 0.0 else: self.max_epsilon = hyper_params.max_epsilon self.min_epsilon = hyper_params.min_epsilon self.epsilon = hyper_params.max_epsilon self._initialize() self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize.""" Agent.__init__(self, env, env_info, args, log_cfg) self.curr_state = np.zeros(1) self.episode_step = 0 self.i_episode = 0 self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device self.per_beta = hyper_params.per_beta self.use_n_step = hyper_params.n_step > 1 if self.learner_cfg.head.configs.use_noisy_net: self.max_epsilon = 0.0 self.min_epsilon = 0.0 self.epsilon = 0.0 else: self.max_epsilon = hyper_params.max_epsilon self.min_epsilon = hyper_params.min_epsilon self.epsilon = hyper_params.max_epsilon self._initialize()
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) env_multi = (env if is_test else self.make_parallel_env( max_episode_steps, hyper_params.n_workers)) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.learner_cfg = learner_cfg if not self.is_test: self.env = env_multi self.epsilon = hyper_params.max_epsilon output_size = (self.env_info.action_space.n if self.is_discrete else self.env_info.action_space.shape[0]) build_args = dict( hyper_params=self.hyper_params, log_cfg=self.log_cfg, env_name=self.env_info.name, state_size=self.env_info.observation_space.shape, output_size=output_size, is_test=self.is_test, load_from=self.load_from, ) self.learner = build_learner(self.learner_cfg, build_args)