def make_parallel_env(self, max_episode_steps, n_workers): if "env_generator" in self.env_info.keys(): env_gen = self.env_info.env_generator else: env_gen = env_generator(self.env.spec.id, max_episode_steps) env_multi = make_envs(env_gen, n_envs=n_workers) return env_multi
def __init__( self, env: gym.Env, # for testing args: argparse.Namespace, log_cfg: ConfigDict, hyper_params: ConfigDict, backbone: ConfigDict, head: ConfigDict, optim_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ env_gen = env_generator(env.spec.id, args) env_multi = make_envs(env_gen, n_envs=hyper_params.n_workers) Agent.__init__(self, env, args, log_cfg) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.backbone_cfg = backbone self.head_cfg = head self.optim_cfg = optim_cfg if not self.args.test: self.env = env_multi self.state_dim = self.env.observation_space.shape self.action_dim = self.env.action_space.shape[0] self.epsilon = hyper_params.max_epsilon self._init_network()
def __init__( self, env: gym.Env, env_info: ConfigDict, args: argparse.Namespace, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, ): """Initialize. Args: env (gym.Env): openAI Gym environment args (argparse.Namespace): arguments including hyperparameters and training settings """ env_gen = env_generator(env.spec.id, args) env_multi = make_envs(env_gen, n_envs=hyper_params.n_workers) Agent.__init__(self, env, env_info, args, log_cfg) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1, )) self.hyper_params = hyper_params self.learner_cfg = learner_cfg self.learner_cfg.args = self.args self.learner_cfg.env_info = self.env_info self.learner_cfg.hyper_params = self.hyper_params self.learner_cfg.log_cfg = self.log_cfg self.learner_cfg.device = device if not self.args.test: self.env = env_multi self.epsilon = hyper_params.max_epsilon self.learner = build_learner(self.learner_cfg)
def __init__( self, env: gym.Env, env_info: ConfigDict, hyper_params: ConfigDict, learner_cfg: ConfigDict, log_cfg: ConfigDict, is_test: bool, load_from: str, is_render: bool, render_after: int, is_log: bool, save_period: int, episode_num: int, max_episode_steps: int, interim_test_num: int, ): env_gen = env_generator(env.spec.id, max_episode_steps) env_multi = make_envs(env_gen, n_envs=hyper_params.n_workers) Agent.__init__( self, env, env_info, log_cfg, is_test, load_from, is_render, render_after, is_log, save_period, episode_num, max_episode_steps, interim_test_num, ) self.episode_steps = np.zeros(hyper_params.n_workers, dtype=np.int) self.states: list = [] self.actions: list = [] self.rewards: list = [] self.values: list = [] self.masks: list = [] self.log_probs: list = [] self.i_episode = 0 self.next_state = np.zeros((1,)) self.hyper_params = hyper_params self.learner_cfg = learner_cfg if not self.is_test: self.env = env_multi self.epsilon = hyper_params.max_epsilon build_args = dict( hyper_params=self.hyper_params, log_cfg=self.log_cfg, env_name=self.env_info.name, state_size=self.env_info.observation_space.shape, output_size=self.env_info.action_space.shape[0], is_test=self.is_test, load_from=self.load_from, ) self.learner = build_learner(self.learner_cfg, build_args)