def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params["logdir"]) # Set random seeds seed = self.params["seed"] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu(use_gpu=not self.params["no_gpu"], gpu_id=self.params["which_gpu"]) ############# ## ENV ############# # Make the gym environment register_custom_envs() self.env = gym.make(self.params["env_name"]) self.eval_env = gym.make(self.params["env_name"]) if not ("pointmass" in self.params["env_name"]): import matplotlib matplotlib.use("Agg") self.env.set_logdir(self.params["logdir"] + "/expl_") self.eval_env.set_logdir(self.params["logdir"] + "/eval_") if "env_wrappers" in self.params: # These operations are currently only for Atari envs self.env = wrappers.Monitor(self.env, os.path.join(self.params["logdir"], "gym"), force=True) self.eval_env = wrappers.Monitor(self.eval_env, os.path.join( self.params["logdir"], "gym"), force=True) self.env = params["env_wrappers"](self.env) self.eval_env = params["env_wrappers"](self.eval_env) self.mean_episode_reward = -float("nan") self.best_mean_episode_reward = -float("inf") if "non_atari_colab_env" in self.params and self.params[ "video_log_freq"] > 0: self.env = wrappers.Monitor( self.env, os.path.join(self.params["logdir"], "gym"), write_upon_reset=True, ) # , force=True) self.eval_env = wrappers.Monitor( self.eval_env, os.path.join(self.params["logdir"], "gym"), write_upon_reset=True, ) self.mean_episode_reward = -float("nan") self.best_mean_episode_reward = -float("inf") self.env.seed(seed) self.eval_env.seed(seed) # Maximum length for episodes self.params["ep_len"] = self.params[ "ep_len"] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params["ep_len"] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params["agent_params"]["discrete"] = discrete # Observation and action sizes ob_dim = (self.env.observation_space.shape if img else self.env.observation_space.shape[0]) ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params["agent_params"]["ac_dim"] = ac_dim self.params["agent_params"]["ob_dim"] = ob_dim # simulation timestep, will be used for video saving if "model" in dir(self.env): self.fps = 1 / self.env.model.opt.timestep elif "env_wrappers" in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper elif "video.frames_per_second" in self.env.env.metadata.keys(): self.fps = self.env.env.metadata["video.frames_per_second"] else: self.fps = 10 ############# ## AGENT ############# agent_class = self.params["agent_class"] self.agent = agent_class(self.env, self.params["agent_params"])
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params['logdir']) # Set random seeds seed = self.params['seed'] np.random.seed(seed) tf.random.set_seed(seed) ############# ## ENV ############# # Make the gym environment register_custom_envs() self.env = gym.make(self.params['env_name']) self.eval_env = gym.make(self.params['env_name']) if not ('pointmass' in self.params['env_name']): import matplotlib matplotlib.use('Agg') self.env.set_logdir(self.params['logdir'] + '/expl_') self.eval_env.set_logdir(self.params['logdir'] + '/eval_') if 'env_wrappers' in self.params: # These operations are currently only for Atari envs self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True) self.eval_env = wrappers.Monitor(self.eval_env, os.path.join( self.params['logdir'], "gym"), force=True) self.env = params['env_wrappers'](self.env) self.eval_env = params['env_wrappers'](self.eval_env) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') if 'non_atari_colab_env' in self.params and self.params[ 'video_log_freq'] > 0: self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), write_upon_reset=True) #, force=True) self.eval_env = wrappers.Monitor(self.eval_env, os.path.join( self.params['logdir'], "gym"), write_upon_reset=True) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') self.env.seed(seed) self.eval_env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[ 0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep elif 'env_wrappers' in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper elif 'video.frames_per_second' in self.env.env.metadata.keys(): self.fps = self.env.env.metadata['video.frames_per_second'] else: self.fps = 10 ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.env, self.params['agent_params'])
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params['logdir']) # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu( use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu'] ) ############# ## ENV ############# # Make the gym environment register_custom_envs() self.env = gym.make(self.params['env_name']) if 'env_wrappers' in self.params: # These operations are currently only for Atari envs # <<<<<<< HEAD # self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True) # # self.env.enabled = (self.params['video_log_freq'] > 0) # ======= self.env = wrappers.Monitor( self.env, os.path.join(self.params['logdir'], "gym"), force=True, video_callable=(None if self.params['video_log_freq'] > 0 else False), ) # >>>>>>> b5d34989d30c72b353acc2a64e691d17ddbea81f self.env = params['env_wrappers'](self.env) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') if 'non_atari_colab_env' in self.params and self.params['video_log_freq'] > 0: self.env = wrappers.Monitor( self.env, os.path.join(self.params['logdir'], "gym"), force=True, video_callable=(None if self.params['video_log_freq'] > 0 else False), ) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') self.env.seed(seed) # import plotting (locally if 'obstacles' env) if not(self.params['env_name']=='obstacles-cs285-v0'): import matplotlib matplotlib.use('Agg') # Maximum length for episodes self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1/self.env.model.opt.timestep elif 'env_wrappers' in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper elif 'video.frames_per_second' in self.env.env.metadata.keys(): self.fps = self.env.env.metadata['video.frames_per_second'] else: self.fps = 10 ############# ## AGENT ############# agent_class = self.params['agent_class'] if 'hparam' in self.params['exp_name']: print('changing optimizer') self.params['agent_params']['optimizer_spec'] = lander_optimizer(self.params['lr']) print('using lr = ', self.params['lr']) self.agent = agent_class(self.env, self.params['agent_params']) else: self.agent = agent_class(self.env, self.params['agent_params'])