def make_env(): env_spec = gym.spec('MsPacman-v0') env_spec.id = 'MsPacman-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(env), width=80, height=80, grayscale=True) return e
def test_get_info(self): cartpole_env = gym.spec('CartPole-v1').make() env = alf_gym_wrapper.AlfGymWrapper(cartpole_env) self.assertEqual(None, env.get_info()) env.reset() self.assertEqual(None, env.get_info()) action = np.array(0, dtype=np.int64) env.step(action) self.assertEqual({}, env.get_info())
def WrappedGymCoreSyncEnv(gym_core_id, fps=60, rewarder_observation=False): spec = gym.spec(gym_core_id) env = gym_core_sync.GymCoreSync(BlockingReset(wrap(envs.VNCEnv(fps=fps)))) if rewarder_observation: env = GymCoreObservation(env, gym_core_id=gym_core_id) elif spec._entry_point.startswith('gym.envs.atari:'): env = CropAtari(env) return env
def make_env(): env_spec = gym.spec('ppaquette/DoomBasic-v0') env_spec.id = 'DoomBasic-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) return e
def make_env(arg_env_spec, arg_env_spec_id): env_spec = gym.spec(arg_env_spec) env_spec.id = arg_env_spec_id env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=80, height=80, grayscale=True) return e
def query_environment(name): env = gym.make(name) spec = gym.spec(name) print(f"Action Space: {env.action_space}") print(f"Observation Space: {env.observation_space}") print(f"Max Episode Steps: {spec.max_episode_steps}") print(f"Nondeterministic: {spec.nondeterministic}") print(f"Reward Range: {env.reward_range}") print(f"Reward Threshold: {spec.reward_threshold}")
def env_factory(env_name): gym_env = gym.make(env_name) gym_spec = gym.spec(env_name) if gym_spec.max_episode_steps in [0, None]: # Add TimeLimit wrapper. gym_env = time_limit.TimeLimit(gym_env, max_episode_steps=1000) tf_env = tf_py_environment.TFPyEnvironment( gym_wrapper.GymWrapper(gym_env)) return tf_env
def _thunk(): env_spec = gym.spec('ppaquette/DoomBasic-v0') env_spec.id = 'DoomBasic-v0' env = env_spec.make() env.seed(seed + rank) env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env)))) if logger.get_dir(): env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank))) gym.logger.setLevel(logging.WARN) return ScaleRewardEnv(env)
def testActionSpec(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) one_hot_action_wrapper = wrappers.OneHotActionWrapper(env) expected_spec = array_spec.BoundedArraySpec(shape=(2, ), dtype=np.int64, minimum=0, maximum=1, name='one_hot_action_spec') self.assertEqual(one_hot_action_wrapper.action_spec(), expected_spec)
def test_wrapped_cartpole_reset(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) first_time_step = env.reset() self.assertTrue(first_time_step.is_first()) self.assertEqual(0.0, first_time_step.reward) self.assertEqual(1.0, first_time_step.discount) self.assertEqual((4, ), first_time_step.observation.shape) self.assertEqual(np.float32, first_time_step.observation.dtype)
def test_wrapped_cartpole_transition(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) env.reset() transition_time_step = env.step(0) self.assertTrue(transition_time_step.is_mid()) self.assertNotEqual(None, transition_time_step.reward) self.assertEqual(1.0, transition_time_step.discount) self.assertEqual((4, ), transition_time_step.observation.shape)
def __init__(self, tuning_parameters): EnvironmentWrapper.__init__(self, tuning_parameters) ports = (5200, 15200) # env parameters self.env = gym.make(self.env_id) self.env_id = self.env_id if self.seed is not None: self.env.seed(self.seed) self.env_spec = gym.spec(self.env_id) self.none_counter = 0 self.discrete_controls = type( self.env.action_space) != gym.spaces.box.Box # pybullet requires rendering before resetting the environment, but other gym environments (Pendulum) will crash try: if self.is_rendered: self.render() except: pass o = self.reset(True)['observation'] # render if self.is_rendered: self.render() # self.env.render() self.is_state_type_image = len(o.shape) > 1 if self.is_state_type_image: self.width = o.shape[1] self.height = o.shape[0] else: self.width = o.shape[0] self.actions_description = {} if self.discrete_controls: self.action_space_size = self.env.action_space.n self.action_space_abs_range = 0 else: self.action_space_size = self.env.action_space.shape[0] self.action_space_high = self.env.action_space.high self.action_space_low = self.env.action_space.low self.action_space_abs_range = np.maximum( np.abs(self.action_space_low), np.abs(self.action_space_high)) self.actions = {i: i for i in range(self.action_space_size)} self.timestep_limit = self.env.spec.timestep_limit self.current_ale_lives = 0 self.measurements_size = len(self.step(0)['info'].keys()) # env intialization self.observation = o self.reward = 0 self.done = False self.last_action = self.actions[0]
def create_expansionai_env(env_id, video=False, params=Params()): gym.spec(env_id)._kwargs = { 'armies': params.armies, 'board_size': params.board_size, 'offset_x': 0, 'offset_y': 0 } env = gym.make(env_id) if video: env = wrappers.Monitor(env, 'test', force=True, mode='training') return env
def _create_gym_with_wrapper(self, discount): gym_spec = gym.spec(self.model_config.gym_env_name) gym_env = gym_spec.make() # simplify_box_bounds: Whether to replace bounds of Box space that are arrays # with identical values with one number and rely on broadcasting. # important, simplify_box_bounds True crashes environments with boundaries with identical values env = gym_wrapper.GymWrapper(gym_env, discount=discount, simplify_box_bounds=False) return env
def test_observation_spec_changed_with_action(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) obs_shape = env.observation_spec().shape action_shape = env.action_spec().shape history_env = wrappers.HistoryWrapper(env, 3, include_actions=True) self.assertEqual((3,) + obs_shape, history_env.observation_spec()['observation'].shape) self.assertEqual((3,) + action_shape, history_env.observation_spec()['action'].shape)
def create_env(env_id, client_id, remotes, **kwargs): spec = gym.spec(env_id) if spec.tags.get('flashgames', False): return create_flash_env(env_id, client_id, remotes, **kwargs) elif spec.tags.get('atari', False) and spec.tags.get('vnc', False): return create_vncatari_env(env_id, client_id, remotes, **kwargs) else: # Assume atari. assert "." not in env_id # universe environments have dots in names. return create_atari_env(env_id)
def create_env(env_id, client_id, remotes, **kwargs): spec = gym.spec(env_id) # spec.tags is a python dict. get search for the key. If the key is not available in the dictionary, it will return False if spec.tags.get('flashgames', False): return create_flash_env(env_id, client_id, remotes, **kwargs) elif spec.tags.get('atari', False) and spec.tags.get('vnc', False): return create_vncatari_env(env_id, client_id, remotes, **kwargs) else: # Assume atari. assert "." not in env_id # universe environments have dots in names. return create_atari_env(env_id)
def load(environment_name, env_id=None, max_instruction_length=80, one_token_per_step=False, discount=1.0, max_episode_steps=None, gym_env_wrappers=(), alf_env_wrappers=()): """Loads the selected environment and wraps it with the specified wrappers. Note that by default a TimeLimit wrapper is used to limit episode lengths to the default benchmarks defined by the registered environments. Args: environment_name (str): Name for the environment to load. env_id (int): (optional) ID of the environment. max_instruction_length (int): the maximum number of words of an instruction. one_token_per_step (bool): If False, the whole instruction (word ID array) is given in the observation at every step. If True, the word IDs are given in the observation sequentially. Each step only one word ID is given. A zero is given for every steps after all the word IDs are given. discount (float): Discount to use for the environment. max_episode_steps (int): If None the max_episode_steps will be set to the default step limit defined in the environment's spec. No limit is applied if set to 0 or if there is no max_episode_steps set in the environment's spec. gym_env_wrappers (Iterable): Iterable with references to gym_wrappers classes to use directly on the gym environment. alf_env_wrappers (Iterable): Iterable with references to alf_wrappers classes to use on the ALF environment. Returns: An AlfEnvironment instance. """ gym_spec = gym.spec(environment_name) gym_env = gym_spec.make() if max_episode_steps is None: if gym_spec.max_episode_steps is not None: max_episode_steps = gym_spec.max_episode_steps else: max_episode_steps = 0 gym_env = BabyAIWrapper(gym_env, max_instruction_length, one_token_per_step) return wrap_env(gym_env, env_id=env_id, discount=discount, max_episode_steps=max_episode_steps, gym_env_wrappers=gym_env_wrappers, alf_env_wrappers=alf_env_wrappers, image_channel_first=False)
def make_env(scenario, grayscale, input_shape): width, height = input_shape env_spec = gym.spec('ppaquette/' + scenario) env_spec.id = scenario #'DoomBasic-v0' env = env_spec.make() e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)), width=width, height=height, grayscale=grayscale) return e
def test_automatic_reset_after_done_not_using_reset_directly(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) time_step = env.step(1) while not time_step.is_last(): time_step = env.step(1) self.assertTrue(time_step.is_last()) first_time_step = env.step(0) self.assertTrue(first_time_step.is_first())
def test_automatic_reset_after_done_not_using_reset_directly(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) time_step = env.step(1) # pytype: disable=wrong-arg-types while not time_step.is_last(): time_step = env.step(np.array(1, dtype=np.int32)) self.assertTrue(time_step.is_last()) first_time_step = env.step(0) # pytype: disable=wrong-arg-types self.assertTrue(first_time_step.is_first())
def test_automatic_reset_after_done(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) time_step = env.reset() while not time_step.is_last(): time_step = env.step(np.array(1, dtype=np.int32)) self.assertTrue(time_step.is_last()) first_time_step = env.step(0) self.assertTrue(first_time_step.is_first())
def _init(): env_names = { "ant": "MyAntBulletEnv", "walker2d": "MyWalker2DBulletEnv", "hopper": "MyHopperBulletEnv", "halfcheetah": "MyHalfCheetahBulletEnv", } env_name = env_names[_template] env_id = f'{env_name}-v{robot_body}' try: gym.spec(env_id) except: gym.envs.registration.register( id=env_id, entry_point=f'gym_envs.{_template}:{env_name}', max_episode_steps=1000, reward_threshold=2500.0, kwargs={ "xml": f"{str(common.input_data_folder.resolve())}/bodies/{robot_body}.xml" }) _render = False if render: _render = rank in [0] env = gym.make(env_id, render=_render) if len(wrappers) > 0: for _wrapper in wrappers: if isinstance(_wrapper, BodyinfoWrapper): if body_info < 0: _body_info = robot_body else: _body_info = body_info env = _wrapper(env, _body_info) else: env = _wrapper(env) if seed is not None: env.seed(seed * 100 + rank) env.action_space.seed(seed * 100 + rank) return env
def __init__(self, gym_core_id, fps=60, vnc_pixels=True): super(GymCoreSyncEnv, self).__init__(gym_core_id, fps=fps) # Metadata has already been cloned self.metadata['semantics.async'] = False self.gym_core_id = gym_core_id self.vnc_pixels = vnc_pixels if not vnc_pixels: self._core_env = gym.spec(gym_core_id).make() else: self._core_env = None
def load_d4rl(env_name, default_time_limit=1000): """Loads the python environment from D4RL.""" gym_env = gym.make(env_name) gym_spec = gym.spec(env_name) # Default to env time limit unless it is not specified. if gym_spec.max_episode_steps in [0, None]: gym_env = TimeLimit(gym_env, max_episode_steps=default_time_limit) # Wrap TF-Agents environment. env = gym_wrapper.GymWrapper(gym_env) return env
def test_wrapped_cartpole_final(self): cartpole_env = gym.spec('CartPole-v1').make() env = gym_wrapper.GymWrapper(cartpole_env) time_step = env.reset() while not time_step.is_last(): time_step = env.step(1) self.assertTrue(time_step.is_last()) self.assertNotEqual(None, time_step.reward) self.assertEqual(0.0, time_step.discount) self.assertEqual((4, ), time_step.observation.shape)
def test_method_propagation(self): cartpole_env = gym.spec('CartPole-v1').make() for method_name in ('render', 'seed', 'close'): setattr(cartpole_env, method_name, mock.MagicMock()) env = gym_wrapper.GymWrapper(cartpole_env) env.render() self.assertEqual(1, cartpole_env.render.call_count) env.seed(0) self.assertEqual(1, cartpole_env.seed.call_count) cartpole_env.seed.assert_called_with(0) env.close() self.assertEqual(1, cartpole_env.close.call_count)
def score_from_remote(url): result = requests.get(url) parsed = result.json() episode_lengths = parsed['episode_lengths'] episode_rewards = parsed['episode_rewards'] timestamps = parsed['timestamps'] # Handle legacy entries where initial_reset_timestamp wasn't set initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0]) env_id = parsed['env_id'] spec = gym.spec(env_id) return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
def create_env(env_id, client_id, n=1, **kwargs): spec = gym.spec(env_id) remotes = "http://allocator.sci.openai-tech.com?n={}".format(n) if spec.tags.get('flashgames', False): return create_flash_env(env_id, client_id, remotes, **kwargs) elif spec.tags.get('atari', False) and spec.tags.get('vnc', False): return create_vncatari_env(env_id, client_id, remotes, **kwargs) else: # Assume atari. assert "." not in env_id # universe environments have dots in names. return create_atari_env(env_id)
def load( environment_name: Text, discount: types.Float = 1.0, max_episode_steps: Optional[types.Int] = None, gym_env_wrappers: Sequence[types.GymEnvWrapper] = (), env_wrappers: Sequence[types.PyEnvWrapper] = (), spec_dtype_map: Optional[Dict[gym.Space, np.dtype]] = None, gym_kwargs: Optional[Dict[str, Any]] = None, render_kwargs: Optional[Dict[str, Any]] = None, ) -> py_environment.PyEnvironment: """Loads the selected environment and wraps it with the specified wrappers. Note that by default a TimeLimit wrapper is used to limit episode lengths to the default benchmarks defined by the registered environments. Args: environment_name: Name for the environment to load. discount: Discount to use for the environment. max_episode_steps: If None the max_episode_steps will be set to the default step limit defined in the environment's spec. No limit is applied if set to 0 or if there is no max_episode_steps set in the environment's spec. gym_env_wrappers: Iterable with references to wrapper classes to use directly on the gym environment. env_wrappers: Iterable with references to wrapper classes to use on the gym_wrapped environment. spec_dtype_map: A dict that maps gym spaces to np dtypes to use as the default dtype for the arrays. An easy way how to configure a custom mapping through Gin is to define a gin-configurable function that returns desired mapping and call it in your Gin congif file, for example: `suite_gym.load.spec_dtype_map = @get_custom_mapping()`. gym_kwargs: Optional kwargs to pass to the Gym environment class. render_kwargs: Optional kwargs for rendering to pass to `render()` of the gym_wrapped environment. Returns: A PyEnvironment instance. """ gym_kwargs = gym_kwargs if gym_kwargs else {} gym_spec = gym.spec(environment_name) gym_env = gym_spec.make(**gym_kwargs) if max_episode_steps is None and gym_spec.max_episode_steps is not None: max_episode_steps = gym_spec.max_episode_steps return wrap_env( gym_env, discount=discount, max_episode_steps=max_episode_steps, gym_env_wrappers=gym_env_wrappers, env_wrappers=env_wrappers, spec_dtype_map=spec_dtype_map, render_kwargs=render_kwargs)
def score_from_local(directory): """Calculate score from a local results directory""" results = gym.monitoring.monitor.load_results(directory) # No scores yet saved if results is None: return None episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] timestamps = results['timestamps'] initial_reset_timestamp = results['initial_reset_timestamp'] spec = gym.spec(results['env_info']['env_id']) return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
def __init__(self, env_id): self.worker_n = None # Pull the relevant info from a transient env instance self.spec = gym.spec(env_id) env = self.spec.make() current_metadata = self.metadata self.metadata = env.metadata.copy() self.metadata.update(current_metadata) self.action_space = env.action_space self.observation_space = env.observation_space self.reward_range = env.reward_range
def __init__(self, env, gym_core_id=None): super(GymCoreObservation, self).__init__(env) if gym_core_id is None: # self.spec is None while inside of the make, so we need # to pass gym_core_id in explicitly there. This case will # be hit when instantiating by hand. gym_core_id = self.spec._kwargs['gym_core_id'] self._reward_n = None self._done_n = None self._info_n = None self._gym_core_env = gym.spec(gym_core_id).make()
def score_from_file(json_file): """Calculate score from an episode_batch.json file""" with open(json_file) as f: results = json.load(f) # No scores yet saved if results is None: return None episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] episode_types = results['episode_types'] timestamps = results['timestamps'] initial_reset_timestamp = results['initial_reset_timestamp'] spec = gym.spec(results['env_id']) return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
def __init__(self, algorithm_id, training_callable, complete_callable, base_dir=None, video_callable=None, processes=None, env_ids=None): global pool self.base_dir = base_dir or tempfile.mkdtemp() self.training_callable = training_callable self.complete_callable = complete_callable self.algorithm_id = algorithm_id self.video_callable = video_callable if env_ids is not None: self.specs = [gym.spec(env_id) for env_id in env_ids] else: self.specs = gym.envs.registry.all() self.selected_specs = None processes = processes or max(1, multiprocessing.cpu_count() - 1) if not pool: pool = multiprocessing.Pool(processes)
def __init__(self, env, gym_core_id=None): super(GymCoreAction, self).__init__(env) if gym_core_id is None: # self.spec is None while inside of the make, so we need # to pass gym_core_id in explicitly there. This case will # be hit when instantiating by hand. gym_core_id = self.spec._kwargs['gym_core_id'] spec = gym.spec(gym_core_id) raw_action_space = action_space.gym_core_action_space(gym_core_id) self._actions = raw_action_space.actions self.action_space = spaces.Discrete(len(self._actions)) if spec._entry_point.startswith('gym.envs.atari:'): self.key_state = translator.AtariKeyState(gym.make(gym_core_id)) else: self.key_state = None
def gym_core_action_space(gym_core_id): spec = gym.spec(gym_core_id) if spec.id == 'CartPole-v0': return spaces.Hardcoded([[spaces.KeyEvent.by_name('left', down=True)], [spaces.KeyEvent.by_name('left', down=False)]]) elif spec._entry_point.startswith('gym.envs.atari:'): actions = [] env = spec.make() for action in env.unwrapped.get_action_meanings(): z = 'FIRE' in action left = 'LEFT' in action right = 'RIGHT' in action up = 'UP' in action down = 'DOWN' in action translated = atari_vnc(up=up, down=down, left=left, right=right, z=z) actions.append(translated) return spaces.Hardcoded(actions) else: raise error.Error('Unsupported env type: {}'.format(spec.id))
def score_from_local(path): parsed = gym.monitoring.monitor.load_results(path) # If no scores yet saved (after original env.reset() called to initialise the environment) then return default if parsed is None: return { 'episode_t_value': None, 'timestep_t_value': None, 'mean': None, 'error': None, 'number_episodes': 0, 'number_timesteps': 0, 'seconds_to_solve': None, 'seconds_in_total': 0, } episode_lengths = parsed['episode_lengths'] episode_rewards = parsed['episode_rewards'] timestamps = parsed['timestamps'] initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0]) spec = gym.spec(parsed['env_info']['env_id']) return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
# Account for remote auto-reset obs = env.reset() vnc_obs, vnc_reward, vnc_done, vnc_info = vnc_env.step(action) assert reward == vnc_reward assert done == vnc_done assert vnc_info['stats.reward.count'] == 1 matcher.assert_match(obs, vnc_obs, {'reward': reward, 'done': done}, stage=stage) count += 1 if done or (timestep_limit is not None and count >= timestep_limit): break # TODO: we should have auto-env spinup specs = [ (gym.spec('gym-core.PongDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper), (gym.spec('gym-core.PitfallDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper), # This test is still broken. Looks like we're not piping the seed # to the CartPole env behind VNC # (gym.spec('gym-core.CartPoleLowDSync-v0'), CartPoleLowDMatcher()) ] @pytest.mark.parametrize("spec,matcher,wrapper", specs) def test_nice_vnc_semantics_match(spec, matcher, wrapper): # Check that when running over VNC or using the raw environment, # semantics match exactly. gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) spaces.seed(0)
def __init__(self, file_name): self.file = gzip.GzipFile(file_name, 'rb') meta = self.file.readline() meta = json.loads(meta) self.env = gym.spec(meta['env_id']).make()