Exemple #1
0
def make_env():
    env_spec = gym.spec('MsPacman-v0')
    env_spec.id = 'MsPacman-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(env),
                        width=80,
                        height=80,
                        grayscale=True)
    return e
Exemple #2
0
 def test_get_info(self):
     cartpole_env = gym.spec('CartPole-v1').make()
     env = alf_gym_wrapper.AlfGymWrapper(cartpole_env)
     self.assertEqual(None, env.get_info())
     env.reset()
     self.assertEqual(None, env.get_info())
     action = np.array(0, dtype=np.int64)
     env.step(action)
     self.assertEqual({}, env.get_info())
Exemple #3
0
def WrappedGymCoreSyncEnv(gym_core_id, fps=60, rewarder_observation=False):
    spec = gym.spec(gym_core_id)
    env = gym_core_sync.GymCoreSync(BlockingReset(wrap(envs.VNCEnv(fps=fps))))
    if rewarder_observation:
        env = GymCoreObservation(env, gym_core_id=gym_core_id)
    elif spec._entry_point.startswith('gym.envs.atari:'):
        env = CropAtari(env)

    return env
Exemple #4
0
def make_env():
    env_spec = gym.spec('ppaquette/DoomBasic-v0')
    env_spec.id = 'DoomBasic-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                        width=80,
                        height=80,
                        grayscale=True)
    return e
Exemple #5
0
def make_env(arg_env_spec, arg_env_spec_id):
    env_spec = gym.spec(arg_env_spec)
    env_spec.id = arg_env_spec_id
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                        width=80,
                        height=80,
                        grayscale=True)
    return e
Exemple #6
0
def WrappedGymCoreSyncEnv(gym_core_id, fps=60, rewarder_observation=False):
    spec = gym.spec(gym_core_id)
    env = gym_core_sync.GymCoreSync(BlockingReset(wrap(envs.VNCEnv(fps=fps))))
    if rewarder_observation:
        env = GymCoreObservation(env, gym_core_id=gym_core_id)
    elif spec._entry_point.startswith('gym.envs.atari:'):
        env = CropAtari(env)

    return env
def query_environment(name):
    env = gym.make(name)
    spec = gym.spec(name)
    print(f"Action Space: {env.action_space}")
    print(f"Observation Space: {env.observation_space}")
    print(f"Max Episode Steps: {spec.max_episode_steps}")
    print(f"Nondeterministic: {spec.nondeterministic}")
    print(f"Reward Range: {env.reward_range}")
    print(f"Reward Threshold: {spec.reward_threshold}")
def env_factory(env_name):
  gym_env = gym.make(env_name)
  gym_spec = gym.spec(env_name)
  if gym_spec.max_episode_steps in [0, None]:  # Add TimeLimit wrapper.
    gym_env = time_limit.TimeLimit(gym_env, max_episode_steps=1000)

  tf_env = tf_py_environment.TFPyEnvironment(
      gym_wrapper.GymWrapper(gym_env))
  return tf_env
Exemple #9
0
 def _thunk():
     env_spec = gym.spec('ppaquette/DoomBasic-v0')
     env_spec.id = 'DoomBasic-v0'
     env = env_spec.make()
     env.seed(seed + rank)
     env = PreprocessImage((SkipWrapper(4)(ToDiscrete("minimal")(env))))
     if logger.get_dir():
         env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank)))
     gym.logger.setLevel(logging.WARN)
     return ScaleRewardEnv(env)
Exemple #10
0
 def testActionSpec(self):
     cartpole_env = gym.spec('CartPole-v1').make()
     env = gym_wrapper.GymWrapper(cartpole_env)
     one_hot_action_wrapper = wrappers.OneHotActionWrapper(env)
     expected_spec = array_spec.BoundedArraySpec(shape=(2, ),
                                                 dtype=np.int64,
                                                 minimum=0,
                                                 maximum=1,
                                                 name='one_hot_action_spec')
     self.assertEqual(one_hot_action_wrapper.action_spec(), expected_spec)
Exemple #11
0
    def test_wrapped_cartpole_reset(self):
        cartpole_env = gym.spec('CartPole-v1').make()
        env = gym_wrapper.GymWrapper(cartpole_env)

        first_time_step = env.reset()
        self.assertTrue(first_time_step.is_first())
        self.assertEqual(0.0, first_time_step.reward)
        self.assertEqual(1.0, first_time_step.discount)
        self.assertEqual((4, ), first_time_step.observation.shape)
        self.assertEqual(np.float32, first_time_step.observation.dtype)
Exemple #12
0
    def test_wrapped_cartpole_transition(self):
        cartpole_env = gym.spec('CartPole-v1').make()
        env = gym_wrapper.GymWrapper(cartpole_env)
        env.reset()
        transition_time_step = env.step(0)

        self.assertTrue(transition_time_step.is_mid())
        self.assertNotEqual(None, transition_time_step.reward)
        self.assertEqual(1.0, transition_time_step.discount)
        self.assertEqual((4, ), transition_time_step.observation.shape)
Exemple #13
0
    def __init__(self, tuning_parameters):
        EnvironmentWrapper.__init__(self, tuning_parameters)
        ports = (5200, 15200)
        # env parameters
        self.env = gym.make(self.env_id)
        self.env_id = self.env_id
        if self.seed is not None:
            self.env.seed(self.seed)

        self.env_spec = gym.spec(self.env_id)
        self.none_counter = 0
        self.discrete_controls = type(
            self.env.action_space) != gym.spaces.box.Box

        # pybullet requires rendering before resetting the environment, but other gym environments (Pendulum) will crash
        try:
            if self.is_rendered:
                self.render()
        except:
            pass

        o = self.reset(True)['observation']

        # render
        if self.is_rendered:
            self.render()

        # self.env.render()
        self.is_state_type_image = len(o.shape) > 1
        if self.is_state_type_image:
            self.width = o.shape[1]
            self.height = o.shape[0]
        else:
            self.width = o.shape[0]

        self.actions_description = {}
        if self.discrete_controls:
            self.action_space_size = self.env.action_space.n
            self.action_space_abs_range = 0
        else:
            self.action_space_size = self.env.action_space.shape[0]
            self.action_space_high = self.env.action_space.high
            self.action_space_low = self.env.action_space.low
            self.action_space_abs_range = np.maximum(
                np.abs(self.action_space_low), np.abs(self.action_space_high))
        self.actions = {i: i for i in range(self.action_space_size)}
        self.timestep_limit = self.env.spec.timestep_limit
        self.current_ale_lives = 0
        self.measurements_size = len(self.step(0)['info'].keys())

        # env intialization
        self.observation = o
        self.reward = 0
        self.done = False
        self.last_action = self.actions[0]
Exemple #14
0
def create_expansionai_env(env_id, video=False, params=Params()):
    gym.spec(env_id)._kwargs = {
        'armies': params.armies,
        'board_size': params.board_size,
        'offset_x': 0,
        'offset_y': 0
    }
    env = gym.make(env_id)
    if video:
        env = wrappers.Monitor(env, 'test', force=True, mode='training')
    return env
Exemple #15
0
    def _create_gym_with_wrapper(self, discount):
        gym_spec = gym.spec(self.model_config.gym_env_name)
        gym_env = gym_spec.make()

        # simplify_box_bounds: Whether to replace bounds of Box space that are arrays
        #  with identical values with one number and rely on broadcasting.
        # important, simplify_box_bounds True crashes environments with boundaries with identical values
        env = gym_wrapper.GymWrapper(gym_env,
                                     discount=discount,
                                     simplify_box_bounds=False)
        return env
Exemple #16
0
  def test_observation_spec_changed_with_action(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    obs_shape = env.observation_spec().shape
    action_shape = env.action_spec().shape

    history_env = wrappers.HistoryWrapper(env, 3, include_actions=True)
    self.assertEqual((3,) + obs_shape,
                     history_env.observation_spec()['observation'].shape)
    self.assertEqual((3,) + action_shape,
                     history_env.observation_spec()['action'].shape)
Exemple #17
0
def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)

    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id)
Exemple #18
0
def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)
    # spec.tags is a python dict. get search for the key. If the key is not available in the dictionary, it will return False
    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id)
Exemple #19
0
def load(environment_name,
         env_id=None,
         max_instruction_length=80,
         one_token_per_step=False,
         discount=1.0,
         max_episode_steps=None,
         gym_env_wrappers=(),
         alf_env_wrappers=()):
    """Loads the selected environment and wraps it with the specified wrappers.

    Note that by default a TimeLimit wrapper is used to limit episode lengths
    to the default benchmarks defined by the registered environments.

    Args:
        environment_name (str): Name for the environment to load.
        env_id (int): (optional) ID of the environment.
        max_instruction_length (int): the maximum number of words of an instruction.
        one_token_per_step (bool): If False, the whole instruction (word ID array)
            is given in the observation at every step. If True, the word IDs are
            given in the observation sequentially. Each step only one word ID
            is given. A zero is given for every steps after all the word IDs
            are given.
        discount (float): Discount to use for the environment.
        max_episode_steps (int): If None the max_episode_steps will be set to the
            default step limit defined in the environment's spec. No limit is applied
            if set to 0 or if there is no max_episode_steps set in the environment's
            spec.
        gym_env_wrappers (Iterable): Iterable with references to gym_wrappers
            classes to use directly on the gym environment.
        alf_env_wrappers (Iterable): Iterable with references to alf_wrappers
            classes to use on the ALF environment.

    Returns:
        An AlfEnvironment instance.
    """
    gym_spec = gym.spec(environment_name)
    gym_env = gym_spec.make()

    if max_episode_steps is None:
        if gym_spec.max_episode_steps is not None:
            max_episode_steps = gym_spec.max_episode_steps
        else:
            max_episode_steps = 0

    gym_env = BabyAIWrapper(gym_env, max_instruction_length,
                            one_token_per_step)

    return wrap_env(gym_env,
                    env_id=env_id,
                    discount=discount,
                    max_episode_steps=max_episode_steps,
                    gym_env_wrappers=gym_env_wrappers,
                    alf_env_wrappers=alf_env_wrappers,
                    image_channel_first=False)
Exemple #20
0
def make_env(scenario, grayscale, input_shape):

    width, height = input_shape
    env_spec = gym.spec('ppaquette/' + scenario)
    env_spec.id = scenario  #'DoomBasic-v0'
    env = env_spec.make()
    e = PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(env)),
                        width=width,
                        height=height,
                        grayscale=grayscale)
    return e
Exemple #21
0
    def test_automatic_reset_after_done_not_using_reset_directly(self):
        cartpole_env = gym.spec('CartPole-v1').make()
        env = gym_wrapper.GymWrapper(cartpole_env)
        time_step = env.step(1)

        while not time_step.is_last():
            time_step = env.step(1)

        self.assertTrue(time_step.is_last())
        first_time_step = env.step(0)
        self.assertTrue(first_time_step.is_first())
Exemple #22
0
def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)

    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id)
  def test_automatic_reset_after_done_not_using_reset_directly(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    time_step = env.step(1)  # pytype: disable=wrong-arg-types

    while not time_step.is_last():
      time_step = env.step(np.array(1, dtype=np.int32))

    self.assertTrue(time_step.is_last())
    first_time_step = env.step(0)  # pytype: disable=wrong-arg-types
    self.assertTrue(first_time_step.is_first())
Exemple #24
0
    def test_automatic_reset_after_done(self):
        cartpole_env = gym.spec('CartPole-v1').make()
        env = gym_wrapper.GymWrapper(cartpole_env)
        time_step = env.reset()

        while not time_step.is_last():
            time_step = env.step(np.array(1, dtype=np.int32))

        self.assertTrue(time_step.is_last())
        first_time_step = env.step(0)
        self.assertTrue(first_time_step.is_first())
    def _init():
        env_names = {
            "ant": "MyAntBulletEnv",
            "walker2d": "MyWalker2DBulletEnv",
            "hopper": "MyHopperBulletEnv",
            "halfcheetah": "MyHalfCheetahBulletEnv",
        }
        env_name = env_names[_template]
        env_id = f'{env_name}-v{robot_body}'
        try:
            gym.spec(env_id)
        except:
            gym.envs.registration.register(
                id=env_id,
                entry_point=f'gym_envs.{_template}:{env_name}',
                max_episode_steps=1000,
                reward_threshold=2500.0,
                kwargs={
                    "xml":
                    f"{str(common.input_data_folder.resolve())}/bodies/{robot_body}.xml"
                })

        _render = False
        if render:
            _render = rank in [0]
        env = gym.make(env_id, render=_render)
        if len(wrappers) > 0:
            for _wrapper in wrappers:
                if isinstance(_wrapper, BodyinfoWrapper):
                    if body_info < 0:
                        _body_info = robot_body
                    else:
                        _body_info = body_info
                    env = _wrapper(env, _body_info)
                else:
                    env = _wrapper(env)

        if seed is not None:
            env.seed(seed * 100 + rank)
            env.action_space.seed(seed * 100 + rank)
        return env
    def __init__(self, gym_core_id, fps=60, vnc_pixels=True):
        super(GymCoreSyncEnv, self).__init__(gym_core_id, fps=fps)
        # Metadata has already been cloned
        self.metadata['semantics.async'] = False

        self.gym_core_id = gym_core_id
        self.vnc_pixels = vnc_pixels

        if not vnc_pixels:
            self._core_env = gym.spec(gym_core_id).make()
        else:
            self._core_env = None
Exemple #27
0
def load_d4rl(env_name, default_time_limit=1000):
    """Loads the python environment from D4RL."""
    gym_env = gym.make(env_name)
    gym_spec = gym.spec(env_name)

    # Default to env time limit unless it is not specified.
    if gym_spec.max_episode_steps in [0, None]:
        gym_env = TimeLimit(gym_env, max_episode_steps=default_time_limit)

    # Wrap TF-Agents environment.
    env = gym_wrapper.GymWrapper(gym_env)
    return env
Exemple #28
0
    def test_wrapped_cartpole_final(self):
        cartpole_env = gym.spec('CartPole-v1').make()
        env = gym_wrapper.GymWrapper(cartpole_env)
        time_step = env.reset()

        while not time_step.is_last():
            time_step = env.step(1)

        self.assertTrue(time_step.is_last())
        self.assertNotEqual(None, time_step.reward)
        self.assertEqual(0.0, time_step.discount)
        self.assertEqual((4, ), time_step.observation.shape)
Exemple #29
0
 def test_method_propagation(self):
     cartpole_env = gym.spec('CartPole-v1').make()
     for method_name in ('render', 'seed', 'close'):
         setattr(cartpole_env, method_name, mock.MagicMock())
     env = gym_wrapper.GymWrapper(cartpole_env)
     env.render()
     self.assertEqual(1, cartpole_env.render.call_count)
     env.seed(0)
     self.assertEqual(1, cartpole_env.seed.call_count)
     cartpole_env.seed.assert_called_with(0)
     env.close()
     self.assertEqual(1, cartpole_env.close.call_count)
Exemple #30
0
def score_from_remote(url):
    result = requests.get(url)
    parsed = result.json()
    episode_lengths = parsed['episode_lengths']
    episode_rewards = parsed['episode_rewards']
    timestamps = parsed['timestamps']
    # Handle legacy entries where initial_reset_timestamp wasn't set
    initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0])
    env_id = parsed['env_id']

    spec = gym.spec(env_id)
    return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
Exemple #31
0
def score_from_remote(url):
    result = requests.get(url)
    parsed = result.json()
    episode_lengths = parsed['episode_lengths']
    episode_rewards = parsed['episode_rewards']
    timestamps = parsed['timestamps']
    # Handle legacy entries where initial_reset_timestamp wasn't set
    initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0])
    env_id = parsed['env_id']

    spec = gym.spec(env_id)
    return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
def create_env(env_id, client_id, n=1, **kwargs):
    spec = gym.spec(env_id)
    remotes = "http://allocator.sci.openai-tech.com?n={}".format(n)

    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id)
Exemple #33
0
def load(
    environment_name: Text,
    discount: types.Float = 1.0,
    max_episode_steps: Optional[types.Int] = None,
    gym_env_wrappers: Sequence[types.GymEnvWrapper] = (),
    env_wrappers: Sequence[types.PyEnvWrapper] = (),
    spec_dtype_map: Optional[Dict[gym.Space, np.dtype]] = None,
    gym_kwargs: Optional[Dict[str, Any]] = None,
    render_kwargs: Optional[Dict[str, Any]] = None,
    ) -> py_environment.PyEnvironment:
  """Loads the selected environment and wraps it with the specified wrappers.

  Note that by default a TimeLimit wrapper is used to limit episode lengths
  to the default benchmarks defined by the registered environments.

  Args:
    environment_name: Name for the environment to load.
    discount: Discount to use for the environment.
    max_episode_steps: If None the max_episode_steps will be set to the default
      step limit defined in the environment's spec. No limit is applied if set
      to 0 or if there is no max_episode_steps set in the environment's spec.
    gym_env_wrappers: Iterable with references to wrapper classes to use
      directly on the gym environment.
    env_wrappers: Iterable with references to wrapper classes to use on the
      gym_wrapped environment.
    spec_dtype_map: A dict that maps gym spaces to np dtypes to use as the
      default dtype for the arrays. An easy way how to configure a custom
      mapping through Gin is to define a gin-configurable function that returns
      desired mapping and call it in your Gin congif file, for example:
      `suite_gym.load.spec_dtype_map = @get_custom_mapping()`.
    gym_kwargs: Optional kwargs to pass to the Gym environment class.
    render_kwargs: Optional kwargs for rendering to pass to `render()` of the
      gym_wrapped environment.

  Returns:
    A PyEnvironment instance.
  """
  gym_kwargs = gym_kwargs if gym_kwargs else {}
  gym_spec = gym.spec(environment_name)
  gym_env = gym_spec.make(**gym_kwargs)

  if max_episode_steps is None and gym_spec.max_episode_steps is not None:
    max_episode_steps = gym_spec.max_episode_steps

  return wrap_env(
      gym_env,
      discount=discount,
      max_episode_steps=max_episode_steps,
      gym_env_wrappers=gym_env_wrappers,
      env_wrappers=env_wrappers,
      spec_dtype_map=spec_dtype_map,
      render_kwargs=render_kwargs)
Exemple #34
0
def score_from_local(directory):
    """Calculate score from a local results directory"""
    results = gym.monitoring.monitor.load_results(directory)
    # No scores yet saved
    if results is None:
        return None

    episode_lengths = results['episode_lengths']
    episode_rewards = results['episode_rewards']
    timestamps = results['timestamps']
    initial_reset_timestamp = results['initial_reset_timestamp']
    spec = gym.spec(results['env_info']['env_id'])

    return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
Exemple #35
0
    def __init__(self, env_id):
        self.worker_n = None

        # Pull the relevant info from a transient env instance
        self.spec = gym.spec(env_id)
        env = self.spec.make()

        current_metadata = self.metadata
        self.metadata = env.metadata.copy()
        self.metadata.update(current_metadata)

        self.action_space = env.action_space
        self.observation_space = env.observation_space
        self.reward_range = env.reward_range
Exemple #36
0
    def __init__(self, env, gym_core_id=None):
        super(GymCoreObservation, self).__init__(env)

        if gym_core_id is None:
            # self.spec is None while inside of the make, so we need
            # to pass gym_core_id in explicitly there. This case will
            # be hit when instantiating by hand.
            gym_core_id = self.spec._kwargs['gym_core_id']

        self._reward_n = None
        self._done_n = None
        self._info_n = None

        self._gym_core_env = gym.spec(gym_core_id).make()
Exemple #37
0
def score_from_file(json_file):
    """Calculate score from an episode_batch.json file"""
    with open(json_file) as f:
        results = json.load(f)

    # No scores yet saved
    if results is None:
        return None

    episode_lengths = results['episode_lengths']
    episode_rewards = results['episode_rewards']
    episode_types = results['episode_types']
    timestamps = results['timestamps']
    initial_reset_timestamp = results['initial_reset_timestamp']
    spec = gym.spec(results['env_id'])

    return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
Exemple #38
0
    def __init__(self, algorithm_id, training_callable, complete_callable, base_dir=None, video_callable=None, processes=None, env_ids=None):
        global pool
        self.base_dir = base_dir or tempfile.mkdtemp()
        self.training_callable = training_callable
        self.complete_callable = complete_callable
        self.algorithm_id = algorithm_id
        self.video_callable = video_callable

        if env_ids is not None:
            self.specs = [gym.spec(env_id) for env_id in env_ids]
        else:
            self.specs = gym.envs.registry.all()
        self.selected_specs = None

        processes = processes or max(1, multiprocessing.cpu_count() - 1)
        if not pool:
            pool = multiprocessing.Pool(processes)
Exemple #39
0
    def __init__(self, env, gym_core_id=None):
        super(GymCoreAction, self).__init__(env)

        if gym_core_id is None:
            # self.spec is None while inside of the make, so we need
            # to pass gym_core_id in explicitly there. This case will
            # be hit when instantiating by hand.
            gym_core_id = self.spec._kwargs['gym_core_id']

        spec = gym.spec(gym_core_id)
        raw_action_space = action_space.gym_core_action_space(gym_core_id)

        self._actions = raw_action_space.actions
        self.action_space = spaces.Discrete(len(self._actions))

        if spec._entry_point.startswith('gym.envs.atari:'):
            self.key_state = translator.AtariKeyState(gym.make(gym_core_id))
        else:
            self.key_state = None
Exemple #40
0
def gym_core_action_space(gym_core_id):
    spec = gym.spec(gym_core_id)

    if spec.id == 'CartPole-v0':
        return spaces.Hardcoded([[spaces.KeyEvent.by_name('left', down=True)],
                                 [spaces.KeyEvent.by_name('left', down=False)]])
    elif spec._entry_point.startswith('gym.envs.atari:'):
        actions = []
        env = spec.make()
        for action in env.unwrapped.get_action_meanings():
            z = 'FIRE' in action
            left = 'LEFT' in action
            right = 'RIGHT' in action
            up = 'UP' in action
            down = 'DOWN' in action
            translated = atari_vnc(up=up, down=down, left=left, right=right, z=z)
            actions.append(translated)
        return spaces.Hardcoded(actions)
    else:
        raise error.Error('Unsupported env type: {}'.format(spec.id))
Exemple #41
0
def score_from_local(path):
    parsed = gym.monitoring.monitor.load_results(path)

    # If no scores yet saved (after original env.reset() called to initialise the environment) then return default
    if parsed is None:
        return {
            'episode_t_value': None,
            'timestep_t_value': None,
            'mean': None,
            'error': None,
            'number_episodes': 0,
            'number_timesteps': 0,
            'seconds_to_solve': None,
            'seconds_in_total': 0,
        }

    episode_lengths = parsed['episode_lengths']
    episode_rewards = parsed['episode_rewards']
    timestamps = parsed['timestamps']
    initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0])
    spec = gym.spec(parsed['env_info']['env_id'])

    return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials,
                             spec.reward_threshold)
            # Account for remote auto-reset
            obs = env.reset()

        vnc_obs, vnc_reward, vnc_done, vnc_info = vnc_env.step(action)
        assert reward == vnc_reward
        assert done == vnc_done
        assert vnc_info['stats.reward.count'] == 1
        matcher.assert_match(obs, vnc_obs, {'reward': reward, 'done': done}, stage=stage)

        count += 1
        if done or (timestep_limit is not None and count >= timestep_limit):
            break

# TODO: we should have auto-env spinup
specs = [
    (gym.spec('gym-core.PongDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper),
    (gym.spec('gym-core.PitfallDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper),

    # This test is still broken. Looks like we're not piping the seed
    # to the CartPole env behind VNC
#    (gym.spec('gym-core.CartPoleLowDSync-v0'), CartPoleLowDMatcher())
]

@pytest.mark.parametrize("spec,matcher,wrapper", specs)
def test_nice_vnc_semantics_match(spec, matcher, wrapper):
    # Check that when running over VNC or using the raw environment,
    # semantics match exactly.
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    spaces.seed(0)
Exemple #43
0
 def __init__(self, file_name):
     self.file = gzip.GzipFile(file_name, 'rb')
     meta = self.file.readline()
     meta = json.loads(meta)
     self.env = gym.spec(meta['env_id']).make()