Esempio n. 1
0
    def __init__(self, desc='4x4', max_episode_length=None):
        """Initialize the environment.

        Args:
            desc (str): grid configuration key.
            max_episode_length (int): The maximum steps allowed for an episode.
        """
        if isinstance(desc, str):
            desc = MAPS[desc]
        desc = np.array(list(map(list, desc)))
        desc[desc == '.'] = 'F'
        desc[desc == 'o'] = 'H'
        desc[desc == 'x'] = 'W'
        self._desc = desc
        self._n_row, self._n_col = desc.shape
        (start_x, ), (start_y, ) = np.nonzero(desc == 'S')
        self._start_state = start_x * self._n_col + start_y
        self._state = None
        self._domain_fig = None

        self._step_cnt = None
        self._max_episode_length = max_episode_length

        self._action_space = akro.Discrete(4)
        self._observation_space = akro.Discrete(self._n_row * self._n_col)
        self._spec = EnvSpec(action_space=self.action_space,
                             observation_space=self.observation_space,
                             max_episode_length=max_episode_length)
Esempio n. 2
0
    def action_space(self):
        """Return the action space.

        Returns:
            akro.Box: Action space.

        """
        if self.act_space_type == 'box':
            return akro.Box(low=-5.0, high=5.0, shape=(1, ), dtype=np.float32)
        else:
            return akro.Discrete(5)
Esempio n. 3
0
def eps_data():
    # spaces
    obs_space = akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32)
    act_space = akro.Discrete(2)
    env_spec = EnvSpec(obs_space, act_space)

    # generate data
    lens = np.array([10, 20, 7, 25, 25, 40, 10, 5])
    n_t = lens.sum()
    obs = np.stack([obs_space.low] * n_t)
    last_obs = np.stack([obs_space.low] * len(lens))
    act = np.stack([1] * n_t)
    rew = np.arange(n_t)

    # env_infos
    env_infos = dict()
    env_infos['goal'] = np.stack([[1, 1]] * n_t)
    env_infos['foo'] = np.arange(n_t)

    # agent_infos
    agent_infos = dict()
    agent_infos['prev_action'] = act
    agent_infos['hidden'] = np.arange(n_t)

    # step_types
    step_types = []
    for size in lens:
        step_types.extend([StepType.FIRST] + [StepType.MID] * (size - 2) +
                          [StepType.TERMINAL])
    step_types = np.array(step_types, dtype=StepType)

    # episode_infos
    episode_infos = dict()
    episode_infos['task_one_hot'] = np.stack([[1, 1]] * len(lens))

    return {
        'env_spec': env_spec,
        'episode_infos': episode_infos,
        'observations': obs,
        'last_observations': last_obs,
        'actions': act,
        'rewards': rew,
        'env_infos': env_infos,
        'agent_infos': agent_infos,
        'step_types': step_types,
        'lengths': lens
    }
Esempio n. 4
0
def test_act_env_spec_mismatch_time_step(sample_data):
    with pytest.raises(ValueError,
                       match='action must conform to action_space'):
        sample_data['action'] = sample_data['action'][:-1]
        s = TimeStep(**sample_data)
        del s

    obs_space = akro.Box(low=1, high=10, shape=(4, 3, 2), dtype=np.float32)
    act_space = akro.Discrete(5)
    env_spec = EnvSpec(obs_space, act_space)
    sample_data['env_spec'] = env_spec

    with pytest.raises(ValueError,
                       match='action should have the same dimensionality'):
        sample_data['action'] = sample_data['action'][:-1]
        s = TimeStep(**sample_data)
        del s
Esempio n. 5
0
    def __init__(self, env, name=None):
        """Create a DMControlEnv.

        Args:
            env (dm_control.suite.Task): The wrapped dm_control environment.
            name (str): Name of the environment.

        """
        self._env = env
        self._name = name or type(env.task).__name__
        self._viewer = None
        self._step_cnt = None

        self._max_episode_length = self._env._step_limit

        # action space
        action_spec = self._env.action_spec()
        if (len(action_spec.shape) == 1) and (-np.inf in action_spec.minimum or
                                              np.inf in action_spec.maximum):
            self._action_space = akro.Discrete(np.prod(action_spec.shape))
        else:
            self._action_space = akro.Box(low=action_spec.minimum,
                                          high=action_spec.maximum,
                                          dtype=np.float32)

        # observation_space
        flat_dim = _flat_shape(self._env.observation_spec())
        self._observation_space = akro.Box(low=-np.inf,
                                           high=np.inf,
                                           shape=[flat_dim],
                                           dtype=np.float32)

        # spec
        self._spec = EnvSpec(action_space=self.action_space,
                             observation_space=self.observation_space,
                             max_episode_length=self._max_episode_length)
Esempio n. 6
0
 def action_space(self):
     """akro.Discrete: an action space."""
     return akro.Discrete(self._action_dim)