Beispiel #1
0
def test_mdp_dataset(
    data_size,
    observation_size,
    action_size,
    n_episodes,
    discrete_action,
    add_actions,
):
    observations = np.random.random((data_size, observation_size)).astype("f4")
    rewards = np.random.uniform(-10.0, 10.0, size=data_size).astype("f4")
    n_steps = data_size // n_episodes
    terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes)

    if discrete_action:
        actions = np.random.randint(action_size, size=data_size)
        ref_action_size = np.max(actions) + 1
    else:
        actions = np.random.random((data_size, action_size)).astype("f4")
        ref_action_size = action_size

    dataset = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
        discrete_action=discrete_action,
    )

    # check MDPDataset methods
    assert np.all(dataset.observations == observations)
    assert np.all(dataset.actions == actions)
    assert np.all(dataset.rewards == rewards)
    assert np.all(dataset.terminals == terminals)
    assert dataset.size() == n_episodes
    assert dataset.get_action_size() == action_size
    assert dataset.get_observation_shape() == (observation_size, )
    assert dataset.is_action_discrete() == discrete_action

    # check stats
    ref_returns = []
    for i in range(n_episodes):
        episode_return = 0.0
        for j in range(1, n_steps):
            episode_return += rewards[j + i * n_steps]
        ref_returns.append(episode_return)

    stats = dataset.compute_stats()
    return_stats = stats["return"]
    assert np.allclose(return_stats["mean"], np.mean(ref_returns))
    assert np.allclose(return_stats["std"], np.std(ref_returns))
    assert np.allclose(return_stats["min"], np.min(ref_returns))
    assert np.allclose(return_stats["max"], np.max(ref_returns))
    reward_stats = stats["reward"]
    assert np.allclose(reward_stats["mean"], np.mean(rewards))
    assert np.allclose(reward_stats["std"], np.std(rewards))
    assert np.allclose(reward_stats["min"], np.min(rewards))
    assert np.allclose(reward_stats["max"], np.max(rewards))
    observation_stats = stats["observation"]
    assert np.all(observation_stats["mean"] == np.mean(observations, axis=0))
    assert np.all(observation_stats["std"] == np.std(observations, axis=0))
    if discrete_action:
        freqs, action_ids = stats["action"]["histogram"]
        assert np.sum(freqs) == data_size
        assert list(action_ids) == [i for i in range(action_size)]
    else:
        action_stats = stats["action"]
        assert np.all(action_stats["mean"] == np.mean(actions, axis=0))
        assert np.all(action_stats["std"] == np.std(actions, axis=0))
        assert np.all(action_stats["min"] == np.min(actions, axis=0))
        assert np.all(action_stats["max"] == np.max(actions, axis=0))
        assert len(action_stats["histogram"]) == action_size
        for freqs, _ in action_stats["histogram"]:
            assert np.sum(freqs) == data_size

    # check episodes exported from dataset
    episodes = dataset.episodes
    assert len(episodes) == n_episodes
    for i, e in enumerate(dataset.episodes):
        assert isinstance(e, Episode)
        assert e.size() == n_steps - 1
        head = i * n_steps
        tail = head + n_steps
        assert np.all(e.observations == observations[head:tail])
        assert np.all(e.actions == actions[head:tail])
        assert np.all(e.rewards == rewards[head:tail])
        assert e.get_observation_shape() == (observation_size, )
        assert e.get_action_size() == ref_action_size

    # check list-like behaviors
    assert len(dataset) == n_episodes
    assert dataset[0] is dataset.episodes[0]
    for i, episode in enumerate(dataset.episodes):
        assert isinstance(episode, Episode)
        assert episode is dataset.episodes[i]

    # check append
    new_size = 2
    dataset.append(observations, actions, rewards, terminals)
    assert len(dataset) == new_size * n_episodes
    assert dataset.observations.shape == (
        new_size * data_size,
        observation_size,
    )
    assert dataset.rewards.shape == (new_size * data_size, )
    assert dataset.terminals.shape == (new_size * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (new_size * data_size, )
    else:
        assert dataset.actions.shape == (new_size * data_size, action_size)

    # check append if discrete action and number of actions grow
    if discrete_action:
        old_action_size = dataset.get_action_size()
        new_size += 1
        dataset.append(observations, actions + add_actions, rewards, terminals)
        assert dataset.get_action_size() == old_action_size + add_actions

    # check extend
    new_size += 1
    another_dataset = MDPDataset(
        observations,
        actions,
        rewards,
        terminals,
        discrete_action=discrete_action,
    )
    dataset.extend(another_dataset)
    assert len(dataset) == new_size * n_episodes
    assert dataset.observations.shape == (
        new_size * data_size,
        observation_size,
    )
    assert dataset.rewards.shape == (new_size * data_size, )
    assert dataset.terminals.shape == (new_size * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (new_size * data_size, )
    else:
        assert dataset.actions.shape == (new_size * data_size, action_size)

    # check dump and load
    dataset.dump(os.path.join("test_data", "dataset.h5"))
    new_dataset = MDPDataset.load(os.path.join("test_data", "dataset.h5"))
    assert np.all(dataset.observations == new_dataset.observations)
    assert np.all(dataset.actions == new_dataset.actions)
    assert np.all(dataset.rewards == new_dataset.rewards)
    assert np.all(dataset.terminals == new_dataset.terminals)
    assert dataset.discrete_action == new_dataset.discrete_action
    assert len(dataset) == len(new_dataset)
Beispiel #2
0
def test_mdp_dataset(data_size, observation_size, action_size, n_episodes,
                     discrete_action):
    observations = np.random.random((data_size, observation_size))
    rewards = np.random.uniform(-10.0, 10.0, size=data_size)
    n_steps = data_size // n_episodes
    terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes)

    if discrete_action:
        actions = np.random.randint(action_size, size=data_size)
        ref_action_size = np.max(actions) + 1
    else:
        actions = np.random.random((data_size, action_size))
        ref_action_size = action_size

    dataset = MDPDataset(observations, actions, rewards, terminals,
                         discrete_action)

    # check MDPDataset methods
    assert np.all(dataset.observations == observations)
    assert np.all(dataset.actions == actions)
    assert np.all(dataset.rewards == rewards)
    assert np.all(dataset.terminals == terminals)
    assert dataset.size() == n_episodes
    assert dataset.get_action_size() == action_size
    assert dataset.get_observation_shape() == (observation_size, )
    assert dataset.is_action_discrete() == discrete_action

    # check stats
    ref_returns = []
    for i in range(n_episodes):
        episode_return = 0.0
        for j in range(1, n_steps):
            episode_return += rewards[j + i * n_steps]
        ref_returns.append(episode_return)

    stats = dataset.compute_stats()
    return_stats = stats['return']
    assert np.allclose(return_stats['mean'], np.mean(ref_returns))
    assert np.allclose(return_stats['std'], np.std(ref_returns))
    assert np.allclose(return_stats['min'], np.min(ref_returns))
    assert np.allclose(return_stats['max'], np.max(ref_returns))
    reward_stats = stats['reward']
    assert np.allclose(reward_stats['mean'], np.mean(rewards))
    assert np.allclose(reward_stats['std'], np.std(rewards))
    assert np.allclose(reward_stats['min'], np.min(rewards))
    assert np.allclose(reward_stats['max'], np.max(rewards))
    observation_stats = stats['observation']
    assert np.all(observation_stats['mean'] == np.mean(observations, axis=0))
    assert np.all(observation_stats['std'] == np.std(observations, axis=0))
    if discrete_action:
        freqs, action_ids = stats['action']['histogram']
        assert np.sum(freqs) == data_size
        assert list(action_ids) == [i for i in range(action_size)]
    else:
        action_stats = stats['action']
        assert np.all(action_stats['mean'] == np.mean(actions, axis=0))
        assert np.all(action_stats['std'] == np.std(actions, axis=0))
        assert np.all(action_stats['min'] == np.min(actions, axis=0))
        assert np.all(action_stats['max'] == np.max(actions, axis=0))
        assert len(action_stats['histogram']) == action_size
        for freqs, _ in action_stats['histogram']:
            assert np.sum(freqs) == data_size

    # check episodes exported from dataset
    episodes = dataset.episodes
    assert len(episodes) == n_episodes
    for i, e in enumerate(dataset.episodes):
        assert isinstance(e, Episode)
        assert e.size() == n_steps - 1
        head = i * n_steps
        tail = head + n_steps
        assert np.all(e.observations == observations[head:tail])
        assert np.all(e.actions == actions[head:tail])
        assert np.all(e.rewards == rewards[head:tail])
        assert e.get_observation_shape() == (observation_size, )
        assert e.get_action_size() == ref_action_size

    # check list-like behaviors
    assert len(dataset) == n_episodes
    assert dataset[0] is dataset.episodes[0]
    for i, episode in enumerate(dataset.episodes):
        assert isinstance(episode, Episode)
        assert episode is dataset.episodes[i]

    # check append
    dataset.append(observations, actions, rewards, terminals)
    assert len(dataset) == 2 * n_episodes
    assert dataset.observations.shape == (2 * data_size, observation_size)
    assert dataset.rewards.shape == (2 * data_size, )
    assert dataset.terminals.shape == (2 * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (2 * data_size, )
    else:
        assert dataset.actions.shape == (2 * data_size, action_size)

    # check extend
    another_dataset = MDPDataset(observations, actions, rewards, terminals,
                                 discrete_action)
    dataset.extend(another_dataset)
    assert len(dataset) == 3 * n_episodes
    assert dataset.observations.shape == (3 * data_size, observation_size)
    assert dataset.rewards.shape == (3 * data_size, )
    assert dataset.terminals.shape == (3 * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (3 * data_size, )
    else:
        assert dataset.actions.shape == (3 * data_size, action_size)

    # check clip_reward
    dataset.clip_reward(-1.0, 1.0)
    assert rewards[rewards > 1.0].sum() != 0
    assert rewards[rewards < -1.0].sum() != 0
    assert dataset.rewards[dataset.rewards > 1.0].sum() == 0
    assert dataset.rewards[dataset.rewards < -1.0].sum() == 0

    # check dump and load
    dataset.dump(os.path.join('test_data', 'dataset.h5'))
    new_dataset = MDPDataset.load(os.path.join('test_data', 'dataset.h5'))
    assert np.all(dataset.observations == new_dataset.observations)
    assert np.all(dataset.actions == new_dataset.actions)
    assert np.all(dataset.rewards == new_dataset.rewards)
    assert np.all(dataset.terminals == new_dataset.terminals)
    assert dataset.discrete_action == new_dataset.discrete_action
    assert len(dataset) == len(new_dataset)