Beispiel #1
0
def test_episode_terminals(data_size, observation_size, action_size):
    observations = np.random.random((data_size, observation_size)).astype("f4")
    actions = np.random.random((data_size, action_size)).astype("f4")
    rewards = np.random.random(data_size).astype("f4")

    # check default
    terminals = np.zeros(data_size, dtype=np.float32)
    terminals[49] = 1.0
    terminals[-1] = 1.0
    dataset1 = MDPDataset(observations, actions, rewards, terminals)
    assert len(dataset1.episodes) == 2
    assert np.all(dataset1.terminals == dataset1.episode_terminals)
    assert dataset1.episodes[0].terminal
    assert dataset1.episodes[0][-1].terminal

    # check non-terminal episode
    terminals = np.zeros(data_size, dtype=np.float32)
    terminals[-1] = 1.0
    episode_terminals = np.zeros(data_size, dtype=np.float32)
    episode_terminals[49] = 1.0
    episode_terminals[-1] = 1.0
    dataset2 = MDPDataset(observations, actions, rewards, terminals,
                          episode_terminals)
    assert len(dataset2.episodes) == 2
    assert not np.all(dataset2.terminals == dataset2.episode_terminals)
    assert not dataset2.episodes[0].terminal
    assert not dataset2.episodes[0][-1].terminal

    # check extend
    dataset1.extend(dataset2)
    assert len(dataset1) == 4
    assert not dataset1.episodes[2].terminal
    assert dataset1.episodes[3].terminal
Beispiel #2
0
def test_image_dataset(discrete_action, n_channels, action_size, data_size):
    shape = (data_size, n_channels, 84, 84)
    observations = np.random.randint(256, size=shape, dtype=np.uint8)
    if discrete_action:
        actions = np.random.randint(action_size, size=(data_size, ))
    else:
        actions = np.random.random((data_size, action_size))
    rewards = np.random.random((data_size, 1))
    terminals = (np.arange(data_size) % 9) == 0

    ref = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
    )

    # save as csv
    export_mdp_dataset_as_csv(ref, "test_data/test.csv")

    # extract zip file
    with zipfile.ZipFile("test_data/test.zip", "r") as zip_fd:
        zip_fd.extractall("test_data")

    # load from csv
    dataset = import_csv_as_mdp_dataset("test_data/test.csv", image=True)

    assert dataset.get_observation_shape() == ref.get_observation_shape()
    assert dataset.get_action_size() == ref.get_action_size()
    assert np.all(dataset.observations == ref.observations)
    assert np.allclose(dataset.actions, ref.actions)
    assert np.allclose(dataset.rewards, ref.rewards)
    assert np.all(dataset.terminals == ref.terminals)
    assert dataset.is_action_discrete() == discrete_action
Beispiel #3
0
def train(algo_name,
          params,
          dataset_path,
          model_save_path,
          experiment_name=None,
          with_timestamp=True,
          logdir='d3rlpy_logs',
          prev_model_path=None,
          test_size=0.2):
    dataset = MDPDataset.load(dataset_path)
    train_data, test_data = train_test_split(dataset, test_size=test_size)

    # train
    algo = create_algo(algo_name, dataset.is_action_discrete(), **params)
    algo.fit(train_data,
             experiment_name=experiment_name,
             with_timestamp=with_timestamp,
             logdir=logdir,
             save_interval=1000000)  # never save models for now

    # save final model
    algo.save_model(model_save_path)

    # evaluate
    scores = _evaluate(algo, test_data, dataset.is_action_discrete())

    # compare previous model
    if prev_model_path:
        base_algo = create_algo(algo_name, **params)
        base_algo.load_model(prev_model_path)
        score = _comapre(algo, base_algo, test_data,
                         dataset.is_action_discrete())
        scores['algo_action_diff'] = score

    return scores
Beispiel #4
0
def test_standard_scaler_with_episode(observation_shape, batch_size):
    shape = (batch_size, ) + observation_shape
    observations = np.random.random(shape).astype("f4")
    actions = np.random.random((batch_size, 1)).astype("f4")
    rewards = np.random.random(batch_size).astype("f4")
    terminals = np.random.randint(2, size=batch_size)
    terminals[-1] = 1.0

    dataset = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
    )

    mean = observations.mean(axis=0)
    std = observations.std(axis=0)

    scaler = StandardScaler()
    scaler.fit(dataset.episodes)

    x = torch.rand((batch_size, ) + observation_shape)

    y = scaler.transform(x)

    ref_y = (x.numpy() - mean.reshape((1, -1))) / std.reshape((1, -1))

    assert np.allclose(y.numpy(), ref_y, atol=1e-6)
Beispiel #5
0
def train(algo_name,
          params,
          dataset_path,
          experiment_name=None,
          logdir='d3rlpy_logs'):
    # prepare dataset
    dataset = MDPDataset.load(dataset_path)
    train_data, test_data = train_test_split(dataset, test_size=0.2)

    # evaluate
    scorers = _get_scorers(dataset.is_action_discrete())

    # train
    algo = create_algo(algo_name, dataset.is_action_discrete(), **params)
    algo.fit(train_data,
             eval_episodes=test_data,
             scorers=scorers,
             experiment_name=experiment_name,
             with_timestamp=False,
             logdir=logdir,
             save_interval=1,
             show_progress=False,
             tensorboard=False)

    return True
Beispiel #6
0
def train(algo_name,
          params,
          dataset_path,
          experiment_name=None,
          logdir="d3rlpy_logs"):
    # prepare dataset
    dataset = MDPDataset.load(dataset_path)
    train_data, test_data = train_test_split(dataset, test_size=0.2)

    # get dataset statistics
    stats = dataset.compute_stats()

    # evaluate
    scorers = _get_scorers(dataset.is_action_discrete(), stats)

    # add action scaler if continuous action-space
    if not dataset.is_action_discrete():
        params["action_scaler"] = "min_max"

    # train
    algo = create_algo(algo_name, dataset.is_action_discrete(), **params)
    algo.fit(
        train_data,
        n_steps=params["n_epochs"] * params["n_steps_per_epoch"],
        n_steps_per_epoch=params["n_steps_per_epoch"],
        eval_episodes=test_data,
        scorers=scorers,
        experiment_name=experiment_name,
        with_timestamp=False,
        logdir=logdir,
        save_interval=1,
        show_progress=False,
    )

    return True
Beispiel #7
0
def test_min_max_scaler_with_episode(observation_shape, batch_size):
    shape = (batch_size, ) + observation_shape
    observations = np.random.random(shape).astype("f4")
    actions = np.random.random((batch_size, 1))
    rewards = np.random.random(batch_size)
    terminals = np.random.randint(2, size=batch_size)
    terminals[-1] = 1.0

    dataset = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
    )

    max = observations.max(axis=0)
    min = observations.min(axis=0)

    scaler = MinMaxScaler()
    scaler.fit(dataset.episodes)

    x = torch.rand((batch_size, ) + observation_shape)

    y = scaler.transform(x)
    ref_y = (x.numpy() - min.reshape((1, -1))) / (max - min).reshape((1, -1))

    assert np.allclose(y.numpy(), ref_y)
Beispiel #8
0
def import_csv_as_image_observation_dataset(fname, discrete_action):
    with open(fname, 'r') as file:
        reader = csv.reader(file)
        rows = [row for row in reader]

        # check header
        header = rows[0]
        _validate_csv_header(header)

        # get action size
        action_size = _get_action_size_from_header(header)

        data_size = len(rows) - 1

        observations = []
        actions = []
        rewards = []
        terminals = []
        for i, row in enumerate(rows[1:]):
            episode_id = row[0]

            # load image
            image = _load_image(os.path.join(os.path.dirname(fname), row[1]))

            # convert PIL.Image to ndarray
            array = convert_image_to_ndarray(image)

            observations.append(array)

            # get action columns
            action = list(map(float, row[2:2 + action_size]))
            if discrete_action:
                actions.append(int(action[0]))
            else:
                actions.append(action)

            # get reward column
            rewards.append(float(row[-1]))

            if i == data_size - 1 or episode_id != rows[i + 2][0]:
                terminals.append(1)
            else:
                terminals.append(0)

        # convert list to ndarray
        observations = np.array(observations, dtype=np.uint8)
        actions = np.array(actions)
        rewards = np.array(rewards, dtype=np.float32)
        terminals = np.array(terminals, dtype=np.float32)

        dataset = MDPDataset(observations=observations,
                             actions=actions,
                             rewards=rewards,
                             terminals=terminals,
                             discrete_action=discrete_action)

    return dataset
Beispiel #9
0
def test_dataset_with_sklearn(data_size, observation_size, action_size,
                              n_episodes, test_size):
    observations = np.random.random((data_size, observation_size)).astype("f4")
    actions = np.random.random((data_size, action_size))
    rewards = np.random.random(data_size)
    n_steps = data_size // n_episodes
    terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes)

    dataset = MDPDataset(observations, actions, rewards, terminals)

    # check compatibility with train_test_split
    train_episodes, test_episodes = train_test_split(dataset,
                                                     test_size=test_size)
    assert len(train_episodes) == int(n_episodes * (1.0 - test_size))
    assert len(test_episodes) == int(n_episodes * test_size)
Beispiel #10
0
def _upload_image_dataset(client):
    # prepare dummy data
    shape = (100, 3, 84, 84)
    observations = np.random.randint(255, size=shape, dtype=np.uint8)
    actions = np.random.random((100, 2)).astype('f4')
    rewards = np.random.random((100, 1)).astype('f4')
    terminals = (np.arange(100) % 9) == 0

    # prepare dataset
    mdp_dataset = MDPDataset(observations, actions, rewards, terminals)
    csv_path = os.path.join('test_data', 'dataset.csv')
    export_image_observation_dataset_as_csv(mdp_dataset,
                                            csv_path,
                                            relative_path=False)

    # prepare upload request
    with open(csv_path, 'rb') as f:
        data = {'is_image': 'true', 'is_discrete': 'true'}
        file = FileStorage(stream=f,
                           filename='dataset.csv',
                           content_type='text/csv')
        data['dataset'] = file

        # add images
        image_dir_path = os.path.join('test_data', 'dataset_images')
        image_fds = []
        for i in range(100):
            file_name = 'observation_%d.png' % i
            file_path = os.path.join(image_dir_path, file_name)
            fd = open(file_path, 'rb')
            file = FileStorage(stream=fd,
                               filename=file_name,
                               content_type='image/png')
            data['image_%d' % i] = file
            image_fds.append(fd)
        data['total_images'] = 100

        # upload
        res = client.post('/api/datasets/upload',
                          data=data,
                          content_type='multipart/form-data')

        for fd in image_fds:
            fd.close()

    return res, mdp_dataset
Beispiel #11
0
def to_mdp_dataset(replay_buffer):
    """ Returns d3rlpy's MDPDataset from SB3's ReplayBuffer

    Args:
        replay_buffer (stable_baselines3.common.buffers.ReplayBuffer):
            SB3's replay buffer.

    Returns:
        d3rlpy.dataset.MDPDataset: d3rlpy's MDPDataset.

    """
    pos = replay_buffer.size()
    discrete_action = isinstance(replay_buffer.action_space, Discrete)
    dataset = MDPDataset(observations=replay_buffer.observations[:pos, 0],
                         actions=replay_buffer.actions[:pos, 0],
                         rewards=replay_buffer.rewards[:pos, 0],
                         terminals=replay_buffer.dones[:pos, 0],
                         discrete_action=discrete_action)
    return dataset
Beispiel #12
0
def test_min_max_scaler_with_dataset(observation_shape, batch_size):
    observations = np.random.random((batch_size, ) + observation_shape)
    actions = np.random.random((batch_size, 1))
    rewards = np.random.random(batch_size)
    terminals = np.random.randint(2, size=batch_size)

    dataset = MDPDataset(observations, actions, rewards, terminals)

    max = observations.max(axis=0)
    min = observations.min(axis=0)

    scaler = MinMaxScaler(dataset)

    x = torch.rand((batch_size, ) + observation_shape)

    y = scaler.transform(x)

    ref_y = (x.numpy() - min.reshape((1, -1))) / (max - min).reshape((1, -1))

    assert np.allclose(y.numpy(), ref_y)
Beispiel #13
0
def test_check_discrete_action_with_mdp_dataset(data_size, observation_size,
                                                action_size):
    observations = np.random.random((data_size, observation_size)).astype("f4")
    rewards = np.random.random(data_size)
    terminals = np.random.randint(2, size=data_size)

    # check discrete_action
    discrete_actions = np.random.randint(action_size, size=data_size)
    dataset = MDPDataset(observations, discrete_actions, rewards, terminals)
    assert dataset.is_action_discrete()

    # check continuous action
    continuous_actions = np.random.random((data_size, action_size))
    dataset = MDPDataset(observations, continuous_actions, rewards, terminals)
    assert not dataset.is_action_discrete()
Beispiel #14
0
def import_csv_as_vector_observation_dataset(fname, discrete_action):
    with open(fname, 'r') as file:
        reader = csv.reader(file)
        rows = [row for row in reader]

        # get observation shape
        header = rows[0]
        _validate_csv_header(header)

        # retrieve data section
        csv_data = np.array(rows[1:], dtype=np.float32)

        # get observation columns
        observation_size = _get_observation_size_from_header(header)
        observation_last_index = observation_size + 1
        observations = csv_data[:, 1:observation_last_index]

        # get action columns
        action_size = _get_action_size_from_header(header)
        action_last_index = observation_last_index + action_size
        actions = csv_data[:, observation_last_index:action_last_index]
        if discrete_action:
            actions = np.array(actions.reshape(-1), dtype=np.int32)

        # get reward column
        rewards = csv_data[:, -1]

        # make terminal flags
        episode_ids = csv_data[:, 0]
        terminals = np.zeros_like(episode_ids)
        for i, episode_id in enumerate(episode_ids):
            if i + 1 == len(episode_ids) or episode_id != episode_ids[i + 1]:
                terminals[i] = 1.0

        dataset = MDPDataset(observations=observations,
                             actions=actions,
                             rewards=rewards,
                             terminals=terminals,
                             discrete_action=discrete_action)

    return dataset
Beispiel #15
0
def test_standard_scaler_with_dataset(observation_shape, batch_size):
    shape = (batch_size, ) + observation_shape
    observations = np.random.random(shape).astype('f')
    actions = np.random.random((batch_size, 1)).astype('f')
    rewards = np.random.random(batch_size).astype('f')
    terminals = np.random.randint(2, size=batch_size)

    dataset = MDPDataset(observations, actions, rewards, terminals)

    mean = observations.mean(axis=0)
    std = observations.std(axis=0)

    scaler = StandardScaler(dataset)

    x = torch.rand((batch_size, ) + observation_shape)

    y = scaler.transform(x)

    ref_y = (x.numpy() - mean.reshape((1, -1))) / std.reshape((1, -1))

    assert np.allclose(y.numpy(), ref_y)
Beispiel #16
0
def test_mdp_dataset_with_mask(data_size, observation_size, action_size,
                               n_episodes, create_mask, mask_size):
    observations = np.random.random((data_size, observation_size))
    actions = np.random.random((data_size, action_size))
    rewards = np.random.uniform(-10.0, 10.0, size=data_size)
    n_steps = data_size // n_episodes
    terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes)

    dataset = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
        create_mask=create_mask,
        mask_size=mask_size,
    )

    for episode in dataset.episodes:
        for transition in episode.transitions:
            if create_mask:
                assert transition.mask.shape == (mask_size, )
            else:
                assert transition.mask is None
Beispiel #17
0
def test_standard_reward_scaler_with_episode(
    observation_shape, action_size, batch_size, eps
):
    shape = (batch_size,) + observation_shape
    observations = np.random.random(shape)
    actions = np.random.random((batch_size, action_size))
    rewards = np.random.random(batch_size).astype("f4")
    terminals = np.random.randint(2, size=batch_size)
    terminals[-1] = 1.0

    dataset = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
    )

    rewards_without_first = []
    for episode in dataset:
        rewards_without_first += episode.rewards[1:].tolist()
    rewards_without_first = np.array(rewards_without_first)

    mean = np.mean(rewards_without_first)
    std = np.std(rewards_without_first)

    scaler = StandardRewardScaler(eps=eps)
    scaler.fit(dataset.episodes)

    x = torch.rand(batch_size)
    y = scaler.transform(x)
    ref_y = (x.numpy() - mean) / (std + eps)
    assert np.allclose(y, ref_y, atol=1e-6)

    params = scaler.get_params()
    assert np.allclose(params["mean"], mean)
    assert np.allclose(params["std"], std)
Beispiel #18
0
def _upload_image_dataset(client):
    # prepare dummy data
    shape = (100, 3, 84, 84)
    observations = np.random.randint(255, size=shape, dtype=np.uint8)
    actions = np.random.random((100, 2)).astype("f4")
    rewards = np.random.random((100, 1)).astype("f4")
    terminals = (np.arange(100) % 9) == 0

    # prepare dataset
    mdp_dataset = MDPDataset(observations, actions, rewards, terminals)
    csv_path = os.path.join("test_data", "dataset.csv")
    zip_path = os.path.join("test_data", "dataset.zip")
    export_image_observation_dataset_as_csv(mdp_dataset, csv_path)

    # prepare upload request
    with open(csv_path, "rb") as csv_fd, open(zip_path, "rb") as zip_fd:
        data = {"is_image": "true"}

        file = FileStorage(stream=csv_fd,
                           filename="dataset.csv",
                           content_type="text/csv")
        data["dataset"] = file

        zip_file = FileStorage(stream=zip_fd,
                               filename="dataset.zip",
                               content_type=".zip")
        data["zip_file"] = zip_file

        # upload
        res = client.post(
            "/api/datasets/upload",
            data=data,
            content_type="multipart/form-data",
        )

    return res, mdp_dataset
Beispiel #19
0
def test_min_max_reward_scaler_with_episode(
    observation_shape, action_size, batch_size
):
    shape = (batch_size,) + observation_shape
    observations = np.random.random(shape)
    actions = np.random.random((batch_size, action_size))
    rewards = np.random.random(batch_size)
    terminals = np.random.randint(2, size=batch_size)
    terminals[-1] = 1.0

    dataset = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
    )

    rewards_without_first = []
    for episode in dataset:
        rewards_without_first += episode.rewards[1:].tolist()
    rewards_without_first = np.array(rewards_without_first)

    maximum = rewards_without_first.max()
    minimum = rewards_without_first.min()

    scaler = MinMaxRewardScaler()
    scaler.fit(dataset.episodes)

    x = torch.rand(batch_size)
    y = scaler.transform(x)
    ref_y = (x.numpy() - minimum) / (maximum - minimum)
    assert np.allclose(y.numpy(), ref_y)

    params = scaler.get_params()
    assert np.allclose(params["minimum"], minimum)
    assert np.allclose(params["maximum"], maximum)
Beispiel #20
0
# take 100 episodes due to dataset size
episodes = dataset.episodes[:30]

observations = []
actions = []
rewards = []
terminals = []

for episode in episodes:
    observations.append(episode.observations)
    actions.append(episode.actions.reshape(-1))
    rewards.append(episode.rewards.reshape(-1))
    flag = np.zeros(episode.observations.shape[0])
    flag[-1] = 1.0
    terminals.append(flag)

observations = np.vstack(observations)
actions = np.hstack(actions)
rewards = np.hstack(rewards)
terminals = np.hstack(terminals)

dataset = MDPDataset(observations=observations,
                     actions=actions,
                     rewards=rewards,
                     terminals=terminals,
                     discrete_action=True)

# save as CSV and images
export_mdp_dataset_as_csv(dataset, 'breakout.csv')
Beispiel #21
0
def test_mdp_dataset(
    data_size,
    observation_size,
    action_size,
    n_episodes,
    discrete_action,
    add_actions,
):
    observations = np.random.random((data_size, observation_size)).astype("f4")
    rewards = np.random.uniform(-10.0, 10.0, size=data_size).astype("f4")
    n_steps = data_size // n_episodes
    terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes)

    if discrete_action:
        actions = np.random.randint(action_size, size=data_size)
        ref_action_size = np.max(actions) + 1
    else:
        actions = np.random.random((data_size, action_size)).astype("f4")
        ref_action_size = action_size

    dataset = MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
        discrete_action=discrete_action,
    )

    # check MDPDataset methods
    assert np.all(dataset.observations == observations)
    assert np.all(dataset.actions == actions)
    assert np.all(dataset.rewards == rewards)
    assert np.all(dataset.terminals == terminals)
    assert dataset.size() == n_episodes
    assert dataset.get_action_size() == action_size
    assert dataset.get_observation_shape() == (observation_size, )
    assert dataset.is_action_discrete() == discrete_action

    # check stats
    ref_returns = []
    for i in range(n_episodes):
        episode_return = 0.0
        for j in range(1, n_steps):
            episode_return += rewards[j + i * n_steps]
        ref_returns.append(episode_return)

    stats = dataset.compute_stats()
    return_stats = stats["return"]
    assert np.allclose(return_stats["mean"], np.mean(ref_returns))
    assert np.allclose(return_stats["std"], np.std(ref_returns))
    assert np.allclose(return_stats["min"], np.min(ref_returns))
    assert np.allclose(return_stats["max"], np.max(ref_returns))
    reward_stats = stats["reward"]
    assert np.allclose(reward_stats["mean"], np.mean(rewards))
    assert np.allclose(reward_stats["std"], np.std(rewards))
    assert np.allclose(reward_stats["min"], np.min(rewards))
    assert np.allclose(reward_stats["max"], np.max(rewards))
    observation_stats = stats["observation"]
    assert np.all(observation_stats["mean"] == np.mean(observations, axis=0))
    assert np.all(observation_stats["std"] == np.std(observations, axis=0))
    if discrete_action:
        freqs, action_ids = stats["action"]["histogram"]
        assert np.sum(freqs) == data_size
        assert list(action_ids) == [i for i in range(action_size)]
    else:
        action_stats = stats["action"]
        assert np.all(action_stats["mean"] == np.mean(actions, axis=0))
        assert np.all(action_stats["std"] == np.std(actions, axis=0))
        assert np.all(action_stats["min"] == np.min(actions, axis=0))
        assert np.all(action_stats["max"] == np.max(actions, axis=0))
        assert len(action_stats["histogram"]) == action_size
        for freqs, _ in action_stats["histogram"]:
            assert np.sum(freqs) == data_size

    # check episodes exported from dataset
    episodes = dataset.episodes
    assert len(episodes) == n_episodes
    for i, e in enumerate(dataset.episodes):
        assert isinstance(e, Episode)
        assert e.size() == n_steps - 1
        head = i * n_steps
        tail = head + n_steps
        assert np.all(e.observations == observations[head:tail])
        assert np.all(e.actions == actions[head:tail])
        assert np.all(e.rewards == rewards[head:tail])
        assert e.get_observation_shape() == (observation_size, )
        assert e.get_action_size() == ref_action_size

    # check list-like behaviors
    assert len(dataset) == n_episodes
    assert dataset[0] is dataset.episodes[0]
    for i, episode in enumerate(dataset.episodes):
        assert isinstance(episode, Episode)
        assert episode is dataset.episodes[i]

    # check append
    new_size = 2
    dataset.append(observations, actions, rewards, terminals)
    assert len(dataset) == new_size * n_episodes
    assert dataset.observations.shape == (
        new_size * data_size,
        observation_size,
    )
    assert dataset.rewards.shape == (new_size * data_size, )
    assert dataset.terminals.shape == (new_size * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (new_size * data_size, )
    else:
        assert dataset.actions.shape == (new_size * data_size, action_size)

    # check append if discrete action and number of actions grow
    if discrete_action:
        old_action_size = dataset.get_action_size()
        new_size += 1
        dataset.append(observations, actions + add_actions, rewards, terminals)
        assert dataset.get_action_size() == old_action_size + add_actions

    # check extend
    new_size += 1
    another_dataset = MDPDataset(
        observations,
        actions,
        rewards,
        terminals,
        discrete_action=discrete_action,
    )
    dataset.extend(another_dataset)
    assert len(dataset) == new_size * n_episodes
    assert dataset.observations.shape == (
        new_size * data_size,
        observation_size,
    )
    assert dataset.rewards.shape == (new_size * data_size, )
    assert dataset.terminals.shape == (new_size * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (new_size * data_size, )
    else:
        assert dataset.actions.shape == (new_size * data_size, action_size)

    # check dump and load
    dataset.dump(os.path.join("test_data", "dataset.h5"))
    new_dataset = MDPDataset.load(os.path.join("test_data", "dataset.h5"))
    assert np.all(dataset.observations == new_dataset.observations)
    assert np.all(dataset.actions == new_dataset.actions)
    assert np.all(dataset.rewards == new_dataset.rewards)
    assert np.all(dataset.terminals == new_dataset.terminals)
    assert dataset.discrete_action == new_dataset.discrete_action
    assert len(dataset) == len(new_dataset)
Beispiel #22
0
 def load_mdp_dataset(self):
     path = self.get_dataset_path()
     mdp_dataset = MDPDataset.load(path)
     return mdp_dataset
Beispiel #23
0
def base_tester(model, impl, observation_shape, action_size=2):
    # dummy impl object
    model._impl = impl

    # check save  model
    impl.save_model = Mock()
    model.save_model("model.pt")
    impl.save_model.assert_called_with("model.pt")

    # check load model
    impl.load_model = Mock()
    model.load_model("mock.pt")
    impl.load_model.assert_called_with("mock.pt")

    # check get_params
    params = model.get_params(deep=False)
    clone = model.__class__(**params)
    for key, val in clone.get_params(deep=False).items():
        assert params[key] is val

    # check deep flag
    deep_params = model.get_params(deep=True)
    assert deep_params["impl"] is not impl

    # check set_params
    clone = model.__class__()
    for key, val in params.items():
        if np.isscalar(val) and not isinstance(val, str):
            params[key] = val + np.random.random()
    # set_params returns itself
    assert clone.set_params(**params) is clone
    for key, val in clone.get_params(deep=False).items():
        assert params[key] is val

    # check fit and fitter
    update_backup = model.update
    model.update = Mock(return_value={"loss": np.random.random()})
    n_episodes = 4
    episode_length = 25
    n_batch = 32
    n_steps = 10
    n_steps_per_epoch = 5
    n_epochs = n_steps // n_steps_per_epoch
    data_size = n_episodes * episode_length
    model._batch_size = n_batch
    shape = (data_size, ) + observation_shape
    if len(observation_shape) == 3:
        observations = np.random.randint(256, size=shape, dtype=np.uint8)
    else:
        observations = np.random.random(shape).astype("f4")
    if model.get_action_type() == ActionSpace.CONTINUOUS:
        actions = np.random.random((data_size, action_size))
    else:
        actions = np.random.randint(action_size, size=data_size)
    rewards = np.random.random(data_size)
    terminals = np.zeros(data_size)
    for i in range(n_episodes):
        terminals[(i + 1) * episode_length - 1] = 1.0
    dataset = MDPDataset(observations, actions, rewards, terminals)

    # check fit
    results = model.fit(
        dataset.episodes,
        n_steps=n_steps,
        n_steps_per_epoch=n_steps_per_epoch,
        logdir="test_data",
        verbose=False,
        show_progress=False,
    )

    assert isinstance(results, list)
    assert len(results) == n_epochs

    # check if the correct number of iterations are performed
    assert len(model.update.call_args_list) == n_steps

    # check arguments at each iteration
    for i, call in enumerate(model.update.call_args_list):
        epoch = i // n_steps_per_epoch
        assert isinstance(call[0][0], TransitionMiniBatch)
        assert len(call[0][0]) == n_batch

    # check fitter
    fitter = model.fitter(
        dataset.episodes,
        n_steps=n_steps,
        n_steps_per_epoch=n_steps_per_epoch,
        logdir="test_data",
        verbose=False,
        show_progress=False,
    )

    for epoch, metrics in fitter:
        assert isinstance(epoch, int)
        assert isinstance(metrics, dict)

    assert epoch == n_epochs

    # save params.json
    logger = D3RLPyLogger("test", root_dir="test_data", verbose=False)
    # save parameters to test_data/test/params.json
    model.save_params(logger)
    # load params.json
    json_path = os.path.join(logger.logdir, "params.json")
    new_model = model.__class__.from_json(json_path)
    assert new_model.impl is not None
    assert new_model.impl.observation_shape == observation_shape
    assert new_model.impl.action_size == action_size
    assert type(model.scaler) == type(new_model.scaler)

    # check __setattr__ override
    prev_batch_size = model.impl.batch_size
    model.batch_size = prev_batch_size + 1
    assert model.impl.batch_size == model.batch_size

    # check builds
    model._impl = None
    model.build_with_dataset(dataset)
    assert model.impl.observation_shape == dataset.get_observation_shape()
    assert model.impl.action_size == dataset.get_action_size()

    # set backed up methods
    model._impl = None
    model.update = update_backup

    return dataset
Beispiel #24
0
def base_tester(model, impl, observation_shape, action_size=2):
    # dummy impl object
    model.impl = impl

    # check save  model
    impl.save_model = Mock()
    model.save_model('model.pt')
    impl.save_model.assert_called_with('model.pt')

    # check load model
    impl.load_model = Mock()
    model.load_model('mock.pt')
    impl.load_model.assert_called_with('mock.pt')

    # check get_params
    params = model.get_params(deep=False)
    clone = model.__class__(**params)
    for key, val in clone.get_params(deep=False).items():
        assert params[key] is val

    # check deep flag
    deep_params = model.get_params(deep=True)
    assert deep_params['impl'] is not impl

    # check set_params
    clone = model.__class__()
    for key, val in params.items():
        if np.isscalar(val) and not isinstance(val, str):
            params[key] = val + np.random.random()
    # set_params returns itself
    assert clone.set_params(**params) is clone
    for key, val in clone.get_params(deep=False).items():
        assert params[key] is val

    # check fit
    update_backup = model.update
    model.update = Mock(return_value=range(len(model._get_loss_labels())))
    n_episodes = 4
    episode_length = 25
    n_batch = 32
    n_epochs = 3
    data_size = n_episodes * episode_length
    model.batch_size = n_batch
    model.n_epochs = n_epochs
    shape = (data_size, ) + observation_shape
    if len(observation_shape) == 3:
        observations = np.random.randint(256, size=shape, dtype=np.uint8)
    else:
        observations = np.random.random(shape).astype('f4')
    actions = np.random.random((data_size, action_size))
    rewards = np.random.random(data_size)
    terminals = np.zeros(data_size)
    for i in range(n_episodes):
        terminals[(i + 1) * episode_length - 1] = 1.0
    dataset = MDPDataset(observations, actions, rewards, terminals)

    model.fit(dataset.episodes,
              logdir='test_data',
              verbose=False,
              show_progress=False,
              tensorboard=False)

    # check if the correct number of iterations are performed
    assert len(model.update.call_args_list) == data_size // n_batch * n_epochs

    # check arguments at each iteration
    for i, call in enumerate(model.update.call_args_list):
        epoch = i // (data_size // n_batch)
        total_step = i
        assert call[0][0] == epoch
        assert call[0][1] == total_step
        assert isinstance(call[0][2], TransitionMiniBatch)
        assert len(call[0][2]) == n_batch

    # save params.json
    logger = D3RLPyLogger('test',
                          root_dir='test_data',
                          verbose=False,
                          tensorboard=False)
    # save parameters to test_data/test/params.json
    model._save_params(logger)
    # load params.json
    json_path = os.path.join(logger.logdir, 'params.json')
    new_model = model.__class__.from_json(json_path)
    assert new_model.impl is not None
    assert new_model.impl.observation_shape == observation_shape
    assert new_model.impl.action_size == action_size
    assert type(model.scaler) == type(new_model.scaler)

    # check __setattr__ override
    prev_batch_size = model.impl.batch_size
    model.batch_size = prev_batch_size + 1
    assert model.impl.batch_size == model.batch_size

    # set backed up methods
    model.impl = None
    model.update = update_backup

    return dataset
Beispiel #25
0
def test_mdp_dataset(data_size, observation_size, action_size, n_episodes,
                     discrete_action):
    observations = np.random.random((data_size, observation_size))
    rewards = np.random.uniform(-10.0, 10.0, size=data_size)
    n_steps = data_size // n_episodes
    terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes)

    if discrete_action:
        actions = np.random.randint(action_size, size=data_size)
        ref_action_size = np.max(actions) + 1
    else:
        actions = np.random.random((data_size, action_size))
        ref_action_size = action_size

    dataset = MDPDataset(observations, actions, rewards, terminals,
                         discrete_action)

    # check MDPDataset methods
    assert np.all(dataset.observations == observations)
    assert np.all(dataset.actions == actions)
    assert np.all(dataset.rewards == rewards)
    assert np.all(dataset.terminals == terminals)
    assert dataset.size() == n_episodes
    assert dataset.get_action_size() == action_size
    assert dataset.get_observation_shape() == (observation_size, )
    assert dataset.is_action_discrete() == discrete_action

    # check stats
    ref_returns = []
    for i in range(n_episodes):
        episode_return = 0.0
        for j in range(1, n_steps):
            episode_return += rewards[j + i * n_steps]
        ref_returns.append(episode_return)

    stats = dataset.compute_stats()
    return_stats = stats['return']
    assert np.allclose(return_stats['mean'], np.mean(ref_returns))
    assert np.allclose(return_stats['std'], np.std(ref_returns))
    assert np.allclose(return_stats['min'], np.min(ref_returns))
    assert np.allclose(return_stats['max'], np.max(ref_returns))
    reward_stats = stats['reward']
    assert np.allclose(reward_stats['mean'], np.mean(rewards))
    assert np.allclose(reward_stats['std'], np.std(rewards))
    assert np.allclose(reward_stats['min'], np.min(rewards))
    assert np.allclose(reward_stats['max'], np.max(rewards))
    observation_stats = stats['observation']
    assert np.all(observation_stats['mean'] == np.mean(observations, axis=0))
    assert np.all(observation_stats['std'] == np.std(observations, axis=0))
    if discrete_action:
        freqs, action_ids = stats['action']['histogram']
        assert np.sum(freqs) == data_size
        assert list(action_ids) == [i for i in range(action_size)]
    else:
        action_stats = stats['action']
        assert np.all(action_stats['mean'] == np.mean(actions, axis=0))
        assert np.all(action_stats['std'] == np.std(actions, axis=0))
        assert np.all(action_stats['min'] == np.min(actions, axis=0))
        assert np.all(action_stats['max'] == np.max(actions, axis=0))
        assert len(action_stats['histogram']) == action_size
        for freqs, _ in action_stats['histogram']:
            assert np.sum(freqs) == data_size

    # check episodes exported from dataset
    episodes = dataset.episodes
    assert len(episodes) == n_episodes
    for i, e in enumerate(dataset.episodes):
        assert isinstance(e, Episode)
        assert e.size() == n_steps - 1
        head = i * n_steps
        tail = head + n_steps
        assert np.all(e.observations == observations[head:tail])
        assert np.all(e.actions == actions[head:tail])
        assert np.all(e.rewards == rewards[head:tail])
        assert e.get_observation_shape() == (observation_size, )
        assert e.get_action_size() == ref_action_size

    # check list-like behaviors
    assert len(dataset) == n_episodes
    assert dataset[0] is dataset.episodes[0]
    for i, episode in enumerate(dataset.episodes):
        assert isinstance(episode, Episode)
        assert episode is dataset.episodes[i]

    # check append
    dataset.append(observations, actions, rewards, terminals)
    assert len(dataset) == 2 * n_episodes
    assert dataset.observations.shape == (2 * data_size, observation_size)
    assert dataset.rewards.shape == (2 * data_size, )
    assert dataset.terminals.shape == (2 * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (2 * data_size, )
    else:
        assert dataset.actions.shape == (2 * data_size, action_size)

    # check extend
    another_dataset = MDPDataset(observations, actions, rewards, terminals,
                                 discrete_action)
    dataset.extend(another_dataset)
    assert len(dataset) == 3 * n_episodes
    assert dataset.observations.shape == (3 * data_size, observation_size)
    assert dataset.rewards.shape == (3 * data_size, )
    assert dataset.terminals.shape == (3 * data_size, )
    if discrete_action:
        assert dataset.actions.shape == (3 * data_size, )
    else:
        assert dataset.actions.shape == (3 * data_size, action_size)

    # check clip_reward
    dataset.clip_reward(-1.0, 1.0)
    assert rewards[rewards > 1.0].sum() != 0
    assert rewards[rewards < -1.0].sum() != 0
    assert dataset.rewards[dataset.rewards > 1.0].sum() == 0
    assert dataset.rewards[dataset.rewards < -1.0].sum() == 0

    # check dump and load
    dataset.dump(os.path.join('test_data', 'dataset.h5'))
    new_dataset = MDPDataset.load(os.path.join('test_data', 'dataset.h5'))
    assert np.all(dataset.observations == new_dataset.observations)
    assert np.all(dataset.actions == new_dataset.actions)
    assert np.all(dataset.rewards == new_dataset.rewards)
    assert np.all(dataset.terminals == new_dataset.terminals)
    assert dataset.discrete_action == new_dataset.discrete_action
    assert len(dataset) == len(new_dataset)