def test_episode_terminals(data_size, observation_size, action_size): observations = np.random.random((data_size, observation_size)).astype("f4") actions = np.random.random((data_size, action_size)).astype("f4") rewards = np.random.random(data_size).astype("f4") # check default terminals = np.zeros(data_size, dtype=np.float32) terminals[49] = 1.0 terminals[-1] = 1.0 dataset1 = MDPDataset(observations, actions, rewards, terminals) assert len(dataset1.episodes) == 2 assert np.all(dataset1.terminals == dataset1.episode_terminals) assert dataset1.episodes[0].terminal assert dataset1.episodes[0][-1].terminal # check non-terminal episode terminals = np.zeros(data_size, dtype=np.float32) terminals[-1] = 1.0 episode_terminals = np.zeros(data_size, dtype=np.float32) episode_terminals[49] = 1.0 episode_terminals[-1] = 1.0 dataset2 = MDPDataset(observations, actions, rewards, terminals, episode_terminals) assert len(dataset2.episodes) == 2 assert not np.all(dataset2.terminals == dataset2.episode_terminals) assert not dataset2.episodes[0].terminal assert not dataset2.episodes[0][-1].terminal # check extend dataset1.extend(dataset2) assert len(dataset1) == 4 assert not dataset1.episodes[2].terminal assert dataset1.episodes[3].terminal
def test_image_dataset(discrete_action, n_channels, action_size, data_size): shape = (data_size, n_channels, 84, 84) observations = np.random.randint(256, size=shape, dtype=np.uint8) if discrete_action: actions = np.random.randint(action_size, size=(data_size, )) else: actions = np.random.random((data_size, action_size)) rewards = np.random.random((data_size, 1)) terminals = (np.arange(data_size) % 9) == 0 ref = MDPDataset( observations=observations, actions=actions, rewards=rewards, terminals=terminals, ) # save as csv export_mdp_dataset_as_csv(ref, "test_data/test.csv") # extract zip file with zipfile.ZipFile("test_data/test.zip", "r") as zip_fd: zip_fd.extractall("test_data") # load from csv dataset = import_csv_as_mdp_dataset("test_data/test.csv", image=True) assert dataset.get_observation_shape() == ref.get_observation_shape() assert dataset.get_action_size() == ref.get_action_size() assert np.all(dataset.observations == ref.observations) assert np.allclose(dataset.actions, ref.actions) assert np.allclose(dataset.rewards, ref.rewards) assert np.all(dataset.terminals == ref.terminals) assert dataset.is_action_discrete() == discrete_action
def train(algo_name, params, dataset_path, model_save_path, experiment_name=None, with_timestamp=True, logdir='d3rlpy_logs', prev_model_path=None, test_size=0.2): dataset = MDPDataset.load(dataset_path) train_data, test_data = train_test_split(dataset, test_size=test_size) # train algo = create_algo(algo_name, dataset.is_action_discrete(), **params) algo.fit(train_data, experiment_name=experiment_name, with_timestamp=with_timestamp, logdir=logdir, save_interval=1000000) # never save models for now # save final model algo.save_model(model_save_path) # evaluate scores = _evaluate(algo, test_data, dataset.is_action_discrete()) # compare previous model if prev_model_path: base_algo = create_algo(algo_name, **params) base_algo.load_model(prev_model_path) score = _comapre(algo, base_algo, test_data, dataset.is_action_discrete()) scores['algo_action_diff'] = score return scores
def test_standard_scaler_with_episode(observation_shape, batch_size): shape = (batch_size, ) + observation_shape observations = np.random.random(shape).astype("f4") actions = np.random.random((batch_size, 1)).astype("f4") rewards = np.random.random(batch_size).astype("f4") terminals = np.random.randint(2, size=batch_size) terminals[-1] = 1.0 dataset = MDPDataset( observations=observations, actions=actions, rewards=rewards, terminals=terminals, ) mean = observations.mean(axis=0) std = observations.std(axis=0) scaler = StandardScaler() scaler.fit(dataset.episodes) x = torch.rand((batch_size, ) + observation_shape) y = scaler.transform(x) ref_y = (x.numpy() - mean.reshape((1, -1))) / std.reshape((1, -1)) assert np.allclose(y.numpy(), ref_y, atol=1e-6)
def train(algo_name, params, dataset_path, experiment_name=None, logdir='d3rlpy_logs'): # prepare dataset dataset = MDPDataset.load(dataset_path) train_data, test_data = train_test_split(dataset, test_size=0.2) # evaluate scorers = _get_scorers(dataset.is_action_discrete()) # train algo = create_algo(algo_name, dataset.is_action_discrete(), **params) algo.fit(train_data, eval_episodes=test_data, scorers=scorers, experiment_name=experiment_name, with_timestamp=False, logdir=logdir, save_interval=1, show_progress=False, tensorboard=False) return True
def train(algo_name, params, dataset_path, experiment_name=None, logdir="d3rlpy_logs"): # prepare dataset dataset = MDPDataset.load(dataset_path) train_data, test_data = train_test_split(dataset, test_size=0.2) # get dataset statistics stats = dataset.compute_stats() # evaluate scorers = _get_scorers(dataset.is_action_discrete(), stats) # add action scaler if continuous action-space if not dataset.is_action_discrete(): params["action_scaler"] = "min_max" # train algo = create_algo(algo_name, dataset.is_action_discrete(), **params) algo.fit( train_data, n_steps=params["n_epochs"] * params["n_steps_per_epoch"], n_steps_per_epoch=params["n_steps_per_epoch"], eval_episodes=test_data, scorers=scorers, experiment_name=experiment_name, with_timestamp=False, logdir=logdir, save_interval=1, show_progress=False, ) return True
def test_min_max_scaler_with_episode(observation_shape, batch_size): shape = (batch_size, ) + observation_shape observations = np.random.random(shape).astype("f4") actions = np.random.random((batch_size, 1)) rewards = np.random.random(batch_size) terminals = np.random.randint(2, size=batch_size) terminals[-1] = 1.0 dataset = MDPDataset( observations=observations, actions=actions, rewards=rewards, terminals=terminals, ) max = observations.max(axis=0) min = observations.min(axis=0) scaler = MinMaxScaler() scaler.fit(dataset.episodes) x = torch.rand((batch_size, ) + observation_shape) y = scaler.transform(x) ref_y = (x.numpy() - min.reshape((1, -1))) / (max - min).reshape((1, -1)) assert np.allclose(y.numpy(), ref_y)
def import_csv_as_image_observation_dataset(fname, discrete_action): with open(fname, 'r') as file: reader = csv.reader(file) rows = [row for row in reader] # check header header = rows[0] _validate_csv_header(header) # get action size action_size = _get_action_size_from_header(header) data_size = len(rows) - 1 observations = [] actions = [] rewards = [] terminals = [] for i, row in enumerate(rows[1:]): episode_id = row[0] # load image image = _load_image(os.path.join(os.path.dirname(fname), row[1])) # convert PIL.Image to ndarray array = convert_image_to_ndarray(image) observations.append(array) # get action columns action = list(map(float, row[2:2 + action_size])) if discrete_action: actions.append(int(action[0])) else: actions.append(action) # get reward column rewards.append(float(row[-1])) if i == data_size - 1 or episode_id != rows[i + 2][0]: terminals.append(1) else: terminals.append(0) # convert list to ndarray observations = np.array(observations, dtype=np.uint8) actions = np.array(actions) rewards = np.array(rewards, dtype=np.float32) terminals = np.array(terminals, dtype=np.float32) dataset = MDPDataset(observations=observations, actions=actions, rewards=rewards, terminals=terminals, discrete_action=discrete_action) return dataset
def test_dataset_with_sklearn(data_size, observation_size, action_size, n_episodes, test_size): observations = np.random.random((data_size, observation_size)).astype("f4") actions = np.random.random((data_size, action_size)) rewards = np.random.random(data_size) n_steps = data_size // n_episodes terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes) dataset = MDPDataset(observations, actions, rewards, terminals) # check compatibility with train_test_split train_episodes, test_episodes = train_test_split(dataset, test_size=test_size) assert len(train_episodes) == int(n_episodes * (1.0 - test_size)) assert len(test_episodes) == int(n_episodes * test_size)
def _upload_image_dataset(client): # prepare dummy data shape = (100, 3, 84, 84) observations = np.random.randint(255, size=shape, dtype=np.uint8) actions = np.random.random((100, 2)).astype('f4') rewards = np.random.random((100, 1)).astype('f4') terminals = (np.arange(100) % 9) == 0 # prepare dataset mdp_dataset = MDPDataset(observations, actions, rewards, terminals) csv_path = os.path.join('test_data', 'dataset.csv') export_image_observation_dataset_as_csv(mdp_dataset, csv_path, relative_path=False) # prepare upload request with open(csv_path, 'rb') as f: data = {'is_image': 'true', 'is_discrete': 'true'} file = FileStorage(stream=f, filename='dataset.csv', content_type='text/csv') data['dataset'] = file # add images image_dir_path = os.path.join('test_data', 'dataset_images') image_fds = [] for i in range(100): file_name = 'observation_%d.png' % i file_path = os.path.join(image_dir_path, file_name) fd = open(file_path, 'rb') file = FileStorage(stream=fd, filename=file_name, content_type='image/png') data['image_%d' % i] = file image_fds.append(fd) data['total_images'] = 100 # upload res = client.post('/api/datasets/upload', data=data, content_type='multipart/form-data') for fd in image_fds: fd.close() return res, mdp_dataset
def to_mdp_dataset(replay_buffer): """ Returns d3rlpy's MDPDataset from SB3's ReplayBuffer Args: replay_buffer (stable_baselines3.common.buffers.ReplayBuffer): SB3's replay buffer. Returns: d3rlpy.dataset.MDPDataset: d3rlpy's MDPDataset. """ pos = replay_buffer.size() discrete_action = isinstance(replay_buffer.action_space, Discrete) dataset = MDPDataset(observations=replay_buffer.observations[:pos, 0], actions=replay_buffer.actions[:pos, 0], rewards=replay_buffer.rewards[:pos, 0], terminals=replay_buffer.dones[:pos, 0], discrete_action=discrete_action) return dataset
def test_min_max_scaler_with_dataset(observation_shape, batch_size): observations = np.random.random((batch_size, ) + observation_shape) actions = np.random.random((batch_size, 1)) rewards = np.random.random(batch_size) terminals = np.random.randint(2, size=batch_size) dataset = MDPDataset(observations, actions, rewards, terminals) max = observations.max(axis=0) min = observations.min(axis=0) scaler = MinMaxScaler(dataset) x = torch.rand((batch_size, ) + observation_shape) y = scaler.transform(x) ref_y = (x.numpy() - min.reshape((1, -1))) / (max - min).reshape((1, -1)) assert np.allclose(y.numpy(), ref_y)
def test_check_discrete_action_with_mdp_dataset(data_size, observation_size, action_size): observations = np.random.random((data_size, observation_size)).astype("f4") rewards = np.random.random(data_size) terminals = np.random.randint(2, size=data_size) # check discrete_action discrete_actions = np.random.randint(action_size, size=data_size) dataset = MDPDataset(observations, discrete_actions, rewards, terminals) assert dataset.is_action_discrete() # check continuous action continuous_actions = np.random.random((data_size, action_size)) dataset = MDPDataset(observations, continuous_actions, rewards, terminals) assert not dataset.is_action_discrete()
def import_csv_as_vector_observation_dataset(fname, discrete_action): with open(fname, 'r') as file: reader = csv.reader(file) rows = [row for row in reader] # get observation shape header = rows[0] _validate_csv_header(header) # retrieve data section csv_data = np.array(rows[1:], dtype=np.float32) # get observation columns observation_size = _get_observation_size_from_header(header) observation_last_index = observation_size + 1 observations = csv_data[:, 1:observation_last_index] # get action columns action_size = _get_action_size_from_header(header) action_last_index = observation_last_index + action_size actions = csv_data[:, observation_last_index:action_last_index] if discrete_action: actions = np.array(actions.reshape(-1), dtype=np.int32) # get reward column rewards = csv_data[:, -1] # make terminal flags episode_ids = csv_data[:, 0] terminals = np.zeros_like(episode_ids) for i, episode_id in enumerate(episode_ids): if i + 1 == len(episode_ids) or episode_id != episode_ids[i + 1]: terminals[i] = 1.0 dataset = MDPDataset(observations=observations, actions=actions, rewards=rewards, terminals=terminals, discrete_action=discrete_action) return dataset
def test_standard_scaler_with_dataset(observation_shape, batch_size): shape = (batch_size, ) + observation_shape observations = np.random.random(shape).astype('f') actions = np.random.random((batch_size, 1)).astype('f') rewards = np.random.random(batch_size).astype('f') terminals = np.random.randint(2, size=batch_size) dataset = MDPDataset(observations, actions, rewards, terminals) mean = observations.mean(axis=0) std = observations.std(axis=0) scaler = StandardScaler(dataset) x = torch.rand((batch_size, ) + observation_shape) y = scaler.transform(x) ref_y = (x.numpy() - mean.reshape((1, -1))) / std.reshape((1, -1)) assert np.allclose(y.numpy(), ref_y)
def test_mdp_dataset_with_mask(data_size, observation_size, action_size, n_episodes, create_mask, mask_size): observations = np.random.random((data_size, observation_size)) actions = np.random.random((data_size, action_size)) rewards = np.random.uniform(-10.0, 10.0, size=data_size) n_steps = data_size // n_episodes terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes) dataset = MDPDataset( observations=observations, actions=actions, rewards=rewards, terminals=terminals, create_mask=create_mask, mask_size=mask_size, ) for episode in dataset.episodes: for transition in episode.transitions: if create_mask: assert transition.mask.shape == (mask_size, ) else: assert transition.mask is None
def test_standard_reward_scaler_with_episode( observation_shape, action_size, batch_size, eps ): shape = (batch_size,) + observation_shape observations = np.random.random(shape) actions = np.random.random((batch_size, action_size)) rewards = np.random.random(batch_size).astype("f4") terminals = np.random.randint(2, size=batch_size) terminals[-1] = 1.0 dataset = MDPDataset( observations=observations, actions=actions, rewards=rewards, terminals=terminals, ) rewards_without_first = [] for episode in dataset: rewards_without_first += episode.rewards[1:].tolist() rewards_without_first = np.array(rewards_without_first) mean = np.mean(rewards_without_first) std = np.std(rewards_without_first) scaler = StandardRewardScaler(eps=eps) scaler.fit(dataset.episodes) x = torch.rand(batch_size) y = scaler.transform(x) ref_y = (x.numpy() - mean) / (std + eps) assert np.allclose(y, ref_y, atol=1e-6) params = scaler.get_params() assert np.allclose(params["mean"], mean) assert np.allclose(params["std"], std)
def _upload_image_dataset(client): # prepare dummy data shape = (100, 3, 84, 84) observations = np.random.randint(255, size=shape, dtype=np.uint8) actions = np.random.random((100, 2)).astype("f4") rewards = np.random.random((100, 1)).astype("f4") terminals = (np.arange(100) % 9) == 0 # prepare dataset mdp_dataset = MDPDataset(observations, actions, rewards, terminals) csv_path = os.path.join("test_data", "dataset.csv") zip_path = os.path.join("test_data", "dataset.zip") export_image_observation_dataset_as_csv(mdp_dataset, csv_path) # prepare upload request with open(csv_path, "rb") as csv_fd, open(zip_path, "rb") as zip_fd: data = {"is_image": "true"} file = FileStorage(stream=csv_fd, filename="dataset.csv", content_type="text/csv") data["dataset"] = file zip_file = FileStorage(stream=zip_fd, filename="dataset.zip", content_type=".zip") data["zip_file"] = zip_file # upload res = client.post( "/api/datasets/upload", data=data, content_type="multipart/form-data", ) return res, mdp_dataset
def test_min_max_reward_scaler_with_episode( observation_shape, action_size, batch_size ): shape = (batch_size,) + observation_shape observations = np.random.random(shape) actions = np.random.random((batch_size, action_size)) rewards = np.random.random(batch_size) terminals = np.random.randint(2, size=batch_size) terminals[-1] = 1.0 dataset = MDPDataset( observations=observations, actions=actions, rewards=rewards, terminals=terminals, ) rewards_without_first = [] for episode in dataset: rewards_without_first += episode.rewards[1:].tolist() rewards_without_first = np.array(rewards_without_first) maximum = rewards_without_first.max() minimum = rewards_without_first.min() scaler = MinMaxRewardScaler() scaler.fit(dataset.episodes) x = torch.rand(batch_size) y = scaler.transform(x) ref_y = (x.numpy() - minimum) / (maximum - minimum) assert np.allclose(y.numpy(), ref_y) params = scaler.get_params() assert np.allclose(params["minimum"], minimum) assert np.allclose(params["maximum"], maximum)
# take 100 episodes due to dataset size episodes = dataset.episodes[:30] observations = [] actions = [] rewards = [] terminals = [] for episode in episodes: observations.append(episode.observations) actions.append(episode.actions.reshape(-1)) rewards.append(episode.rewards.reshape(-1)) flag = np.zeros(episode.observations.shape[0]) flag[-1] = 1.0 terminals.append(flag) observations = np.vstack(observations) actions = np.hstack(actions) rewards = np.hstack(rewards) terminals = np.hstack(terminals) dataset = MDPDataset(observations=observations, actions=actions, rewards=rewards, terminals=terminals, discrete_action=True) # save as CSV and images export_mdp_dataset_as_csv(dataset, 'breakout.csv')
def test_mdp_dataset( data_size, observation_size, action_size, n_episodes, discrete_action, add_actions, ): observations = np.random.random((data_size, observation_size)).astype("f4") rewards = np.random.uniform(-10.0, 10.0, size=data_size).astype("f4") n_steps = data_size // n_episodes terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes) if discrete_action: actions = np.random.randint(action_size, size=data_size) ref_action_size = np.max(actions) + 1 else: actions = np.random.random((data_size, action_size)).astype("f4") ref_action_size = action_size dataset = MDPDataset( observations=observations, actions=actions, rewards=rewards, terminals=terminals, discrete_action=discrete_action, ) # check MDPDataset methods assert np.all(dataset.observations == observations) assert np.all(dataset.actions == actions) assert np.all(dataset.rewards == rewards) assert np.all(dataset.terminals == terminals) assert dataset.size() == n_episodes assert dataset.get_action_size() == action_size assert dataset.get_observation_shape() == (observation_size, ) assert dataset.is_action_discrete() == discrete_action # check stats ref_returns = [] for i in range(n_episodes): episode_return = 0.0 for j in range(1, n_steps): episode_return += rewards[j + i * n_steps] ref_returns.append(episode_return) stats = dataset.compute_stats() return_stats = stats["return"] assert np.allclose(return_stats["mean"], np.mean(ref_returns)) assert np.allclose(return_stats["std"], np.std(ref_returns)) assert np.allclose(return_stats["min"], np.min(ref_returns)) assert np.allclose(return_stats["max"], np.max(ref_returns)) reward_stats = stats["reward"] assert np.allclose(reward_stats["mean"], np.mean(rewards)) assert np.allclose(reward_stats["std"], np.std(rewards)) assert np.allclose(reward_stats["min"], np.min(rewards)) assert np.allclose(reward_stats["max"], np.max(rewards)) observation_stats = stats["observation"] assert np.all(observation_stats["mean"] == np.mean(observations, axis=0)) assert np.all(observation_stats["std"] == np.std(observations, axis=0)) if discrete_action: freqs, action_ids = stats["action"]["histogram"] assert np.sum(freqs) == data_size assert list(action_ids) == [i for i in range(action_size)] else: action_stats = stats["action"] assert np.all(action_stats["mean"] == np.mean(actions, axis=0)) assert np.all(action_stats["std"] == np.std(actions, axis=0)) assert np.all(action_stats["min"] == np.min(actions, axis=0)) assert np.all(action_stats["max"] == np.max(actions, axis=0)) assert len(action_stats["histogram"]) == action_size for freqs, _ in action_stats["histogram"]: assert np.sum(freqs) == data_size # check episodes exported from dataset episodes = dataset.episodes assert len(episodes) == n_episodes for i, e in enumerate(dataset.episodes): assert isinstance(e, Episode) assert e.size() == n_steps - 1 head = i * n_steps tail = head + n_steps assert np.all(e.observations == observations[head:tail]) assert np.all(e.actions == actions[head:tail]) assert np.all(e.rewards == rewards[head:tail]) assert e.get_observation_shape() == (observation_size, ) assert e.get_action_size() == ref_action_size # check list-like behaviors assert len(dataset) == n_episodes assert dataset[0] is dataset.episodes[0] for i, episode in enumerate(dataset.episodes): assert isinstance(episode, Episode) assert episode is dataset.episodes[i] # check append new_size = 2 dataset.append(observations, actions, rewards, terminals) assert len(dataset) == new_size * n_episodes assert dataset.observations.shape == ( new_size * data_size, observation_size, ) assert dataset.rewards.shape == (new_size * data_size, ) assert dataset.terminals.shape == (new_size * data_size, ) if discrete_action: assert dataset.actions.shape == (new_size * data_size, ) else: assert dataset.actions.shape == (new_size * data_size, action_size) # check append if discrete action and number of actions grow if discrete_action: old_action_size = dataset.get_action_size() new_size += 1 dataset.append(observations, actions + add_actions, rewards, terminals) assert dataset.get_action_size() == old_action_size + add_actions # check extend new_size += 1 another_dataset = MDPDataset( observations, actions, rewards, terminals, discrete_action=discrete_action, ) dataset.extend(another_dataset) assert len(dataset) == new_size * n_episodes assert dataset.observations.shape == ( new_size * data_size, observation_size, ) assert dataset.rewards.shape == (new_size * data_size, ) assert dataset.terminals.shape == (new_size * data_size, ) if discrete_action: assert dataset.actions.shape == (new_size * data_size, ) else: assert dataset.actions.shape == (new_size * data_size, action_size) # check dump and load dataset.dump(os.path.join("test_data", "dataset.h5")) new_dataset = MDPDataset.load(os.path.join("test_data", "dataset.h5")) assert np.all(dataset.observations == new_dataset.observations) assert np.all(dataset.actions == new_dataset.actions) assert np.all(dataset.rewards == new_dataset.rewards) assert np.all(dataset.terminals == new_dataset.terminals) assert dataset.discrete_action == new_dataset.discrete_action assert len(dataset) == len(new_dataset)
def load_mdp_dataset(self): path = self.get_dataset_path() mdp_dataset = MDPDataset.load(path) return mdp_dataset
def base_tester(model, impl, observation_shape, action_size=2): # dummy impl object model._impl = impl # check save model impl.save_model = Mock() model.save_model("model.pt") impl.save_model.assert_called_with("model.pt") # check load model impl.load_model = Mock() model.load_model("mock.pt") impl.load_model.assert_called_with("mock.pt") # check get_params params = model.get_params(deep=False) clone = model.__class__(**params) for key, val in clone.get_params(deep=False).items(): assert params[key] is val # check deep flag deep_params = model.get_params(deep=True) assert deep_params["impl"] is not impl # check set_params clone = model.__class__() for key, val in params.items(): if np.isscalar(val) and not isinstance(val, str): params[key] = val + np.random.random() # set_params returns itself assert clone.set_params(**params) is clone for key, val in clone.get_params(deep=False).items(): assert params[key] is val # check fit and fitter update_backup = model.update model.update = Mock(return_value={"loss": np.random.random()}) n_episodes = 4 episode_length = 25 n_batch = 32 n_steps = 10 n_steps_per_epoch = 5 n_epochs = n_steps // n_steps_per_epoch data_size = n_episodes * episode_length model._batch_size = n_batch shape = (data_size, ) + observation_shape if len(observation_shape) == 3: observations = np.random.randint(256, size=shape, dtype=np.uint8) else: observations = np.random.random(shape).astype("f4") if model.get_action_type() == ActionSpace.CONTINUOUS: actions = np.random.random((data_size, action_size)) else: actions = np.random.randint(action_size, size=data_size) rewards = np.random.random(data_size) terminals = np.zeros(data_size) for i in range(n_episodes): terminals[(i + 1) * episode_length - 1] = 1.0 dataset = MDPDataset(observations, actions, rewards, terminals) # check fit results = model.fit( dataset.episodes, n_steps=n_steps, n_steps_per_epoch=n_steps_per_epoch, logdir="test_data", verbose=False, show_progress=False, ) assert isinstance(results, list) assert len(results) == n_epochs # check if the correct number of iterations are performed assert len(model.update.call_args_list) == n_steps # check arguments at each iteration for i, call in enumerate(model.update.call_args_list): epoch = i // n_steps_per_epoch assert isinstance(call[0][0], TransitionMiniBatch) assert len(call[0][0]) == n_batch # check fitter fitter = model.fitter( dataset.episodes, n_steps=n_steps, n_steps_per_epoch=n_steps_per_epoch, logdir="test_data", verbose=False, show_progress=False, ) for epoch, metrics in fitter: assert isinstance(epoch, int) assert isinstance(metrics, dict) assert epoch == n_epochs # save params.json logger = D3RLPyLogger("test", root_dir="test_data", verbose=False) # save parameters to test_data/test/params.json model.save_params(logger) # load params.json json_path = os.path.join(logger.logdir, "params.json") new_model = model.__class__.from_json(json_path) assert new_model.impl is not None assert new_model.impl.observation_shape == observation_shape assert new_model.impl.action_size == action_size assert type(model.scaler) == type(new_model.scaler) # check __setattr__ override prev_batch_size = model.impl.batch_size model.batch_size = prev_batch_size + 1 assert model.impl.batch_size == model.batch_size # check builds model._impl = None model.build_with_dataset(dataset) assert model.impl.observation_shape == dataset.get_observation_shape() assert model.impl.action_size == dataset.get_action_size() # set backed up methods model._impl = None model.update = update_backup return dataset
def base_tester(model, impl, observation_shape, action_size=2): # dummy impl object model.impl = impl # check save model impl.save_model = Mock() model.save_model('model.pt') impl.save_model.assert_called_with('model.pt') # check load model impl.load_model = Mock() model.load_model('mock.pt') impl.load_model.assert_called_with('mock.pt') # check get_params params = model.get_params(deep=False) clone = model.__class__(**params) for key, val in clone.get_params(deep=False).items(): assert params[key] is val # check deep flag deep_params = model.get_params(deep=True) assert deep_params['impl'] is not impl # check set_params clone = model.__class__() for key, val in params.items(): if np.isscalar(val) and not isinstance(val, str): params[key] = val + np.random.random() # set_params returns itself assert clone.set_params(**params) is clone for key, val in clone.get_params(deep=False).items(): assert params[key] is val # check fit update_backup = model.update model.update = Mock(return_value=range(len(model._get_loss_labels()))) n_episodes = 4 episode_length = 25 n_batch = 32 n_epochs = 3 data_size = n_episodes * episode_length model.batch_size = n_batch model.n_epochs = n_epochs shape = (data_size, ) + observation_shape if len(observation_shape) == 3: observations = np.random.randint(256, size=shape, dtype=np.uint8) else: observations = np.random.random(shape).astype('f4') actions = np.random.random((data_size, action_size)) rewards = np.random.random(data_size) terminals = np.zeros(data_size) for i in range(n_episodes): terminals[(i + 1) * episode_length - 1] = 1.0 dataset = MDPDataset(observations, actions, rewards, terminals) model.fit(dataset.episodes, logdir='test_data', verbose=False, show_progress=False, tensorboard=False) # check if the correct number of iterations are performed assert len(model.update.call_args_list) == data_size // n_batch * n_epochs # check arguments at each iteration for i, call in enumerate(model.update.call_args_list): epoch = i // (data_size // n_batch) total_step = i assert call[0][0] == epoch assert call[0][1] == total_step assert isinstance(call[0][2], TransitionMiniBatch) assert len(call[0][2]) == n_batch # save params.json logger = D3RLPyLogger('test', root_dir='test_data', verbose=False, tensorboard=False) # save parameters to test_data/test/params.json model._save_params(logger) # load params.json json_path = os.path.join(logger.logdir, 'params.json') new_model = model.__class__.from_json(json_path) assert new_model.impl is not None assert new_model.impl.observation_shape == observation_shape assert new_model.impl.action_size == action_size assert type(model.scaler) == type(new_model.scaler) # check __setattr__ override prev_batch_size = model.impl.batch_size model.batch_size = prev_batch_size + 1 assert model.impl.batch_size == model.batch_size # set backed up methods model.impl = None model.update = update_backup return dataset
def test_mdp_dataset(data_size, observation_size, action_size, n_episodes, discrete_action): observations = np.random.random((data_size, observation_size)) rewards = np.random.uniform(-10.0, 10.0, size=data_size) n_steps = data_size // n_episodes terminals = np.array(([0] * (n_steps - 1) + [1]) * n_episodes) if discrete_action: actions = np.random.randint(action_size, size=data_size) ref_action_size = np.max(actions) + 1 else: actions = np.random.random((data_size, action_size)) ref_action_size = action_size dataset = MDPDataset(observations, actions, rewards, terminals, discrete_action) # check MDPDataset methods assert np.all(dataset.observations == observations) assert np.all(dataset.actions == actions) assert np.all(dataset.rewards == rewards) assert np.all(dataset.terminals == terminals) assert dataset.size() == n_episodes assert dataset.get_action_size() == action_size assert dataset.get_observation_shape() == (observation_size, ) assert dataset.is_action_discrete() == discrete_action # check stats ref_returns = [] for i in range(n_episodes): episode_return = 0.0 for j in range(1, n_steps): episode_return += rewards[j + i * n_steps] ref_returns.append(episode_return) stats = dataset.compute_stats() return_stats = stats['return'] assert np.allclose(return_stats['mean'], np.mean(ref_returns)) assert np.allclose(return_stats['std'], np.std(ref_returns)) assert np.allclose(return_stats['min'], np.min(ref_returns)) assert np.allclose(return_stats['max'], np.max(ref_returns)) reward_stats = stats['reward'] assert np.allclose(reward_stats['mean'], np.mean(rewards)) assert np.allclose(reward_stats['std'], np.std(rewards)) assert np.allclose(reward_stats['min'], np.min(rewards)) assert np.allclose(reward_stats['max'], np.max(rewards)) observation_stats = stats['observation'] assert np.all(observation_stats['mean'] == np.mean(observations, axis=0)) assert np.all(observation_stats['std'] == np.std(observations, axis=0)) if discrete_action: freqs, action_ids = stats['action']['histogram'] assert np.sum(freqs) == data_size assert list(action_ids) == [i for i in range(action_size)] else: action_stats = stats['action'] assert np.all(action_stats['mean'] == np.mean(actions, axis=0)) assert np.all(action_stats['std'] == np.std(actions, axis=0)) assert np.all(action_stats['min'] == np.min(actions, axis=0)) assert np.all(action_stats['max'] == np.max(actions, axis=0)) assert len(action_stats['histogram']) == action_size for freqs, _ in action_stats['histogram']: assert np.sum(freqs) == data_size # check episodes exported from dataset episodes = dataset.episodes assert len(episodes) == n_episodes for i, e in enumerate(dataset.episodes): assert isinstance(e, Episode) assert e.size() == n_steps - 1 head = i * n_steps tail = head + n_steps assert np.all(e.observations == observations[head:tail]) assert np.all(e.actions == actions[head:tail]) assert np.all(e.rewards == rewards[head:tail]) assert e.get_observation_shape() == (observation_size, ) assert e.get_action_size() == ref_action_size # check list-like behaviors assert len(dataset) == n_episodes assert dataset[0] is dataset.episodes[0] for i, episode in enumerate(dataset.episodes): assert isinstance(episode, Episode) assert episode is dataset.episodes[i] # check append dataset.append(observations, actions, rewards, terminals) assert len(dataset) == 2 * n_episodes assert dataset.observations.shape == (2 * data_size, observation_size) assert dataset.rewards.shape == (2 * data_size, ) assert dataset.terminals.shape == (2 * data_size, ) if discrete_action: assert dataset.actions.shape == (2 * data_size, ) else: assert dataset.actions.shape == (2 * data_size, action_size) # check extend another_dataset = MDPDataset(observations, actions, rewards, terminals, discrete_action) dataset.extend(another_dataset) assert len(dataset) == 3 * n_episodes assert dataset.observations.shape == (3 * data_size, observation_size) assert dataset.rewards.shape == (3 * data_size, ) assert dataset.terminals.shape == (3 * data_size, ) if discrete_action: assert dataset.actions.shape == (3 * data_size, ) else: assert dataset.actions.shape == (3 * data_size, action_size) # check clip_reward dataset.clip_reward(-1.0, 1.0) assert rewards[rewards > 1.0].sum() != 0 assert rewards[rewards < -1.0].sum() != 0 assert dataset.rewards[dataset.rewards > 1.0].sum() == 0 assert dataset.rewards[dataset.rewards < -1.0].sum() == 0 # check dump and load dataset.dump(os.path.join('test_data', 'dataset.h5')) new_dataset = MDPDataset.load(os.path.join('test_data', 'dataset.h5')) assert np.all(dataset.observations == new_dataset.observations) assert np.all(dataset.actions == new_dataset.actions) assert np.all(dataset.rewards == new_dataset.rewards) assert np.all(dataset.terminals == new_dataset.terminals) assert dataset.discrete_action == new_dataset.discrete_action assert len(dataset) == len(new_dataset)