def load_environments(trajectory_file, simulation_dir, simulator_variant="bpr", train_file=None, valid_env=False): max_state_len = 10 num_parallel_envs = 1 test_max_len = 100 test_env = ParallelSimulator(trajectory_file, simulation_type=simulation_dir, num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=test_max_len, variant=simulator_variant, iteration_level=bpr_iteration_version.get( trajectory_file.parent.name), **simulator_values) train_env = ParallelSimulator(train_file or trajectory_file, simulation_type=simulation_dir, num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=test_max_len, variant="logs") valid_env = ParallelSimulator(trajectory_file.parent / "valid_split.csv", simulation_type=simulation_dir, num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=test_max_len, variant="logs") if valid_env else None return train_env, test_env, valid_env
def setUp(self) -> None: traj_file = Path("/home/test/test.csv") self.batch_size = 32 self.simulator = ParallelSimulator(traj_file, num_replicas=self.batch_size, max_state_len=5, max_traj_len=20) self.simulator.seed(1)
def setUp(self) -> None: data_dir = Path("/home/test/") self.batch_size = 32 self.simulator = ParallelSimulator(data_dir, num_replicas=self.batch_size, max_state_len=5, num_start_items=1, max_traj_len=20, logs_only=True) self.simulator.seed(1)
def setUp(self) -> None: data_dir = Path("/home/alex/workspace/datasets/ml/ml-1m") self.batch_size = 1 self.simulator = ParallelSimulator(data_dir / "test.csv", simulation_type=data_dir / "simulator/bpr/batch-rl-test/0", num_replicas=self.batch_size, max_state_len=10, variant="bpr", reward_type="item") self.simulator.seed(1)
num_parallel_envs = 1 test_max_len = 100 num_eval_seeds = 5 train_env = RecommendEnv().initialize( data_dir / "train_split.csv", num_repeats=1, sample_k=top_k_actions, max_episode_len=max_episode_len, max_state_len=max_state_len, ) env = ParallelEnvWrapper(train_env, num_parallel_envs) valid_env = ParallelSimulator(data_dir / "valid_split.csv", num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=test_max_len, variant="logs") # For log-data only # For simulator test_env = ParallelSimulator(data_dir / "test.csv", num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=test_max_len, variant=simulator_type, iteration_level=bpr_iteration_version.get( log_dir.name), **simulator_values)
class TestUserLogSimulator(TestCase): def setUp(self) -> None: data_dir = Path("/home/test/") self.batch_size = 32 self.simulator = ParallelSimulator(data_dir, num_replicas=self.batch_size, max_state_len=5, num_start_items=1, max_traj_len=20, logs_only=True) self.simulator.seed(1) def get_sim(self): return deepcopy(self.simulator) def test_reset(self): sim = self.get_sim() obs = sim.reset() self.assertIsInstance(obs[0], ReturnStateTuple) self.assertEqual(len(obs), self.batch_size) def test_step(self): sim = self.get_sim() obs = sim.reset() action = np.random.randint(1, 100, size=(self.batch_size, 5)) next_state, done = sim.step(action) self.assertEqual(len(next_state), self.batch_size) def test_multiple_steps(self): sim = self.get_sim() sim.reset() action = np.random.randint(1, 100, size=(self.batch_size, 5)) for _ in range(10): sim.step(action) def test_run_to_end(self): sim = self.get_sim() obs = sim.reset() action = np.random.randint(1, 100, size=(self.batch_size, 5)) total = sum(e.trajectories.map(len).sum() - 1 for e in sim.envs) num_users = len(sim) counter = 0 total_dones = 0 while obs: counter += len(obs) obs, num_dones = sim.step(action) total_dones += num_dones self.assertEqual(total_dones, num_users) metrics = sim.get_metrics() print(metrics) def test_metrics(self): sim = self.get_sim() total_dones = 0 sim.seed(1) obs = sim.reset() while obs: action = np.random.randint(1, 100, size=(self.batch_size, 5)) obs, num_dones = sim.step(action) total_dones += num_dones self.assertEqual(total_dones, len(sim)) metrics = sim.get_metrics() print(metrics) sim.hard_reset() sim.seed(1) obs = sim.reset() while obs: action = np.random.randint(1, 100, size=(self.batch_size, 5)) obs, num_dones = sim.step(action) metrics_1 = sim.get_metrics() print(metrics_1) np.testing.assert_almost_equal(np.array(metrics), np.array(metrics_1)) sim.hard_reset() sim.seed(1) obs = sim.reset() while obs: action = np.zeros((self.batch_size, 5)) obs, num_dones = sim.step(action) metrics = sim.get_metrics() print(metrics)
class TestSimulator(TestCase): def setUp(self) -> None: traj_file = Path("/home/test/test.csv") self.batch_size = 32 self.simulator = ParallelSimulator(traj_file, num_replicas=self.batch_size, max_state_len=5, max_traj_len=20) self.simulator.seed(1) def test_reset(self): obs = self.simulator.reset() self.assertIsInstance(obs[0], ReturnStateTuple) self.assertEqual(len(obs), self.batch_size) def test_step(self): self.simulator.reset() action = np.random.randint(1, 100, size=(self.batch_size, 5)) next_state, done = self.simulator.step(action) self.assertEqual(len(next_state), self.batch_size) def test_multiple_steps(self): self.simulator.reset() action = np.random.randint(1, 100, size=(self.batch_size, 5)) self.simulator.step(action) self.simulator.step(action) self.simulator.step(action) self.simulator.step(action) self.simulator.step(action) s = self.simulator.step(action) def test_run_to_end(self): obs = self.simulator.reset() action = np.random.randint(1, 100, size=(self.batch_size, 5)) while len(obs) > 0: obs, _ = self.simulator.step(action)
class TestSimulator(TestCase): def setUp(self) -> None: data_dir = Path("/home/alex/workspace/datasets/ml/ml-1m") self.batch_size = 1 self.simulator = ParallelSimulator(data_dir / "test.csv", simulation_type=data_dir / "simulator/bpr/batch-rl-test/0", num_replicas=self.batch_size, max_state_len=10, variant="bpr", reward_type="item") self.simulator.seed(1) def test_reset(self): obs = self.simulator.reset() self.assertEqual(len(obs), self.batch_size) def test_step(self): self.simulator.reset() action = np.random.randint(1, 3000, size=(self.batch_size, 10)) next_state, rewards, done, info = self.simulator.step(action) self.assertEqual(len(next_state), self.batch_size) def test_multiple_steps(self): self.simulator.reset() action = np.random.randint(1, 100, size=(self.batch_size, 5)) done = True while not done: next_state, rewards, done, info = self.simulator.step(action) def test_break_soon(self): episode_lens = [] rewardsep = [] while True: obs = self.simulator.reset() if obs is None: break done = False e = 0 action = np.random.randint(1, 30, size=(len(obs), 10)) r = 0 while not done: next_state, rewards, done, info = self.simulator.step(action) r += rewards e += 1 episode_lens.append(e) rewardsep.append(r) print("repeated") print(np.mean(episode_lens), np.std(episode_lens)) print(np.mean(rewardsep), np.std(rewardsep)) def test_run_to_end_with_much_random(self): episode_lens = [] rewardsep = [] while True: obs = self.simulator.reset() if obs is None: break done = False e = 0 while not done: action = np.random.randint(1, 3000, size=(len(obs), 10)) obs, rewards, done, info = self.simulator.step(action) rewardsep.append(rewards.mean()) e += 1 episode_lens.append(e) print("random") print(np.mean(episode_lens), np.std(episode_lens)) print(np.mean(rewardsep), np.std(rewardsep)) def test_run_to_end_tricky(self): episode_lens = [] rewardsep = [] action = np.arange(1, 101).reshape(10, 10) while True: obs = self.simulator.reset() if obs is None: break done = False e = 0 while not done: a = np.expand_dims(action[e % 10], 0) a = np.repeat(a, len(obs), axis=0) obs, rewards, done, info = self.simulator.step(a) rewardsep.append(rewards.mean()) e += 1 episode_lens.append(e) print("tricky") print(np.mean(episode_lens), np.std(episode_lens)) print(np.mean(rewardsep), np.std(rewardsep))
max_episode_len = 0 max_state_len = 10 num_parallel_envs = 1 test_max_len = 100 train_env = RecommendEnv().initialize( data_dir / "train_split.csv", num_repeats=1, sample_k=top_k_actions, max_episode_len=max_episode_len, max_state_len=max_state_len, ) env = ParallelEnvWrapper(train_env, num_parallel_envs) valid_env = ParallelSimulator(data_dir / "valid_split.csv", num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=test_max_len, variant="logs") # For log-data only # For simulator test_env = ParallelSimulator(data_dir / "test.csv", num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=test_max_len, variant="logs") for ba in baselines: logging = log_dir / d / ba.__name__ logging.mkdir(exist_ok=True, parents=True) b = ba(env, top_k_actions, tf_idf=True, log_dir=logging)
### if not args.simulator: train_env = RecommendEnv(debug=args.debug).initialize(data_dir / "train_split.csv", num_repeats=num_iterations, sample_k=top_k_actions, max_episode_len=max_episode_len, max_state_len=max_state_len, ) env = ParallelEnvWrapper(train_env, num_parallel_envs) sim_type = "logs" else: env = ParallelSimulator(data_dir / "train_split.csv", num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=max_episode_len_test, variant="bpr", simulation_type="train", reward_type=reward_type) sim_type = "bpr" valid_env = ParallelSimulator(data_dir / "valid_split.csv", num_replicas=num_parallel_envs, max_state_len=max_state_len, max_traj_len=max_episode_len_test, variant=sim_type, simulation_type="valid", iteration_level=0) test_env = ParallelSimulator(data_dir / "test.csv", num_replicas=num_parallel_envs,