def load_environments(trajectory_file,
                      simulation_dir,
                      simulator_variant="bpr",
                      train_file=None,
                      valid_env=False):
    max_state_len = 10
    num_parallel_envs = 1
    test_max_len = 100

    test_env = ParallelSimulator(trajectory_file,
                                 simulation_type=simulation_dir,
                                 num_replicas=num_parallel_envs,
                                 max_state_len=max_state_len,
                                 max_traj_len=test_max_len,
                                 variant=simulator_variant,
                                 iteration_level=bpr_iteration_version.get(
                                     trajectory_file.parent.name),
                                 **simulator_values)
    train_env = ParallelSimulator(train_file or trajectory_file,
                                  simulation_type=simulation_dir,
                                  num_replicas=num_parallel_envs,
                                  max_state_len=max_state_len,
                                  max_traj_len=test_max_len,
                                  variant="logs")
    valid_env = ParallelSimulator(trajectory_file.parent / "valid_split.csv",
                                  simulation_type=simulation_dir,
                                  num_replicas=num_parallel_envs,
                                  max_state_len=max_state_len,
                                  max_traj_len=test_max_len,
                                  variant="logs") if valid_env else None
    return train_env, test_env, valid_env
Exemple #2
0
 def setUp(self) -> None:
     traj_file = Path("/home/test/test.csv")
     self.batch_size = 32
     self.simulator = ParallelSimulator(traj_file,
                                        num_replicas=self.batch_size,
                                        max_state_len=5,
                                        max_traj_len=20)
     self.simulator.seed(1)
Exemple #3
0
 def setUp(self) -> None:
     data_dir = Path("/home/test/")
     self.batch_size = 32
     self.simulator = ParallelSimulator(data_dir,
                                        num_replicas=self.batch_size,
                                        max_state_len=5,
                                        num_start_items=1,
                                        max_traj_len=20,
                                        logs_only=True)
     self.simulator.seed(1)
 def setUp(self) -> None:
     data_dir = Path("/home/alex/workspace/datasets/ml/ml-1m")
     self.batch_size = 1
     self.simulator = ParallelSimulator(data_dir / "test.csv",
                                        simulation_type=data_dir /
                                        "simulator/bpr/batch-rl-test/0",
                                        num_replicas=self.batch_size,
                                        max_state_len=10,
                                        variant="bpr",
                                        reward_type="item")
     self.simulator.seed(1)
Exemple #5
0
    num_parallel_envs = 1
    test_max_len = 100

    num_eval_seeds = 5

    train_env = RecommendEnv().initialize(
        data_dir / "train_split.csv",
        num_repeats=1,
        sample_k=top_k_actions,
        max_episode_len=max_episode_len,
        max_state_len=max_state_len,
    )
    env = ParallelEnvWrapper(train_env, num_parallel_envs)
    valid_env = ParallelSimulator(data_dir / "valid_split.csv",
                                  num_replicas=num_parallel_envs,
                                  max_state_len=max_state_len,
                                  max_traj_len=test_max_len,
                                  variant="logs")

    # For log-data only
    # For simulator

    test_env = ParallelSimulator(data_dir / "test.csv",
                                 num_replicas=num_parallel_envs,
                                 max_state_len=max_state_len,
                                 max_traj_len=test_max_len,
                                 variant=simulator_type,
                                 iteration_level=bpr_iteration_version.get(
                                     log_dir.name),
                                 **simulator_values)
Exemple #6
0
class TestUserLogSimulator(TestCase):
    def setUp(self) -> None:
        data_dir = Path("/home/test/")
        self.batch_size = 32
        self.simulator = ParallelSimulator(data_dir,
                                           num_replicas=self.batch_size,
                                           max_state_len=5,
                                           num_start_items=1,
                                           max_traj_len=20,
                                           logs_only=True)
        self.simulator.seed(1)

    def get_sim(self):
        return deepcopy(self.simulator)

    def test_reset(self):
        sim = self.get_sim()
        obs = sim.reset()
        self.assertIsInstance(obs[0], ReturnStateTuple)
        self.assertEqual(len(obs), self.batch_size)

    def test_step(self):
        sim = self.get_sim()
        obs = sim.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        next_state, done = sim.step(action)
        self.assertEqual(len(next_state), self.batch_size)

    def test_multiple_steps(self):
        sim = self.get_sim()
        sim.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        for _ in range(10):
            sim.step(action)

    def test_run_to_end(self):
        sim = self.get_sim()
        obs = sim.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))

        total = sum(e.trajectories.map(len).sum() - 1 for e in sim.envs)
        num_users = len(sim)
        counter = 0
        total_dones = 0
        while obs:
            counter += len(obs)
            obs, num_dones = sim.step(action)
            total_dones += num_dones

        self.assertEqual(total_dones, num_users)
        metrics = sim.get_metrics()
        print(metrics)

    def test_metrics(self):
        sim = self.get_sim()
        total_dones = 0
        sim.seed(1)
        obs = sim.reset()
        while obs:
            action = np.random.randint(1, 100, size=(self.batch_size, 5))
            obs, num_dones = sim.step(action)
            total_dones += num_dones
        self.assertEqual(total_dones, len(sim))
        metrics = sim.get_metrics()
        print(metrics)

        sim.hard_reset()
        sim.seed(1)
        obs = sim.reset()
        while obs:
            action = np.random.randint(1, 100, size=(self.batch_size, 5))
            obs, num_dones = sim.step(action)
        metrics_1 = sim.get_metrics()
        print(metrics_1)
        np.testing.assert_almost_equal(np.array(metrics), np.array(metrics_1))

        sim.hard_reset()
        sim.seed(1)
        obs = sim.reset()
        while obs:
            action = np.zeros((self.batch_size, 5))
            obs, num_dones = sim.step(action)
        metrics = sim.get_metrics()
        print(metrics)
Exemple #7
0
class TestSimulator(TestCase):
    def setUp(self) -> None:
        traj_file = Path("/home/test/test.csv")
        self.batch_size = 32
        self.simulator = ParallelSimulator(traj_file,
                                           num_replicas=self.batch_size,
                                           max_state_len=5,
                                           max_traj_len=20)
        self.simulator.seed(1)

    def test_reset(self):
        obs = self.simulator.reset()
        self.assertIsInstance(obs[0], ReturnStateTuple)
        self.assertEqual(len(obs), self.batch_size)

    def test_step(self):
        self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        next_state, done = self.simulator.step(action)
        self.assertEqual(len(next_state), self.batch_size)

    def test_multiple_steps(self):
        self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        self.simulator.step(action)
        self.simulator.step(action)
        self.simulator.step(action)
        self.simulator.step(action)
        self.simulator.step(action)
        s = self.simulator.step(action)

    def test_run_to_end(self):
        obs = self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        while len(obs) > 0:
            obs, _ = self.simulator.step(action)
class TestSimulator(TestCase):
    def setUp(self) -> None:
        data_dir = Path("/home/alex/workspace/datasets/ml/ml-1m")
        self.batch_size = 1
        self.simulator = ParallelSimulator(data_dir / "test.csv",
                                           simulation_type=data_dir /
                                           "simulator/bpr/batch-rl-test/0",
                                           num_replicas=self.batch_size,
                                           max_state_len=10,
                                           variant="bpr",
                                           reward_type="item")
        self.simulator.seed(1)

    def test_reset(self):
        obs = self.simulator.reset()
        self.assertEqual(len(obs), self.batch_size)

    def test_step(self):
        self.simulator.reset()
        action = np.random.randint(1, 3000, size=(self.batch_size, 10))
        next_state, rewards, done, info = self.simulator.step(action)
        self.assertEqual(len(next_state), self.batch_size)

    def test_multiple_steps(self):
        self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        done = True
        while not done:
            next_state, rewards, done, info = self.simulator.step(action)

    def test_break_soon(self):
        episode_lens = []
        rewardsep = []
        while True:
            obs = self.simulator.reset()
            if obs is None:
                break
            done = False
            e = 0
            action = np.random.randint(1, 30, size=(len(obs), 10))
            r = 0
            while not done:
                next_state, rewards, done, info = self.simulator.step(action)
                r += rewards
                e += 1
            episode_lens.append(e)
            rewardsep.append(r)
        print("repeated")
        print(np.mean(episode_lens), np.std(episode_lens))
        print(np.mean(rewardsep), np.std(rewardsep))

    def test_run_to_end_with_much_random(self):
        episode_lens = []
        rewardsep = []
        while True:
            obs = self.simulator.reset()
            if obs is None:
                break
            done = False
            e = 0
            while not done:
                action = np.random.randint(1, 3000, size=(len(obs), 10))
                obs, rewards, done, info = self.simulator.step(action)
                rewardsep.append(rewards.mean())
                e += 1
            episode_lens.append(e)
        print("random")
        print(np.mean(episode_lens), np.std(episode_lens))
        print(np.mean(rewardsep), np.std(rewardsep))

    def test_run_to_end_tricky(self):
        episode_lens = []
        rewardsep = []

        action = np.arange(1, 101).reshape(10, 10)

        while True:
            obs = self.simulator.reset()
            if obs is None:
                break
            done = False
            e = 0
            while not done:
                a = np.expand_dims(action[e % 10], 0)
                a = np.repeat(a, len(obs), axis=0)
                obs, rewards, done, info = self.simulator.step(a)
                rewardsep.append(rewards.mean())
                e += 1
            episode_lens.append(e)
        print("tricky")
        print(np.mean(episode_lens), np.std(episode_lens))
        print(np.mean(rewardsep), np.std(rewardsep))
Exemple #9
0
    max_episode_len = 0
    max_state_len = 10
    num_parallel_envs = 1
    test_max_len = 100

    train_env = RecommendEnv().initialize(
        data_dir / "train_split.csv",
        num_repeats=1,
        sample_k=top_k_actions,
        max_episode_len=max_episode_len,
        max_state_len=max_state_len,
    )
    env = ParallelEnvWrapper(train_env, num_parallel_envs)
    valid_env = ParallelSimulator(data_dir / "valid_split.csv",
                                  num_replicas=num_parallel_envs,
                                  max_state_len=max_state_len,
                                  max_traj_len=test_max_len,
                                  variant="logs")

    # For log-data only
    # For simulator
    test_env = ParallelSimulator(data_dir / "test.csv",
                                 num_replicas=num_parallel_envs,
                                 max_state_len=max_state_len,
                                 max_traj_len=test_max_len,
                                 variant="logs")

    for ba in baselines:
        logging = log_dir / d / ba.__name__
        logging.mkdir(exist_ok=True, parents=True)
        b = ba(env, top_k_actions, tf_idf=True, log_dir=logging)
Exemple #10
0
###

if not args.simulator:
    train_env = RecommendEnv(debug=args.debug).initialize(data_dir / "train_split.csv",
                                                          num_repeats=num_iterations,
                                                          sample_k=top_k_actions,
                                                          max_episode_len=max_episode_len,
                                                          max_state_len=max_state_len,
                                                          )
    env = ParallelEnvWrapper(train_env, num_parallel_envs)
    sim_type = "logs"
else:
    env = ParallelSimulator(data_dir / "train_split.csv",
                            num_replicas=num_parallel_envs,
                            max_state_len=max_state_len,
                            max_traj_len=max_episode_len_test,
                            variant="bpr",
                            simulation_type="train",
                            reward_type=reward_type)
    sim_type = "bpr"

valid_env = ParallelSimulator(data_dir / "valid_split.csv",
                              num_replicas=num_parallel_envs,
                              max_state_len=max_state_len,
                              max_traj_len=max_episode_len_test,
                              variant=sim_type,
                              simulation_type="valid",
                              iteration_level=0)

test_env = ParallelSimulator(data_dir / "test.csv",
                             num_replicas=num_parallel_envs,