예제 #1
0
class TestSimulator(TestCase):
    def setUp(self) -> None:
        traj_file = Path("/home/test/test.csv")
        self.batch_size = 32
        self.simulator = ParallelSimulator(traj_file,
                                           num_replicas=self.batch_size,
                                           max_state_len=5,
                                           max_traj_len=20)
        self.simulator.seed(1)

    def test_reset(self):
        obs = self.simulator.reset()
        self.assertIsInstance(obs[0], ReturnStateTuple)
        self.assertEqual(len(obs), self.batch_size)

    def test_step(self):
        self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        next_state, done = self.simulator.step(action)
        self.assertEqual(len(next_state), self.batch_size)

    def test_multiple_steps(self):
        self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        self.simulator.step(action)
        self.simulator.step(action)
        self.simulator.step(action)
        self.simulator.step(action)
        self.simulator.step(action)
        s = self.simulator.step(action)

    def test_run_to_end(self):
        obs = self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        while len(obs) > 0:
            obs, _ = self.simulator.step(action)
예제 #2
0
class TestUserLogSimulator(TestCase):
    def setUp(self) -> None:
        data_dir = Path("/home/test/")
        self.batch_size = 32
        self.simulator = ParallelSimulator(data_dir,
                                           num_replicas=self.batch_size,
                                           max_state_len=5,
                                           num_start_items=1,
                                           max_traj_len=20,
                                           logs_only=True)
        self.simulator.seed(1)

    def get_sim(self):
        return deepcopy(self.simulator)

    def test_reset(self):
        sim = self.get_sim()
        obs = sim.reset()
        self.assertIsInstance(obs[0], ReturnStateTuple)
        self.assertEqual(len(obs), self.batch_size)

    def test_step(self):
        sim = self.get_sim()
        obs = sim.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        next_state, done = sim.step(action)
        self.assertEqual(len(next_state), self.batch_size)

    def test_multiple_steps(self):
        sim = self.get_sim()
        sim.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        for _ in range(10):
            sim.step(action)

    def test_run_to_end(self):
        sim = self.get_sim()
        obs = sim.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))

        total = sum(e.trajectories.map(len).sum() - 1 for e in sim.envs)
        num_users = len(sim)
        counter = 0
        total_dones = 0
        while obs:
            counter += len(obs)
            obs, num_dones = sim.step(action)
            total_dones += num_dones

        self.assertEqual(total_dones, num_users)
        metrics = sim.get_metrics()
        print(metrics)

    def test_metrics(self):
        sim = self.get_sim()
        total_dones = 0
        sim.seed(1)
        obs = sim.reset()
        while obs:
            action = np.random.randint(1, 100, size=(self.batch_size, 5))
            obs, num_dones = sim.step(action)
            total_dones += num_dones
        self.assertEqual(total_dones, len(sim))
        metrics = sim.get_metrics()
        print(metrics)

        sim.hard_reset()
        sim.seed(1)
        obs = sim.reset()
        while obs:
            action = np.random.randint(1, 100, size=(self.batch_size, 5))
            obs, num_dones = sim.step(action)
        metrics_1 = sim.get_metrics()
        print(metrics_1)
        np.testing.assert_almost_equal(np.array(metrics), np.array(metrics_1))

        sim.hard_reset()
        sim.seed(1)
        obs = sim.reset()
        while obs:
            action = np.zeros((self.batch_size, 5))
            obs, num_dones = sim.step(action)
        metrics = sim.get_metrics()
        print(metrics)
class TestSimulator(TestCase):
    def setUp(self) -> None:
        data_dir = Path("/home/alex/workspace/datasets/ml/ml-1m")
        self.batch_size = 1
        self.simulator = ParallelSimulator(data_dir / "test.csv",
                                           simulation_type=data_dir /
                                           "simulator/bpr/batch-rl-test/0",
                                           num_replicas=self.batch_size,
                                           max_state_len=10,
                                           variant="bpr",
                                           reward_type="item")
        self.simulator.seed(1)

    def test_reset(self):
        obs = self.simulator.reset()
        self.assertEqual(len(obs), self.batch_size)

    def test_step(self):
        self.simulator.reset()
        action = np.random.randint(1, 3000, size=(self.batch_size, 10))
        next_state, rewards, done, info = self.simulator.step(action)
        self.assertEqual(len(next_state), self.batch_size)

    def test_multiple_steps(self):
        self.simulator.reset()
        action = np.random.randint(1, 100, size=(self.batch_size, 5))
        done = True
        while not done:
            next_state, rewards, done, info = self.simulator.step(action)

    def test_break_soon(self):
        episode_lens = []
        rewardsep = []
        while True:
            obs = self.simulator.reset()
            if obs is None:
                break
            done = False
            e = 0
            action = np.random.randint(1, 30, size=(len(obs), 10))
            r = 0
            while not done:
                next_state, rewards, done, info = self.simulator.step(action)
                r += rewards
                e += 1
            episode_lens.append(e)
            rewardsep.append(r)
        print("repeated")
        print(np.mean(episode_lens), np.std(episode_lens))
        print(np.mean(rewardsep), np.std(rewardsep))

    def test_run_to_end_with_much_random(self):
        episode_lens = []
        rewardsep = []
        while True:
            obs = self.simulator.reset()
            if obs is None:
                break
            done = False
            e = 0
            while not done:
                action = np.random.randint(1, 3000, size=(len(obs), 10))
                obs, rewards, done, info = self.simulator.step(action)
                rewardsep.append(rewards.mean())
                e += 1
            episode_lens.append(e)
        print("random")
        print(np.mean(episode_lens), np.std(episode_lens))
        print(np.mean(rewardsep), np.std(rewardsep))

    def test_run_to_end_tricky(self):
        episode_lens = []
        rewardsep = []

        action = np.arange(1, 101).reshape(10, 10)

        while True:
            obs = self.simulator.reset()
            if obs is None:
                break
            done = False
            e = 0
            while not done:
                a = np.expand_dims(action[e % 10], 0)
                a = np.repeat(a, len(obs), axis=0)
                obs, rewards, done, info = self.simulator.step(a)
                rewardsep.append(rewards.mean())
                e += 1
            episode_lens.append(e)
        print("tricky")
        print(np.mean(episode_lens), np.std(episode_lens))
        print(np.mean(rewardsep), np.std(rewardsep))