Exemple #1
0
    def test_step(self):
        bench = LubyBenchmark()
        bench.config.reward_range = (-10, 10)
        env = bench.get_environment()
        env.reset()
        _, raw_reward, _, _ = env.step(1)

        wrapped = RewardNoiseWrapper(env)
        wrapped.reset()
        _, reward, _, _ = wrapped.step(1)
        self.assertTrue(reward != raw_reward)

        wrapped = RewardNoiseWrapper(env,
                                     noise_dist="normal",
                                     dist_args=[0, 0.3])
        wrapped.reset()
        env.reset()
        _, raw_reward, _, _ = env.step(1)
        _, reward, _, _ = wrapped.step(1)
        self.assertTrue(reward != raw_reward)

        def dummy():
            return 0

        wrapped = RewardNoiseWrapper(env, noise_function=dummy)
        wrapped.reset()
        _, reward, _, _ = wrapped.step(1)
        self.assertTrue(reward == 0 or reward == -1)
Exemple #2
0
    def test_step(self):
        bench = LubyBenchmark()
        bench.config.instance_set = {
            0: [0, 0],
            1: [1, 1],
            2: [3, 4],
            3: [5, 6]
        }
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env, 2)

        state = wrapped.reset()
        self.assertTrue(len(state) > 1)

        state, reward, done, _ = wrapped.step(1)
        self.assertTrue(len(state) > 1)
        self.assertTrue(reward <= 0)
        self.assertFalse(done)

        while not done:
            _, _, done, _ = wrapped.step(1)

        self.assertTrue(len(wrapped.overall_performance) == 1)
        self.assertTrue(len(wrapped.performance_intervals) == 0)
        self.assertTrue(len(wrapped.current_performance) == 1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 1)

        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)

        self.assertTrue(len(wrapped.performance_intervals) == 1)
        self.assertTrue(len(wrapped.current_performance) == 1)

        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 3)

        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 4)
 def test_read_instances(self):
     bench = LubyBenchmark()
     bench.read_instance_set()
     self.assertTrue(len(bench.config.instance_set) == 1)
     self.assertTrue(len(bench.config.instance_set[0]) == 2)
     self.assertTrue(bench.config.instance_set[0] == [0, 0])
     bench2 = LubyBenchmark()
     env = bench2.get_environment()
     self.assertTrue(len(env.instance_set[0]) == 2)
     self.assertTrue(env.instance_set[0] == [0, 0])
     self.assertTrue(len(env.instance_set) == 1)
 def test_rendering(self):
     bench = LubyBenchmark()
     env = bench.get_environment()
     wrapped = EpisodeTimeWrapper(env, 10)
     wrapped.reset()
     for _ in range(30):
         wrapped.step(1)
     img = wrapped.render_step_time()
     self.assertTrue(img.shape[-1] == 3)
     img = wrapped.render_episode_time()
     self.assertTrue(img.shape[-1] == 3)
    def test_init(self):
        bench = LubyBenchmark()
        env = bench.get_environment()

        with pytest.raises(Exception):
            wrapped = InstanceSamplingWrapper(env)

        def sample():
            return [0, 0]

        wrapped = InstanceSamplingWrapper(env, sampling_function=sample)
        self.assertFalse(wrapped.sampling_function is None)
 def test_scenarios(self):
     scenarios = [
         "luby_hard.json", "luby_harder.json", "luby_very_hard.json"
     ]
     for s in scenarios:
         path = os.path.join("dacbench/scenarios/luby/", s)
         bench = LubyBenchmark(path)
         self.assertTrue(bench.config is not None)
         env = bench.get_environment()
         state = env.reset()
         self.assertTrue(state is not None)
         state, _, _, _ = env.step(0)
         self.assertTrue(state is not None)
Exemple #7
0
 def test_render(self, mock_plt):
     bench = LubyBenchmark()
     env = bench.get_environment()
     env = PerformanceTrackingWrapper(env)
     for _ in range(10):
         done = False
         env.reset()
         while not done:
             _, _, done, _ = env.step(1)
     env.render_performance()
     self.assertTrue(mock_plt.show.called)
     env.render_instance_performance()
     self.assertTrue(mock_plt.show.called)
    def test_init(self):
        bench = LubyBenchmark()
        bench.config.instance_update_func = "none"
        env = bench.get_environment()

        with self.assertRaises(Exception):
            wrapped = InstanceSamplingWrapper(env)

        def sample():
            return [0, 0]

        wrapped = InstanceSamplingWrapper(env, sampling_function=sample)
        self.assertFalse(wrapped.sampling_function is None)
Exemple #9
0
    def test_init(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = ActionFrequencyWrapper(env)
        self.assertTrue(len(wrapped.overall_actions) == 0)
        self.assertTrue(wrapped.action_interval is None)
        wrapped.instance = [0]
        self.assertTrue(wrapped.instance[0] == 0)

        wrapped2 = ActionFrequencyWrapper(env, 10)
        self.assertTrue(len(wrapped2.overall_actions) == 0)
        self.assertTrue(wrapped2.action_interval == 10)
        self.assertTrue(len(wrapped2.action_intervals) == 0)
        self.assertTrue(len(wrapped2.current_actions) == 0)
    def test_init(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = EpisodeTimeWrapper(env)
        self.assertTrue(len(wrapped.overall_times) == 0)
        self.assertTrue(wrapped.time_interval is None)
        wrapped.instance = [0]
        self.assertTrue(wrapped.instance[0] == 0)

        wrapped2 = EpisodeTimeWrapper(env, 10)
        self.assertTrue(len(wrapped2.overall_times) == 0)
        self.assertTrue(wrapped2.time_interval == 10)
        self.assertTrue(len(wrapped2.time_intervals) == 0)
        self.assertTrue(len(wrapped2.current_times) == 0)
Exemple #11
0
    def test_init(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env)
        self.assertTrue(len(wrapped.overall_performance) == 0)
        self.assertTrue(wrapped.performance_interval is None)
        wrapped.instance = [0]
        self.assertTrue(wrapped.instance[0] == 0)

        wrapped2 = PerformanceTrackingWrapper(env, 10)
        self.assertTrue(len(wrapped2.overall_performance) == 0)
        self.assertTrue(wrapped2.performance_interval == 10)
        self.assertTrue(len(wrapped2.performance_intervals) == 0)
        self.assertTrue(len(wrapped2.current_performance) == 0)
Exemple #12
0
    def test_get_performance(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped2 = PerformanceTrackingWrapper(env, 2)
        wrapped2.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped2.step(1)
        wrapped3 = PerformanceTrackingWrapper(env,
                                              2,
                                              track_instance_performance=False)
        wrapped3.reset()
        for i in range(5):
            wrapped3.step(i)
        wrapped4 = PerformanceTrackingWrapper(env,
                                              track_instance_performance=False)
        wrapped4.reset()
        for i in range(5):
            wrapped4.step(i)

        overall, instance_performance = wrapped.get_performance()
        overall_perf, interval_perf, instance_perf = wrapped2.get_performance()
        overall_performance_only = wrapped4.get_performance()
        overall_performance, intervals = wrapped3.get_performance()
        self.assertTrue(
            np.array_equal(
                np.round(overall_performance, decimals=2),
                np.round(overall_performance_only, decimals=2),
            ))

        self.assertTrue(
            np.array_equal(np.round(overall_perf, decimals=2),
                           np.round(overall, decimals=2)))

        self.assertTrue(len(instance_performance.keys()) == 1)
        self.assertTrue(len(list(instance_performance.values())[0]) == 1)
        self.assertTrue(len(instance_perf.keys()) == 1)
        self.assertTrue(len(list(instance_perf.values())[0]) == 1)

        self.assertTrue(len(intervals) == 1)
        self.assertTrue(len(intervals[0]) == 0)
        self.assertTrue(len(interval_perf) == 1)
        self.assertTrue(len(interval_perf[0]) == 1)
    def test_getters_and_setters(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = RewardNoiseWrapper(env)

        self.assertTrue(wrapped.noise_function == getattr(wrapped, "noise_function"))
        self.assertTrue(wrapped.env == getattr(wrapped, "env"))

        print(wrapped.action_space)
        print(wrapped.env.action_space)
        print(getattr(wrapped.env, "action_space"))
        self.assertTrue(wrapped.action_space == getattr(wrapped.env, "action_space"))
        self.assertTrue(
            wrapped.observation_space == getattr(wrapped.env, "observation_space")
        )
        self.assertTrue(wrapped.reward_range == getattr(wrapped.env, "reward_range"))
    def test_reset(self):
        bench = LubyBenchmark()
        bench.config.instance_update_func = "none"
        env = bench.get_environment()

        def sample():
            return [1, 1]

        wrapped = InstanceSamplingWrapper(env, sampling_function=sample)

        self.assertFalse(np.array_equal(wrapped.instance, sample()))
        self.assertFalse(
            np.array_equal(list(wrapped.instance_set.values())[0], sample()))

        wrapped.reset()
        self.assertTrue(np.array_equal(wrapped.instance, sample()))
    def test_reset(self):
        bench = LubyBenchmark()
        env = bench.get_environment()

        def sample():
            return [0, 0]

        wrapped = InstanceSamplingWrapper(env, sampling_function=sample)

        self.assertFalse(np.array_equal(wrapped.instance, sample()))
        self.assertFalse(np.array_equal(wrapped.instance_set, [sample()]))
        self.assertTrue(wrapped.inst_id == 0)

        wrapped.reset()
        self.assertTrue(np.array_equal(wrapped.instance, sample()))
        self.assertTrue(np.array_equal(wrapped.instance_set, [sample()]))
        self.assertTrue(wrapped.inst_id == 0)
Exemple #16
0
    def test_step(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = ActionFrequencyWrapper(env, 10)

        state = wrapped.reset()
        self.assertTrue(len(state) > 1)

        state, reward, done, _ = wrapped.step(1)
        self.assertTrue(len(state) > 1)
        self.assertTrue(reward <= 0)
        self.assertFalse(done)

        self.assertTrue(len(wrapped.overall_actions) == 1)
        self.assertTrue(wrapped.overall_actions[0] == 1)
        self.assertTrue(len(wrapped.current_actions) == 1)
        self.assertTrue(wrapped.current_actions[0] == 1)
        self.assertTrue(len(wrapped.action_intervals) == 0)
    def test_fit(self):
        bench = LubyBenchmark()
        bench.config.instance_set_path = "../instance_sets/luby/luby_train.csv"
        bench.read_instance_set()
        instances = bench.config.instance_set
        env = bench.get_environment()

        wrapped = InstanceSamplingWrapper(env, instances=instances)
        samples = []
        for _ in range(100):
            samples.append(wrapped.sampling_function())
        mi1 = mutual_info_score(np.array(instances)[:, 0], np.array(samples)[:, 0])
        mi2 = mutual_info_score(np.array(instances)[:, 1], np.array(samples)[:, 1])

        self.assertTrue(mi1 > 0.99)
        self.assertTrue(mi1 != 1)
        self.assertTrue(mi2 > 0.99)
        self.assertTrue(mi2 != 1)
    def test_init(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = RewardNoiseWrapper(env)
        self.assertFalse(wrapped.noise_function is None)

        with self.assertRaises(Exception):
            wrapped = RewardNoiseWrapper(env, noise_dist=None)
        with self.assertRaises(Exception):
            wrapped = RewardNoiseWrapper(env, noise_dist="norm")

        wrapped = RewardNoiseWrapper(env, noise_dist="normal", dist_args=[0, 0.3])
        self.assertFalse(wrapped.noise_function is None)

        def dummy():
            return 0

        wrapped = RewardNoiseWrapper(env, noise_function=dummy)
        self.assertFalse(wrapped.noise_function is None)
    def test_box_logging(self):
        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        episodes = 10
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_box_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = LubyBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        state_logger = logger.add_module(StateTrackingWrapper)
        wrapped = StateTrackingWrapper(env, logger=state_logger)
        agent = StaticAgent(env, 1)
        logger.set_env(env)

        run_benchmark(wrapped, agent, episodes, logger)
        state_logger.close()

        logs = load_logs(state_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        sate_columns = [
            "state_Action t (current)",
            "state_Step t (current)",
            "state_Action t-1",
            "state_Action t-2",
            "state_Step t-1",
            "state_Step t-2",
        ]

        for state_column in sate_columns:
            self.assertTrue(state_column in dataframe.columns)
            self.assertTrue((~dataframe[state_column].isna()).all())

        temp_dir.cleanup()
Exemple #20
0
    def test_step_reset(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env, 2)

        state = wrapped.reset()
        self.assertTrue(len(state) > 1)
        self.assertTrue(len(wrapped.overall_states) == 1)

        state, reward, done, _ = wrapped.step(1)
        self.assertTrue(len(state) > 1)
        self.assertTrue(reward <= 0)
        self.assertFalse(done)

        self.assertTrue(len(wrapped.overall_states) == 2)
        self.assertTrue(len(wrapped.current_states) == 2)
        self.assertTrue(len(wrapped.state_intervals) == 0)

        state = wrapped.reset()
        self.assertTrue(len(wrapped.overall_states) == 3)
        self.assertTrue(len(wrapped.current_states) == 1)
        self.assertTrue(len(wrapped.state_intervals) == 1)
Exemple #21
0
    def test_get_states(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        for i in range(4):
            wrapped.step(i)
        wrapped2 = StateTrackingWrapper(env, 2)
        wrapped2.reset()
        for i in range(4):
            wrapped2.step(i)

        overall_states_only = wrapped.get_states()
        overall_states, intervals = wrapped2.get_states()
        self.assertTrue(np.array_equal(overall_states, overall_states_only))
        self.assertTrue(len(overall_states_only) == 5)
        self.assertTrue(len(overall_states_only[4]) == 6)

        self.assertTrue(len(intervals) == 3)
        self.assertTrue(len(intervals[0]) == 2)
        self.assertTrue(len(intervals[1]) == 2)
        self.assertTrue(len(intervals[2]) == 1)
    def test_step(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = EpisodeTimeWrapper(env, 10)

        state = wrapped.reset()
        self.assertTrue(len(state) > 1)

        state, reward, done, _ = wrapped.step(1)
        self.assertTrue(len(state) > 1)
        self.assertTrue(reward <= 0)
        self.assertFalse(done)

        self.assertTrue(len(wrapped.all_steps) == 1)
        self.assertTrue(len(wrapped.current_step_interval) == 1)
        self.assertTrue(len(wrapped.step_intervals) == 0)

        for _ in range(20):
            wrapped.step(1)

        self.assertTrue(len(wrapped.overall_times) > 2)
        self.assertTrue(len(wrapped.time_intervals) == 1)
    def test_get_times(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = EpisodeTimeWrapper(env)
        wrapped.reset()
        for i in range(5):
            wrapped.step(i)
        wrapped2 = EpisodeTimeWrapper(env, 2)
        wrapped2.reset()
        for i in range(5):
            wrapped2.step(i)

        overall_times_only, steps_only = wrapped.get_times()
        overall_times, steps, intervals, step_intervals = wrapped2.get_times()
        self.assertTrue(
            np.array_equal(
                np.round(overall_times, decimals=2),
                np.round(overall_times_only, decimals=2),
            ))
        self.assertTrue(len(step_intervals) == 3)
        self.assertTrue(len(step_intervals[0]) == 2)
        self.assertTrue(len(step_intervals[1]) == 2)
        self.assertTrue(len(step_intervals[2]) == 1)
Exemple #24
0
    def test_get_actions(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = ActionFrequencyWrapper(env)
        wrapped.reset()
        for i in range(5):
            wrapped.step(i)
        wrapped2 = ActionFrequencyWrapper(env, 2)
        wrapped2.reset()
        for i in range(5):
            wrapped2.step(i)

        overall_actions_only = wrapped.get_actions()
        overall_actions, intervals = wrapped2.get_actions()
        self.assertTrue(np.array_equal(overall_actions, overall_actions_only))
        self.assertTrue(overall_actions_only == [0, 1, 2, 3, 4])

        self.assertTrue(len(intervals) == 3)
        self.assertTrue(len(intervals[0]) == 2)
        self.assertTrue(intervals[0] == [0, 1])
        self.assertTrue(len(intervals[1]) == 2)
        self.assertTrue(intervals[1] == [2, 3])
        self.assertTrue(len(intervals[2]) == 1)
        self.assertTrue(intervals[2] == [4])
    def test_logging(self):
        temp_dir = tempfile.TemporaryDirectory()

        episodes = 5
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_logging",
        )
        bench = LubyBenchmark()
        env = bench.get_environment()
        time_logger = logger.add_module(EpisodeTimeWrapper)
        wrapped = EpisodeTimeWrapper(env, logger=time_logger)
        agent = StaticAgent(env=env, action=1)
        run_benchmark(wrapped, agent, episodes, logger)

        logger.close()

        logs = load_logs(time_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        # all steps must have logged time
        self.assertTrue((~dataframe.step_duration.isna()).all())

        # each episode has a recored time
        episodes = dataframe.groupby("episode")
        last_steps_per_episode = dataframe.iloc[episodes.step.idxmax()]
        self.assertTrue(
            (~last_steps_per_episode.episode_duration.isna()).all())

        # episode time equals the sum of the steps in episode
        calculated_episode_times = episodes.step_duration.sum()
        recorded_episode_times = last_steps_per_episode.episode_duration
        self.assertListEqual(calculated_episode_times.tolist(),
                             recorded_episode_times.tolist())

        temp_dir.cleanup()
Exemple #26
0
        # The policy we're following
        policy = make_tabular_policy(Q, epsilon, environment.action_space.n)
        # Print out which episode we're on, useful for debugging.
        if (i_episode + 1) % 100 == 0:
            if verbose:
                print("\rEpisode {:>5d}/{}.".format(i_episode + 1,
                                                    num_episodes))
            else:
                print("\rEpisode {:>5d}/{}.".format(i_episode + 1,
                                                    num_episodes),
                      end="")
                sys.stdout.flush()
        Q, rs, exp_rew, ep_len = update(Q, environment, policy, alpha,
                                        discount_factor)
        train_stats.episode_rewards[i_episode] = rs
        train_stats.expected_rewards[i_episode] = exp_rew
        train_stats.episode_lengths[i_episode] = ep_len
    if not verbose:
        print("\rEpisode {:>5d}/{}.".format(i_episode + 1, num_episodes))

    return Q, (test_stats, train_stats)


bench = LubyBenchmark()
env = bench.get_environment()

# Execute 10 episodes of tabular Q-Learning
q_func, test_train_stats = q_learning(env, 10)
print(f"Rewards: {test_train_stats[1].episode_rewards}")
print(f"Episode Lenghts: {test_train_stats[1].episode_lengths}")
 def test_get_env(self):
     bench = LubyBenchmark()
     env = bench.get_environment()
     self.assertTrue(issubclass(type(env), LubyEnv))
Exemple #28
0
    def test_logging_discrete(self):

        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_discrete_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = LubyBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        env.seed_action_space(seed)

        action_logger = logger.add_module(ActionFrequencyWrapper)
        wrapped = ActionFrequencyWrapper(env, logger=action_logger)
        agent = RandomAgent(env)
        logger.set_env(env)

        run_benchmark(wrapped, agent, 10, logger)
        action_logger.close()

        logs = load_logs(action_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        expected_actions = [
            0,
            3,
            5,
            4,
            3,
            5,
            5,
            5,
            3,
            3,
            2,
            1,
            0,
            1,
            2,
            0,
            1,
            1,
            0,
            1,
            2,
            4,
            3,
            0,
            1,
            3,
            0,
            3,
            3,
            3,
            4,
            4,
            4,
            5,
            4,
            0,
            4,
            2,
            1,
            3,
            4,
            2,
            1,
            3,
            3,
            2,
            0,
            5,
            2,
            5,
            2,
            1,
            5,
            3,
            2,
            5,
            1,
            0,
            2,
            3,
            1,
            3,
            2,
            3,
            2,
            4,
            3,
            4,
            0,
            5,
            5,
            1,
            5,
            0,
            1,
            5,
            5,
            3,
            3,
            2,
        ]

        self.assertListEqual(dataframe.action.to_list(), expected_actions)

        temp_dir.cleanup()