def test_step(self): bench = LubyBenchmark() bench.config.reward_range = (-10, 10) env = bench.get_environment() env.reset() _, raw_reward, _, _ = env.step(1) wrapped = RewardNoiseWrapper(env) wrapped.reset() _, reward, _, _ = wrapped.step(1) self.assertTrue(reward != raw_reward) wrapped = RewardNoiseWrapper(env, noise_dist="normal", dist_args=[0, 0.3]) wrapped.reset() env.reset() _, raw_reward, _, _ = env.step(1) _, reward, _, _ = wrapped.step(1) self.assertTrue(reward != raw_reward) def dummy(): return 0 wrapped = RewardNoiseWrapper(env, noise_function=dummy) wrapped.reset() _, reward, _, _ = wrapped.step(1) self.assertTrue(reward == 0 or reward == -1)
def test_benchmark_env(self): bench = LubyBenchmark() env = bench.get_benchmark() self.assertTrue(issubclass(type(env), RewardNoiseWrapper)) env.reset() _, r, _, _ = env.step(1) self.assertTrue(r != 0 and r != -1)
def test_save_conf(self): bench = LubyBenchmark() bench.save_config("test_conf.json") with open("test_conf.json", "r") as fp: recovered = json.load(fp) for k in bench.config.keys(): self.assertTrue(k in recovered.keys()) os.remove("test_conf.json")
def test_step(self): bench = LubyBenchmark() bench.config.instance_set = { 0: [0, 0], 1: [1, 1], 2: [3, 4], 3: [5, 6] } env = bench.get_environment() wrapped = PerformanceTrackingWrapper(env, 2) state = wrapped.reset() self.assertTrue(len(state) > 1) state, reward, done, _ = wrapped.step(1) self.assertTrue(len(state) > 1) self.assertTrue(reward <= 0) self.assertFalse(done) while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.overall_performance) == 1) self.assertTrue(len(wrapped.performance_intervals) == 0) self.assertTrue(len(wrapped.current_performance) == 1) self.assertTrue(len(wrapped.instance_performances.keys()) == 1) done = False while not done: _, _, done, _ = wrapped.step(1) done = False while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.performance_intervals) == 1) self.assertTrue(len(wrapped.current_performance) == 1) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.instance_performances.keys()) == 3) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.instance_performances.keys()) == 4)
def test_rendering(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = EpisodeTimeWrapper(env, 10) wrapped.reset() for _ in range(30): wrapped.step(1) img = wrapped.render_step_time() self.assertTrue(img.shape[-1] == 3) img = wrapped.render_episode_time() self.assertTrue(img.shape[-1] == 3)
def test_init(self): bench = LubyBenchmark() env = bench.get_environment() with pytest.raises(Exception): wrapped = InstanceSamplingWrapper(env) def sample(): return [0, 0] wrapped = InstanceSamplingWrapper(env, sampling_function=sample) self.assertFalse(wrapped.sampling_function is None)
def test_render(self, mock_plt): bench = LubyBenchmark() env = bench.get_environment() env = PerformanceTrackingWrapper(env) for _ in range(10): done = False env.reset() while not done: _, _, done, _ = env.step(1) env.render_performance() self.assertTrue(mock_plt.show.called) env.render_instance_performance() self.assertTrue(mock_plt.show.called)
def test_init(self): bench = LubyBenchmark() bench.config.instance_update_func = "none" env = bench.get_environment() with self.assertRaises(Exception): wrapped = InstanceSamplingWrapper(env) def sample(): return [0, 0] wrapped = InstanceSamplingWrapper(env, sampling_function=sample) self.assertFalse(wrapped.sampling_function is None)
def test_scenarios(self): scenarios = [ "luby_hard.json", "luby_harder.json", "luby_very_hard.json" ] for s in scenarios: path = os.path.join("dacbench/scenarios/luby/", s) bench = LubyBenchmark(path) self.assertTrue(bench.config is not None) env = bench.get_environment() state = env.reset() self.assertTrue(state is not None) state, _, _, _ = env.step(0) self.assertTrue(state is not None)
def test_init(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = PerformanceTrackingWrapper(env) self.assertTrue(len(wrapped.overall_performance) == 0) self.assertTrue(wrapped.performance_interval is None) wrapped.instance = [0] self.assertTrue(wrapped.instance[0] == 0) wrapped2 = PerformanceTrackingWrapper(env, 10) self.assertTrue(len(wrapped2.overall_performance) == 0) self.assertTrue(wrapped2.performance_interval == 10) self.assertTrue(len(wrapped2.performance_intervals) == 0) self.assertTrue(len(wrapped2.current_performance) == 0)
def test_init(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = ActionFrequencyWrapper(env) self.assertTrue(len(wrapped.overall_actions) == 0) self.assertTrue(wrapped.action_interval is None) wrapped.instance = [0] self.assertTrue(wrapped.instance[0] == 0) wrapped2 = ActionFrequencyWrapper(env, 10) self.assertTrue(len(wrapped2.overall_actions) == 0) self.assertTrue(wrapped2.action_interval == 10) self.assertTrue(len(wrapped2.action_intervals) == 0) self.assertTrue(len(wrapped2.current_actions) == 0)
def test_init(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = EpisodeTimeWrapper(env) self.assertTrue(len(wrapped.overall_times) == 0) self.assertTrue(wrapped.time_interval is None) wrapped.instance = [0] self.assertTrue(wrapped.instance[0] == 0) wrapped2 = EpisodeTimeWrapper(env, 10) self.assertTrue(len(wrapped2.overall_times) == 0) self.assertTrue(wrapped2.time_interval == 10) self.assertTrue(len(wrapped2.time_intervals) == 0) self.assertTrue(len(wrapped2.current_times) == 0)
def test_get_performance(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = PerformanceTrackingWrapper(env) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) wrapped2 = PerformanceTrackingWrapper(env, 2) wrapped2.reset() done = False while not done: _, _, done, _ = wrapped2.step(1) wrapped3 = PerformanceTrackingWrapper(env, 2, track_instance_performance=False) wrapped3.reset() for i in range(5): wrapped3.step(i) wrapped4 = PerformanceTrackingWrapper(env, track_instance_performance=False) wrapped4.reset() for i in range(5): wrapped4.step(i) overall, instance_performance = wrapped.get_performance() overall_perf, interval_perf, instance_perf = wrapped2.get_performance() overall_performance_only = wrapped4.get_performance() overall_performance, intervals = wrapped3.get_performance() self.assertTrue( np.array_equal( np.round(overall_performance, decimals=2), np.round(overall_performance_only, decimals=2), )) self.assertTrue( np.array_equal(np.round(overall_perf, decimals=2), np.round(overall, decimals=2))) self.assertTrue(len(instance_performance.keys()) == 1) self.assertTrue(len(list(instance_performance.values())[0]) == 1) self.assertTrue(len(instance_perf.keys()) == 1) self.assertTrue(len(list(instance_perf.values())[0]) == 1) self.assertTrue(len(intervals) == 1) self.assertTrue(len(intervals[0]) == 0) self.assertTrue(len(interval_perf) == 1) self.assertTrue(len(interval_perf[0]) == 1)
def test_getters_and_setters(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = RewardNoiseWrapper(env) self.assertTrue(wrapped.noise_function == getattr(wrapped, "noise_function")) self.assertTrue(wrapped.env == getattr(wrapped, "env")) print(wrapped.action_space) print(wrapped.env.action_space) print(getattr(wrapped.env, "action_space")) self.assertTrue(wrapped.action_space == getattr(wrapped.env, "action_space")) self.assertTrue( wrapped.observation_space == getattr(wrapped.env, "observation_space") ) self.assertTrue(wrapped.reward_range == getattr(wrapped.env, "reward_range"))
def test_reset(self): bench = LubyBenchmark() bench.config.instance_update_func = "none" env = bench.get_environment() def sample(): return [1, 1] wrapped = InstanceSamplingWrapper(env, sampling_function=sample) self.assertFalse(np.array_equal(wrapped.instance, sample())) self.assertFalse( np.array_equal(list(wrapped.instance_set.values())[0], sample())) wrapped.reset() self.assertTrue(np.array_equal(wrapped.instance, sample()))
def test_reset(self): bench = LubyBenchmark() env = bench.get_environment() def sample(): return [0, 0] wrapped = InstanceSamplingWrapper(env, sampling_function=sample) self.assertFalse(np.array_equal(wrapped.instance, sample())) self.assertFalse(np.array_equal(wrapped.instance_set, [sample()])) self.assertTrue(wrapped.inst_id == 0) wrapped.reset() self.assertTrue(np.array_equal(wrapped.instance, sample())) self.assertTrue(np.array_equal(wrapped.instance_set, [sample()])) self.assertTrue(wrapped.inst_id == 0)
def test_fit(self): bench = LubyBenchmark() bench.config.instance_set_path = "../instance_sets/luby/luby_train.csv" bench.read_instance_set() instances = bench.config.instance_set env = bench.get_environment() wrapped = InstanceSamplingWrapper(env, instances=instances) samples = [] for _ in range(100): samples.append(wrapped.sampling_function()) mi1 = mutual_info_score(np.array(instances)[:, 0], np.array(samples)[:, 0]) mi2 = mutual_info_score(np.array(instances)[:, 1], np.array(samples)[:, 1]) self.assertTrue(mi1 > 0.99) self.assertTrue(mi1 != 1) self.assertTrue(mi2 > 0.99) self.assertTrue(mi2 != 1)
def test_step(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = ActionFrequencyWrapper(env, 10) state = wrapped.reset() self.assertTrue(len(state) > 1) state, reward, done, _ = wrapped.step(1) self.assertTrue(len(state) > 1) self.assertTrue(reward <= 0) self.assertFalse(done) self.assertTrue(len(wrapped.overall_actions) == 1) self.assertTrue(wrapped.overall_actions[0] == 1) self.assertTrue(len(wrapped.current_actions) == 1) self.assertTrue(wrapped.current_actions[0] == 1) self.assertTrue(len(wrapped.action_intervals) == 0)
def test_init(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = RewardNoiseWrapper(env) self.assertFalse(wrapped.noise_function is None) with self.assertRaises(Exception): wrapped = RewardNoiseWrapper(env, noise_dist=None) with self.assertRaises(Exception): wrapped = RewardNoiseWrapper(env, noise_dist="norm") wrapped = RewardNoiseWrapper(env, noise_dist="normal", dist_args=[0, 0.3]) self.assertFalse(wrapped.noise_function is None) def dummy(): return 0 wrapped = RewardNoiseWrapper(env, noise_function=dummy) self.assertFalse(wrapped.noise_function is None)
def test_box_logging(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 episodes = 10 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_box_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = LubyBenchmark() bench.set_seed(seed) env = bench.get_environment() state_logger = logger.add_module(StateTrackingWrapper) wrapped = StateTrackingWrapper(env, logger=state_logger) agent = StaticAgent(env, 1) logger.set_env(env) run_benchmark(wrapped, agent, episodes, logger) state_logger.close() logs = load_logs(state_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) sate_columns = [ "state_Action t (current)", "state_Step t (current)", "state_Action t-1", "state_Action t-2", "state_Step t-1", "state_Step t-2", ] for state_column in sate_columns: self.assertTrue(state_column in dataframe.columns) self.assertTrue((~dataframe[state_column].isna()).all()) temp_dir.cleanup()
def test_read_instances(self): bench = LubyBenchmark() bench.read_instance_set() self.assertTrue(len(bench.config.instance_set) == 1) self.assertTrue(len(bench.config.instance_set[0]) == 2) self.assertTrue(bench.config.instance_set[0] == [0, 0]) bench2 = LubyBenchmark() env = bench2.get_environment() self.assertTrue(len(env.instance_set[0]) == 2) self.assertTrue(env.instance_set[0] == [0, 0]) self.assertTrue(len(env.instance_set) == 1)
def test_get_states(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() for i in range(4): wrapped.step(i) wrapped2 = StateTrackingWrapper(env, 2) wrapped2.reset() for i in range(4): wrapped2.step(i) overall_states_only = wrapped.get_states() overall_states, intervals = wrapped2.get_states() self.assertTrue(np.array_equal(overall_states, overall_states_only)) self.assertTrue(len(overall_states_only) == 5) self.assertTrue(len(overall_states_only[4]) == 6) self.assertTrue(len(intervals) == 3) self.assertTrue(len(intervals[0]) == 2) self.assertTrue(len(intervals[1]) == 2) self.assertTrue(len(intervals[2]) == 1)
def test_step_reset(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env, 2) state = wrapped.reset() self.assertTrue(len(state) > 1) self.assertTrue(len(wrapped.overall_states) == 1) state, reward, done, _ = wrapped.step(1) self.assertTrue(len(state) > 1) self.assertTrue(reward <= 0) self.assertFalse(done) self.assertTrue(len(wrapped.overall_states) == 2) self.assertTrue(len(wrapped.current_states) == 2) self.assertTrue(len(wrapped.state_intervals) == 0) state = wrapped.reset() self.assertTrue(len(wrapped.overall_states) == 3) self.assertTrue(len(wrapped.current_states) == 1) self.assertTrue(len(wrapped.state_intervals) == 1)
def test_step(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = EpisodeTimeWrapper(env, 10) state = wrapped.reset() self.assertTrue(len(state) > 1) state, reward, done, _ = wrapped.step(1) self.assertTrue(len(state) > 1) self.assertTrue(reward <= 0) self.assertFalse(done) self.assertTrue(len(wrapped.all_steps) == 1) self.assertTrue(len(wrapped.current_step_interval) == 1) self.assertTrue(len(wrapped.step_intervals) == 0) for _ in range(20): wrapped.step(1) self.assertTrue(len(wrapped.overall_times) > 2) self.assertTrue(len(wrapped.time_intervals) == 1)
def test_get_times(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = EpisodeTimeWrapper(env) wrapped.reset() for i in range(5): wrapped.step(i) wrapped2 = EpisodeTimeWrapper(env, 2) wrapped2.reset() for i in range(5): wrapped2.step(i) overall_times_only, steps_only = wrapped.get_times() overall_times, steps, intervals, step_intervals = wrapped2.get_times() self.assertTrue( np.array_equal( np.round(overall_times, decimals=2), np.round(overall_times_only, decimals=2), )) self.assertTrue(len(step_intervals) == 3) self.assertTrue(len(step_intervals[0]) == 2) self.assertTrue(len(step_intervals[1]) == 2) self.assertTrue(len(step_intervals[2]) == 1)
def test_get_actions(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = ActionFrequencyWrapper(env) wrapped.reset() for i in range(5): wrapped.step(i) wrapped2 = ActionFrequencyWrapper(env, 2) wrapped2.reset() for i in range(5): wrapped2.step(i) overall_actions_only = wrapped.get_actions() overall_actions, intervals = wrapped2.get_actions() self.assertTrue(np.array_equal(overall_actions, overall_actions_only)) self.assertTrue(overall_actions_only == [0, 1, 2, 3, 4]) self.assertTrue(len(intervals) == 3) self.assertTrue(len(intervals[0]) == 2) self.assertTrue(intervals[0] == [0, 1]) self.assertTrue(len(intervals[1]) == 2) self.assertTrue(intervals[1] == [2, 3]) self.assertTrue(len(intervals[2]) == 1) self.assertTrue(intervals[2] == [4])
def test_logging(self): temp_dir = tempfile.TemporaryDirectory() episodes = 5 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_logging", ) bench = LubyBenchmark() env = bench.get_environment() time_logger = logger.add_module(EpisodeTimeWrapper) wrapped = EpisodeTimeWrapper(env, logger=time_logger) agent = StaticAgent(env=env, action=1) run_benchmark(wrapped, agent, episodes, logger) logger.close() logs = load_logs(time_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) # all steps must have logged time self.assertTrue((~dataframe.step_duration.isna()).all()) # each episode has a recored time episodes = dataframe.groupby("episode") last_steps_per_episode = dataframe.iloc[episodes.step.idxmax()] self.assertTrue( (~last_steps_per_episode.episode_duration.isna()).all()) # episode time equals the sum of the steps in episode calculated_episode_times = episodes.step_duration.sum() recorded_episode_times = last_steps_per_episode.episode_duration self.assertListEqual(calculated_episode_times.tolist(), recorded_episode_times.tolist()) temp_dir.cleanup()
# The policy we're following policy = make_tabular_policy(Q, epsilon, environment.action_space.n) # Print out which episode we're on, useful for debugging. if (i_episode + 1) % 100 == 0: if verbose: print("\rEpisode {:>5d}/{}.".format(i_episode + 1, num_episodes)) else: print("\rEpisode {:>5d}/{}.".format(i_episode + 1, num_episodes), end="") sys.stdout.flush() Q, rs, exp_rew, ep_len = update(Q, environment, policy, alpha, discount_factor) train_stats.episode_rewards[i_episode] = rs train_stats.expected_rewards[i_episode] = exp_rew train_stats.episode_lengths[i_episode] = ep_len if not verbose: print("\rEpisode {:>5d}/{}.".format(i_episode + 1, num_episodes)) return Q, (test_stats, train_stats) bench = LubyBenchmark() env = bench.get_environment() # Execute 10 episodes of tabular Q-Learning q_func, test_train_stats = q_learning(env, 10) print(f"Rewards: {test_train_stats[1].episode_rewards}") print(f"Episode Lenghts: {test_train_stats[1].episode_lengths}")
def test_logging_discrete(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_discrete_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = LubyBenchmark() bench.set_seed(seed) env = bench.get_environment() env.seed_action_space(seed) action_logger = logger.add_module(ActionFrequencyWrapper) wrapped = ActionFrequencyWrapper(env, logger=action_logger) agent = RandomAgent(env) logger.set_env(env) run_benchmark(wrapped, agent, 10, logger) action_logger.close() logs = load_logs(action_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) expected_actions = [ 0, 3, 5, 4, 3, 5, 5, 5, 3, 3, 2, 1, 0, 1, 2, 0, 1, 1, 0, 1, 2, 4, 3, 0, 1, 3, 0, 3, 3, 3, 4, 4, 4, 5, 4, 0, 4, 2, 1, 3, 4, 2, 1, 3, 3, 2, 0, 5, 2, 5, 2, 1, 5, 3, 2, 5, 1, 0, 2, 3, 1, 3, 2, 3, 2, 4, 3, 4, 0, 5, 5, 1, 5, 0, 1, 5, 5, 3, 3, 2, ] self.assertListEqual(dataframe.action.to_list(), expected_actions) temp_dir.cleanup()
def test_rendering(self): bench = CMAESBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() with pytest.raises(NotImplementedError): wrapped.render_state_tracking() bench = CMAESBenchmark() def dummy(): return [1, [2, 3]] bench.config.state_method = dummy bench.config.observation_space = gym.spaces.Tuple( ( gym.spaces.Discrete(2), gym.spaces.Box(low=np.array([-1, 1]), high=np.array([5, 5])), ) ) env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() with pytest.raises(NotImplementedError): wrapped.render_state_tracking() def dummy2(): return [0.5] bench.config.state_method = dummy2 bench.config.observation_space = gym.spaces.Box( low=np.array([0]), high=np.array([1]) ) env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env, 2) wrapped.reset() wrapped.step(1) wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class discrete_obs_env: def __init__(self): self.observation_space = gym.spaces.Discrete(2) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return 1 def step(self, action): return 1, 1, 1, 1 env = discrete_obs_env() wrapped = StateTrackingWrapper(env, 2) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class multi_discrete_obs_env: def __init__(self): self.observation_space = gym.spaces.MultiDiscrete([2, 3]) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return [1, 2] def step(self, action): return [1, 2], 1, 1, 1 env = multi_discrete_obs_env() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class multi_binary_obs_env: def __init__(self): self.observation_space = gym.spaces.MultiBinary(2) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return [1, 1] def step(self, action): return [1, 1], 1, 1, 1 env = multi_binary_obs_env() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3)