Example #1
0
    def test_step(self):
        bench = LubyBenchmark()
        bench.config.instance_set = {
            0: [0, 0],
            1: [1, 1],
            2: [3, 4],
            3: [5, 6]
        }
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env, 2)

        state = wrapped.reset()
        self.assertTrue(len(state) > 1)

        state, reward, done, _ = wrapped.step(1)
        self.assertTrue(len(state) > 1)
        self.assertTrue(reward <= 0)
        self.assertFalse(done)

        while not done:
            _, _, done, _ = wrapped.step(1)

        self.assertTrue(len(wrapped.overall_performance) == 1)
        self.assertTrue(len(wrapped.performance_intervals) == 0)
        self.assertTrue(len(wrapped.current_performance) == 1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 1)

        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)

        self.assertTrue(len(wrapped.performance_intervals) == 1)
        self.assertTrue(len(wrapped.current_performance) == 1)

        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 3)

        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 4)
Example #2
0
 def test_render(self, mock_plt):
     bench = LubyBenchmark()
     env = bench.get_environment()
     env = PerformanceTrackingWrapper(env)
     for _ in range(10):
         done = False
         env.reset()
         while not done:
             _, _, done, _ = env.step(1)
     env.render_performance()
     self.assertTrue(mock_plt.show.called)
     env.render_instance_performance()
     self.assertTrue(mock_plt.show.called)
Example #3
0
    def test_get_performance(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped2 = PerformanceTrackingWrapper(env, 2)
        wrapped2.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped2.step(1)
        wrapped3 = PerformanceTrackingWrapper(env,
                                              2,
                                              track_instance_performance=False)
        wrapped3.reset()
        for i in range(5):
            wrapped3.step(i)
        wrapped4 = PerformanceTrackingWrapper(env,
                                              track_instance_performance=False)
        wrapped4.reset()
        for i in range(5):
            wrapped4.step(i)

        overall, instance_performance = wrapped.get_performance()
        overall_perf, interval_perf, instance_perf = wrapped2.get_performance()
        overall_performance_only = wrapped4.get_performance()
        overall_performance, intervals = wrapped3.get_performance()
        self.assertTrue(
            np.array_equal(
                np.round(overall_performance, decimals=2),
                np.round(overall_performance_only, decimals=2),
            ))

        self.assertTrue(
            np.array_equal(np.round(overall_perf, decimals=2),
                           np.round(overall, decimals=2)))

        self.assertTrue(len(instance_performance.keys()) == 1)
        self.assertTrue(len(list(instance_performance.values())[0]) == 1)
        self.assertTrue(len(instance_perf.keys()) == 1)
        self.assertTrue(len(list(instance_perf.values())[0]) == 1)

        self.assertTrue(len(intervals) == 1)
        self.assertTrue(len(intervals[0]) == 0)
        self.assertTrue(len(interval_perf) == 1)
        self.assertTrue(len(interval_perf[0]) == 1)
Example #4
0
# Make chainer agent
obs_size = env.observation_space.low.size
action_size = env.action_space.low.size
agent = make_chainer_a3c(obs_size, action_size)

# Training
num_episodes = 3
for i in range(num_episodes):
    # Reset environment to begin episode
    state = env.reset()

    # Initialize episode
    done = False
    r = 0
    reward = 0
    while not done:
        # Select action
        action = agent.act_and_train(state, reward)
        # Execute action
        next_state, reward, done, _ = env.step(action)
        r += reward
        logger.next_step()
        state = next_state
    logger.next_episode()
    # Train agent after episode has ended
    agent.stop_episode_and_train(state, reward, done=done)
    # Log episode
    print(
        f"Episode {i+1}/{num_episodes}...........................................Reward: {r}"
    )