예제 #1
0
def main():
    gym_id = 'CartPole-v0'
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    max_episodes = 10000
    max_timesteps = 1000

    env = OpenAIGym(gym_id, monitor=False, monitor_video=False)

    config = Configuration(repeat_actions=1,
                           actions=env.actions,
                           states=env.states,
                           exploration='constant',
                           exploration_args=[0.1],
                           network=[{
                               "type": "linear",
                               "size": 16
                           }])

    agent = create_agent(SimpleQAgent, config)

    runner = Runner(agent, env)

    def episode_finished(r):
        if r.episode % 10 == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 10 rewards: {}".format(
                np.mean(r.episode_rewards[-10:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent,
                                                                  env=env))
    runner.run(max_episodes, max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))
예제 #2
0
 def test_no_defaults_raises(self):
     config = Configuration(allow_defaults=False, a=1, b=2)
     with self.assertRaises(TensorForceError):
         config.default({'c': 3})
예제 #3
0
 def test_defaults_allowed(self):
     config = Configuration(allow_defaults=True, a=1, b=2)
     config.default({'c': 3})
     self.assertEqual(config.c, 3)