def main(): gym_id = 'CartPole-v0' logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) max_episodes = 10000 max_timesteps = 1000 env = OpenAIGym(gym_id, monitor=False, monitor_video=False) config = Configuration(repeat_actions=1, actions=env.actions, states=env.states, exploration='constant', exploration_args=[0.1], network=[{ "type": "linear", "size": 16 }]) agent = create_agent(SimpleQAgent, config) runner = Runner(agent, env) def episode_finished(r): if r.episode % 10 == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 10 rewards: {}".format( np.mean(r.episode_rewards[-10:]))) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env)) runner.run(max_episodes, max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.episode + 1))
def main(): parser = argparse.ArgumentParser() # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce # Hence, relative paths will not work without first fetching the path of this run file parser.add_argument('-id', '--level-id', default='tests/demo_map', help="DeepMind Lab level id") parser.add_argument('-a', '--agent', default='VPGAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs") # Redirect output to file sys.stdout = open('lab_output.txt', 'w') args = parser.parse_args() environment = DeepMindLab(args.level_id) path = os.path.dirname(__file__) if args.agent_config: # Use absolute path agent_config = Configuration.from_json(path + args.agent_config, True) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default( dict(states=environment.states, actions=environment.actions, network=from_json(path + args.network_config, True))) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # configurable!!! agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) runner = Runner(agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) report_episodes = args.episodes // 1000 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format( np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Lab environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.episode + 1)) environment.close()
def test_conflicting_desires_raises(self): with self.assertRaises(TensorForceError): Configuration.from_json_string(test_config_with_flag, allow_defaults=False)
def test_default_to_provided_param_is_ok(self): config = Configuration.from_json_string(test_config, allow_defaults=False) config.default({'a': 'boo!'}) self.assertEqual(config.a, 1)
def test_defaults_disallowed_with_json_load_specifying_param_default(self): config = Configuration.from_json_string(test_config, allow_defaults=False) with self.assertRaises(TensorForceError): config.default({'c': 3})
def test_defaults_allowed_with_json_load_specifying_param_default(self): config = Configuration.from_json_string(test_config, allow_defaults=True) config.default({'c': 3}) self.assertEqual(config.c, 3)
def test_defaults_allowed_with_json_load_relying_upon_param_default(self): config = Configuration.from_json_string(test_config) config.default({'c': 3}) self.assertEqual(config.c, 3)
def test_no_defaults_raises(self): config = Configuration(allow_defaults=False, a=1, b=2) with self.assertRaises(TensorForceError): config.default({'c': 3})
def test_defaults_allowed(self): config = Configuration(allow_defaults=True, a=1, b=2) config.default({'c': 3}) self.assertEqual(config.c, 3)
def main(): parser = argparse.ArgumentParser() # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce # Hence, relative paths will not work without first fetching the path of this run file parser.add_argument('-id', '--level-id', default='tests/demo_map',help="DeepMind Lab level id") parser.add_argument('-a', '--agent', default='VPGAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs") # Redirect output to file sys.stdout = open('lab_output.txt', 'w') args = parser.parse_args() environment = DeepMindLab(args.level_id) path = os.path.dirname(__file__) if args.agent_config: # Use absolute path agent_config = Configuration.from_json(path + args.agent_config, True) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default(dict(states=environment.states, actions=environment.actions, network=from_json(path + args.network_config, True))) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # configurable!!! agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) runner = Runner( agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes ) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) report_episodes = args.episodes // 1000 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Lab environment '{env}'".format(agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1)) environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-id', '--level-id', default='tests/demo_map', help="DeepMind Lab level id") parser.add_argument('-a', '--agent', default='DQNAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file", default='/configs/dqn_agent.json') parser.add_argument('-n', '--network-config', help="Network configuration file", default='/configs/dqn_network.json') parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs") # Redirect output to file sys.stdout = open('lab_output.txt', 'w') args = parser.parse_args() environment = DeepMindLab(args.level_id) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default( dict(states=environment.states, actions=environment.actions, network=from_json(args.network_config))) # This is necessary to give bazel the correct path path = os.path.dirname(__file__) logger = logging.getLogger(__name__) logger.setLevel(log_levels[agent_config['loglevel']]) preprocessing_config = agent_config['preprocessing'] if preprocessing_config: preprocessor = build_preprocessing_stack(preprocessing_config) agent_config.states['shape'] = preprocessor.shape( agent_config.states['shape']) else: preprocessor = None agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Agent configuration:") logger.info(agent.config) if agent.model: logger.info("Model configuration:") logger.info(agent.model.config) runner = Runner(agent=agent, environment=environment, repeat_actions=1, preprocessor=preprocessor, save_path=args.save, save_episodes=args.save_episodes) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) report_episodes = args.episodes // 1000 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format( np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Lab environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.episode + 1)) environment.close()