def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-w', '--num-workers', type=int, default=1, help="Number of worker agents") parser.add_argument('-m', '--monitor', help="Save results to this file") parser.add_argument('-M', '--mode', choices=['tmux', 'child'], default='tmux', help="Starter mode") parser.add_argument('-L', '--logdir', default='logs_async', help="Log directory") parser.add_argument('-C', '--is-child', action='store_true') parser.add_argument('-i', '--task-index', type=int, default=0, help="Task index") parser.add_argument('-K', '--kill', action='store_true', default=False, help="Kill runners") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() session_name = 'openai_async' shell = '/bin/bash' kill_cmds = [ "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format( 12222 + args.num_workers), "tmux kill-session -t {}".format(session_name), ] if args.kill: os.system("\n".join(kill_cmds)) return 0 if not args.is_child: # start up child processes target_script = os.path.abspath(inspect.stack()[0][1]) def wrap_cmd(session, name, cmd): if isinstance(cmd, list): cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd) if args.mode == 'tmux': return 'tmux send-keys -t {}:{} {} Enter'.format( session, name, shlex_quote(cmd)) elif args.mode == 'child': return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format( cmd, args.logdir, session, name, args.logdir) def build_cmd(index): cmd_args = [ 'CUDA_VISIBLE_DEVICES=', sys.executable, target_script, args.gym_id, '--is-child', '--agent', args.agent, '--agent-config', os.path.join(os.getcwd(), args.agent_config), '--network-config', os.path.join(os.getcwd(), args.network_config), '--num-workers', args.num_workers, '--task-index', index ] if args.debug: cmd_args.append('--debug') return cmd_args if args.mode == 'tmux': cmds = kill_cmds + [ 'tmux new-session -d -s {} -n ps'.format(session_name) ] elif args.mode == 'child': cmds = [ 'mkdir -p {}'.format(args.logdir), 'rm -f {}/kill.sh'.format(args.logdir), 'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir), 'chmod +x {}/kill.sh'.format(args.logdir) ] cmds.append(wrap_cmd(session_name, 'ps', build_cmd(-1))) for i in xrange(args.num_workers): name = 'w_{}'.format(i) if args.mode == 'tmux': cmds.append('tmux new-window -t {} -n {} -d {}'.format( session_name, name, shell)) cmds.append(wrap_cmd(session_name, name, build_cmd(i))) # add one PS call # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell)) print("\n".join(cmds)) os.system("\n".join(cmds)) return 0 ps_hosts = ['127.0.0.1:{}'.format(12222)] worker_hosts = [] port = 12223 for _ in range(args.num_workers): worker_hosts.append('127.0.0.1:{}'.format(port)) port += 1 cluster = {'ps': ps_hosts, 'worker': worker_hosts} cluster_spec = tf.train.ClusterSpec(cluster) environment = OpenAIGym(args.gym_id) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default( dict(states=environment.states, actions=environment.actions, network=from_json(args.network_config))) agent_config.default( dict(distributed=True, cluster_spec=cluster_spec, global_model=(args.task_index == -1), device=('/job:ps' if args.task_index == -1 else '/job:worker/task:{}/cpu:0'.format(args.task_index)))) logger = logging.getLogger(__name__) logger.setLevel(log_levels[agent_config.loglevel]) agent = agents[args.agent](config=agent_config) logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format( gym_id=args.gym_id)) logger.info("Config:") logger.info(agent_config) runner = Runner(agent=agent, environment=environment, repeat_actions=1, cluster_spec=cluster_spec, task_index=args.task_index) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode, ts=r.timestep)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format( sum(r.episode_rewards[-100:]) / 100)) return True runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
def main(): parser = argparse.ArgumentParser() # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce # Hence, relative paths will not work without first fetching the path of this run file parser.add_argument('-id', '--level-id', default='tests/demo_map', help="DeepMind Lab level id") parser.add_argument('-a', '--agent', default='VPGAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs") # Redirect output to file sys.stdout = open('lab_output.txt', 'w') args = parser.parse_args() environment = DeepMindLab(args.level_id) path = os.path.dirname(__file__) if args.agent_config: # Use absolute path agent_config = Configuration.from_json(path + args.agent_config, True) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default( dict(states=environment.states, actions=environment.actions, network=from_json(path + args.network_config, True))) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # configurable!!! agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) runner = Runner(agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) report_episodes = args.episodes // 1000 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format( np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Lab environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.episode + 1)) environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('rom', help="File path of the rom") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument( '-w', '--workers', help="Number of threads to run where the model is shared", type=int, default=16) parser.add_argument('-fs', '--frame-skip', help="Number of frames to repeat action", type=int, default=1) parser.add_argument('-rap', '--repeat-action-probability', help="Repeat action probability", type=float, default=0.0) parser.add_argument('-lolt', '--loss-of-life-termination', help="Loss of life counts as terminal state", action='store_true') parser.add_argument('-lolr', '--loss-of-life-reward', help="Loss of life reward/penalty. EX: -1 to penalize", type=float, default=0.0) parser.add_argument( '-ea', '--epsilon-annealing', help='Create separate epislon annealing schedules per thread', action='store_true') parser.add_argument('-ds', '--display-screen', action='store_true', default=False, help="Display emulator screen") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # configurable!!! logger.addHandler(logging.StreamHandler(sys.stdout)) environments = [ ALE(args.rom, frame_skip=args.frame_skip, repeat_action_probability=args.repeat_action_probability, loss_of_life_termination=args.loss_of_life_termination, loss_of_life_reward=args.loss_of_life_reward, display_screen=args.display_screen) for t in range(args.workers) ] if args.network_config: network = from_json(args.network_config) else: network = None logger.info("No network configuration provided.") agent_configs = [] for i in range(args.workers): agent_config = Configuration.from_json(args.agent_config) # optionally overwrite epsilon final values if "exploration" in agent_config and "epsilon" in agent_config.exploration.type: # epsilon annealing is based on the global step so divide by the total workers epsilon_timesteps = agent_config.exploration.epsilon_timesteps // args.workers agent_config.exploration.epsilon_timesteps = epsilon_timesteps if args.epsilon_annealing: # epsilon final values are [0.5, 0.1, 0.01] with probabilities [0.3, 0.4, 0.3] epsilon_final = np.random.choice([0.5, 0.1, 0.01], p=[0.3, 0.4, 0.3]) agent_config.exploration.epsilon_final = epsilon_final agent_config.default( dict(states=environments[0].states, actions=environments[0].actions, network=network)) agent_configs.append(agent_config) # let the first agent create the model agent = AgentsDictionary[args.agent](config=agent_configs[-1]) # then create agents with a shared model agents = [ AgentsDictionary[args.agent](config=agent_configs[t], model=agent.model) for t in range(args.workers - 1) ] + [agent] if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) def episode_finished(stats): if args.debug: logger.info( "Thread {t}. Finished episode {ep} after {ts} timesteps. Reward {r}" .format(t=stats['thread_id'], ep=stats['episode'], ts=stats['timestep'], r=stats['episode_reward'])) return True def summary_report(r): et = time.time() logger.info('=' * 40) logger.info('Current Step/Episode: {}/{}'.format( r.global_step, r.global_episode)) logger.info('SPS: {}'.format(r.global_step / (et - r.start_time))) reward_list = r.episode_rewards if len(reward_list) > 0: logger.info('Max Reward: {}'.format(np.max(reward_list))) logger.info("Average of last 500 rewards: {}".format( sum(reward_list[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format( sum(reward_list[-100:]) / 100)) logger.info('=' * 40) # create runners threaded_runner = ThreadedRunner(agents, environments, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes) logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environments[0])) threaded_runner.run(summary_interval=100, episode_finished=episode_finished, summary_report=summary_report) logger.info("Learning finished. Total episodes: {ep}".format( ep=threaded_runner.global_episode)) [environments[t].close() for t in range(args.workers)]
def main(): parser = argparse.ArgumentParser() parser.add_argument('rom', help="File path of the rom") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-fs', '--frame-skip', help="Number of frames to repeat action", type=int, default=1) parser.add_argument('-rap', '--repeat-action-probability', help="Repeat action probability", type=float, default=0.0) parser.add_argument('-lolt', '--loss-of-life-termination', help="Loss of life counts as terminal state", action='store_true') parser.add_argument('-lolr', '--loss-of-life-reward', help="Loss of life reward/penalty. EX: -1 to penalize", type=float, default=0.0) parser.add_argument('-ds', '--display-screen', action='store_true', default=False, help="Display emulator screen") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # configurable!!! logger.addHandler(logging.StreamHandler(sys.stdout)) environment = ALE(args.rom, frame_skip=args.frame_skip, repeat_action_probability=args.repeat_action_probability, loss_of_life_termination=args.loss_of_life_termination, loss_of_life_reward=args.loss_of_life_reward, display_screen=args.display_screen) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: agent_config = Configuration() logger.info("No agent configuration provided.") if args.network_config: network = from_json(args.network_config) else: network = None logger.info("No network configuration provided.") agent_config.default( dict(states=environment.states, actions=environment.actions, network=network)) agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) runner = Runner(agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: sps = r.total_timesteps / (time.time() - r.start_time) logger.info( "Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}" .format(ep=r.episode, ts=r.timestep, sps=sps)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format( sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info( "Learning finished. Total episodes: {ep}".format(ep=runner.episode)) environment.close()
def main(): parser = argparse.ArgumentParser() # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce # Hence, relative paths will not work without first fetching the path of this run file parser.add_argument('-id', '--level-id', default='tests/demo_map',help="DeepMind Lab level id") parser.add_argument('-a', '--agent', default='VPGAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs") # Redirect output to file sys.stdout = open('lab_output.txt', 'w') args = parser.parse_args() environment = DeepMindLab(args.level_id) path = os.path.dirname(__file__) if args.agent_config: # Use absolute path agent_config = Configuration.from_json(path + args.agent_config, True) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default(dict(states=environment.states, actions=environment.actions, network=from_json(path + args.network_config, True))) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # configurable!!! agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) runner = Runner( agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes ) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) report_episodes = args.episodes // 1000 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Lab environment '{env}'".format(agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1)) environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # configurable!!! environment = OpenAIGym(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: agent_config = Configuration() logger.info("No agent configuration provided.") if args.network_config: network = from_json(args.network_config) else: network = None logger.info("No network configuration provided.") agent_config.default(dict(states=environment.states, actions=environment.actions, network=network)) agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) runner = Runner( agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes ) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode)) if args.monitor: environment.gym.monitor.close() environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # configurable!!! environment = OpenAIGym(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: agent_config = Configuration() logger.info("No agent configuration provided.") if args.network_config: network = from_json(args.network_config) else: network = None logger.info("No network configuration provided.") agent_config.default( dict(states=environment.states, actions=environment.actions, network=network)) agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) runner = Runner(agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode, ts=r.timestep)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format( sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info( "Learning finished. Total episodes: {ep}".format(ep=runner.episode)) if args.monitor: environment.gym.monitor.close() environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-id', '--level-id', default='tests/demo_map', help="DeepMind Lab level id") parser.add_argument('-a', '--agent', default='DQNAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file", default='/configs/dqn_agent.json') parser.add_argument('-n', '--network-config', help="Network configuration file", default='/configs/dqn_network.json') parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs") # Redirect output to file sys.stdout = open('lab_output.txt', 'w') args = parser.parse_args() environment = DeepMindLab(args.level_id) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default( dict(states=environment.states, actions=environment.actions, network=from_json(args.network_config))) # This is necessary to give bazel the correct path path = os.path.dirname(__file__) logger = logging.getLogger(__name__) logger.setLevel(log_levels[agent_config['loglevel']]) preprocessing_config = agent_config['preprocessing'] if preprocessing_config: preprocessor = build_preprocessing_stack(preprocessing_config) agent_config.states['shape'] = preprocessor.shape( agent_config.states['shape']) else: preprocessor = None agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Agent configuration:") logger.info(agent.config) if agent.model: logger.info("Model configuration:") logger.info(agent.model.config) runner = Runner(agent=agent, environment=environment, repeat_actions=1, preprocessor=preprocessor, save_path=args.save, save_episodes=args.save_episodes) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) report_episodes = args.episodes // 1000 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format( np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Lab environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.episode + 1)) environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-w', '--num-workers', type=int, default=1, help="Number of worker agents") parser.add_argument('-m', '--monitor', help="Save results to this file") parser.add_argument('-M', '--mode', choices=['tmux', 'child'], default='tmux', help="Starter mode") parser.add_argument('-L', '--logdir', default='logs_async', help="Log directory") parser.add_argument('-C', '--is-child', action='store_true') parser.add_argument('-i', '--task-index', type=int, default=0, help="Task index") parser.add_argument('-K', '--kill', action='store_true', default=False, help="Kill runners") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() session_name = 'openai_async' shell = '/bin/bash' kill_cmds = [ "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format(12222 + args.num_workers), "tmux kill-session -t {}".format(session_name), ] if args.kill: os.system("\n".join(kill_cmds)) return 0 if not args.is_child: # start up child processes target_script = os.path.abspath(inspect.stack()[0][1]) def wrap_cmd(session, name, cmd): if isinstance(cmd, list): cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd) if args.mode == 'tmux': return 'tmux send-keys -t {}:{} {} Enter'.format(session, name, shlex_quote(cmd)) elif args.mode == 'child': return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format( cmd, args.logdir, session, name, args.logdir ) def build_cmd(index): cmd_args = [ 'CUDA_VISIBLE_DEVICES=', sys.executable, target_script, args.gym_id, '--is-child', '--agent', args.agent, '--agent-config', os.path.join(os.getcwd(), args.agent_config), '--network-config', os.path.join(os.getcwd(), args.network_config), '--num-workers', args.num_workers, '--task-index', index ] if args.debug: cmd_args.append('--debug') return cmd_args if args.mode == 'tmux': cmds = kill_cmds + ['tmux new-session -d -s {} -n ps'.format(session_name)] elif args.mode == 'child': cmds = ['mkdir -p {}'.format(args.logdir), 'rm -f {}/kill.sh'.format(args.logdir), 'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir), 'chmod +x {}/kill.sh'.format(args.logdir)] cmds.append(wrap_cmd(session_name, 'ps', build_cmd(-1))) for i in xrange(args.num_workers): name = 'w_{}'.format(i) if args.mode == 'tmux': cmds.append('tmux new-window -t {} -n {} -d {}'.format(session_name, name, shell)) cmds.append(wrap_cmd(session_name, name, build_cmd(i))) # add one PS call # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell)) print("\n".join(cmds)) os.system("\n".join(cmds)) return 0 ps_hosts = ['127.0.0.1:{}'.format(12222)] worker_hosts = [] port = 12223 for _ in range(args.num_workers): worker_hosts.append('127.0.0.1:{}'.format(port)) port += 1 cluster = {'ps': ps_hosts, 'worker': worker_hosts} cluster_spec = tf.train.ClusterSpec(cluster) environment = OpenAIGym(args.gym_id) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: raise TensorForceError("No agent configuration provided.") if not args.network_config: raise TensorForceError("No network configuration provided.") agent_config.default(dict(states=environment.states, actions=environment.actions, network=from_json(args.network_config))) agent_config.default(dict(distributed=True, cluster_spec=cluster_spec, global_model=(args.task_index == -1), device=('/job:ps' if args.task_index == -1 else '/job:worker/task:{}/cpu:0'.format(args.task_index)))) logger = logging.getLogger(__name__) logger.setLevel(log_levels[agent_config.loglevel]) agent = agents[args.agent](config=agent_config) logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(gym_id=args.gym_id)) logger.info("Config:") logger.info(agent_config) runner = Runner( agent=agent, environment=environment, repeat_actions=1, cluster_spec=cluster_spec, task_index=args.task_index ) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100)) return True runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)