Example #1
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-w',
                        '--num-workers',
                        type=int,
                        default=1,
                        help="Number of worker agents")
    parser.add_argument('-m', '--monitor', help="Save results to this file")
    parser.add_argument('-M',
                        '--mode',
                        choices=['tmux', 'child'],
                        default='tmux',
                        help="Starter mode")
    parser.add_argument('-L',
                        '--logdir',
                        default='logs_async',
                        help="Log directory")
    parser.add_argument('-C', '--is-child', action='store_true')
    parser.add_argument('-i',
                        '--task-index',
                        type=int,
                        default=0,
                        help="Task index")
    parser.add_argument('-K',
                        '--kill',
                        action='store_true',
                        default=False,
                        help="Kill runners")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    session_name = 'openai_async'
    shell = '/bin/bash'

    kill_cmds = [
        "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format(
            12222 + args.num_workers),
        "tmux kill-session -t {}".format(session_name),
    ]
    if args.kill:
        os.system("\n".join(kill_cmds))
        return 0

    if not args.is_child:
        # start up child processes
        target_script = os.path.abspath(inspect.stack()[0][1])

        def wrap_cmd(session, name, cmd):
            if isinstance(cmd, list):
                cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd)
            if args.mode == 'tmux':
                return 'tmux send-keys -t {}:{} {} Enter'.format(
                    session, name, shlex_quote(cmd))
            elif args.mode == 'child':
                return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format(
                    cmd, args.logdir, session, name, args.logdir)

        def build_cmd(index):
            cmd_args = [
                'CUDA_VISIBLE_DEVICES=', sys.executable, target_script,
                args.gym_id, '--is-child', '--agent', args.agent,
                '--agent-config',
                os.path.join(os.getcwd(),
                             args.agent_config), '--network-config',
                os.path.join(os.getcwd(), args.network_config),
                '--num-workers', args.num_workers, '--task-index', index
            ]
            if args.debug:
                cmd_args.append('--debug')
            return cmd_args

        if args.mode == 'tmux':
            cmds = kill_cmds + [
                'tmux new-session -d -s {} -n ps'.format(session_name)
            ]
        elif args.mode == 'child':
            cmds = [
                'mkdir -p {}'.format(args.logdir),
                'rm -f {}/kill.sh'.format(args.logdir),
                'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir),
                'chmod +x {}/kill.sh'.format(args.logdir)
            ]
        cmds.append(wrap_cmd(session_name, 'ps', build_cmd(-1)))

        for i in xrange(args.num_workers):
            name = 'w_{}'.format(i)
            if args.mode == 'tmux':
                cmds.append('tmux new-window -t {} -n {} -d {}'.format(
                    session_name, name, shell))
            cmds.append(wrap_cmd(session_name, name, build_cmd(i)))

        # add one PS call
        # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell))

        print("\n".join(cmds))

        os.system("\n".join(cmds))

        return 0

    ps_hosts = ['127.0.0.1:{}'.format(12222)]
    worker_hosts = []
    port = 12223
    for _ in range(args.num_workers):
        worker_hosts.append('127.0.0.1:{}'.format(port))
        port += 1
    cluster = {'ps': ps_hosts, 'worker': worker_hosts}
    cluster_spec = tf.train.ClusterSpec(cluster)

    environment = OpenAIGym(args.gym_id)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=from_json(args.network_config)))

    agent_config.default(
        dict(distributed=True,
             cluster_spec=cluster_spec,
             global_model=(args.task_index == -1),
             device=('/job:ps' if args.task_index == -1 else
                     '/job:worker/task:{}/cpu:0'.format(args.task_index))))

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[agent_config.loglevel])

    agent = agents[args.agent](config=agent_config)

    logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(
        gym_id=args.gym_id))
    logger.info("Config:")
    logger.info(agent_config)

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    cluster_spec=cluster_spec,
                    task_index=args.task_index)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) / 100))
        return True

    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
Example #2
0
def main():
    parser = argparse.ArgumentParser()

    # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce
    # Hence, relative paths will not work without first fetching the path of this run file
    parser.add_argument('-id',
                        '--level-id',
                        default='tests/demo_map',
                        help="DeepMind Lab level id")
    parser.add_argument('-a', '--agent', default='VPGAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=1000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=200,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-m',
                        '--monitor',
                        help="Save results to this directory")
    parser.add_argument('-ms',
                        '--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('-mv',
                        '--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=True,
                        help="Show debug outputs")

    # Redirect output to file
    sys.stdout = open('lab_output.txt', 'w')

    args = parser.parse_args()

    environment = DeepMindLab(args.level_id)

    path = os.path.dirname(__file__)
    if args.agent_config:
        # Use absolute path
        agent_config = Configuration.from_json(path + args.agent_config, True)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=from_json(path + args.network_config, True)))

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)  # configurable!!!

    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    save_path=args.save,
                    save_episodes=args.save_episodes)
    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    report_episodes = args.episodes // 1000

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(
                np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Lab environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))

    environment.close()
Example #3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('rom', help="File path of the rom")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument(
        '-w',
        '--workers',
        help="Number of threads to run where the model is shared",
        type=int,
        default=16)
    parser.add_argument('-fs',
                        '--frame-skip',
                        help="Number of frames to repeat action",
                        type=int,
                        default=1)
    parser.add_argument('-rap',
                        '--repeat-action-probability',
                        help="Repeat action probability",
                        type=float,
                        default=0.0)
    parser.add_argument('-lolt',
                        '--loss-of-life-termination',
                        help="Loss of life counts as terminal state",
                        action='store_true')
    parser.add_argument('-lolr',
                        '--loss-of-life-reward',
                        help="Loss of life reward/penalty. EX: -1 to penalize",
                        type=float,
                        default=0.0)
    parser.add_argument(
        '-ea',
        '--epsilon-annealing',
        help='Create separate epislon annealing schedules per thread',
        action='store_true')
    parser.add_argument('-ds',
                        '--display-screen',
                        action='store_true',
                        default=False,
                        help="Display emulator screen")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # configurable!!!
    logger.addHandler(logging.StreamHandler(sys.stdout))

    environments = [
        ALE(args.rom,
            frame_skip=args.frame_skip,
            repeat_action_probability=args.repeat_action_probability,
            loss_of_life_termination=args.loss_of_life_termination,
            loss_of_life_reward=args.loss_of_life_reward,
            display_screen=args.display_screen) for t in range(args.workers)
    ]

    if args.network_config:
        network = from_json(args.network_config)
    else:
        network = None
        logger.info("No network configuration provided.")

    agent_configs = []
    for i in range(args.workers):
        agent_config = Configuration.from_json(args.agent_config)

        # optionally overwrite epsilon final values
        if "exploration" in agent_config and "epsilon" in agent_config.exploration.type:
            # epsilon annealing is based on the global step so divide by the total workers
            epsilon_timesteps = agent_config.exploration.epsilon_timesteps // args.workers
            agent_config.exploration.epsilon_timesteps = epsilon_timesteps
            if args.epsilon_annealing:
                # epsilon final values are [0.5, 0.1, 0.01] with probabilities [0.3, 0.4, 0.3]
                epsilon_final = np.random.choice([0.5, 0.1, 0.01],
                                                 p=[0.3, 0.4, 0.3])
                agent_config.exploration.epsilon_final = epsilon_final

        agent_config.default(
            dict(states=environments[0].states,
                 actions=environments[0].actions,
                 network=network))
        agent_configs.append(agent_config)

    # let the first agent create the model
    agent = AgentsDictionary[args.agent](config=agent_configs[-1])
    # then create agents with a shared model
    agents = [
        AgentsDictionary[args.agent](config=agent_configs[t],
                                     model=agent.model)
        for t in range(args.workers - 1)
    ] + [agent]

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    def episode_finished(stats):
        if args.debug:
            logger.info(
                "Thread {t}. Finished episode {ep} after {ts} timesteps. Reward {r}"
                .format(t=stats['thread_id'],
                        ep=stats['episode'],
                        ts=stats['timestep'],
                        r=stats['episode_reward']))
        return True

    def summary_report(r):
        et = time.time()
        logger.info('=' * 40)
        logger.info('Current Step/Episode: {}/{}'.format(
            r.global_step, r.global_episode))
        logger.info('SPS: {}'.format(r.global_step / (et - r.start_time)))
        reward_list = r.episode_rewards
        if len(reward_list) > 0:
            logger.info('Max Reward: {}'.format(np.max(reward_list)))
            logger.info("Average of last 500 rewards: {}".format(
                sum(reward_list[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(reward_list[-100:]) / 100))
        logger.info('=' * 40)

    # create runners
    threaded_runner = ThreadedRunner(agents,
                                     environments,
                                     repeat_actions=1,
                                     save_path=args.save,
                                     save_episodes=args.save_episodes)

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environments[0]))
    threaded_runner.run(summary_interval=100,
                        episode_finished=episode_finished,
                        summary_report=summary_report)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=threaded_runner.global_episode))

    [environments[t].close() for t in range(args.workers)]
Example #4
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('rom', help="File path of the rom")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-fs',
                        '--frame-skip',
                        help="Number of frames to repeat action",
                        type=int,
                        default=1)
    parser.add_argument('-rap',
                        '--repeat-action-probability',
                        help="Repeat action probability",
                        type=float,
                        default=0.0)
    parser.add_argument('-lolt',
                        '--loss-of-life-termination',
                        help="Loss of life counts as terminal state",
                        action='store_true')
    parser.add_argument('-lolr',
                        '--loss-of-life-reward',
                        help="Loss of life reward/penalty. EX: -1 to penalize",
                        type=float,
                        default=0.0)
    parser.add_argument('-ds',
                        '--display-screen',
                        action='store_true',
                        default=False,
                        help="Display emulator screen")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # configurable!!!
    logger.addHandler(logging.StreamHandler(sys.stdout))

    environment = ALE(args.rom,
                      frame_skip=args.frame_skip,
                      repeat_action_probability=args.repeat_action_probability,
                      loss_of_life_termination=args.loss_of_life_termination,
                      loss_of_life_reward=args.loss_of_life_reward,
                      display_screen=args.display_screen)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        agent_config = Configuration()
        logger.info("No agent configuration provided.")
    if args.network_config:
        network = from_json(args.network_config)
    else:
        network = None
        logger.info("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=network))
    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    save_path=args.save,
                    save_episodes=args.save_episodes)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            sps = r.total_timesteps / (time.time() - r.start_time)
            logger.info(
                "Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}"
                .format(ep=r.episode, ts=r.timestep, sps=sps))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info(
        "Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    environment.close()
Example #5
0
def main():
    parser = argparse.ArgumentParser()

    # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce
    # Hence, relative paths will not work without first fetching the path of this run file
    parser.add_argument('-id', '--level-id', default='tests/demo_map',help="DeepMind Lab level id")
    parser.add_argument('-a', '--agent', default='VPGAgent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs")

    # Redirect output to file
    sys.stdout = open('lab_output.txt', 'w')

    args = parser.parse_args()

    environment = DeepMindLab(args.level_id)

    path = os.path.dirname(__file__)
    if args.agent_config:
        # Use absolute path
        agent_config = Configuration.from_json(path + args.agent_config, True)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(dict(states=environment.states, actions=environment.actions,
                              network=from_json(path + args.network_config, True)))

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)  # configurable!!!

    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1,
        save_path=args.save,
        save_episodes=args.save_episodes
    )
    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    report_episodes = args.episodes // 1000

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Lab environment '{env}'".format(agent=agent, env=environment))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1))

    environment.close()
Example #6
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # configurable!!!

    environment = OpenAIGym(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        agent_config = Configuration()
        logger.info("No agent configuration provided.")
    if args.network_config:
        network = from_json(args.network_config)
    else:
        network = None
        logger.info("No network configuration provided.")
    agent_config.default(dict(states=environment.states, actions=environment.actions, network=network))
    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1,
        save_path=args.save,
        save_episodes=args.save_episodes
    )

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    if args.monitor:
        environment.gym.monitor.close()
    environment.close()
Example #7
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-m',
                        '--monitor',
                        help="Save results to this directory")
    parser.add_argument('-ms',
                        '--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('-mv',
                        '--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)  # configurable!!!

    environment = OpenAIGym(args.gym_id,
                            monitor=args.monitor,
                            monitor_safe=args.monitor_safe,
                            monitor_video=args.monitor_video)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        agent_config = Configuration()
        logger.info("No agent configuration provided.")
    if args.network_config:
        network = from_json(args.network_config)
    else:
        network = None
        logger.info("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=network))
    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    save_path=args.save,
                    save_episodes=args.save_episodes)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info(
        "Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    if args.monitor:
        environment.gym.monitor.close()
    environment.close()
Example #8
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-id',
                        '--level-id',
                        default='tests/demo_map',
                        help="DeepMind Lab level id")
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file",
                        default='/configs/dqn_agent.json')
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file",
                        default='/configs/dqn_network.json')
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=1000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=200,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-m',
                        '--monitor',
                        help="Save results to this directory")
    parser.add_argument('-ms',
                        '--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('-mv',
                        '--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=True,
                        help="Show debug outputs")

    # Redirect output to file
    sys.stdout = open('lab_output.txt', 'w')

    args = parser.parse_args()

    environment = DeepMindLab(args.level_id)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=from_json(args.network_config)))

    # This is necessary to give bazel the correct path
    path = os.path.dirname(__file__)

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[agent_config['loglevel']])

    preprocessing_config = agent_config['preprocessing']
    if preprocessing_config:
        preprocessor = build_preprocessing_stack(preprocessing_config)
        agent_config.states['shape'] = preprocessor.shape(
            agent_config.states['shape'])
    else:
        preprocessor = None

    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Agent configuration:")
        logger.info(agent.config)
        if agent.model:
            logger.info("Model configuration:")
            logger.info(agent.model.config)

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    preprocessor=preprocessor,
                    save_path=args.save,
                    save_episodes=args.save_episodes)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    report_episodes = args.episodes // 1000

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(
                np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Lab environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))

    environment.close()
Example #9
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
    parser.add_argument('-w', '--num-workers', type=int, default=1, help="Number of worker agents")
    parser.add_argument('-m', '--monitor', help="Save results to this file")
    parser.add_argument('-M', '--mode', choices=['tmux', 'child'], default='tmux', help="Starter mode")
    parser.add_argument('-L', '--logdir', default='logs_async', help="Log directory")
    parser.add_argument('-C', '--is-child', action='store_true')
    parser.add_argument('-i', '--task-index', type=int, default=0, help="Task index")
    parser.add_argument('-K', '--kill', action='store_true', default=False, help="Kill runners")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    session_name = 'openai_async'
    shell = '/bin/bash'

    kill_cmds = [
        "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format(12222 + args.num_workers),
        "tmux kill-session -t {}".format(session_name),
    ]
    if args.kill:
        os.system("\n".join(kill_cmds))
        return 0

    if not args.is_child:
        # start up child processes
        target_script = os.path.abspath(inspect.stack()[0][1])

        def wrap_cmd(session, name, cmd):
            if isinstance(cmd, list):
                cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd)
            if args.mode == 'tmux':
                return 'tmux send-keys -t {}:{} {} Enter'.format(session, name, shlex_quote(cmd))
            elif args.mode == 'child':
                return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format(
                    cmd, args.logdir, session, name, args.logdir
                )

        def build_cmd(index):
            cmd_args = [
                'CUDA_VISIBLE_DEVICES=',
                sys.executable, target_script,
                args.gym_id,
                '--is-child',
                '--agent', args.agent,
                '--agent-config', os.path.join(os.getcwd(), args.agent_config),
                '--network-config', os.path.join(os.getcwd(), args.network_config),
                '--num-workers', args.num_workers,
                '--task-index', index
            ]
            if args.debug:
                cmd_args.append('--debug')
            return cmd_args

        if args.mode == 'tmux':
            cmds = kill_cmds + ['tmux new-session -d -s {} -n ps'.format(session_name)]
        elif args.mode == 'child':
            cmds = ['mkdir -p {}'.format(args.logdir),
                    'rm -f {}/kill.sh'.format(args.logdir),
                    'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir),
                    'chmod +x {}/kill.sh'.format(args.logdir)]
        cmds.append(wrap_cmd(session_name, 'ps', build_cmd(-1)))

        for i in xrange(args.num_workers):
            name = 'w_{}'.format(i)
            if args.mode == 'tmux':
                cmds.append('tmux new-window -t {} -n {} -d {}'.format(session_name, name, shell))
            cmds.append(wrap_cmd(session_name, name, build_cmd(i)))

        # add one PS call
        # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell))

        print("\n".join(cmds))

        os.system("\n".join(cmds))

        return 0

    ps_hosts = ['127.0.0.1:{}'.format(12222)]
    worker_hosts = []
    port = 12223
    for _ in range(args.num_workers):
        worker_hosts.append('127.0.0.1:{}'.format(port))
        port += 1
    cluster = {'ps': ps_hosts, 'worker': worker_hosts}
    cluster_spec = tf.train.ClusterSpec(cluster)

    environment = OpenAIGym(args.gym_id)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(dict(states=environment.states, actions=environment.actions, network=from_json(args.network_config)))

    agent_config.default(dict(distributed=True, cluster_spec=cluster_spec, global_model=(args.task_index == -1), device=('/job:ps' if args.task_index == -1 else '/job:worker/task:{}/cpu:0'.format(args.task_index))))

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[agent_config.loglevel])

    agent = agents[args.agent](config=agent_config)

    logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(gym_id=args.gym_id))
    logger.info("Config:")
    logger.info(agent_config)

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1,
        cluster_spec=cluster_spec,
        task_index=args.task_index
    )

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
        return True

    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)