def main():
    gym_id = 'CartPole-v0'
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    max_episodes = 10000
    max_timesteps = 1000

    env = OpenAIGym(gym_id, monitor=False, monitor_video=False)

    config = Configuration(repeat_actions=1,
                           actions=env.actions,
                           states=env.states,
                           exploration='constant',
                           exploration_args=[0.1],
                           network=[{
                               "type": "linear",
                               "size": 16
                           }])

    agent = create_agent(SimpleQAgent, config)

    runner = Runner(agent, env)

    def episode_finished(r):
        if r.episode % 10 == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 10 rewards: {}".format(
                np.mean(r.episode_rewards[-10:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent,
                                                                  env=env))
    runner.run(max_episodes, max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000 * 60,
                        help="Maximum number of timesteps per episode")
    # parser.add_argument('-m', '--monitor', help="Save results to this directory")
    # parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    # parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    env = OpenAIUniverse(args.gym_id)
    env.configure(remotes=1)

    default = dict(repeat_actions=1,
                   actions=env.actions,
                   states=env.states,
                   max_episode_length=args.max_timesteps)

    if args.agent_config:
        config = Configuration.from_json(args.agent_config)
    else:
        config = Configuration()

    config.default(default)

    if args.network_config:
        network_config = Configuration.from_json(
            args.network_config).network_layers
    else:
        if config.network_layers:
            network_config = config.network_layers
        else:
            raise TensorForceError("Error: No network configuration provided.")

    if args.debug:
        print("Configuration:")
        print(config)

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[config['loglevel']])

    # preprocessing_config = config['preprocessing']
    # if preprocessing_config:
    #     stack = build_preprocessing_stack(preprocessing_config)
    #     config.states['shape'] = stack.shape(config.states['shape'])
    # else:
    stack = None

    agent = create_agent(args.agent, config, network_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(config)

    runner = Runner(agent,
                    env,
                    preprocessor=stack,
                    repeat_actions=config.repeat_actions)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))
        runner.save_model(args.save, args.save_episodes)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(
                np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent,
                                                                  env=env))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))

    if args.monitor:
        env.gym.monitor.close()
    env.close()
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000*60, help="Maximum number of timesteps per episode")
    # parser.add_argument('-m', '--monitor', help="Save results to this directory")
    # parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    # parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    env = OpenAIUniverse(args.gym_id)
    env.configure(remotes=1)

    default = dict(
        repeat_actions=1,
        actions=env.actions,
        states=env.states,
        max_episode_length=args.max_timesteps
    )

    if args.agent_config:
        config = Configuration.from_json(args.agent_config)
    else:
        config = Configuration()

    config.default(default)

    if args.network_config:
        network_config = Configuration.from_json(args.network_config).network_layers
    else:
        if config.network_layers:
            network_config = config.network_layers
        else:
            raise TensorForceError("Error: No network configuration provided.")

    if args.debug:
        print("Configuration:")
        print(config)

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[config.loglevel])

    stack = None

    agent = create_agent(args.agent, config, network_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(config)

    runner = Runner(agent, env, preprocessor=stack, repeat_actions=config.repeat_actions)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))
        runner.save_model(args.save, args.save_episodes)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    if args.monitor:
        env.gym.monitor.close()
    env.close()