コード例 #1
0
def main():
    gym_id = 'CartPole-v0'
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    max_episodes = 10000
    max_timesteps = 1000

    env = OpenAIGym(gym_id, monitor=False, monitor_video=False)

    config = Configuration(repeat_actions=1,
                           actions=env.actions,
                           states=env.states,
                           exploration='constant',
                           exploration_args=[0.1],
                           network=[{
                               "type": "linear",
                               "size": 16
                           }])

    agent = create_agent(SimpleQAgent, config)

    runner = Runner(agent, env)

    def episode_finished(r):
        if r.episode % 10 == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 10 rewards: {}".format(
                np.mean(r.episode_rewards[-10:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent,
                                                                  env=env))
    runner.run(max_episodes, max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))
コード例 #2
0
def main():
    parser = argparse.ArgumentParser()

    # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce
    # Hence, relative paths will not work without first fetching the path of this run file
    parser.add_argument('-id',
                        '--level-id',
                        default='tests/demo_map',
                        help="DeepMind Lab level id")
    parser.add_argument('-a', '--agent', default='VPGAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=1000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=200,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-m',
                        '--monitor',
                        help="Save results to this directory")
    parser.add_argument('-ms',
                        '--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('-mv',
                        '--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=True,
                        help="Show debug outputs")

    # Redirect output to file
    sys.stdout = open('lab_output.txt', 'w')

    args = parser.parse_args()

    environment = DeepMindLab(args.level_id)

    path = os.path.dirname(__file__)
    if args.agent_config:
        # Use absolute path
        agent_config = Configuration.from_json(path + args.agent_config, True)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=from_json(path + args.network_config, True)))

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)  # configurable!!!

    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    save_path=args.save,
                    save_episodes=args.save_episodes)
    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    report_episodes = args.episodes // 1000

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(
                np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Lab environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))

    environment.close()
コード例 #3
0
ファイル: test_config.py プロジェクト: yukunix/tensorforce
 def test_conflicting_desires_raises(self):
     with self.assertRaises(TensorForceError):
         Configuration.from_json_string(test_config_with_flag, allow_defaults=False)
コード例 #4
0
ファイル: test_config.py プロジェクト: yukunix/tensorforce
 def test_default_to_provided_param_is_ok(self):
     config = Configuration.from_json_string(test_config, allow_defaults=False)
     config.default({'a': 'boo!'})
     self.assertEqual(config.a, 1)
コード例 #5
0
ファイル: test_config.py プロジェクト: yukunix/tensorforce
 def test_defaults_disallowed_with_json_load_specifying_param_default(self):
     config = Configuration.from_json_string(test_config, allow_defaults=False)
     with self.assertRaises(TensorForceError):
         config.default({'c': 3})
コード例 #6
0
ファイル: test_config.py プロジェクト: yukunix/tensorforce
 def test_defaults_allowed_with_json_load_specifying_param_default(self):
     config = Configuration.from_json_string(test_config, allow_defaults=True)
     config.default({'c': 3})
     self.assertEqual(config.c, 3)
コード例 #7
0
ファイル: test_config.py プロジェクト: yukunix/tensorforce
 def test_defaults_allowed_with_json_load_relying_upon_param_default(self):
     config = Configuration.from_json_string(test_config)
     config.default({'c': 3})
     self.assertEqual(config.c, 3)
コード例 #8
0
ファイル: test_config.py プロジェクト: yukunix/tensorforce
 def test_no_defaults_raises(self):
     config = Configuration(allow_defaults=False, a=1, b=2)
     with self.assertRaises(TensorForceError):
         config.default({'c': 3})
コード例 #9
0
ファイル: test_config.py プロジェクト: yukunix/tensorforce
 def test_defaults_allowed(self):
     config = Configuration(allow_defaults=True, a=1, b=2)
     config.default({'c': 3})
     self.assertEqual(config.c, 3)
コード例 #10
0
ファイル: lab_main.py プロジェクト: et0803/tensorforce
def main():
    parser = argparse.ArgumentParser()

    # N.b. if ran from within lab, the working directory is something like lab/bazel-out/../../tensorforce
    # Hence, relative paths will not work without first fetching the path of this run file
    parser.add_argument('-id', '--level-id', default='tests/demo_map',help="DeepMind Lab level id")
    parser.add_argument('-a', '--agent', default='VPGAgent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=1000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=200, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=True, help="Show debug outputs")

    # Redirect output to file
    sys.stdout = open('lab_output.txt', 'w')

    args = parser.parse_args()

    environment = DeepMindLab(args.level_id)

    path = os.path.dirname(__file__)
    if args.agent_config:
        # Use absolute path
        agent_config = Configuration.from_json(path + args.agent_config, True)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(dict(states=environment.states, actions=environment.actions,
                              network=from_json(path + args.network_config, True)))

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)  # configurable!!!

    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1,
        save_path=args.save,
        save_episodes=args.save_episodes
    )
    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    report_episodes = args.episodes // 1000

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Lab environment '{env}'".format(agent=agent, env=environment))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1))

    environment.close()
コード例 #11
0
ファイル: lab_main.py プロジェクト: liyanonline/tensorforce
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-id',
                        '--level-id',
                        default='tests/demo_map',
                        help="DeepMind Lab level id")
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file",
                        default='/configs/dqn_agent.json')
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file",
                        default='/configs/dqn_network.json')
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=1000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=200,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-m',
                        '--monitor',
                        help="Save results to this directory")
    parser.add_argument('-ms',
                        '--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('-mv',
                        '--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=True,
                        help="Show debug outputs")

    # Redirect output to file
    sys.stdout = open('lab_output.txt', 'w')

    args = parser.parse_args()

    environment = DeepMindLab(args.level_id)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=from_json(args.network_config)))

    # This is necessary to give bazel the correct path
    path = os.path.dirname(__file__)

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[agent_config['loglevel']])

    preprocessing_config = agent_config['preprocessing']
    if preprocessing_config:
        preprocessor = build_preprocessing_stack(preprocessing_config)
        agent_config.states['shape'] = preprocessor.shape(
            agent_config.states['shape'])
    else:
        preprocessor = None

    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Agent configuration:")
        logger.info(agent.config)
        if agent.model:
            logger.info("Model configuration:")
            logger.info(agent.model.config)

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    preprocessor=preprocessor,
                    save_path=args.save,
                    save_episodes=args.save_episodes)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    report_episodes = args.episodes // 1000

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(
                np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Lab environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))

    environment.close()