Beispiel #1
0
def test_fluid_film(multiprocessing=False):
    # Instantiate the environment
    list_envs = []

    if multiprocessing:
        for _ in range(n_env):
            list_envs.append(
                ProcessWrapper(
                    OpenAIGym(FilmEnv(render=False),
                              max_episode_timesteps=n_step)))
    else:
        for _ in range(n_env):
            list_envs.append(
                OpenAIGym(FilmEnv(render=False), max_episode_timesteps=n_step))

    base_test(list_envs)
 def conv_action(action):
     if not isinstance(action, dict):
         return action
     elif all(name.startswith('gymmdc') for name in action) or \
             all(name.startswith('gymbox') for name in action) or \
             all(name.startswith('gymtpl') for name in action):
         space_type = next(iter(action))[:6]
         actions = list()
         n = 0
         while True:
             if any(
                     name.startswith(space_type + str(n) + '-')
                     for name in action):
                 inner_action = {
                     name[name.index('-') + 1:]
                     for name, inner_action in action.items()
                     if name.startswith(space_type + str(n))
                 }
                 actions.append(
                     OpenAIGym.unflatten_action(action=inner_action))
             elif any(name == space_type + str(n) for name in action):
                 actions.append(action[space_type + str(n)])
             else:
                 break
             n += 1
         return tuple(actions)
     else:
         actions = dict()
         for name, action in action.items():
             if '-' in name:
                 name, inner_name = name.split('-', 1)
                 if name not in actions:
                     actions[name] = dict()
                 actions[name][inner_name] = action
             else:
                 actions[name] = action
         for name, action in actions.items():
             if isinstance(action, dict):
                 actions[name] = OpenAIGym.unflatten_action(action=action)
         return actions
    def test_example(self):
        passed = 0

        for _ in xrange(3):
            # Create an OpenAIgym environment
            env = OpenAIGym('CartPole-v0')

            # Create a Trust Region Policy Optimization agent
            agent = TRPOAgent(config=Configuration(
                loglevel='info',
                batch_size=100,
                baseline=dict(
                    type='mlp',
                    size=32,
                    repeat_update=100
                ),
                override_line_search=False,
                generalized_advantage_estimation=True,
                normalize_advantage=False,
                gae_lambda=0.97,
                cg_iterations=20,
                cg_damping=0.01,
                line_search_steps=20,
                max_kl_divergence=0.005,
                states=env.states,
                actions=env.actions,
                network=layered_network_builder([
                    dict(type='dense', size=32, activation='tanh'),
                    dict(type='dense', size=32, activation='tanh')
                ])
            ))
            runner = Runner(agent=agent, environment=env)

            def episode_finished(r):
                # Test if mean reward over 50 should ensure that learning took off
                avg_reward = np.mean(r.episode_rewards[-50:])
                return r.episode < 100 or avg_reward < 50.0

            runner.run(episodes=2000, max_timesteps=200, episode_finished=episode_finished)

            if runner.episode < 2000:
                passed += 1

        print('Quick start example passed = {}'.format(passed))
        self.assertTrue(passed >= 2)
def main():
    gym_id = 'CartPole-v0'
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    max_episodes = 10000
    max_timesteps = 1000

    env = OpenAIGym(gym_id, monitor=False, monitor_video=False)

    config = Configuration(repeat_actions=1,
                           actions=env.actions,
                           states=env.states,
                           exploration='constant',
                           exploration_args=[0.1],
                           network=[{
                               "type": "linear",
                               "size": 16
                           }])

    agent = create_agent(SimpleQAgent, config)

    runner = Runner(agent, env)

    def episode_finished(r):
        if r.episode % 10 == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 10 rewards: {}".format(
                np.mean(r.episode_rewards[-10:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent,
                                                                  env=env))
    runner.run(max_episodes, max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.episode + 1))
Beispiel #5
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-w',
                        '--num-workers',
                        type=int,
                        default=1,
                        help="Number of worker agents")
    parser.add_argument('-m', '--monitor', help="Save results to this file")
    parser.add_argument('-M',
                        '--mode',
                        choices=['tmux', 'child'],
                        default='tmux',
                        help="Starter mode")
    parser.add_argument('-L',
                        '--logdir',
                        default='logs_async',
                        help="Log directory")
    parser.add_argument('-C', '--is-child', action='store_true')
    parser.add_argument('-i',
                        '--task-index',
                        type=int,
                        default=0,
                        help="Task index")
    parser.add_argument('-K',
                        '--kill',
                        action='store_true',
                        default=False,
                        help="Kill runners")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    session_name = 'openai_async'
    shell = '/bin/bash'

    kill_cmds = [
        "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format(
            12222 + args.num_workers),
        "tmux kill-session -t {}".format(session_name),
    ]
    if args.kill:
        os.system("\n".join(kill_cmds))
        return 0

    if not args.is_child:
        # start up child processes
        target_script = os.path.abspath(inspect.stack()[0][1])

        def wrap_cmd(session, name, cmd):
            if isinstance(cmd, list):
                cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd)
            if args.mode == 'tmux':
                return 'tmux send-keys -t {}:{} {} Enter'.format(
                    session, name, shlex_quote(cmd))
            elif args.mode == 'child':
                return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format(
                    cmd, args.logdir, session, name, args.logdir)

        def build_cmd(index):
            cmd_args = [
                'CUDA_VISIBLE_DEVICES=', sys.executable, target_script,
                args.gym_id, '--is-child', '--agent', args.agent,
                '--agent-config',
                os.path.join(os.getcwd(),
                             args.agent_config), '--network-config',
                os.path.join(os.getcwd(), args.network_config),
                '--num-workers', args.num_workers, '--task-index', index
            ]
            if args.debug:
                cmd_args.append('--debug')
            return cmd_args

        if args.mode == 'tmux':
            cmds = kill_cmds + [
                'tmux new-session -d -s {} -n ps'.format(session_name)
            ]
        elif args.mode == 'child':
            cmds = [
                'mkdir -p {}'.format(args.logdir),
                'rm -f {}/kill.sh'.format(args.logdir),
                'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir),
                'chmod +x {}/kill.sh'.format(args.logdir)
            ]
        cmds.append(wrap_cmd(session_name, 'ps', build_cmd(-1)))

        for i in xrange(args.num_workers):
            name = 'w_{}'.format(i)
            if args.mode == 'tmux':
                cmds.append('tmux new-window -t {} -n {} -d {}'.format(
                    session_name, name, shell))
            cmds.append(wrap_cmd(session_name, name, build_cmd(i)))

        # add one PS call
        # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell))

        print("\n".join(cmds))

        os.system("\n".join(cmds))

        return 0

    ps_hosts = ['127.0.0.1:{}'.format(12222)]
    worker_hosts = []
    port = 12223
    for _ in range(args.num_workers):
        worker_hosts.append('127.0.0.1:{}'.format(port))
        port += 1
    cluster = {'ps': ps_hosts, 'worker': worker_hosts}
    cluster_spec = tf.train.ClusterSpec(cluster)

    environment = OpenAIGym(args.gym_id)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        raise TensorForceError("No agent configuration provided.")
    if not args.network_config:
        raise TensorForceError("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=from_json(args.network_config)))

    agent_config.default(
        dict(distributed=True,
             cluster_spec=cluster_spec,
             global_model=(args.task_index == -1),
             device=('/job:ps' if args.task_index == -1 else
                     '/job:worker/task:{}/cpu:0'.format(args.task_index))))

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[agent_config.loglevel])

    agent = agents[args.agent](config=agent_config)

    logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(
        gym_id=args.gym_id))
    logger.info("Config:")
    logger.info(agent_config)

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    cluster_spec=cluster_spec,
                    task_index=args.task_index)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) / 100))
        return True

    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
Beispiel #6
0
# limitations under the License.
# ==============================================================================
"""
Quick start example.
"""

from tensorforce import Configuration
from tensorforce.agents import TRPOAgent
from tensorforce.environments.openai_gym import OpenAIGym
from tensorforce.execution import Runner
from tensorforce.core.networks import layered_network_builder

import numpy as np

# Create an OpenAIgym environment
env = OpenAIGym('CartPole-v0')

# Create a Trust Region Policy Optimization agent
agent = TRPOAgent(config=Configuration(batch_size=200,
                                       states=env.states,
                                       actions=env.actions,
                                       network=layered_network_builder(
                                           [dict(type='dense', size=10)])))

# Create the runner
runner = Runner(agent=agent, environment=env)


# Callback function printing episode statistics
def episode_finished(r):
    print(
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # configurable!!!

    environment = OpenAIGym(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        agent_config = Configuration()
        logger.info("No agent configuration provided.")
    if args.network_config:
        network = from_json(args.network_config)
    else:
        network = None
        logger.info("No network configuration provided.")
    agent_config.default(dict(states=environment.states, actions=environment.actions, network=network))
    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1,
        save_path=args.save,
        save_episodes=args.save_episodes
    )

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    if args.monitor:
        environment.gym.monitor.close()
    environment.close()
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-m',
                        '--monitor',
                        help="Save results to this directory")
    parser.add_argument('-ms',
                        '--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('-mv',
                        '--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)  # configurable!!!

    environment = OpenAIGym(args.gym_id,
                            monitor=args.monitor,
                            monitor_safe=args.monitor_safe,
                            monitor_video=args.monitor_video)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        agent_config = Configuration()
        logger.info("No agent configuration provided.")
    if args.network_config:
        network = from_json(args.network_config)
    else:
        network = None
        logger.info("No network configuration provided.")
    agent_config.default(
        dict(states=environment.states,
             actions=environment.actions,
             network=network))
    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=1,
                    save_path=args.save,
                    save_episodes=args.save_episodes)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode, ts=r.timestep))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    logger.info(
        "Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    if args.monitor:
        environment.gym.monitor.close()
    environment.close()
Beispiel #9
0
from tensorforce.environments.openai_gym import OpenAIGym
from env_gym import SimplePendulumEnv
from gym.envs.classic_control import CartPoleEnv
from tensorforce.execution import Runner
import os

batch_size = 10
n_step = 2000

# Instantiate the environment
n_env = 12

list_envs = []

# env = OpenAIGym(SimplePendulumEnv())
env = OpenAIGym(CartPoleEnv())

actor_network = [
    dict(type='dense', size=128, activation='relu'),
    dict(type='dense', size=64, activation='relu'),
    dict(type='dense', size=64, activation='relu')
]

critic_network = [
    dict(type='dense', size=128, activation='relu'),
    dict(type='dense', size=64, activation='relu'),
    dict(type='dense', size=64, activation='relu')
]

agent = Agent.create(agent='ppo',
                     batch_size=batch_size,
Beispiel #10
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="Id of the Gym environment")
    parser.add_argument('-a', '--agent', help="Agent configuration file")
    parser.add_argument('-n',
                        '--network',
                        default=None,
                        help="Network specification file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=None,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--timesteps',
                        type=int,
                        default=None,
                        help="Number of timesteps")
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-d',
                        '--deterministic',
                        action='store_true',
                        default=False,
                        help="Choose actions deterministically")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('--monitor', help="Save results to this directory")
    parser.add_argument('--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('--visualize',
                        action='store_true',
                        default=False,
                        help="Enable OpenAI Gym's visualization")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")
    parser.add_argument('-te',
                        '--test',
                        action='store_true',
                        default=False,
                        help="Test agent without learning.")
    parser.add_argument(
        '--job',
        type=str,
        default=None,
        help="For distributed mode: The job type of this agent.")
    parser.add_argument(
        '--task',
        type=int,
        default=0,
        help="For distributed mode: The task index of this agent.")
    parser.add_argument('--sleep',
                        type=float,
                        default=None,
                        help='To make the simulation slower for analysis.')

    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    logger = logging.getLogger(__file__)
    logger.setLevel(logging.INFO)

    environment = OpenAIGym(gym_id=args.gym_id,
                            monitor=args.monitor,
                            monitor_safe=args.monitor_safe,
                            monitor_video=args.monitor_video,
                            visualize=args.visualize)

    # initialize visualization
    if args.visualize:
        environment.gym.render(
            mode="human")  # HACK to get the visualizer started

    if args.agent is not None:
        with open(args.agent, 'r') as fp:
            agent = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network is not None:
        with open(args.network, 'r') as fp:
            network = json.load(fp=fp)
    else:
        network = None
        logger.info("No network configuration provided.")

    # TEST
    agent["execution"] = dict(
        type="distributed",
        distributed_spec=dict(
            job=args.job,
            task_index=args.task,
            # parameter_server=(args.job == "ps"),
            cluster_spec=dict(ps=["192.168.2.107:22222"],
                              worker=["192.168.2.107:22223"
                                      ]))) if args.job else None
    # END: TEST

    agent = Agent.from_spec(spec=agent,
                            kwargs=dict(
                                states=environment.states,
                                actions=environment.actions,
                                network=network,
                            ))
    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.restore_model(args.load)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent)

    runner = Runner(agent=agent, environment=environment, repeat_actions=1)

    if args.debug:  # TODO: Timestep-based reporting
        report_episodes = 1
    else:
        report_episodes = 100

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))

    def episode_finished(r, id_):
        if r.episode % report_episodes == 0:
            steps_per_second = r.timestep / (time.time() - r.start_time)
            logger.info(
                "Finished episode {:d} after {:d} timesteps. Steps Per Second {:0.2f}"
                .format(r.agent.episode, r.episode_timestep, steps_per_second))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {:0.2f}".format(
                sum(r.episode_rewards[-500:]) /
                min(500, len(r.episode_rewards))))
            logger.info("Average of last 100 rewards: {:0.2f}".format(
                sum(r.episode_rewards[-100:]) /
                min(100, len(r.episode_rewards))))
        if args.save and args.save_episodes is not None and not r.episode % args.save_episodes:
            logger.info("Saving agent to {}".format(args.save))
            r.agent.save_model(args.save)

        return True

    runner.run(num_timesteps=args.timesteps,
               num_episodes=args.episodes,
               max_episode_timesteps=args.max_episode_timesteps,
               deterministic=args.deterministic,
               episode_finished=episode_finished,
               testing=args.test,
               sleep=args.sleep)
    runner.close()

    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.agent.episode))