Ejemplos de SafeActionSpace en Python, ejemplos de universe.wrappers.SafeActionSpace en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: random-agent.py Proyecto: zmoon111/universe

def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    env = gym.make('flashgames.NeonRace-v0')
    # Restrict the valid random actions. (Try removing this and see
    # what happens when the agent is given full control of the
    # keyboard/mouse.)
    env = wrappers.SafeActionSpace(env)
    observation_n = env.reset()

    while True:
        # your agent here
        #
        # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True)
        action_n = [env.action_space.sample() for ob in observation_n]
        observation_n, reward_n, done_n, info = env.step(action_n)
        env.render()

    return 0

Ejemplo n.º 2

0

Mostrar archivo

Archivo: run_game.py Proyecto: telechong/universe-rl

def _play_game(args):
    env = wrappers.SafeActionSpace(gym.make(args.game))
    env.configure()
    env.reset()
    action_space = GenericActionSpace(env.action_space)

    agent = dqn.DQN(action_space.n, checkpointpath=args.checkpoints, summarypath=args.summary)

    action = 0
    while True:
        # For now, assume single instance environment (1 frame/action per observation)
        # zip(*X) Transposes observations
        for frame, reward, terminal, _ in zip(*env.step([action_space[action]])):
            if frame is not None:
                action = agent.get_action(_preprocess(_make_generic_frame(frame)), action, reward, terminal)
                env.render()

Ejemplo n.º 3

0

Mostrar archivo

Archivo: diagnostic-agent.py Proyecto: digimatronics/OpenAI_Universe

        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    if args.env_id is not None:
        env = gym.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    # env = wrappers.BlockingReset(env)
    if not isinstance(env, wrappers.GymCoreAction):
        # The GymCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.SafeActionSpace(env)
    else:
        # Only gym-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)

    env.configure(
        fps=args.fps,
        # print_frequency=None,
        # ignore_clock_skew=True,
        remotes=args.remote,
        client_id=args.client_id,

        # remotes=remote, docker_image=args.docker_image, reuse=args.reuse, ignore_clock_skew=True,
        # vnc_session_driver='go', vnc_session_kwargs={
        #     'compress_level': 0,

Ejemplo n.º 4

0

Mostrar archivo

Archivo: main.py Proyecto: zxsted/deepdrive-universe

def main():
    # You can optionally set up the logger. Also fine to set the level
    # to logging.DEBUG or logging.WARN if you want to change the
    # amount of output.
    logger.setLevel(logging.INFO)
    universe.configure_logging()

    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.')
    parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.')
    parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)')
    parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
    parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.')
    parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second')
    parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take')
    parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver')
    parser.add_argument('-c', '--custom_camera',  action='store_true', help='Customize the GTA camera')

    args = parser.parse_args()

    logging.getLogger('gym').setLevel(logging.NOTSET)
    logging.getLogger('universe').setLevel(logging.NOTSET)
    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    if args.env_id is not None:
        # N.B. This does not set the actual environment mode yet, which
        # is currently driven by environment itself.
        env = gym.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    if not isinstance(env, wrappers.GymCoreAction):
        # The GymCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.SafeActionSpace(env)
    else:
        # Only gym-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)

    env.configure(
        fps=args.fps,
        # print_frequency=None,
        # ignore_clock_skew=True,
        remotes=args.remote,
        vnc_driver='go', vnc_kwargs={
            'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5,
        },
    )

    if args.driver == 'DeepDriver':
        driver = DeepDriver()
    elif args.driver == 'DeepDriverTF':
        driver = DeepDriverTF()
    else:
        raise Exception('That driver is not available')

    driver.setup()

    if args.monitor:
        # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True)
        wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True)
         
    render = not args.no_render
    observation_n = env.reset()
    reward_n = [0] * env.n
    done_n = [False] * env.n
    info = None

    for i in range(args.max_steps):
        if render:
            # Note the first time you call render, it'll be relatively
            # slow and you'll have some aggregated rewards. We could
            # open the render() window before `reset()`, but that's
            # confusing since it pops up a black window for the
            # duration of the reset.
            env.render()

        action_n = driver.step(observation_n, reward_n, done_n, info)

        try:
            if info is not None:
                distance = info['n'][0]['distance_from_destination']
                logger.info('distance %s', distance)
        except KeyError as e:
            logger.debug('distance not available %s', str(e))

        if args.custom_camera:
            # Sending this every step is probably overkill
            for action in action_n:
                action.append(GTASetting('use_custom_camera', True))

        # Take an action
        with pyprofile.push('env.step'):
            _step = env.step(action_n)
            observation_n, reward_n, done_n, info = _step

        if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']):
            print('done_n', done_n, 'i', i)
            logger.info('end of episode')
            env.reset()

    # We're done! clean up
    env.close()