Beispiel #1
0
def create_dd_env(remotes=None, client_id=None):
    env = gym.make('flashgames.DuskDrive-v0')
    assert env.metadata['runtime.vectorized']

    env = wrappers.Logger(env)
    env = wrappers.BlockingReset(env)
    env = wrappers.Vision(env)
    env = wrappers.EpisodeID(env)
    env = wrappers.Unvectorize(env)

    env.configure(remotes=remotes, fps=10, observer=True, client_id=client_id)

    return env
Beispiel #2
0
    if args.env_id is not None:
        env = gym.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    # env = wrappers.BlockingReset(env)
    if not isinstance(env, wrappers.GymCoreAction):
        # The GymCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.experimental.SafeActionSpace(env)
    else:
        # Only gym-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)

    if args.monitor:
        env = wrappers.Monitor(env, '/tmp/vnc_random_agent', force=True)

    if args.actions == 'random':
        action_space = env.action_space
    elif args.actions == 'noop':
        action_space = NoopSpace()
    elif args.actions == 'forward':
        action_space = ForwardSpace()
    elif args.actions == 'click':
        spec = universe.runtime_spec('flashgames').server_registry[args.env_id]
        height = spec["height"]
        width = spec["width"]
        noclick_regions = [
Beispiel #3
0
def main():
    # You can optionally set up the logger. Also fine to set the level
    # to logging.DEBUG or logging.WARN if you want to change the
    # amount of output.
    logger.setLevel(logging.INFO)
    universe.configure_logging()

    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.')
    parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.')
    parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)')
    parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
    parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.')
    parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second')
    parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take')
    parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver')
    parser.add_argument('-c', '--custom_camera',  action='store_true', help='Customize the GTA camera')

    args = parser.parse_args()

    logging.getLogger('gym').setLevel(logging.NOTSET)
    logging.getLogger('universe').setLevel(logging.NOTSET)
    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    if args.env_id is not None:
        # N.B. This does not set the actual environment mode yet, which
        # is currently driven by environment itself.
        env = gym.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    if not isinstance(env, wrappers.GymCoreAction):
        # The GymCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.SafeActionSpace(env)
    else:
        # Only gym-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)

    env.configure(
        fps=args.fps,
        # print_frequency=None,
        # ignore_clock_skew=True,
        remotes=args.remote,
        vnc_driver='go', vnc_kwargs={
            'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5,
        },
    )

    if args.driver == 'DeepDriver':
        driver = DeepDriver()
    elif args.driver == 'DeepDriverTF':
        driver = DeepDriverTF()
    else:
        raise Exception('That driver is not available')

    driver.setup()

    if args.monitor:
        # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True)
        wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True)
         
    render = not args.no_render
    observation_n = env.reset()
    reward_n = [0] * env.n
    done_n = [False] * env.n
    info = None

    for i in range(args.max_steps):
        if render:
            # Note the first time you call render, it'll be relatively
            # slow and you'll have some aggregated rewards. We could
            # open the render() window before `reset()`, but that's
            # confusing since it pops up a black window for the
            # duration of the reset.
            env.render()

        action_n = driver.step(observation_n, reward_n, done_n, info)

        try:
            if info is not None:
                distance = info['n'][0]['distance_from_destination']
                logger.info('distance %s', distance)
        except KeyError as e:
            logger.debug('distance not available %s', str(e))

        if args.custom_camera:
            # Sending this every step is probably overkill
            for action in action_n:
                action.append(GTASetting('use_custom_camera', True))

        # Take an action
        with pyprofile.push('env.step'):
            _step = env.step(action_n)
            observation_n, reward_n, done_n, info = _step

        if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']):
            print('done_n', done_n, 'i', i)
            logger.info('end of episode')
            env.reset()

    # We're done! clean up
    env.close()
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    parser.add_argument('-r',
                        '--remotes',
                        default='vnc://127.0.0.1:5900',
                        help='Which VNC address to connect to.')
    parser.add_argument(
        '-e',
        '--env-id',
        default=None,
        help=
        'An env ID to optionally run upon startup (e.g. flashgames.DuskDrive-v0).'
    )
    parser.add_argument('-V',
                        '--no-vexpect',
                        action='store_true',
                        help='Whether to use vexpect.')
    parser.add_argument('-S',
                        '--no-scorer',
                        action='store_true',
                        help='Whether to use the scorer.')
    parser.add_argument('-E',
                        '--no-env',
                        action='store_true',
                        help='Whether to maintain an environment.')
    parser.add_argument('-I',
                        '--integrator-mode',
                        action='store_true',
                        help='Whether to use vexpect.')
    parser.add_argument('-R',
                        '--no-rewarder',
                        action='store_true',
                        help='Whether to enable the rewarder thread at all.')
    parser.add_argument('--rewarder-port',
                        type=int,
                        default=15900,
                        help='Which port to start the agent_conn thread')
    parser.add_argument('--rewarder-fps',
                        default=60,
                        type=float,
                        help='The frame rate for the rewarder.')
    parser.add_argument(
        '-i',
        '--idle-timeout',
        type=float,
        help=
        'How long to keep the environment around when it has no active connections'
    )
    parser.add_argument('--demonstration',
                        action='store_true',
                        help='Run a demonstration agent.')
    parser.add_argument(
        '--bot-demonstration',
        action='store_true',
        help=
        'Run a demonstrationa agent that connects to the vnc_recorder port, to record complete demos with no human playing'
    )

    args = parser.parse_args()

    # TODO: only activate in dev
    signal.signal(signal.SIGINT, lambda signal, frame: os._exit(10))

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    # Launch demonstration agent if requested

    if args.bot_demonstration and args.env_id is not None:
        cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} -r vnc://localhost:5899+15899 2>&1 | sed -e 's/^/[demonstration_agent] /'".format(
            pipes.quote(args.env_id))
        logger.info('Launching demonstration agent in bot mode: %s', cmd)
        subprocess.Popen(cmd, shell=True)

    elif args.demonstration and args.env_id is not None:
        cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} 2>&1 | sed -e 's/^/[demonstration_agent] /'".format(
            pipes.quote(args.env_id))
        logger.info('Launching demonstration agent: %s', cmd)
        subprocess.Popen(cmd, shell=True)

    logger.info(
        "Starting play_controlplane.py with the following: command=%s args=%s env=%s",
        sys.argv, args, os.environ)

    error_buffer = universe.utils.ErrorBuffer()

    env_status = universe.rewarder.EnvStatus()
    env_status.set_env_info(env_id=args.env_id, fps=args.rewarder_fps)

    cv = threading.Condition()
    control_buffer = remote.ControlBuffer(cv)
    agent_conn = remote.AgentConn(env_status,
                                  cv,
                                  control_buffer,
                                  error_buffer=error_buffer,
                                  idle_timeout=args.idle_timeout)
    agent_conn.listen(port=args.rewarder_port)

    # Logger gives us the diagnostics printing
    if not args.no_env:
        env = wrappers.Unvectorize(
            wrappers.Vision(wrappers.Logger(vnc_env.VNCEnv())))
        # Assert when given self-referential rewarder connection
        # This shows up as a '+15900' or similar port number in the remotes string
        assert '+' not in args.remotes, "Remotes may not have rewarder ports"
        env.configure(
            remotes=args.remotes,
            ignore_clock_skew=True,
            disable_action_probes=True,
            vnc_driver='go',
            vnc_kwargs={
                'encoding': 'zrle',
                'compress_level': 9
            },
            observer=True,
        )
    else:
        logger.info(
            'Running without environment, meaning reward and gameover parsing will be disabled'
        )
        env = None

    no_vexpect = args.no_vexpect or args.integrator_mode

    env_controller = EnvController(
        env,
        args.remotes,
        env_status,
        agent_conn,
        error_buffer=error_buffer,
        control_buffer=control_buffer,
        no_vexpect=no_vexpect,
        integrator_mode=args.integrator_mode,
    )
    env_controller.start()

    if not args.no_rewarder:
        rewarder = Rewarder(
            env,
            args.remotes,
            agent_conn,
            env_status=env_controller.env_status,
            trigger_reset=env_controller.trigger_reset,
            error_buffer=error_buffer,
            no_vexpect=no_vexpect,
            no_scorer=args.no_scorer,
        )
        rewarder.start()
    else:
        rewarder = None

    manhole.install(locals={
        'rewarder': rewarder,
        'env_controller': env_controller
    })

    # TODO: clean up this API, but good enough for now
    while True:
        try:
            error_buffer.blocking_check(timeout=60)
        except remote.Exit as e:
            logger.info('%s', e)
            return 0

    return 1