Esempio n. 1
0
    def _configure(
        self,
        remotes=None,
        client_id=None,
        start_timeout=None,
        docker_image=None,
        ignore_clock_skew=False,
        disable_action_probes=False,
        vnc_driver=None,
        vnc_kwargs={},
        rewarder_driver=None,
        replace_on_crash=False,
        allocate_sync=True,
        observer=False,
        api_key=None,
        record=False,
    ):
        """Standard Gym hook to configure the environment.

        Args:

          ignore_clock_skew (bool): Assume remotes are on the same machine as us,
            for the purposes of diagnostics measurement.

            If true, we skip measuring the clock skew over the network,
            and skip generating diagnostics which rely on it.

            True when used by the rewarder to measure latency between
            the VNC frame and its calculation of reward for that
            frame.  In this case we share a common clock with the env
            generating the VNC frame, so we don't need to send/receive
            probes.  Clock skew is zero in this case.

            False when remotes are potentially different machines
            (such as an agent, or a demonstrator), and we will be
            sending probe keys and measuring network ping rountrip
            times to calculate clock skew.
        """
        if self._started:
            raise error.Error(
                '{} has already been started; cannot change configuration now.'
                .format(self))

        universe.configure_logging()

        twisty.start_once()

        if self.spec is not None:
            runtime = registration.runtime_spec(self.spec.tags['runtime'])
            # Let the user manually set the docker_image version
            if docker_image:
                # TODO: don't support this option?
                runtime.image = docker_image
        else:
            runtime = None

        if remotes is None:
            remotes = os.environ.get('GYM_VNC_REMOTES', '1')

        if client_id is None:
            client_id = default_client_id()

        self.remote_manager, self.n = remotes_module.build(
            client_id=client_id,
            remotes=remotes,
            runtime=runtime,
            start_timeout=start_timeout,
            api_key=api_key,
            use_recorder_ports=record,
        )
        self.connection_names = [None] * self.n
        self.connection_labels = [None] * self.n
        self.crashed = {}

        self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect
        if self.remote_manager.connect_vnc:
            cls = vnc_session(vnc_driver)
            vnc_kwargs.setdefault('start_timeout',
                                  self.remote_manager.start_timeout)
            if runtime == 'gym-core':
                vnc_kwargs.setdefault('encoding', 'zrle')
            else:
                vnc_kwargs.setdefault('encoding', 'tight')
                vnc_kwargs.setdefault('fine_quality_level', 50)
                vnc_kwargs.setdefault('subsample_level', 2)
            # Filter out None values, since some drivers may not handle them correctly
            vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None}
            logger.info(
                'Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"',
                vnc_kwargs)
            self.vnc_kwargs = vnc_kwargs
            self.vnc_session = cls()
        else:
            self.vnc_session = None

        self._observer = observer
        if self.remote_manager.connect_rewarder:
            cls = rewarder_session(rewarder_driver)
            self.rewarder_session = cls()
        else:
            self.rewarder_session = None

        if ignore_clock_skew:
            logger.info(
                'Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)'
            )

        if self.rewarder_session or ignore_clock_skew:
            # Don't need rewarder session if we're ignoring clock skew
            if self.spec is not None:
                metadata_encoding = self.spec.tags.get('metadata_encoding')
            else:
                metadata_encoding = None
            self.diagnostics = diagnostics.Diagnostics(
                self.n,
                self._probe_key,
                ignore_clock_skew,
                metadata_encoding=metadata_encoding,
                disable_action_probes=disable_action_probes)
        else:
            self.diagnostics = None

        self._reset_mask()
        self._started = True

        self.remote_manager.allocate([str(i) for i in range(self.n)],
                                     initial=True)
        if allocate_sync:
            # Block until we've fulfilled n environments
            self._handle_connect(n=self.n)
        else:
            # Handle any backends which synchronously fufill their
            # allocation.
            self._handle_connect()
Esempio n. 2
0
from mpl_toolkits.mplot3d import Axes3D
import scipy
import scipy.cluster.hierarchy as sch
from scipy.cluster.vq import vq, kmeans, whiten

import subprocess
import urllib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from vrplayer import get_view
from move_view_lib import move_view

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
universe.configure_logging()


class env_f():
    def __init__(self, env_id, task, subject=None, summary_writer=None):

        self._episode_reward = 0
        self._episode_length = 0

        class nnn():
            def __init__(self, n):
                self.n = n

        import config
        self.action_space = nnn(config.direction_num)
Esempio n. 3
0
import os
from gym.spaces.box import Box
import numpy as np
import gym
from gym import spaces
import logging
import universe
from universe import vectorized
from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger
from universe import spaces as vnc_spaces
from universe.spaces.vnc_event import keycode
import time

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
universe.configure_logging(
    path='/vol/bitbucket/jh1016/logfiles/universe-{}.log'.format(os.getpid()))
#universe.configure_logging()


def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)
    # spec.tags is a python dict. get search for the key. If the key is not available in the dictionary, it will return False
    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id)
Esempio n. 4
0
    def _configure(self, remotes=None,
                   client_id=None,
                   start_timeout=None, docker_image=None,
                   ignore_clock_skew=False, disable_action_probes=False,
                   vnc_driver=None, vnc_kwargs=None,
                   rewarder_driver=None,
                   replace_on_crash=False, allocate_sync=True,
                   observer=False, api_key=None,
                   record=False,
    ):
        """Standard Gym hook to configure the environment.

        Args:

          ignore_clock_skew (bool): Assume remotes are on the same machine as us,
            for the purposes of diagnostics measurement.

            If true, we skip measuring the clock skew over the network,
            and skip generating diagnostics which rely on it.

            True when used by the rewarder to measure latency between
            the VNC frame and its calculation of reward for that
            frame.  In this case we share a common clock with the env
            generating the VNC frame, so we don't need to send/receive
            probes.  Clock skew is zero in this case.

            False when remotes are potentially different machines
            (such as an agent, or a demonstrator), and we will be
            sending probe keys and measuring network ping rountrip
            times to calculate clock skew.
        """
        if self._started:
            raise error.Error('{} has already been started; cannot change configuration now.'.format(self))

        universe.configure_logging()

        twisty.start_once()

        if self.spec is not None:
            runtime = registration.runtime_spec(self.spec.tags['runtime'])
            # Let the user manually set the docker_image version
            if docker_image:
                # TODO: don't support this option?
                runtime.image = docker_image
        else:
            runtime = None

        if remotes is None:
            remotes = os.environ.get('GYM_VNC_REMOTES', '1')

        if client_id is None:
            client_id = default_client_id()

        if vnc_kwargs is None:
            vnc_kwargs = {}

        self.remote_manager, self.n = remotes_module.build(
            client_id=client_id,
            remotes=remotes, runtime=runtime, start_timeout=start_timeout,
            api_key=api_key,
            use_recorder_ports=record,
        )
        self.connection_names = [None] * self.n
        self.connection_labels = [None] * self.n
        self.crashed = {}

        self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect
        if self.remote_manager.connect_vnc:
            cls = vnc_session(vnc_driver)
            vnc_kwargs.setdefault('start_timeout', self.remote_manager.start_timeout)
            if runtime == 'gym-core':
                vnc_kwargs.setdefault('encoding', 'zrle')
            else:
                vnc_kwargs.setdefault('encoding', 'tight')
                vnc_kwargs.setdefault('fine_quality_level', 50)
                vnc_kwargs.setdefault('subsample_level', 2)
            # Filter out None values, since some drivers may not handle them correctly
            vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None}
            logger.info('Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"', vnc_kwargs)
            self.vnc_kwargs = vnc_kwargs
            self.vnc_session = cls()
        else:
            self.vnc_session = None

        self._observer = observer
        if self.remote_manager.connect_rewarder:
            cls = rewarder_session(rewarder_driver)
            self.rewarder_session = cls()
        else:
            self.rewarder_session = None

        if ignore_clock_skew:
            logger.info('Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)')

        if self.rewarder_session or ignore_clock_skew:
            # Don't need rewarder session if we're ignoring clock skew
            if self.spec is not None:
                metadata_encoding = self.spec.tags.get('metadata_encoding')
            else:
                metadata_encoding = None
            self.diagnostics = diagnostics.Diagnostics(self.n, self._probe_key, ignore_clock_skew, metadata_encoding=metadata_encoding, disable_action_probes=disable_action_probes)
        else:
            self.diagnostics = None

        self._reset_mask()
        self._started = True

        self.remote_manager.allocate([str(i) for i in range(self.n)], initial=True)
        if allocate_sync:
            # Block until we've fulfilled n environments
            self._handle_connect(n=self.n)
        else:
            # Handle any backends which synchronously fufill their
            # allocation.
            self._handle_connect()
Esempio n. 5
0
def setup_module(module):
    universe.configure_logging('-')
Esempio n. 6
0
import cv2
from gym.spaces.box import Box
import numpy as np
import gym
from gym import spaces
import logging
import universe
from universe import vectorized
from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger
from universe import spaces as vnc_spaces
from universe.spaces.vnc_event import keycode
import time
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
universe.configure_logging()

def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)

    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id)

def create_flash_env(env_id, client_id, remotes, **_):
    env = gym.make(env_id)
    env = Vision(env)
Esempio n. 7
0
def main():
    # You can optionally set up the logger. Also fine to set the level
    # to logging.DEBUG or logging.WARN if you want to change the
    # amount of output.
    logger.setLevel(logging.INFO)
    universe.configure_logging()

    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.')
    parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.')
    parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)')
    parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
    parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.')
    parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second')
    parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take')
    parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver')
    parser.add_argument('-c', '--custom_camera',  action='store_true', help='Customize the GTA camera')

    args = parser.parse_args()

    logging.getLogger('gym').setLevel(logging.NOTSET)
    logging.getLogger('universe').setLevel(logging.NOTSET)
    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    if args.env_id is not None:
        # N.B. This does not set the actual environment mode yet, which
        # is currently driven by environment itself.
        env = gym.make(args.env_id)
    else:
        env = wrappers.WrappedVNCEnv()
    if not isinstance(env, wrappers.GymCoreAction):
        # The GymCoreSyncEnv's try to mimic their core counterparts,
        # and thus came pre-wrapped wth an action space
        # translator. Everything else probably wants a SafeActionSpace
        # wrapper to shield them from random-agent clicking around
        # everywhere.
        env = wrappers.SafeActionSpace(env)
    else:
        # Only gym-core are seedable
        env.seed([0])
    env = wrappers.Logger(env)

    env.configure(
        fps=args.fps,
        # print_frequency=None,
        # ignore_clock_skew=True,
        remotes=args.remote,
        vnc_driver='go', vnc_kwargs={
            'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5,
        },
    )

    if args.driver == 'DeepDriver':
        driver = DeepDriver()
    elif args.driver == 'DeepDriverTF':
        driver = DeepDriverTF()
    else:
        raise Exception('That driver is not available')

    driver.setup()

    if args.monitor:
        # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True)
        wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True)
         
    render = not args.no_render
    observation_n = env.reset()
    reward_n = [0] * env.n
    done_n = [False] * env.n
    info = None

    for i in range(args.max_steps):
        if render:
            # Note the first time you call render, it'll be relatively
            # slow and you'll have some aggregated rewards. We could
            # open the render() window before `reset()`, but that's
            # confusing since it pops up a black window for the
            # duration of the reset.
            env.render()

        action_n = driver.step(observation_n, reward_n, done_n, info)

        try:
            if info is not None:
                distance = info['n'][0]['distance_from_destination']
                logger.info('distance %s', distance)
        except KeyError as e:
            logger.debug('distance not available %s', str(e))

        if args.custom_camera:
            # Sending this every step is probably overkill
            for action in action_n:
                action.append(GTASetting('use_custom_camera', True))

        # Take an action
        with pyprofile.push('env.step'):
            _step = env.step(action_n)
            observation_n, reward_n, done_n, info = _step

        if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']):
            print('done_n', done_n, 'i', i)
            logger.info('end of episode')
            env.reset()

    # We're done! clean up
    env.close()
Esempio n. 8
0
def setup_module(module):
    universe.configure_logging('-')
Esempio n. 9
0
import cv2
from gym.spaces.box import Box
import numpy as np
import gym
from gym import spaces
import logging
import universe
from universe import vectorized
from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger
from universe import spaces as vnc_spaces
from universe.spaces.vnc_event import keycode
import time
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
#universe.configure_logging()
universe.configure_logging(False)

from skimage.color import rgb2gray

os.environ['OPENAI_REMOTE_VERBOSE'] = '0'


def create_env(env_id, client_id, remotes, **kwargs):
    if env_id.find("Catcher-v0") != -1:
        import gym_ple
        import os
        os.putenv('SDL_VIDEODRIVER', 'fbcon')
        os.environ["SDL_VIDEODRIVER"] = "dummy"
        return create_ple_env(env_id, **kwargs)

    spec = gym.spec(env_id)
Esempio n. 10
0
def run(args, server):
    # configure logging
    args.log_dir = f_expand(args.log_dir)
    logging_dir = f_join(args.log_dir, 'log')
    video_dir = f_join(args.log_dir, 'video')
    info_dir = f_join(args.log_dir,
                      'info')  # other diagnostics, such as screenshot
    f_mkdir(logging_dir)
    f_mkdir(video_dir)
    f_mkdir(info_dir)
    universe.configure_logging('{}/{:0>2}.txt'.format(logging_dir, args.task))

    if args.test:
        mode = 'test-' + args.test
        logger.info('TEST MODE: ' +
                    ('stochastic' if args.test == 's' else 'deterministic'))
    else:
        mode = 'train'

    # create env


#     env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes)
    env = create_atari_env(args.env_id, mode=mode, use_stack=(POLICY == 'cnn'))
    #     env = record_video_wrap(env, video_dir=video_dir)
    trainer = A3C(env, args.task, visualize=args.visualize, mode=mode)

    # Variable names that start with "local" are not saved in checkpoints.
    variables_to_save = [
        v for v in tf.global_variables() if not v.name.startswith("local")
    ]
    variables_local = [
        v for v in tf.global_variables() if v.name.startswith("local")
    ]
    # DEBUG
    for v in tf.global_variables():
        print(v.name, v.get_shape())
    print('=' * 80)
    init_op = tf.variables_initializer(variables_to_save)
    init_all_op = tf.global_variables_initializer()
    init_local_op = tf.variables_initializer(variables_local)
    saver = FastSaver(variables_to_save)

    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 tf.get_variable_scope().name)
    logger.info('Trainable vars:')
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)

    config = tf.ConfigProto(device_filters=[
        "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)
    ])
    event_dir = f_join(args.log_dir, mode)

    event_suffix = '_{}'.format(args.task) if mode == 'train' else ''
    summary_writer = tf.summary.FileWriter(event_dir + event_suffix)

    sv = tf.train.Supervisor(
        is_chief=(args.task == 0),
        logdir=event_dir,
        saver=saver,
        summary_op=None,
        init_op=init_op,
        init_fn=init_fn,
        summary_writer=summary_writer,
        ready_op=tf.report_uninitialized_variables(variables_to_save),
        global_step=trainer.global_step,
        save_model_secs=30,
        save_summaries_secs=30)

    num_global_steps = 1000000000
    logger.info(
        "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. "
        +
        "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified."
    )
    with sv.managed_session(server.target,
                            config=config) as sess, sess.as_default():
        if ELASTIC:
            sess.run(init_local_op)
        sess.run(trainer.global_sync)
        trainer.start(sess, summary_writer)
        global_step = sess.run(trainer.global_step)
        logger.info("Starting training at step=%d", global_step)
        while not sv.should_stop() and (not num_global_steps
                                        or global_step < num_global_steps):
            trainer.process(sess)
            global_step = sess.run(trainer.global_step)

    # Ask for all the services to stop.
    sv.stop()
    logger.info('reached %s steps. worker stopped.', global_step)
Esempio n. 11
0
from gym.spaces.box import Box
import numpy as np
import gym
from gym import spaces
import logging
import universe
from universe import vectorized
from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger
from universe import spaces as vnc_spaces
from universe.spaces.vnc_event import keycode
import time
import os
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
pid = os.getpid()
universe.configure_logging('/dev/shm/universe-{}.log'.format(pid))

def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)

    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id)

def create_flash_env(env_id, client_id, remotes, **_):
    env = gym.make(env_id)