def _configure( self, remotes=None, client_id=None, start_timeout=None, docker_image=None, ignore_clock_skew=False, disable_action_probes=False, vnc_driver=None, vnc_kwargs={}, rewarder_driver=None, replace_on_crash=False, allocate_sync=True, observer=False, api_key=None, record=False, ): """Standard Gym hook to configure the environment. Args: ignore_clock_skew (bool): Assume remotes are on the same machine as us, for the purposes of diagnostics measurement. If true, we skip measuring the clock skew over the network, and skip generating diagnostics which rely on it. True when used by the rewarder to measure latency between the VNC frame and its calculation of reward for that frame. In this case we share a common clock with the env generating the VNC frame, so we don't need to send/receive probes. Clock skew is zero in this case. False when remotes are potentially different machines (such as an agent, or a demonstrator), and we will be sending probe keys and measuring network ping rountrip times to calculate clock skew. """ if self._started: raise error.Error( '{} has already been started; cannot change configuration now.' .format(self)) universe.configure_logging() twisty.start_once() if self.spec is not None: runtime = registration.runtime_spec(self.spec.tags['runtime']) # Let the user manually set the docker_image version if docker_image: # TODO: don't support this option? runtime.image = docker_image else: runtime = None if remotes is None: remotes = os.environ.get('GYM_VNC_REMOTES', '1') if client_id is None: client_id = default_client_id() self.remote_manager, self.n = remotes_module.build( client_id=client_id, remotes=remotes, runtime=runtime, start_timeout=start_timeout, api_key=api_key, use_recorder_ports=record, ) self.connection_names = [None] * self.n self.connection_labels = [None] * self.n self.crashed = {} self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect if self.remote_manager.connect_vnc: cls = vnc_session(vnc_driver) vnc_kwargs.setdefault('start_timeout', self.remote_manager.start_timeout) if runtime == 'gym-core': vnc_kwargs.setdefault('encoding', 'zrle') else: vnc_kwargs.setdefault('encoding', 'tight') vnc_kwargs.setdefault('fine_quality_level', 50) vnc_kwargs.setdefault('subsample_level', 2) # Filter out None values, since some drivers may not handle them correctly vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None} logger.info( 'Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"', vnc_kwargs) self.vnc_kwargs = vnc_kwargs self.vnc_session = cls() else: self.vnc_session = None self._observer = observer if self.remote_manager.connect_rewarder: cls = rewarder_session(rewarder_driver) self.rewarder_session = cls() else: self.rewarder_session = None if ignore_clock_skew: logger.info( 'Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)' ) if self.rewarder_session or ignore_clock_skew: # Don't need rewarder session if we're ignoring clock skew if self.spec is not None: metadata_encoding = self.spec.tags.get('metadata_encoding') else: metadata_encoding = None self.diagnostics = diagnostics.Diagnostics( self.n, self._probe_key, ignore_clock_skew, metadata_encoding=metadata_encoding, disable_action_probes=disable_action_probes) else: self.diagnostics = None self._reset_mask() self._started = True self.remote_manager.allocate([str(i) for i in range(self.n)], initial=True) if allocate_sync: # Block until we've fulfilled n environments self._handle_connect(n=self.n) else: # Handle any backends which synchronously fufill their # allocation. self._handle_connect()
from mpl_toolkits.mplot3d import Axes3D import scipy import scipy.cluster.hierarchy as sch from scipy.cluster.vq import vq, kmeans, whiten import subprocess import urllib from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter from vrplayer import get_view from move_view_lib import move_view logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) universe.configure_logging() class env_f(): def __init__(self, env_id, task, subject=None, summary_writer=None): self._episode_reward = 0 self._episode_length = 0 class nnn(): def __init__(self, n): self.n = n import config self.action_space = nnn(config.direction_num)
import os from gym.spaces.box import Box import numpy as np import gym from gym import spaces import logging import universe from universe import vectorized from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger from universe import spaces as vnc_spaces from universe.spaces.vnc_event import keycode import time logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) universe.configure_logging( path='/vol/bitbucket/jh1016/logfiles/universe-{}.log'.format(os.getpid())) #universe.configure_logging() def create_env(env_id, client_id, remotes, **kwargs): spec = gym.spec(env_id) # spec.tags is a python dict. get search for the key. If the key is not available in the dictionary, it will return False if spec.tags.get('flashgames', False): return create_flash_env(env_id, client_id, remotes, **kwargs) elif spec.tags.get('atari', False) and spec.tags.get('vnc', False): return create_vncatari_env(env_id, client_id, remotes, **kwargs) else: # Assume atari. assert "." not in env_id # universe environments have dots in names. return create_atari_env(env_id)
def _configure(self, remotes=None, client_id=None, start_timeout=None, docker_image=None, ignore_clock_skew=False, disable_action_probes=False, vnc_driver=None, vnc_kwargs=None, rewarder_driver=None, replace_on_crash=False, allocate_sync=True, observer=False, api_key=None, record=False, ): """Standard Gym hook to configure the environment. Args: ignore_clock_skew (bool): Assume remotes are on the same machine as us, for the purposes of diagnostics measurement. If true, we skip measuring the clock skew over the network, and skip generating diagnostics which rely on it. True when used by the rewarder to measure latency between the VNC frame and its calculation of reward for that frame. In this case we share a common clock with the env generating the VNC frame, so we don't need to send/receive probes. Clock skew is zero in this case. False when remotes are potentially different machines (such as an agent, or a demonstrator), and we will be sending probe keys and measuring network ping rountrip times to calculate clock skew. """ if self._started: raise error.Error('{} has already been started; cannot change configuration now.'.format(self)) universe.configure_logging() twisty.start_once() if self.spec is not None: runtime = registration.runtime_spec(self.spec.tags['runtime']) # Let the user manually set the docker_image version if docker_image: # TODO: don't support this option? runtime.image = docker_image else: runtime = None if remotes is None: remotes = os.environ.get('GYM_VNC_REMOTES', '1') if client_id is None: client_id = default_client_id() if vnc_kwargs is None: vnc_kwargs = {} self.remote_manager, self.n = remotes_module.build( client_id=client_id, remotes=remotes, runtime=runtime, start_timeout=start_timeout, api_key=api_key, use_recorder_ports=record, ) self.connection_names = [None] * self.n self.connection_labels = [None] * self.n self.crashed = {} self.allow_reconnect = replace_on_crash and self.remote_manager.supports_reconnect if self.remote_manager.connect_vnc: cls = vnc_session(vnc_driver) vnc_kwargs.setdefault('start_timeout', self.remote_manager.start_timeout) if runtime == 'gym-core': vnc_kwargs.setdefault('encoding', 'zrle') else: vnc_kwargs.setdefault('encoding', 'tight') vnc_kwargs.setdefault('fine_quality_level', 50) vnc_kwargs.setdefault('subsample_level', 2) # Filter out None values, since some drivers may not handle them correctly vnc_kwargs = {k: v for k, v in vnc_kwargs.items() if v is not None} logger.info('Using VNCSession arguments: %s. (Customize by running "env.configure(vnc_kwargs={...})"', vnc_kwargs) self.vnc_kwargs = vnc_kwargs self.vnc_session = cls() else: self.vnc_session = None self._observer = observer if self.remote_manager.connect_rewarder: cls = rewarder_session(rewarder_driver) self.rewarder_session = cls() else: self.rewarder_session = None if ignore_clock_skew: logger.info('Printed stats will ignore clock skew. (This usually makes sense only when the environment and agent are on the same machine.)') if self.rewarder_session or ignore_clock_skew: # Don't need rewarder session if we're ignoring clock skew if self.spec is not None: metadata_encoding = self.spec.tags.get('metadata_encoding') else: metadata_encoding = None self.diagnostics = diagnostics.Diagnostics(self.n, self._probe_key, ignore_clock_skew, metadata_encoding=metadata_encoding, disable_action_probes=disable_action_probes) else: self.diagnostics = None self._reset_mask() self._started = True self.remote_manager.allocate([str(i) for i in range(self.n)], initial=True) if allocate_sync: # Block until we've fulfilled n environments self._handle_connect(n=self.n) else: # Handle any backends which synchronously fufill their # allocation. self._handle_connect()
def setup_module(module): universe.configure_logging('-')
import cv2 from gym.spaces.box import Box import numpy as np import gym from gym import spaces import logging import universe from universe import vectorized from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger from universe import spaces as vnc_spaces from universe.spaces.vnc_event import keycode import time logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) universe.configure_logging() def create_env(env_id, client_id, remotes, **kwargs): spec = gym.spec(env_id) if spec.tags.get('flashgames', False): return create_flash_env(env_id, client_id, remotes, **kwargs) elif spec.tags.get('atari', False) and spec.tags.get('vnc', False): return create_vncatari_env(env_id, client_id, remotes, **kwargs) else: # Assume atari. assert "." not in env_id # universe environments have dots in names. return create_atari_env(env_id) def create_flash_env(env_id, client_id, remotes, **_): env = gym.make(env_id) env = Vision(env)
def main(): # You can optionally set up the logger. Also fine to set the level # to logging.DEBUG or logging.WARN if you want to change the # amount of output. logger.setLevel(logging.INFO) universe.configure_logging() parser = argparse.ArgumentParser(description=None) parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.') parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.') parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)') parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.') parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second') parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take') parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver') parser.add_argument('-c', '--custom_camera', action='store_true', help='Customize the GTA camera') args = parser.parse_args() logging.getLogger('gym').setLevel(logging.NOTSET) logging.getLogger('universe').setLevel(logging.NOTSET) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) if args.env_id is not None: # N.B. This does not set the actual environment mode yet, which # is currently driven by environment itself. env = gym.make(args.env_id) else: env = wrappers.WrappedVNCEnv() if not isinstance(env, wrappers.GymCoreAction): # The GymCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.SafeActionSpace(env) else: # Only gym-core are seedable env.seed([0]) env = wrappers.Logger(env) env.configure( fps=args.fps, # print_frequency=None, # ignore_clock_skew=True, remotes=args.remote, vnc_driver='go', vnc_kwargs={ 'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5, }, ) if args.driver == 'DeepDriver': driver = DeepDriver() elif args.driver == 'DeepDriverTF': driver = DeepDriverTF() else: raise Exception('That driver is not available') driver.setup() if args.monitor: # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True) wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True) render = not args.no_render observation_n = env.reset() reward_n = [0] * env.n done_n = [False] * env.n info = None for i in range(args.max_steps): if render: # Note the first time you call render, it'll be relatively # slow and you'll have some aggregated rewards. We could # open the render() window before `reset()`, but that's # confusing since it pops up a black window for the # duration of the reset. env.render() action_n = driver.step(observation_n, reward_n, done_n, info) try: if info is not None: distance = info['n'][0]['distance_from_destination'] logger.info('distance %s', distance) except KeyError as e: logger.debug('distance not available %s', str(e)) if args.custom_camera: # Sending this every step is probably overkill for action in action_n: action.append(GTASetting('use_custom_camera', True)) # Take an action with pyprofile.push('env.step'): _step = env.step(action_n) observation_n, reward_n, done_n, info = _step if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']): print('done_n', done_n, 'i', i) logger.info('end of episode') env.reset() # We're done! clean up env.close()
import cv2 from gym.spaces.box import Box import numpy as np import gym from gym import spaces import logging import universe from universe import vectorized from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger from universe import spaces as vnc_spaces from universe.spaces.vnc_event import keycode import time logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) #universe.configure_logging() universe.configure_logging(False) from skimage.color import rgb2gray os.environ['OPENAI_REMOTE_VERBOSE'] = '0' def create_env(env_id, client_id, remotes, **kwargs): if env_id.find("Catcher-v0") != -1: import gym_ple import os os.putenv('SDL_VIDEODRIVER', 'fbcon') os.environ["SDL_VIDEODRIVER"] = "dummy" return create_ple_env(env_id, **kwargs) spec = gym.spec(env_id)
def run(args, server): # configure logging args.log_dir = f_expand(args.log_dir) logging_dir = f_join(args.log_dir, 'log') video_dir = f_join(args.log_dir, 'video') info_dir = f_join(args.log_dir, 'info') # other diagnostics, such as screenshot f_mkdir(logging_dir) f_mkdir(video_dir) f_mkdir(info_dir) universe.configure_logging('{}/{:0>2}.txt'.format(logging_dir, args.task)) if args.test: mode = 'test-' + args.test logger.info('TEST MODE: ' + ('stochastic' if args.test == 's' else 'deterministic')) else: mode = 'train' # create env # env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes) env = create_atari_env(args.env_id, mode=mode, use_stack=(POLICY == 'cnn')) # env = record_video_wrap(env, video_dir=video_dir) trainer = A3C(env, args.task, visualize=args.visualize, mode=mode) # Variable names that start with "local" are not saved in checkpoints. variables_to_save = [ v for v in tf.global_variables() if not v.name.startswith("local") ] variables_local = [ v for v in tf.global_variables() if v.name.startswith("local") ] # DEBUG for v in tf.global_variables(): print(v.name, v.get_shape()) print('=' * 80) init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.global_variables_initializer() init_local_op = tf.variables_initializer(variables_local) saver = FastSaver(variables_to_save) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) config = tf.ConfigProto(device_filters=[ "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task) ]) event_dir = f_join(args.log_dir, mode) event_suffix = '_{}'.format(args.task) if mode == 'train' else '' summary_writer = tf.summary.FileWriter(event_dir + event_suffix) sv = tf.train.Supervisor( is_chief=(args.task == 0), logdir=event_dir, saver=saver, summary_op=None, init_op=init_op, init_fn=init_fn, summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(variables_to_save), global_step=trainer.global_step, save_model_secs=30, save_summaries_secs=30) num_global_steps = 1000000000 logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified." ) with sv.managed_session(server.target, config=config) as sess, sess.as_default(): if ELASTIC: sess.run(init_local_op) sess.run(trainer.global_sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info("Starting training at step=%d", global_step) while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps): trainer.process(sess) global_step = sess.run(trainer.global_step) # Ask for all the services to stop. sv.stop() logger.info('reached %s steps. worker stopped.', global_step)
from gym.spaces.box import Box import numpy as np import gym from gym import spaces import logging import universe from universe import vectorized from universe.wrappers import BlockingReset, GymCoreAction, EpisodeID, Unvectorize, Vectorize, Vision, Logger from universe import spaces as vnc_spaces from universe.spaces.vnc_event import keycode import time import os logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) pid = os.getpid() universe.configure_logging('/dev/shm/universe-{}.log'.format(pid)) def create_env(env_id, client_id, remotes, **kwargs): spec = gym.spec(env_id) if spec.tags.get('flashgames', False): return create_flash_env(env_id, client_id, remotes, **kwargs) elif spec.tags.get('atari', False) and spec.tags.get('vnc', False): return create_vncatari_env(env_id, client_id, remotes, **kwargs) else: # Assume atari. assert "." not in env_id # universe environments have dots in names. return create_atari_env(env_id) def create_flash_env(env_id, client_id, remotes, **_): env = gym.make(env_id)