def _decode(self, observation, start, available_at): # This method gets wrapped by AsyncDecode.__call__ with pyprofile.push( 'vnc_env.diagnostics.QRCodeMetadataDecoder.qr_code_scanner'): encoded = fastzbarlight.qr_code_scanner(observation.tobytes(), self.width, self.height) if encoded is None: # Failed to parse! return if encoded.startswith(b'v1:'): encoded = encoded.decode('utf-8') if len(encoded) != len('v1:') + 12 + 12: raise error.Error( 'Bad length for metadata from enviroment: {}'.format( encoded)) encoded = encoded[len('v1:'):] last_update = int(encoded[:12], 16) / 1000.0 last_action = int(encoded[12:24], 16) / 1000. return { # Timestamp on the image 'now': last_update, # When the last probe was received 'probe_received_at': last_action, 'processing_start': start, 'processing_end': time.time(), 'available_at': available_at, } else: raise error.Error( 'Bad version string for metadata from environment: {}'.format( encoded))
def build(client_id, remotes, runtime=None, start_timeout=None, **kwargs): if isinstance(remotes, int): remotes = str(remotes) elif not isinstance(remotes, str): raise error.Error('remotes argument must be a string, got {} which is of type {}'.format(remotes, type(remotes))) if re.search('^\d+$', remotes): # an integer, like -r 20 n = int(remotes) return DockerManager( runtime=runtime, start_timeout=start_timeout, reuse=kwargs.get('reuse', False), n=n, ), n elif remotes.startswith('vnc://'): return HardcodedAddresses.build( remotes, start_timeout=start_timeout) elif remotes.startswith('http://') or remotes.startswith('https://'): if runtime is None: raise error.Error('Must provide a runtime. HINT: try creating your env instance via lab.make("flashgames.DuskDrive-v0")') manager, n = AllocatorManager.from_remotes( client_id, remotes, runtime_id=runtime.id, runtime_tag=runtime.image.split(':')[-1], start_timeout=start_timeout, api_key=kwargs.get('api_key'), use_recorder_ports=kwargs.get('use_recorder_ports', False), ) manager.start() return manager, n else: raise error.Error('Invalid remotes: {!r}. Must be an integer or must start with vnc:// or https://'.format(remotes))
def allocate_ports(self, num): if self.reuse and self._next_port in self._ports: vnc_id = self._ports[self._next_port] rewarder_id = self._ports.get(self._next_port + 10000) # Reuse an existing docker container if it exists if (self._next_port + 10000) not in self._ports: raise error.Error( "Port {} was allocated but {} was not. This indicates unexpected state with spun-up VNC docker instances." .format(self._next_port, self._next_port + 1)) elif vnc_id != rewarder_id: raise error.Error( "Port {} is exposed from {} while {} is exposed from {}. Both should come from a single Docker instance running your environment." .format(vnc_id, self._next_port, rewarder_id, self._next_port + 10000)) base = self._next_port self._next_port += 1 return base, base + 10000, vnc_id elif not self.reuse: # Otherwise, allocate find the lowest free pair of # ports. This doesn't work for the reuse case since on # restart we won't remember where we spun up our # containers. while self._next_port in self._ports or (self._next_port + 10000) in self._ports: self._next_port += 1 base = self._next_port self._next_port += 1 # And get started! return base, base + 10000, None
def processExited(self, reason): if isinstance(reason.value, twisted.internet.error.ProcessDone): out = b''.join(self.out).decode('utf-8') match = re.search('offset ([\d.-]+) sec', out) if match is not None: offset = float(match.group(1)) self.deferred.callback(offset) else: self.deferred.errback(error.Error('Could not parse offset: %s', out)) else: err = b''.join(self.err) self.deferred.errback(error.Error('{} failed with status {}: stderr={!r}'.format(self._cmd, reason.value.exitCode, err)))
def _register_vnc(self, address, start_time=None): if start_time is None: start_time = time.time() host, port = host_port(address, default_port=5900) while True: # In VNC, the server sends bytes upon connection sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.connect((host, port)) except (socket.error, socket.gaierror) as e: # ECONNREFUSED: VNC env hasn't come up yet # ETIMEDOUT: the packets can't be delivered yet, such as can happen on kubernetes # gaierror: can't resolve the address yet, which can also happen on kubernetes expected = socket.errno.ECONNREFUSED == e.errno or socket.errno.ETIMEDOUT == e.errno or isinstance( e, socket.gaierror) if self.start_timeout is None or not expected: reraise(suffix='while connecting to VNC server {}'.format( address)) logger.info( 'VNC server %s did not come up yet (error: %s). Sleeping for 1s.', address, e) time.sleep(1) else: break if time.time() - start_time > self.start_timeout: raise error.Error( 'VNC server {} did not come up within {}s'.format( address, self.start_timeout)) self.sockets[sock] = ('vnc', address)
def keycode(key): if key in constants.KEYMAP: return constants.KEYMAP.get(key) elif len(key) == 1: return ord(key) else: raise error.Error('Not sure how to translate to keycode: {!r}'.format(key))
def env(self): # Called upon instantiation if not hasattr(self, '_env_ref'): return env = self._env_ref() if env is None: raise error.Error( "env has been garbage collected. To keep using WeakUnvectorize, you must keep around a reference to the env object. (HINT: try assigning the env to a variable in your code.)" ) return env
def build(cls, metadata_encoding, pool, qr_pool, label): metadata_encoding = metadata_encoding.copy() type = metadata_encoding.pop('type') if type == 'qrcode': return QRCodeMetadataDecoder(label=label, pool=pool, qr_pool=qr_pool, **metadata_encoding) elif type == 'pixels': return PixelsMetadataDecoder(label=label) else: raise error.Error('Invalid encoding: {}'.format(type))
def __init__(self, remotes, error_buffer, encoding=None, compress_level=None, fine_quality_level=None, subsample_level=None): """compress_level: 0-9 [9 is highest compression] fine_quality_level: 0-100 [100 is best quality] subsample_level: 0-3 [0 is best quality] Lots of references for this, but https://github.com/TurboVNC/turbovnc/blob/master/doc/performance.txt is decent. """ load_pygame() import libvncdriver if encoding is None: encoding = os.environ.get('LIBVNC_ENCODING', 'tight') if compress_level is None: compress_level = int(os.environ.get('LIBVNC_COMPRESS_LEVEL', '0')) if fine_quality_level is None: fine_quality_level = int( os.environ.get('LIBVNC_FINE_QUALITY_LEVEL', '100')) if subsample_level is None: subsample_level = int(os.environ.get('LIBVNC_SUBSAMPLE_LEVEL', '0')) if not hasattr(libvncdriver, 'VNCSession'): raise error.Error(''' *=================================================* || libvncdriver is not installed || || Try installing with "pip install libvncdriver" || || or use the go or python driver by setting || || INSTITUTE_VNCDRIVER=go || || INSTITUTE_VNCDRIVER=py || *=================================================*''') logger.info("Using libvncdriver's %s encoding" % encoding) self.driver = libvncdriver.VNCSession( remotes=remotes, error_buffer=error_buffer, encoding=encoding, compress_level=compress_level, fine_quality_level=fine_quality_level, subsample_level=subsample_level, ) self.screen = None self.render_called_once = False if PYGAME_INSTALLED: pygame.init()
def __init__(self, env): super(Wrapper, self).__init__(env) if not env.metadata.get('runtime.vectorized'): if self.autovectorize: # Circular dependency :( from institute import wrappers env = wrappers.Vectorize(env) else: raise error.Error( 'This wrapper can only wrap vectorized envs (i.e. where env.metadata["runtime.vectorized"] = True), not {}. Set "self.autovectorize = True" to automatically add a Vectorize wrapper.' .format(env)) self.env = env
def apply(self, framebuffer_update): pyprofile.push('vncdriver.pyglet_screen.apply') for rect in framebuffer_update.rectangles: if isinstance( rect.encoding, (server_messages.RAWEncoding, server_messages.ZRLEEncoding, server_messages.ZlibEncoding)): self.update_rectangle(rect.x, rect.y, rect.width, rect.height, rect.encoding.data) else: raise error.Error('Unrecognized encoding: {}'.format( rect.encoding)) pyprofile.pop()
def build(cls, remotes, **kwargs): parsed = urlparse.urlparse(remotes) if parsed.scheme != 'vnc': raise error.Error('HardcodedAddresses must be initialized with a string starting with vnc://: {}'.format(remotes)) addresses = parsed.netloc.split(',') query = urlparse.parse_qs(parsed.query) # We could support per-backend passwords, but no need for it # right now. password = query.get('password', [utils.default_password()])[0] vnc_addresses, rewarder_addresses = parse_remotes(addresses) res = cls(vnc_addresses, rewarder_addresses, vnc_password=password, rewarder_password=password, **kwargs) return res, res.available_n
def _initialize(self): if not os.environ.get('DISPLAY') and sys.platform.startswith('linux'): raise error.Error( "Cannot render with mode='human' with no DISPLAY variable set." ) import pyglet self._window = pyglet.window.Window(width=self._width, height=self._height, visible=True) self._window.dispatch_events() self.texture = pyglet.image.Texture.create(width=self._width, height=self._height)
def _spawn(self): if self.runtime.image is None: raise error.Error('No image specified') assert self._container_id is None self.vnc_port, self.rewarder_port, self._container_id = self.assigner.allocate_ports( 2) if self._container_id is not None: logger.info('[%s] Reusing container %s on ports %s and %s', self.label, self._container_id[:12], self.vnc_port, self.rewarder_port) self.reusing = True self.started = True return self.reusing = False logger.info( '[%s] Creating container: image=%s. Run the same thing by hand as: %s', self.label, self.runtime.image, pretty_command( self.runtime.cli_command(self.vnc_port, self.rewarder_port))) try: container = self._spawn_container() except docker.errors.NotFound as e: # Looks like we need to pull the image assert 'No such image' in e.explanation.decode( 'utf-8' ), 'Expected NotFound error message message to include "No such image", but it was: {}. This is probably just a bug in this assertion and the assumption was incorrect'.format( e.explanation) logger.info('Image %s not present locally; pulling', self.runtime.image) self._pull_image() # If we called pull_image from multiple processes (as we do with institute-starter-agent A3C) # these will all return at the same time. We probably all got the same port numbers before the pull started, # so wait a short random time and refresh our port numbers time.sleep(random.uniform(0.5, 2.5)) self.assigner._refresh_ports() self.vnc_port, self.rewarder_port, self._container_id = self.assigner.allocate_ports( 2) if self._container_id is not None: logger.info('[%s] Reusing container %s on ports %s and %s', self.label, self._container_id[:12], self.vnc_port, self.rewarder_port) self.reusing = True self.started = True return # Try spawning again. container = self._spawn_container() self._container_id = container['Id']
def _step(self, action): try: for a, client in zip(action, self._clients): for event in a: if event[0] == 'KeyEvent': key, down = event[1:] client.send_KeyEvent(key, down) elif event[0] == 'PointerEvent': x, y, buttomask = event[1:] client.send_PointerEvent(x, y, buttomask) else: raise error.Error('Bad event type: {}'.format(type)) except Exception as e: self.error_buffer.record(e)
def onConnect(self, request): if not os.path.exists('/usr/local/synthai/privileged_state/password'): raise error.Error( 'No such file: /usr/local/synthai/privileged_state/password. (HINT: did the init script run /app/institute-envs/base/synthai-setpassword?)' ) with open('/usr/local/synthai/privileged_state/password') as f: password = f.read().strip() self._message_id = 0 self._request = request self._observer = request.headers.get('synthai-observer') == 'true' self.password = password logger.info('Client connecting: peer=%s observer=%s', request.peer, self._observer)
def wait_for_step(self, error_buffer=None, timeout=None): # TODO: this might be cleaner using channels with self.cv: start = time.time() while True: if self.count != 0: return elif timeout is not None and time.time() - start > timeout: raise error.Error( 'No rewards received in {}s'.format(timeout)) if error_buffer: error_buffer.check() self.cv.wait(timeout=0.5)
def blockingCallFromThread(f, *a, **kw): local_queue = queue.Queue() def _callFromThread(): result = defer.maybeDeferred(f, *a, **kw) result.addBoth(local_queue.put) reactor.callFromThread(_callFromThread) result = queue_get(local_queue) if isinstance(result, failure.Failure): if result.frames: e = error.Error(str(result)) else: e = result.value raise e return result
def _allocate(self, handles, initial, params): self._sleep = 1 _params = self.params.copy() _params.update(params) for handle in handles: history = self._reconnect_history.get(handle, []) history.append(time.time()) floor = time.time() - 5 * 60 history = [entry for entry in history if entry > floor] if len(history) > 5: raise error.Error( 'Tried reallocating a fresh remote at index {} a total of {} times in the past 5 minutes (at {}). Please examine the logs to determine why the remotes keep failing.' .format(handle, len(history), history)) self._reconnect_history[handle] = history assert all(re.search('^\d+$', h) for h in handles), "All handles must be numbers: {}".format(handles) allocation = self.with_retries( self._requestor.allocation_create, client_id=self.client_id, runtime_id=self.runtime_id, placement=self.placement, params=_params, handles=handles, initial=initial, ) news = len( [entry for entry in allocation['info']['n'] if entry['new']]) extra_logger.info( '[%s] Received allocation with %s new and %s existing envs: %s', self.label, news, len(allocation['info']['n']) - news, allocation) assert len(allocation['env_n']) <= len( handles ), "Received more envs than requested: allocation={} handles={}".format( allocation, handles) _, pending = self._handle_allocation(allocation) for env in pending: self.pending[env['name']] = { 'handle': env['handle'], 'params': params, 'received_at': time.time() }
def run(self): target = time.time() + self.timeout while self.sockets: remaining = target - time.time() if remaining < 0: break ready, _, _ = select.select(self.sockets.keys(), [], [], remaining) # Go through the readable sockets remote_closed = False for sock in ready: type, address = self.sockets.pop(sock) # Connection was closed; try again. # # This is guaranteed not to block. try: recv = sock.recv(1) except socket.error as e: if e.errno == errno.ECONNRESET: recv = b'' else: raise if recv == b'': logger.info('Remote closed: address=%s', address) remote_closed = True if type == 'rewarder': self._register_rewarder(address) else: self._register_vnc(address) else: logger.debug('Healthcheck passed for %s %s', type, address) sock.close() if remote_closed: sleep = 1 logger.info( 'At least one sockets was closed by the remote. Sleeping %ds...', sleep) time.sleep(sleep) if self.sockets: raise error.Error('Not all servers came up within {}s: {}'.format( self.timeout, list(self.sockets.values())))
def _register_rewarder(self, address, start_time=None): if start_time is None: start_time = time.time() host, port = host_port(address, default_port=15900) while True: # In WebSockets, the server sends bytes once we've upgraded the protocol sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.connect((host, port)) except (socket.error, socket.gaierror) as e: # ECONNREFUSED: VNC env hasn't come up yet # ETIMEDOUT: the packets can't be delivered yet, such as can happen on kubernetes # gaierror: can't resolve the address yet, which can also happen on kubernetes expected = socket.errno.ECONNREFUSED == e.errno or socket.errno.ETIMEDOUT == e.errno or isinstance( e, socket.gaierror) if self.start_timeout is None or not expected: reraise(suffix='while connecting to Rewarder server {}'. format(address)) logger.info( 'Rewarder server %s did not come up yet (error: %s). Sleeping for 1s.', address, e) time.sleep(1) else: break if time.time() - start_time > self.start_timeout: raise error.Error( 'Rewarder server {} did not come up within {}s'.format( address, self.start_timeout)) # Send a websocket handshake. # https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API/Writing_WebSocket_servers # # The port 10003 is an arbitrary port that we don't actually connect to, but needs to be a valid part # e.g Host: 127.0.0.1:GARBAGE results in the following error: (invalid port 'GARBAGE' in HTTP Host header '127.0.0.1:GARBAGE') sock.send( b'GET / HTTP/1.1\r\nHost: 127.0.0.1:10003\r\nUpgrade: WebSocket\r\nConnection:Upgrade\r\nSec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==\r\nSec-WebSocket-Version: 13\r\nauthorization: ' + utils.basic_auth_encode('synthai').encode('utf-8') + b'\r\nsynthai-observer: true\r\n\r\n') self.sockets[sock] = ('rewarder', address)
def lab_core_action_space(lab_core_id): spec = lab.spec(lab_core_id) if spec.id == 'CartPole-v0': return spaces.Hardcoded([[spaces.KeyEvent.by_name('left', down=True)], [spaces.KeyEvent.by_name('left', down=False)]]) elif spec._entry_point.startswith('lab.envs.atari:'): actions = [] env = spec.make() for action in env.unwrapped.get_action_meanings(): z = 'FIRE' in action left = 'LEFT' in action right = 'RIGHT' in action up = 'UP' in action down = 'DOWN' in action translated = atari_vnc(up=up, down=down, left=left, right=right, z=z) actions.append(translated) return spaces.Hardcoded(actions) else: raise error.Error('Unsupported env type: {}'.format(spec.id))
def _apply(self, framebuffer_update): if self.paint_cursor: self._unpaint_cursor() for rect in framebuffer_update.rectangles: if isinstance( rect.encoding, (server_messages.RAWEncoding, server_messages.ZRLEEncoding, server_messages.ZlibEncoding)): self._update_rectangle(rect.x, rect.y, rect.width, rect.height, rect.encoding.data) elif isinstance(rect.encoding, server_messages.PseudoCursorEncoding): self._update_cursor_shape(rect.x, rect.y, rect.width, rect.height, rect.encoding.image, rect.encoding.mask) else: raise error.Error('Unrecognized encoding: {}'.format( rect.encoding)) if self.paint_cursor: self._paint_cursor()
def from_remotes(cls, client_id, remotes, runtime_id, runtime_tag, start_timeout, api_key, use_recorder_ports): parsed = urlparse.urlparse(remotes) if not (parsed.scheme == 'http' or parsed.scheme == 'https'): raise error.Error( 'AllocatorManager must start with http:// or https://: {}'. format(remotes)) base_url = parsed.scheme + '://' + parsed.netloc if parsed.path: base_url += '/' + parsed.path query = urlparse.parse_qs(parsed.query) # Intercept url-encoded params ("?n=2" and similar) params = {} n = query.get('n', [1])[0] # not added to params, just returned later cpu = query.get('cpu', [None])[0] if cpu is not None: cpu = float(cpu) params['cpu'] = cpu tag = query.get('tag', [None])[0] if tag is not None: params[ 'tag'] = tag # url-encoded "?tag=" gets precedence over runtimes.yml tag else: params['tag'] = runtime_tag placement = query.get('address', ['public'])[0] # anything else from the query other than the components processed above will get dropped on the floor return cls(client_id=client_id, runtime_id=runtime_id, base_url=base_url, start_timeout=start_timeout, params=params, placement=placement, api_key=api_key, use_recorder_ports=use_recorder_ports), int(n)
def start(self, attempts=None): if attempts is None: # If we're reusing, we don't scan through ports for a free # one. if not self.assigner.reuse: attempts = 20 else: attempts = 1 for attempt in range(attempts): self._spawn() e = self._start() if e is None: return time.sleep(random.uniform(1.0, 5.0)) self.assigner._refresh_ports() raise error.Error( '[{}] Could not start container after {} attempts. Last error: {}'. format(self.label, attempts, e))
def format_error(e): # errback automatically wraps everything in a Twisted Failure if isinstance(e, failure.Failure): e = e.value if isinstance(e, str): err_string = e elif six.PY2: err_string = traceback.format_exc(e).rstrip() else: err_string = ''.join( traceback.format_exception(type(e), e, e.__traceback__)).rstrip() if err_string == 'None': # Reasonable heuristic for exceptions that were created by hand last = traceback.format_stack()[-2] err_string = '{}\n {}'.format(e, last) # Quick and dirty hack for now. err_string = err_string.replace( 'Connection to the other side was lost in a non-clean fashion', 'Connection to the other side was lost in a non-clean fashion (HINT: this generally actually means we got a connection refused error. Check that the remote is actually running.)' ) return error.Error(err_string)
def __init__( self, client_id, base_url=allocator_base, address_type=None, start_timeout=None, api_key=None, runtime_id=None, params=None, placement=None, use_recorder_ports=False, ): super(AllocatorManager, self).__init__() self.label = 'AllocatorManager' self.supports_reconnect = True self.connect_vnc = True self.connect_rewarder = True if address_type is None: address_type = 'public' if address_type not in ['public', 'pod', 'private']: raise error.Error( 'Bad address type specified: {}. Must be public, pod, or private.' .format(address_type)) self.client_id = client_id self.address_type = address_type if start_timeout is None: start_timeout = 20 * 60 self.start_timeout = start_timeout self.params = params self.placement = placement self.use_recorder_ports = use_recorder_ports # if base_url is None: # base_url = scoreboard.api_base # if base_url is None: # base_url = lab_base_url # if api_key is None: # api_key = scoreboard.api_key # if api_key is None: # raise lab.error.AuthenticationError("""You must provide an SynthAI Lab API key. # (HINT: Set your API key using "lab.scoreboard.api_key = .." or "export SYNTHAI_LAB_API_KEY=..."). You can find your API key in the SynthAI Lab web interface: https://lab.synthai.com/settings/profile.""") if api_key is None: api_key = _api_key self._requestor = AllocatorClient(self.label, api_key, base_url=base_url) self.base_url = base_url # These could be overridden on a per-allocation basis, if you # want heterogeoneous envs. We don't support those currently # in the higher layers, but this layer could support it # easily. self.runtime_id = runtime_id self.pending = {} self.error_buffer = utils.ErrorBuffer() self.requests = queue.Queue() self.ready = queue.Queue() self._reconnect_history = {} self._sleep = 1
def parse_remotes(remotes): # Parse a list of remotes of the form: # # address:vnc_port+rewarder_port (e.g. localhost:5900+15900) # # either vnc_port or rewarder_port can be omitted, but not both all_vnc = None all_rewarder = None vnc_addresses = [] rewarder_addresses = [] for remote in remotes: # Parse off +, then : if '+' in remote: if all_vnc == False: raise error.Error('Either all or no remotes must have rewarders: {}'.format(remotes)) all_vnc = True remote, rewarder_port = remote.split('+') if not re.match(r'^[0-9]+$', rewarder_port): raise error.Error('Rewarder port must be an integer, not `{}`: {}'.format(rewarder_port, remotes)) rewarder_port = int(rewarder_port) else: if all_vnc == True: raise error.Error('Either all or no remotes must have rewarders: {}'.format(remotes)) all_vnc = False rewarder_port = None if ':' in remote: if all_rewarder == False: raise error.Error('Either all or no remotes must have a VNC port: {}'.format(remotes)) all_rewarder = True remote, vnc_port = remote.split(':') if not re.match(r'^[0-9]+$', vnc_port): raise error.Error('VNC port must be an integer, not `{}`: {}'.format(vnc_port, remotes)) vnc_port = int(vnc_port) else: if all_rewarder == True: raise error.Error('Either all or no remotes must have a VNC port: {}'.format(remotes)) all_rewarder = False vnc_port = None all_rewarder = False host = remote if not re.match(r'^[-a-zA-Z0-9\.\_]+$', host): raise error.Error('Invalid hostname for remote: {}'.format(remotes)) if rewarder_port is not None: rewarder_address = '{}:{}'.format(host, rewarder_port) rewarder_addresses.append(rewarder_address) if vnc_port is not None: vnc_address = '{}:{}'.format(host, vnc_port) vnc_addresses.append(vnc_address) if not all_vnc and not all_rewarder: raise error.Error('You must provide either rewarder or a VNC port: {}'.format(remotes)) if not vnc_addresses: vnc_addresses = None if not rewarder_addresses: rewarder_addresses = None return vnc_addresses, rewarder_addresses
def allocate(self, handles, initial=False, params={}): if len(handles) > self.available_n: raise error.Error('Requested {} handles, but only have {} envs'.format(len(handles), self.available_n)) self.n = len(handles) self._handles = handles
def fail(reason): reason = error.Error('[{}] Connection failed: {}'.format(factory.label, reason.value)) try: d.errback(utils.format_error(reason)) except defer.AlreadyCalledError: pass