async def read_one(self) -> Text: """Read a single message""" message = "" headers = HTTPHeaders() line = await convert_yielded(self._readline()) if line: while line and line.strip(): headers.parse_line(line) line = await convert_yielded(self._readline()) content_length = int(headers.get("content-length", "0")) if content_length: raw = await self._read_content(length=content_length) if raw is not None: message = raw.decode("utf-8").strip() else: # pragma: no cover self.log.warning( "%s failed to read message of length %s", self, content_length, ) return message
async def read_one(self) -> Text: """Read a single message""" message = "" headers = HTTPHeaders() line = await convert_yielded(self._readline()) if line: while line and line.strip(): headers.parse_line(line) line = await convert_yielded(self._readline()) content_length = int(headers.get("content-length", "0")) if content_length: raw = None retries = 5 while raw is None and retries: try: raw = self.stream.read(content_length) except OSError: # pragma: no cover raw = None if raw is None: # pragma: no cover self.log.warning( "%s failed to read message of length %s", self, content_length, ) await self.sleep() retries -= 1 else: message = raw.decode("utf-8").strip() break return message
def headers_parse_simple(headers: str) -> HTTPHeaders: h = HTTPHeaders() for line in headers.split("\n"): if line.endswith("\r"): line = line[:-1] if line: h.parse_line(line) return h
def headers_parse_simple(headers: str) -> HTTPHeaders: h = HTTPHeaders() for line in headers.split("\n"): if line.endswith("\r"): line = line[:-1] if line: h.parse_line(line) return h
def parse_headers(data): headers = HTTPHeaders() for line in data.splitlines(): if line: try: headers.parse_line(line) except Exception, e: break
def _curl_header_callback( self, headers: httputil.HTTPHeaders, header_callback: Callable[[str], None], header_line_bytes: bytes, ) -> None: header_line = native_str(header_line_bytes.decode("latin1")) if header_callback is not None: self.io_loop.add_callback(header_callback, header_line) # header_line as returned by curl includes the end-of-line characters. # whitespace at the start should be preserved to allow multi-line headers header_line = header_line.rstrip() if header_line.startswith("HTTP/"): headers.clear() try: (__, __, reason) = httputil.parse_response_start_line(header_line) header_line = "X-Http-Reason: %s" % reason except httputil.HTTPInputError: return if not header_line: return headers.parse_line(header_line)
def _curl_header_callback( self, headers: httputil.HTTPHeaders, header_callback: Callable[[str], None], header_line_bytes: bytes, ) -> None: header_line = native_str(header_line_bytes.decode("latin1")) if header_callback is not None: self.io_loop.add_callback(header_callback, header_line) # header_line as returned by curl includes the end-of-line characters. # whitespace at the start should be preserved to allow multi-line headers header_line = header_line.rstrip() if header_line.startswith("HTTP/"): headers.clear() try: (__, __, reason) = httputil.parse_response_start_line(header_line) header_line = "X-Http-Reason: %s" % reason except httputil.HTTPInputError: return if not header_line: return headers.parse_line(header_line)
def send_object(cls, object_url): """ Send OpenSlides objects to all connected clients. First, receive the object from the OpenSlides ReST API. """ for waiter in cls.waiters: # Get the object from the ReST API http_client = AsyncHTTPClient() headers = HTTPHeaders() # TODO: read to python Morselcookies and why "set-Cookie" does not work request_cookies = waiter.request_info.cookies.values() cookie_value = ';'.join("%s=%s" % (cookie.key, cookie.value) for cookie in request_cookies) headers.parse_line("Cookie: %s" % cookie_value) request = HTTPRequest( url=''.join((REST_URL, object_url)), headers=headers, decompress_response=False) # TODO: use proxy_host as header from waiter.request_info http_client.fetch(request, waiter.handle_rest_request)
def read_headers(self): parser = HTTPHeaders() lastkey = None count = 0 while True: line = yield self.stream.read_until(b'\r\n',max_bytes=self.max_header_length) if len(line) == 2: break self.readed += len(line) count += 1 line = line.decode('utf-8') if self.max_headers and count > self.max_headers: raise iostream.UnsatisfiableReadError("Too many headers "+line+' '+json.dumps(parser)) parser.parse_line(line) if lastkey is None: lastkey = parser._last_key elif lastkey != parser._last_key: self.request.received_header(lastkey,parser[lastkey]) lastkey = parser._last_key self.request.request_headers = parser self.request.received_headers() note('received all headers') raise gen.Return(parser)
class ProxyHandler(web.RequestHandler): def initialize(self): self.proxy_headers = HTTPHeaders() # create a new client for each request self.http_client = AsyncHTTPClient(max_clients=1) self.in_request_headers = False self.id = id(self) self.request_data = None def validate_request(self, request_data): if self.request.headers.get("X-Proxy-Agent") == X_Proxy_Agent: self.set_status(403, "recursion rejected") return False try: RequstDataValidator.validate(request_data) except ValidationError as err: self.set_status(400, "/%s: %s" % ( "::".join(err.path), err.message )) return False return True def get_post_request_data(self): try: request_data = json.loads(self.request.body.decode("utf-8")) except ValueError as err: self.set_status(400, str(err)) return return request_data def _set_proxy_headers(self): for k, v in self.proxy_headers.items(): if k not in RESPONSE_EXCLUDE_HEADERS: logger.debug( "[%s] write header %s: %s", self.id, k, v, ) self.set_header(k, v) def _streaming_callback(self, chunk): if self._finished: return if not self._headers_written: self._set_proxy_headers() self.flush() self.in_request_headers = False self.write(chunk) logger.debug("[%s] chunk length %s", self.id, len(chunk)) def _header_callback(self, header_line): if not self.in_request_headers: start_line = parse_response_start_line(header_line) self.set_status(start_line.code, start_line.reason) self.in_request_headers = True elif not HTTPHeaderEndLineRex.match(header_line): self.proxy_headers.parse_line(header_line) def _get_request_body(self, request_data): post_type = request_data.get("post_type") data = request_data.get("data") if data is None: return None if post_type == "form": body = urlencode(data or {}) elif post_type == "json": body = json.dumps(data) elif post_type == "string" and isinstance(data, basestring): body = native_str(data) else: body = None return body @gen.coroutine def _get_keystone_auth_headers(self, auth_info, validate_cert=True): try: response = yield self.http_client.fetch( auth_info.get("auth_url"), method="POST", headers={"Content-Type": "application/json"}, validate_cert=validate_cert, body=json.dumps({ "auth": { "passwordCredentials": { "username": auth_info.get("user_name"), "password": auth_info.get("password"), }, "tenantName": auth_info.get("tenant_name"), } }) ) except Exception as err: logger.info(err) self.set_status(503, "keystone auth error") raise gen.Return() if response.error or response.code != 200: logger.info("keystone auth error") self.set_status(407, "keystone auth error") raise gen.Return() auth_info = json.loads(response.body.decode("utf-8")) try: raise gen.Return({ "X-AUTH-TOKEN": auth_info["access"]["token"]["id"], }) except KeyError: logger.info("keystone auth failed") self.set_status(407, "keystone auth failed") raise gen.Return() def _get_proxy_request_headers(self, request_data): headers = { k: v for k, v in self.request.headers.items() if k.lower() in RAW_REQUEST_ACCEPT_HEADERS } cookies = request_data.get("cookies") if cookies: headers["Cookie"] = "; ".join( "%s=%s" % i for i in cookies.items() ) post_type = request_data.get("post_type") if post_type == "form": headers.setdefault( "Content-Type", "application/x-www-form-urlencoded" ) elif post_type == "json": headers.setdefault( "Content-Type", "application/json" ) elif post_type == "string": headers.setdefault( "Content-Type", "text/plain" ) request_headers = request_data.get("headers") or {} for k, v in request_headers.items(): if k in REQUEST_ACCEPT_HEADERS: headers[k] = v elif k.startswith("X-"): headers[k] = v headers["X-Proxy-Agent"] = X_Proxy_Agent return headers @gen.coroutine def handle_request(self, request_data): try: proxy_request = yield self._make_proxy_request(request_data) if not proxy_request: raise gen.Return() yield self._fetch_proxy_request(proxy_request) except RequestParamsError as err: self.set_status(400, str(err)) except Exception as err: logger.exception(err) raise gen.Return() @web.asynchronous @gen.coroutine def get(self): url = self.get_query_argument("url") logger.debug("[%s]agent get url: %s", self.id, url) self.request_data = request_data = {"url": url} if not self.validate_request(request_data): raise gen.Return() yield self.handle_request(request_data) @web.asynchronous @gen.coroutine def post(self): request_data = self.get_post_request_data() logger.debug("[%s]agent request data: %s", self.id, request_data) if not request_data: raise gen.Return() self.request_data = request_data if not self.validate_request(request_data): raise gen.Return() yield self.handle_request(request_data) def prepare_curl_callback(self, curl): import pycurl if ( "insecure_connection" in self.request_data and bool(self.request_data.get("insecure_connection")) ): curl.setopt(pycurl.SSL_VERIFYHOST, 0) @gen.coroutine def _make_proxy_request(self, request_data): timeout = float(request_data.get("timeout", DEFAULT_TIMEOUT)) validate_cert = bool(request_data.get("validate_cert") or True) max_redirects = request_data.get("max_http_redirects") or 0 follow_redirects = max_redirects > 0 # 0 means do not follow redirects url = request_data.get("url") params = request_data.get("data") post_type = request_data.get("post_type") if params and post_type is None: url = "%s?%s" % (url, urlencode(params)) logger.info("[%s]agent request url: %s", self.id, url) proxy_request = HTTPRequest( url, validate_cert=validate_cert, headers=self._get_proxy_request_headers(request_data), method=request_data.get("method", "GET"), allow_nonstandard_methods=True, connect_timeout=timeout, request_timeout=timeout, streaming_callback=self._streaming_callback, header_callback=self._header_callback, follow_redirects=follow_redirects, max_redirects=max_redirects, prepare_curl_callback=self.prepare_curl_callback, ) role_name = request_data.get("role") if role_name: InterfaceRoleManager.set_curl_interface_role( proxy_request, role_name, ) keystone_auth_info = request_data.get("keystone") if keystone_auth_info: logger.warning( "[%s]agent request required keystone token", ) auth_headers = yield self._get_keystone_auth_headers( keystone_auth_info, validate_cert=validate_cert, ) if not auth_headers: raise gen.Return() proxy_request.headers.update(auth_headers) body = self._get_request_body(request_data) if body: proxy_request.body = body raise gen.Return(proxy_request) @gen.coroutine def _fetch_proxy_request(self, proxy_request): self.in_request_headers = False try: response = yield self.http_client.fetch(proxy_request) except HTTPError as err: self.set_status(err.code, err.message) raise gen.Return() except Exception as err: self.set_status(503, str(err)) raise gen.Return() if response.error: self.set_status(response.code, str(response.error)) else: self.set_status(response.code, response.reason) logger.info( "[%s]agent response status: %s, reason: %s", self.id, response.code, response.reason, )
class TwitterStream(object): ''' Starts a Twitter Streaming client. Sample usage:: >>> from gramex.transforms import TwitterStream >>> stream = TwitterStream( ... track='modi,mms', ... path='save-as-file.json', ... key='...', ... secret='...', ... access_key='...', ... access_secret='...', ... flush=True) This saves all tweets mentioning ``modi`` or ``mms`` in ``save-as-file.json`` with each line representing a tweet in JSN format. If ``flush=True``, the file is flushed on every tweet. If ``flush=<number>``, the file is flushed every ``<number>`` seconds. If ``flush=False`` (default), the file is flushed only when the file or app is closed. This function runs forever, so run it in a separate thread. ''' def __init__(self, **kwargs): self.params = kwargs self.url = 'https://stream.twitter.com/1.1/statuses/filter.json' self.valid_params = { 'follow', 'track', 'locations', 'delimited', 'stall_warnings', 'filter_level', 'language'} self.enabled = True self.delay = 0 # Set up writers if 'path' in kwargs: self.stream = StreamWriter(kwargs['path'], flush=kwargs.get('flush', False)) self.process_bytes = self.stream.write elif 'function' in kwargs: self.process_json = build_transform( kwargs, vars={'message': {}}, filename='TwitterStream:function') elif kwargs.get('driver') == 'sqlalchemy': engine = gramex.data.create_engine(kwargs['url'], **kwargs.get('parameters', {})) table = gramex.data.get_table(kwargs['table']) fields = kwargs['fields'] for field in list(fields.keys()): if field not in table.columns: app_log.error('TwitterStream field %s not in table' % field) fields.pop(field) flatten = flattener(fields=fields) self.process_json = lambda tweet: engine.execute(table.insert(flatten(tweet))) self.buf = bytearray() self.client = tornado.httpclient.HTTPClient() while True: # Set .enabled to False to temporarily disable streamer if self.enabled: params = {key: val.encode('utf-8') for key, val in self.params.items() if key in self.valid_params} if 'follow' not in params and 'track' not in params and 'locations' not in params: self.enabled = False self.delay = 5 app_log.error('TwitterStream needs follow, track or locations. Disabling') else: self.fetch_tweets(params) # Restart after a delay determined by time.sleep(self.delay) def fetch_tweets(self, tweet_params): oauth = oauth1.Client( client_key=self.params['key'], client_secret=self.params['secret'], resource_owner_key=self.params['access_key'], resource_owner_secret=self.params['access_secret']) headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Gramex', } url, headers, data = oauth.sign( self.url, 'POST', body=urlencode(tweet_params), headers=headers) self.req = tornado.httpclient.HTTPRequest( method='POST', url=url, body=data, headers=headers, request_timeout=864000, # Keep request alive for 10 days streaming_callback=self._stream, header_callback=self.header_callback) try: self.headers = None self.client.fetch(self.req) self.delay = 0 except tornado.httpclient.HTTPError as e: # HTTPError is raised for non-200 HTTP status codes. # For rate limiting, start with 1 minute and double each attempt if e.code in {RATE_LIMITED, TOO_MANY_REQUESTS}: self.delay = self.delay * 2 if self.delay else 60 app_log.error('TwitterStream HTTP %d (rate limited): %s. Retry: %ss', e.code, e.response, self.delay) # For Tornado timeout errors, reconnect immediately elif e.code == CLIENT_TIMEOUT: self.delay = 0 app_log.error('TwitterStream HTTP %d (timeout): %s. Retry: %ss', e.code, e.response, self.delay) # For server errors, start with 5 seconds and double until 320 seconds elif INTERNAL_SERVER_ERROR <= e.code <= GATEWAY_TIMEOUT: self.delay = min(320, self.delay * 2 if self.delay else 1) # noqa: 320 seconds app_log.error('TwitterStream HTTP %d: %s. Retry: %ss', e.code, e.response, self.delay) # For client errors (e.g. wrong params), disable connection else: self.delay, self.enabled = 5, False app_log.error('TwitterStream HTTP %d: %s. Disabling', e.code, e.response) except Exception as e: # Other errors are possible, such as IOError. # Increase the delay in reconnects by 250ms each attempt, up to 16 seconds. self.delay = min(16, self.delay + 0.25) # noqa: 16 seconds, 0.25 seconds app_log.error('TwitterStream exception %s. Retry: %ss', e, self.delay) def header_callback(self, line): try: if self.headers is None: start_line = parse_response_start_line(line) self.http_version, self.status_code, self.http_reason = start_line self.headers = HTTPHeaders() else: self.headers.parse_line(line) except Exception: app_log.exception('Cannot parse header %s' % line) def _stream(self, data): buf = self.buf buf.extend(data) while len(buf): index = buf.find(b'\r\n') if index < 0: break data = bytes(buf[:index]) del buf[:index + 2] # Ignore stall warnings if len(data) == 0: continue try: self.process_bytes(data) except Exception: app_log.exception('TwitterStream could not process: %s' % data) def process_bytes(self, data): try: text = six.text_type(data, encoding='utf-8') message = json.loads(text) except UnicodeError: app_log.error('TwitterStream unicode error: %s', data) return except ValueError: # When rate limited, text="Exceeded connection limit for user" app_log.error('TwitterStream non-JSON data: %s', text) return # Process the message (which is usually, but not always, a tweet) try: self.process_json(message) except Exception: app_log.exception('TwitterStream could not process message: %s' % text) def process_json(self, message): '''Subclass this to process tweets differently''' app_log.info(repr(message))
class Connection(object): headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'Connection': 'close' } def __init__(self, endpoint, api_path, event_handler): self.endpoint = endpoint self.api_path = api_path self.subscription_client = AsyncHTTPClient() self.outbound_client = AsyncHTTPClient() self.event_handler = event_handler self.pending = {} self.buffer = deque() self.mesos_stream_id = None self.closing = False self.connection_successful = False self._headers = HTTPHeaders() def _parse_subscription_headers(self, response): try: if "200 OK" in response: self.connection_successful = True elif "400 Bad Request" in response: self.close() elif "HTTP/" not in response: self._headers.parse_line(response) if self.connection_successful and "Mesos-Stream-Id" in self._headers: self.mesos_stream_id = self._headers["Mesos-Stream-Id"].strip() except ValueError as ex: # pragma: no cover log.warn("Problem parsing headers") @gen.coroutine def connect(self, request): payload = encode(request) headers = dict(self.headers) headers['Content-Length'] = str(len(payload)) http_request = HTTPRequest( url=self.endpoint + self.api_path, method='POST', headers=headers, body=payload, streaming_callback=self._handle_chunks, header_callback=self._parse_subscription_headers, follow_redirects=False, request_timeout=1e15) self.buffer = deque() self._headers = HTTPHeaders() try: yield self.subscription_client.fetch(http_request) except HTTPError as ex: if ex.code == 599: raise_from( ConnectionLost( "Disconnected from endpoint, will try to reconnect"), None) if ex.code == 400: raise_from( BadSubscription( "Got a 400 code from endpoint. Probably bad subscription request" ), ex) except ConnectionRefusedError as ex: # pragma: no cover log.error("Problem subscribing: %s" % self.endpoint) except Exception as ex: # pragma: no cover log.error("Unhandled exception") log.exception(ex) def send(self, request): f = concurrent.Future() if self.closing: # pragma: no cover f.set_exception(ConnectError(self.endpoint)) return f payload = encode(request) headers = dict(self.headers) headers['Content-Length'] = str(len(payload)) http_request = HTTPRequest( url=self.endpoint + self.api_path, body=payload, method='POST', headers=headers, ) if self.mesos_stream_id: headers['Mesos-Stream-Id'] = self.mesos_stream_id return self.outbound_client.fetch(http_request) @gen.coroutine def ping(self, path=None): request = HTTPRequest(url=self.endpoint + (path or self.api_path), method='GET', headers=self.headers, follow_redirects=False, request_timeout=100) try: yield self.outbound_client.fetch(request) except HTTPError as ex: # pragma: no cover if ex.code == 307: raise_from( MasterRedirect( urlparse(ex.response.headers["location"]).netloc), None) except ConnectionRefusedError as ex: # pragma: no cover log.debug("Problem reaching: %s" % self.endpoint) raise ex except Exception as ex: # pragma: no cover log.debug("Unhandled exception when connecting to %s", self.endpoint) raise ex def _handle_chunks(self, chunk): # pragma: no cover """ Handle incoming byte chunk stream """ with log_errors(): try: log.debug("Buffer length %s" % len(self.buffer)) if b"Failed to" in chunk: log.warn("Got error from Master: %s" % chunk.decode()) return if b"No leader elected" in chunk: log.warn(chunk.decode()) return self.buffer.append(chunk) length = self.buffer[0].split(b'\n', 1)[0] number = -len(length) length = int(length) i = 0 while i < len(self.buffer) and number < length: number += len(self.buffer[i]) i += 1 if number < length: return msgs = [self.buffer.popleft().split(b'\n', 1)[1]] number = len(msgs[0]) while number < length: msg = self.buffer.popleft() number += len(msg) msgs.append(msg) if number > length: msg = msgs[-1] length, message = msg[(length - number):], msg[:(length - number)] msgs[-1] = message self.buffer.appendleft(length) msg = decode(b''.join(msgs)) self.event_handler(msg) # yield self.(msg) except Exception as ex: log.warn( "Problem parsing response from endpoint. Might be a subscription error", ex) def close(self): if self.closing: return self.closing = True self.subscription_client.close() self.outbound_client.close()
def headers_parse_re(headers: str) -> HTTPHeaders: h = HTTPHeaders() for line in _CRLF_RE.split(headers): if line: h.parse_line(line) return h
def headers_parse_re(headers: str) -> HTTPHeaders: h = HTTPHeaders() for line in _CRLF_RE.split(headers): if line: h.parse_line(line) return h
class ProxyHandler(tornado.web.StaticFileHandler): CHUNK_SIZE = 64 * 1024 SUPPORTED_METHODS = ['GET', 'CONNECT'] def initialize(self, path, default_filename=None): self.cache_dir = path self.url_transpose = self.application.url_transpose tornado.web.StaticFileHandler.initialize(self, str(self.cache_dir)) def data_received(self, chunk): raise NotImplementedError() def prepare(self): self.cacheable_exts = ('.rpm', '.img', '.sqlite.bz2', '.sqlite.gz', '.xml', '.xml.gz', '.qcow2', '.raw.xz', '.iso', 'filelist.gz', 'vmlinuz') self.cacheable = False self.cache_used = False self.cache_file = None self.cache_fd = None self.cache_url = False self.req_code = None self.req_path = None self.req_headers = None def is_cacheable(self, path): return path.endswith(self.cacheable_exts) @tornado.gen.coroutine @tornado.web.asynchronous def get(self, path, include_body=True): self.req_path = path app_log.info('process %s', path) url = urlsplit(path) self.cache_url = path.replace(url[0] + '://', '') self.cacheable = self.is_cacheable(url.path) app_log.debug('is cacheable %r', self.cacheable) if self.cacheable: cache_file = self.url_transpose(path) if not cache_file: netloc = [x for x in reversed(url.netloc.split('.'))] self.cache_file = self.cache_dir / '.'.join(netloc) / url.path[1:] else: self.cache_file = self.cache_dir / cache_file else: uri = self.request.uri.encode() cache_id = hashlib.sha1(uri).hexdigest() cache_path = self.cache_dir / '~' / cache_id[:2] cache_info = cache_path / (cache_id + '-url.txt') if not cache_info.exists(): if not cache_info.parent.exists(): cache_info.parent.mkdir(parents=True) with cache_info.open('w') as f: f.write(uri.decode()) self.cache_file = cache_path / (cache_id + '-data.txt') cache_time = None if self.cache_file.exists(): self.cache_file = self.cache_file.resolve() cache_time = self.cache_file.stat().st_mtime lifetime = time() - int(self.settings['cache']['lifetime']) * 60 * 60 app_log.debug('cache time is %r lifetime is %r', cache_time, lifetime) if cache_time > lifetime: app_log.info('found %s', self.cache_file) cache_url = self.cache_file.relative_to(self.cache_dir).as_posix() return tornado.web.StaticFileHandler.get(self, cache_url) app_log.info('%s lifetime exceeded', self.cache_file) args = {k: v[0] for k, v in self.request.arguments.items()} app_log.info('fetch %s', self.request.uri) if 'Range' in self.request.headers: del self.request.headers['Range'] self.client = AsyncHTTPClient() self.client.fetch(self.request.uri, method=self.request.method, body=self.request.body, headers=self.request.headers, follow_redirects=False, if_modified_since=cache_time, allow_nonstandard_methods=True, connect_timeout=int(self.settings['proxy']['timeout']), request_timeout=2 ** 31 - 1, header_callback=self.process_header, streaming_callback=self.process_body, callback=self.process_finish) def process_header(self, line): header = line.strip() app_log.debug('response header %s', header) if header: if self.req_headers is None: self.req_headers = HTTPHeaders() _, status, _ = header.split(' ', 2) status = int(status) if status == 599: # network error but cache file exists if self.cache_file.exists(): status = 200 elif status == 304: status = 200 elif status == 200: app_log.debug('prepare temp file for %s', self.req_path) self.cache_fd = NamedTemporaryFile(dir=str(self.cache_dir), delete=False) self.set_status(status) else: self.req_headers.parse_line(line) return for header in ('Date', 'Cache-Control', 'Server', 'Content-Type', 'Location'): val = self.req_headers.get(header) if val: self.set_header(header, val) if 'content-encoding' not in self.req_headers: val = self.req_headers.get('Content-Length') if val: self.set_header('Content-Length', val) self.flush() def process_body(self, chunk): if self._finished: return if self.cache_fd is not None: self.cache_fd.write(chunk) self.write(chunk) self.flush() def process_finish(self, response): app_log.debug('process finish %s', self.req_path) if self._finished or self.cache_used: app_log.debug('skip process finish') return app_log.info('code %s fo %s', response.code, self.request.uri) if response.code in (599, 304): if self.cache_file.exists(): if response.code == 304: self.cache_file.touch() app_log.info('use %s', self.cache_file) self.cache_fd = self.cache_file.open('rb') self.process_file() return elif 200 <= response.code < 300: if self.cache_fd is not None: self.cache_fd.close() if self.cache_file.exists(): self.cache_file.unlink() elif not self.cache_file.parent.exists(): self.cache_file.parent.mkdir(parents=True) temp_file = Path(self.cache_dir) / self.cache_fd.name temp_file.rename(self.cache_file) app_log.info('saved %s', self.cache_file) self.cache_fd = None self.finish() def process_file(self): chunk = self.cache_fd.read(self.CHUNK_SIZE) if chunk: self.write(chunk) self.flush(callback=self.process_file) return self.cache_fd.close() self.cache_fd = None app_log.debug('process file %s finish', self.cache_file) self.finish() def compute_etag(self): if self.cache_file is None or not self.cache_file.exists(): return None if not hasattr(self, 'absolute_path'): self.absolute_path = str(self.cache_file.absolute()) return tornado.web.StaticFileHandler.compute_etag(self) def on_finish(self): app_log.debug('on finish') # sometimes, prepare is not called. if not hasattr(self, 'cache_fd') or self.cache_fd is None: return self.cache_fd.close() @tornado.web.asynchronous def connect(self, path): app_log.info('CONNECT to %s', self.request.uri) host, port = self.request.uri.split(':') client = self.request.connection.stream def read_from_client(data): # app_log.debug('read from client\n%s', data) upstream.write(data) def read_from_upstream(data): # app_log.debug('read from upstream\n%s', data) client.write(data) def client_close(data=None): # app_log.debug('client close\n%s', data) if upstream.closed(): return if data: upstream.write(data) upstream.close() def upstream_close(data=None): # app_log.debug('upstream close\n%s', data) if client.closed(): return if data: client.write(data) client.close() def start_tunnel(): app_log.debug('start connect tunnel') client.read_until_close(client_close, read_from_client) upstream.read_until_close(upstream_close, read_from_upstream) client.write(b'HTTP/1.0 200 Connection established\r\n\r\n') s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) upstream = tornado.iostream.IOStream(s) app_log.debug('connect to upstream') upstream.connect((host, int(port)), start_tunnel)