Beispiel #1
0
    async def read_one(self) -> Text:
        """Read a single message"""
        message = ""
        headers = HTTPHeaders()

        line = await convert_yielded(self._readline())

        if line:
            while line and line.strip():
                headers.parse_line(line)
                line = await convert_yielded(self._readline())

            content_length = int(headers.get("content-length", "0"))

            if content_length:
                raw = await self._read_content(length=content_length)
                if raw is not None:
                    message = raw.decode("utf-8").strip()
                else:  # pragma: no cover
                    self.log.warning(
                        "%s failed to read message of length %s",
                        self,
                        content_length,
                    )

        return message
Beispiel #2
0
    async def read_one(self) -> Text:
        """Read a single message"""
        message = ""
        headers = HTTPHeaders()

        line = await convert_yielded(self._readline())

        if line:
            while line and line.strip():
                headers.parse_line(line)
                line = await convert_yielded(self._readline())

            content_length = int(headers.get("content-length", "0"))

            if content_length:
                raw = None
                retries = 5
                while raw is None and retries:
                    try:
                        raw = self.stream.read(content_length)
                    except OSError:  # pragma: no cover
                        raw = None
                    if raw is None:  # pragma: no cover
                        self.log.warning(
                            "%s failed to read message of length %s",
                            self,
                            content_length,
                        )
                        await self.sleep()
                        retries -= 1
                    else:
                        message = raw.decode("utf-8").strip()
                        break

        return message
Beispiel #3
0
def headers_parse_simple(headers: str) -> HTTPHeaders:
    h = HTTPHeaders()
    for line in headers.split("\n"):
        if line.endswith("\r"):
            line = line[:-1]
        if line:
            h.parse_line(line)
    return h
Beispiel #4
0
def headers_parse_simple(headers: str) -> HTTPHeaders:
    h = HTTPHeaders()
    for line in headers.split("\n"):
        if line.endswith("\r"):
            line = line[:-1]
        if line:
            h.parse_line(line)
    return h
Beispiel #5
0
def parse_headers(data):
    headers = HTTPHeaders()

    for line in data.splitlines():
        if line:
            try:
                headers.parse_line(line)
            except Exception, e:
                break
Beispiel #6
0
 def _curl_header_callback(
     self,
     headers: httputil.HTTPHeaders,
     header_callback: Callable[[str], None],
     header_line_bytes: bytes,
 ) -> None:
     header_line = native_str(header_line_bytes.decode("latin1"))
     if header_callback is not None:
         self.io_loop.add_callback(header_callback, header_line)
     # header_line as returned by curl includes the end-of-line characters.
     # whitespace at the start should be preserved to allow multi-line headers
     header_line = header_line.rstrip()
     if header_line.startswith("HTTP/"):
         headers.clear()
         try:
             (__, __, reason) = httputil.parse_response_start_line(header_line)
             header_line = "X-Http-Reason: %s" % reason
         except httputil.HTTPInputError:
             return
     if not header_line:
         return
     headers.parse_line(header_line)
Beispiel #7
0
 def _curl_header_callback(
     self,
     headers: httputil.HTTPHeaders,
     header_callback: Callable[[str], None],
     header_line_bytes: bytes,
 ) -> None:
     header_line = native_str(header_line_bytes.decode("latin1"))
     if header_callback is not None:
         self.io_loop.add_callback(header_callback, header_line)
     # header_line as returned by curl includes the end-of-line characters.
     # whitespace at the start should be preserved to allow multi-line headers
     header_line = header_line.rstrip()
     if header_line.startswith("HTTP/"):
         headers.clear()
         try:
             (__, __, reason) = httputil.parse_response_start_line(header_line)
             header_line = "X-Http-Reason: %s" % reason
         except httputil.HTTPInputError:
             return
     if not header_line:
         return
     headers.parse_line(header_line)
Beispiel #8
0
    def send_object(cls, object_url):
        """
        Send OpenSlides objects to all connected clients.

        First, receive the object from the OpenSlides ReST API.
        """
        for waiter in cls.waiters:
            # Get the object from the ReST API
            http_client = AsyncHTTPClient()
            headers = HTTPHeaders()
            # TODO: read to python Morselcookies and why "set-Cookie" does not work
            request_cookies = waiter.request_info.cookies.values()
            cookie_value = ';'.join("%s=%s" % (cookie.key, cookie.value)
                                    for cookie in request_cookies)
            headers.parse_line("Cookie: %s" % cookie_value)

            request = HTTPRequest(
                url=''.join((REST_URL, object_url)),
                headers=headers,
                decompress_response=False)
            # TODO: use proxy_host as header from waiter.request_info
            http_client.fetch(request, waiter.handle_rest_request)
Beispiel #9
0
 def read_headers(self):
     parser = HTTPHeaders()
     lastkey = None
     count = 0
     while True:
         line = yield self.stream.read_until(b'\r\n',max_bytes=self.max_header_length)
         if len(line) == 2:
             break
         self.readed += len(line)
         count += 1
         line = line.decode('utf-8')
         if self.max_headers and count > self.max_headers:
             raise iostream.UnsatisfiableReadError("Too many headers "+line+' '+json.dumps(parser))
         parser.parse_line(line)
         if lastkey is None:
             lastkey = parser._last_key
         elif lastkey != parser._last_key:
             self.request.received_header(lastkey,parser[lastkey])
             lastkey = parser._last_key
     self.request.request_headers = parser
     self.request.received_headers()
     note('received all headers')
     raise gen.Return(parser)
Beispiel #10
0
class ProxyHandler(web.RequestHandler):

    def initialize(self):
        self.proxy_headers = HTTPHeaders()
        # create a new client for each request
        self.http_client = AsyncHTTPClient(max_clients=1)
        self.in_request_headers = False
        self.id = id(self)
        self.request_data = None

    def validate_request(self, request_data):
        if self.request.headers.get("X-Proxy-Agent") == X_Proxy_Agent:
            self.set_status(403, "recursion rejected")
            return False

        try:
            RequstDataValidator.validate(request_data)
        except ValidationError as err:
            self.set_status(400, "/%s: %s" % (
                "::".join(err.path), err.message
            ))
            return False
        return True

    def get_post_request_data(self):
        try:
            request_data = json.loads(self.request.body.decode("utf-8"))
        except ValueError as err:
            self.set_status(400, str(err))
            return
        return request_data

    def _set_proxy_headers(self):
        for k, v in self.proxy_headers.items():
            if k not in RESPONSE_EXCLUDE_HEADERS:
                logger.debug(
                    "[%s] write header %s: %s", self.id, k, v,
                )
                self.set_header(k, v)

    def _streaming_callback(self, chunk):
        if self._finished:
            return

        if not self._headers_written:
            self._set_proxy_headers()
            self.flush()
        self.in_request_headers = False
        self.write(chunk)
        logger.debug("[%s] chunk length %s", self.id, len(chunk))

    def _header_callback(self, header_line):
        if not self.in_request_headers:
            start_line = parse_response_start_line(header_line)
            self.set_status(start_line.code, start_line.reason)
            self.in_request_headers = True
        elif not HTTPHeaderEndLineRex.match(header_line):
            self.proxy_headers.parse_line(header_line)

    def _get_request_body(self, request_data):
        post_type = request_data.get("post_type")
        data = request_data.get("data")
        if data is None:
            return None

        if post_type == "form":
            body = urlencode(data or {})
        elif post_type == "json":
            body = json.dumps(data)
        elif post_type == "string" and isinstance(data, basestring):
            body = native_str(data)
        else:
            body = None
        return body

    @gen.coroutine
    def _get_keystone_auth_headers(self, auth_info, validate_cert=True):
        try:
            response = yield self.http_client.fetch(
                auth_info.get("auth_url"), method="POST",
                headers={"Content-Type": "application/json"},
                validate_cert=validate_cert,
                body=json.dumps({
                    "auth": {
                        "passwordCredentials": {
                            "username": auth_info.get("user_name"),
                            "password": auth_info.get("password"),
                        },
                        "tenantName": auth_info.get("tenant_name"),
                    }
                })
            )
        except Exception as err:
            logger.info(err)
            self.set_status(503, "keystone auth error")
            raise gen.Return()

        if response.error or response.code != 200:
            logger.info("keystone auth error")
            self.set_status(407, "keystone auth error")
            raise gen.Return()

        auth_info = json.loads(response.body.decode("utf-8"))
        try:
            raise gen.Return({
                "X-AUTH-TOKEN": auth_info["access"]["token"]["id"],
            })
        except KeyError:
            logger.info("keystone auth failed")
            self.set_status(407, "keystone auth failed")
        raise gen.Return()

    def _get_proxy_request_headers(self, request_data):
        headers = {
            k: v for k, v in self.request.headers.items()
            if k.lower() in RAW_REQUEST_ACCEPT_HEADERS
        }
        cookies = request_data.get("cookies")
        if cookies:
            headers["Cookie"] = "; ".join(
                "%s=%s" % i
                for i in cookies.items()
            )

        post_type = request_data.get("post_type")
        if post_type == "form":
            headers.setdefault(
                "Content-Type", "application/x-www-form-urlencoded"
            )
        elif post_type == "json":
            headers.setdefault(
                "Content-Type", "application/json"
            )
        elif post_type == "string":
            headers.setdefault(
                "Content-Type", "text/plain"
            )

        request_headers = request_data.get("headers") or {}
        for k, v in request_headers.items():
            if k in REQUEST_ACCEPT_HEADERS:
                headers[k] = v
            elif k.startswith("X-"):
                headers[k] = v
        headers["X-Proxy-Agent"] = X_Proxy_Agent
        return headers

    @gen.coroutine
    def handle_request(self, request_data):
        try:
            proxy_request = yield self._make_proxy_request(request_data)
            if not proxy_request:
                raise gen.Return()

            yield self._fetch_proxy_request(proxy_request)
        except RequestParamsError as err:
            self.set_status(400, str(err))
        except Exception as err:
            logger.exception(err)
        raise gen.Return()

    @web.asynchronous
    @gen.coroutine
    def get(self):
        url = self.get_query_argument("url")
        logger.debug("[%s]agent get url: %s", self.id, url)

        self.request_data = request_data = {"url": url}
        if not self.validate_request(request_data):
            raise gen.Return()

        yield self.handle_request(request_data)

    @web.asynchronous
    @gen.coroutine
    def post(self):
        request_data = self.get_post_request_data()
        logger.debug("[%s]agent request data: %s", self.id, request_data)
        if not request_data:
            raise gen.Return()

        self.request_data = request_data
        if not self.validate_request(request_data):
            raise gen.Return()

        yield self.handle_request(request_data)

    def prepare_curl_callback(self, curl):
        import pycurl

        if (
            "insecure_connection" in self.request_data and
            bool(self.request_data.get("insecure_connection"))
        ):
            curl.setopt(pycurl.SSL_VERIFYHOST, 0)

    @gen.coroutine
    def _make_proxy_request(self, request_data):
        timeout = float(request_data.get("timeout", DEFAULT_TIMEOUT))
        validate_cert = bool(request_data.get("validate_cert") or True)
        max_redirects = request_data.get("max_http_redirects") or 0
        follow_redirects = max_redirects > 0  # 0 means do not follow redirects

        url = request_data.get("url")
        params = request_data.get("data")
        post_type = request_data.get("post_type")
        if params and post_type is None:
            url = "%s?%s" % (url, urlencode(params))

        logger.info("[%s]agent request url: %s", self.id, url)

        proxy_request = HTTPRequest(
            url, validate_cert=validate_cert,
            headers=self._get_proxy_request_headers(request_data),
            method=request_data.get("method", "GET"),
            allow_nonstandard_methods=True,
            connect_timeout=timeout,
            request_timeout=timeout,
            streaming_callback=self._streaming_callback,
            header_callback=self._header_callback,
            follow_redirects=follow_redirects,
            max_redirects=max_redirects,
            prepare_curl_callback=self.prepare_curl_callback,
        )

        role_name = request_data.get("role")
        if role_name:
            InterfaceRoleManager.set_curl_interface_role(
                proxy_request, role_name,
            )

        keystone_auth_info = request_data.get("keystone")
        if keystone_auth_info:
            logger.warning(
                "[%s]agent request required keystone token",
            )
            auth_headers = yield self._get_keystone_auth_headers(
                keystone_auth_info, validate_cert=validate_cert,
            )
            if not auth_headers:
                raise gen.Return()
            proxy_request.headers.update(auth_headers)

        body = self._get_request_body(request_data)
        if body:
            proxy_request.body = body

        raise gen.Return(proxy_request)

    @gen.coroutine
    def _fetch_proxy_request(self, proxy_request):
        self.in_request_headers = False
        try:
            response = yield self.http_client.fetch(proxy_request)
        except HTTPError as err:
            self.set_status(err.code, err.message)
            raise gen.Return()
        except Exception as err:
            self.set_status(503, str(err))
            raise gen.Return()

        if response.error:
            self.set_status(response.code, str(response.error))
        else:
            self.set_status(response.code, response.reason)

        logger.info(
            "[%s]agent response status: %s, reason: %s",
            self.id, response.code, response.reason,
        )
class TwitterStream(object):
    '''
    Starts a Twitter Streaming client. Sample usage::

        >>> from gramex.transforms import TwitterStream
        >>> stream = TwitterStream(
        ...     track='modi,mms',
        ...     path='save-as-file.json',
        ...     key='...',
        ...     secret='...',
        ...     access_key='...',
        ...     access_secret='...',
        ...     flush=True)

    This saves all tweets mentioning ``modi`` or ``mms`` in ``save-as-file.json``
    with each line representing a tweet in JSN format.

    If ``flush=True``, the file is flushed on every tweet. If ``flush=<number>``,
    the file is flushed every ``<number>`` seconds. If ``flush=False`` (default),
    the file is flushed only when the file or app is closed.

    This function runs forever, so run it in a separate thread.
    '''
    def __init__(self, **kwargs):
        self.params = kwargs
        self.url = 'https://stream.twitter.com/1.1/statuses/filter.json'
        self.valid_params = {
            'follow', 'track', 'locations', 'delimited', 'stall_warnings',
            'filter_level', 'language'}
        self.enabled = True
        self.delay = 0

        # Set up writers
        if 'path' in kwargs:
            self.stream = StreamWriter(kwargs['path'], flush=kwargs.get('flush', False))
            self.process_bytes = self.stream.write
        elif 'function' in kwargs:
            self.process_json = build_transform(
                kwargs, vars={'message': {}}, filename='TwitterStream:function')
        elif kwargs.get('driver') == 'sqlalchemy':
            engine = gramex.data.create_engine(kwargs['url'], **kwargs.get('parameters', {}))
            table = gramex.data.get_table(kwargs['table'])
            fields = kwargs['fields']
            for field in list(fields.keys()):
                if field not in table.columns:
                    app_log.error('TwitterStream field %s not in table' % field)
                    fields.pop(field)
            flatten = flattener(fields=fields)
            self.process_json = lambda tweet: engine.execute(table.insert(flatten(tweet)))

        self.buf = bytearray()
        self.client = tornado.httpclient.HTTPClient()
        while True:
            # Set .enabled to False to temporarily disable streamer
            if self.enabled:
                params = {key: val.encode('utf-8') for key, val in self.params.items()
                          if key in self.valid_params}
                if 'follow' not in params and 'track' not in params and 'locations' not in params:
                    self.enabled = False
                    self.delay = 5
                    app_log.error('TwitterStream needs follow, track or locations. Disabling')
                else:
                    self.fetch_tweets(params)
            # Restart after a delay determined by
            time.sleep(self.delay)

    def fetch_tweets(self, tweet_params):
        oauth = oauth1.Client(
            client_key=self.params['key'],
            client_secret=self.params['secret'],
            resource_owner_key=self.params['access_key'],
            resource_owner_secret=self.params['access_secret'])
        headers = {
            'Content-Type': 'application/x-www-form-urlencoded',
            'User-Agent': 'Gramex',
        }
        url, headers, data = oauth.sign(
            self.url, 'POST', body=urlencode(tweet_params), headers=headers)
        self.req = tornado.httpclient.HTTPRequest(
            method='POST', url=url, body=data, headers=headers,
            request_timeout=864000,      # Keep request alive for 10 days
            streaming_callback=self._stream,
            header_callback=self.header_callback)

        try:
            self.headers = None
            self.client.fetch(self.req)
            self.delay = 0
        except tornado.httpclient.HTTPError as e:
            # HTTPError is raised for non-200 HTTP status codes.
            # For rate limiting, start with 1 minute and double each attempt
            if e.code in {RATE_LIMITED, TOO_MANY_REQUESTS}:
                self.delay = self.delay * 2 if self.delay else 60
                app_log.error('TwitterStream HTTP %d (rate limited): %s. Retry: %ss',
                              e.code, e.response, self.delay)
            # For Tornado timeout errors, reconnect immediately
            elif e.code == CLIENT_TIMEOUT:
                self.delay = 0
                app_log.error('TwitterStream HTTP %d (timeout): %s. Retry: %ss',
                              e.code, e.response, self.delay)
            # For server errors, start with 5 seconds and double until 320 seconds
            elif INTERNAL_SERVER_ERROR <= e.code <= GATEWAY_TIMEOUT:
                self.delay = min(320, self.delay * 2 if self.delay else 1)      # noqa: 320 seconds
                app_log.error('TwitterStream HTTP %d: %s. Retry: %ss',
                              e.code, e.response, self.delay)
            # For client errors (e.g. wrong params), disable connection
            else:
                self.delay, self.enabled = 5, False
                app_log.error('TwitterStream HTTP %d: %s. Disabling', e.code, e.response)
        except Exception as e:
            # Other errors are possible, such as IOError.
            # Increase the delay in reconnects by 250ms each attempt, up to 16 seconds.
            self.delay = min(16, self.delay + 0.25)         # noqa: 16 seconds, 0.25 seconds
            app_log.error('TwitterStream exception %s. Retry: %ss', e, self.delay)

    def header_callback(self, line):
        try:
            if self.headers is None:
                start_line = parse_response_start_line(line)
                self.http_version, self.status_code, self.http_reason = start_line
                self.headers = HTTPHeaders()
            else:
                self.headers.parse_line(line)
        except Exception:
            app_log.exception('Cannot parse header %s' % line)

    def _stream(self, data):
        buf = self.buf
        buf.extend(data)
        while len(buf):
            index = buf.find(b'\r\n')
            if index < 0:
                break
            data = bytes(buf[:index])
            del buf[:index + 2]
            # Ignore stall warnings
            if len(data) == 0:
                continue
            try:
                self.process_bytes(data)
            except Exception:
                app_log.exception('TwitterStream could not process: %s' % data)

    def process_bytes(self, data):
        try:
            text = six.text_type(data, encoding='utf-8')
            message = json.loads(text)
        except UnicodeError:
            app_log.error('TwitterStream unicode error: %s', data)
            return
        except ValueError:
            # When rate limited, text="Exceeded connection limit for user"
            app_log.error('TwitterStream non-JSON data: %s', text)
            return
        # Process the message (which is usually, but not always, a tweet)
        try:
            self.process_json(message)
        except Exception:
            app_log.exception('TwitterStream could not process message: %s' % text)

    def process_json(self, message):
        '''Subclass this to process tweets differently'''
        app_log.info(repr(message))
Beispiel #12
0
class Connection(object):
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
        'Connection': 'close'
    }

    def __init__(self, endpoint, api_path, event_handler):

        self.endpoint = endpoint
        self.api_path = api_path
        self.subscription_client = AsyncHTTPClient()
        self.outbound_client = AsyncHTTPClient()

        self.event_handler = event_handler
        self.pending = {}

        self.buffer = deque()
        self.mesos_stream_id = None

        self.closing = False

        self.connection_successful = False
        self._headers = HTTPHeaders()

    def _parse_subscription_headers(self, response):
        try:
            if "200 OK" in response:
                self.connection_successful = True
            elif "400 Bad Request" in response:
                self.close()
            elif "HTTP/" not in response:
                self._headers.parse_line(response)
            if self.connection_successful and "Mesos-Stream-Id" in self._headers:
                self.mesos_stream_id = self._headers["Mesos-Stream-Id"].strip()
        except ValueError as ex:  # pragma: no cover
            log.warn("Problem parsing headers")

    @gen.coroutine
    def connect(self, request):

        payload = encode(request)
        headers = dict(self.headers)
        headers['Content-Length'] = str(len(payload))

        http_request = HTTPRequest(
            url=self.endpoint + self.api_path,
            method='POST',
            headers=headers,
            body=payload,
            streaming_callback=self._handle_chunks,
            header_callback=self._parse_subscription_headers,
            follow_redirects=False,
            request_timeout=1e15)

        self.buffer = deque()
        self._headers = HTTPHeaders()

        try:
            yield self.subscription_client.fetch(http_request)
        except HTTPError as ex:
            if ex.code == 599:
                raise_from(
                    ConnectionLost(
                        "Disconnected from endpoint, will try to reconnect"),
                    None)
            if ex.code == 400:
                raise_from(
                    BadSubscription(
                        "Got a 400 code from endpoint. Probably bad subscription request"
                    ), ex)
        except ConnectionRefusedError as ex:  # pragma: no cover
            log.error("Problem subscribing: %s" % self.endpoint)
        except Exception as ex:  # pragma: no cover
            log.error("Unhandled exception")
            log.exception(ex)

    def send(self, request):
        f = concurrent.Future()

        if self.closing:  # pragma: no cover
            f.set_exception(ConnectError(self.endpoint))
            return f

        payload = encode(request)
        headers = dict(self.headers)
        headers['Content-Length'] = str(len(payload))
        http_request = HTTPRequest(
            url=self.endpoint + self.api_path,
            body=payload,
            method='POST',
            headers=headers,
        )

        if self.mesos_stream_id:
            headers['Mesos-Stream-Id'] = self.mesos_stream_id

        return self.outbound_client.fetch(http_request)

    @gen.coroutine
    def ping(self, path=None):
        request = HTTPRequest(url=self.endpoint + (path or self.api_path),
                              method='GET',
                              headers=self.headers,
                              follow_redirects=False,
                              request_timeout=100)
        try:
            yield self.outbound_client.fetch(request)
        except HTTPError as ex:  # pragma: no cover
            if ex.code == 307:
                raise_from(
                    MasterRedirect(
                        urlparse(ex.response.headers["location"]).netloc),
                    None)
        except ConnectionRefusedError as ex:  # pragma: no cover
            log.debug("Problem reaching: %s" % self.endpoint)
            raise ex
        except Exception as ex:  # pragma: no cover
            log.debug("Unhandled exception when connecting to %s",
                      self.endpoint)
            raise ex

    def _handle_chunks(self, chunk):  # pragma: no cover
        """ Handle incoming byte chunk stream """
        with log_errors():
            try:
                log.debug("Buffer length %s" % len(self.buffer))
                if b"Failed to" in chunk:
                    log.warn("Got error from Master: %s" % chunk.decode())
                    return
                if b"No leader elected" in chunk:
                    log.warn(chunk.decode())
                    return
                self.buffer.append(chunk)
                length = self.buffer[0].split(b'\n', 1)[0]
                number = -len(length)
                length = int(length)
                i = 0
                while i < len(self.buffer) and number < length:
                    number += len(self.buffer[i])
                    i += 1

                if number < length:
                    return

                msgs = [self.buffer.popleft().split(b'\n', 1)[1]]
                number = len(msgs[0])

                while number < length:
                    msg = self.buffer.popleft()
                    number += len(msg)
                    msgs.append(msg)

                if number > length:
                    msg = msgs[-1]
                    length, message = msg[(length - number):], msg[:(length -
                                                                     number)]
                    msgs[-1] = message
                    self.buffer.appendleft(length)

                msg = decode(b''.join(msgs))

                self.event_handler(msg)

                # yield self.(msg)
            except Exception as ex:
                log.warn(
                    "Problem parsing response from endpoint. Might be a subscription error",
                    ex)

    def close(self):
        if self.closing:
            return

        self.closing = True
        self.subscription_client.close()
        self.outbound_client.close()
Beispiel #13
0
def headers_parse_re(headers: str) -> HTTPHeaders:
    h = HTTPHeaders()
    for line in _CRLF_RE.split(headers):
        if line:
            h.parse_line(line)
    return h
Beispiel #14
0
def headers_parse_re(headers: str) -> HTTPHeaders:
    h = HTTPHeaders()
    for line in _CRLF_RE.split(headers):
        if line:
            h.parse_line(line)
    return h
Beispiel #15
0
class ProxyHandler(tornado.web.StaticFileHandler):
    CHUNK_SIZE = 64 * 1024
    SUPPORTED_METHODS = ['GET', 'CONNECT']

    def initialize(self, path, default_filename=None):
        self.cache_dir = path
        self.url_transpose = self.application.url_transpose

        tornado.web.StaticFileHandler.initialize(self, str(self.cache_dir))

    def data_received(self, chunk):
        raise NotImplementedError()

    def prepare(self):
        self.cacheable_exts = ('.rpm', '.img', '.sqlite.bz2', '.sqlite.gz', '.xml', '.xml.gz', '.qcow2', '.raw.xz',
                               '.iso', 'filelist.gz', 'vmlinuz')

        self.cacheable = False
        self.cache_used = False
        self.cache_file = None
        self.cache_fd = None
        self.cache_url = False

        self.req_code = None
        self.req_path = None
        self.req_headers = None

    def is_cacheable(self, path):
        return path.endswith(self.cacheable_exts)

    @tornado.gen.coroutine
    @tornado.web.asynchronous
    def get(self, path, include_body=True):
        self.req_path = path
        app_log.info('process %s', path)

        url = urlsplit(path)


        self.cache_url = path.replace(url[0] + '://', '')
        self.cacheable = self.is_cacheable(url.path)
        app_log.debug('is cacheable %r', self.cacheable)
        if self.cacheable:
            cache_file = self.url_transpose(path)
            if not cache_file:
                netloc = [x for x in reversed(url.netloc.split('.'))]
                self.cache_file = self.cache_dir / '.'.join(netloc) / url.path[1:]
            else:
                self.cache_file = self.cache_dir / cache_file

        else:
            uri = self.request.uri.encode()
            cache_id = hashlib.sha1(uri).hexdigest()
            cache_path = self.cache_dir / '~' / cache_id[:2]

            cache_info = cache_path / (cache_id + '-url.txt')
            if not cache_info.exists():
                if not cache_info.parent.exists():
                    cache_info.parent.mkdir(parents=True)

                with cache_info.open('w') as f:
                    f.write(uri.decode())

            self.cache_file = cache_path / (cache_id + '-data.txt')

        cache_time = None
        if self.cache_file.exists():
            self.cache_file = self.cache_file.resolve()
            cache_time = self.cache_file.stat().st_mtime

            lifetime = time() - int(self.settings['cache']['lifetime']) * 60 * 60
            app_log.debug('cache time is %r lifetime is %r', cache_time, lifetime)
            if cache_time > lifetime:
                app_log.info('found %s', self.cache_file)

                cache_url = self.cache_file.relative_to(self.cache_dir).as_posix()
                return tornado.web.StaticFileHandler.get(self, cache_url)

            app_log.info('%s lifetime exceeded', self.cache_file)

        args = {k: v[0] for k, v in self.request.arguments.items()}

        app_log.info('fetch %s', self.request.uri)
        if 'Range' in self.request.headers:
            del self.request.headers['Range']

        self.client = AsyncHTTPClient()
        self.client.fetch(self.request.uri,
                          method=self.request.method,
                          body=self.request.body,
                          headers=self.request.headers,
                          follow_redirects=False,
                          if_modified_since=cache_time,
                          allow_nonstandard_methods=True,
                          connect_timeout=int(self.settings['proxy']['timeout']),
                          request_timeout=2 ** 31 - 1,
                          header_callback=self.process_header,
                          streaming_callback=self.process_body,
                          callback=self.process_finish)

    def process_header(self, line):
        header = line.strip()
        app_log.debug('response header %s', header)
        if header:
            if self.req_headers is None:
                self.req_headers = HTTPHeaders()
                _, status, _ = header.split(' ', 2)
                status = int(status)

                if status == 599:
                    # network error but cache file exists
                    if self.cache_file.exists():
                        status = 200
                elif status == 304:
                    status = 200
                elif status == 200:
                    app_log.debug('prepare temp file for %s', self.req_path)
                    self.cache_fd = NamedTemporaryFile(dir=str(self.cache_dir), delete=False)

                self.set_status(status)
            else:
                self.req_headers.parse_line(line)
            return

        for header in ('Date', 'Cache-Control', 'Server', 'Content-Type', 'Location'):
            val = self.req_headers.get(header)
            if val:
                self.set_header(header, val)

        if 'content-encoding' not in self.req_headers:
            val = self.req_headers.get('Content-Length')
            if val:
                self.set_header('Content-Length', val)

        self.flush()

    def process_body(self, chunk):
        if self._finished:
            return

        if self.cache_fd is not None:
            self.cache_fd.write(chunk)

        self.write(chunk)
        self.flush()

    def process_finish(self, response):
        app_log.debug('process finish %s', self.req_path)
        if self._finished or self.cache_used:
            app_log.debug('skip process finish')
            return

        app_log.info('code %s fo %s', response.code, self.request.uri)

        if response.code in (599, 304):
            if self.cache_file.exists():
                if response.code == 304:
                    self.cache_file.touch()

                app_log.info('use %s', self.cache_file)
                self.cache_fd = self.cache_file.open('rb')
                self.process_file()
                return

        elif 200 <= response.code < 300:
            if self.cache_fd is not None:
                self.cache_fd.close()

                if self.cache_file.exists():
                    self.cache_file.unlink()
                elif not self.cache_file.parent.exists():
                    self.cache_file.parent.mkdir(parents=True)

                temp_file = Path(self.cache_dir) / self.cache_fd.name
                temp_file.rename(self.cache_file)

                app_log.info('saved %s', self.cache_file)

        self.cache_fd = None
        self.finish()

    def process_file(self):
        chunk = self.cache_fd.read(self.CHUNK_SIZE)
        if chunk:
            self.write(chunk)
            self.flush(callback=self.process_file)
            return

        self.cache_fd.close()
        self.cache_fd = None
        app_log.debug('process file %s finish', self.cache_file)
        self.finish()

    def compute_etag(self):
        if self.cache_file is None or not self.cache_file.exists():
            return None

        if not hasattr(self, 'absolute_path'):
            self.absolute_path = str(self.cache_file.absolute())

        return tornado.web.StaticFileHandler.compute_etag(self)

    def on_finish(self):
        app_log.debug('on finish')
        # sometimes, prepare is not called.
        if not hasattr(self, 'cache_fd') or self.cache_fd is None:
            return
        self.cache_fd.close()

    @tornado.web.asynchronous
    def connect(self, path):
        app_log.info('CONNECT to %s', self.request.uri)
        host, port = self.request.uri.split(':')
        client = self.request.connection.stream

        def read_from_client(data):
            # app_log.debug('read from client\n%s', data)
            upstream.write(data)

        def read_from_upstream(data):
            # app_log.debug('read from upstream\n%s', data)
            client.write(data)

        def client_close(data=None):
            # app_log.debug('client close\n%s', data)
            if upstream.closed():
                return
            if data:
                upstream.write(data)
            upstream.close()

        def upstream_close(data=None):
            # app_log.debug('upstream close\n%s', data)
            if client.closed():
                return
            if data:
                client.write(data)
            client.close()

        def start_tunnel():
            app_log.debug('start connect tunnel')
            client.read_until_close(client_close, read_from_client)
            upstream.read_until_close(upstream_close, read_from_upstream)
            client.write(b'HTTP/1.0 200 Connection established\r\n\r\n')

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
        upstream = tornado.iostream.IOStream(s)
        app_log.debug('connect to upstream')
        upstream.connect((host, int(port)), start_tunnel)