Ejemplo n.º 1
0
 def __init__(self, url: str, code: int, result: bytes,
              response_headers: HTTPHeaders,
              error: Optional[BaseException]) -> None:
     self._url = url
     self._code = code
     self._result: bytes = result
     self._encoding: str = "utf-8"
     self._response_headers: HTTPHeaders = response_headers
     self._etag: Optional[str] = response_headers.get("etag", None)
     self._error = error
     self._last_modified: Optional[str] = response_headers.get(
         "last-modified", None)
Ejemplo n.º 2
0
class _HTTPRequest(object):
    def __init__(self, data):
        method, url, version, headers, self._body = msgpack_unpackb(data)
        if six.PY3:
            method = method.decode()
            url = url.decode()
            version = version.decode()
            headers = [(k.decode(), v.decode()) for k, v in headers]

        self._headers = HTTPHeaders(headers)
        self._meta = {
            'method':
            method,
            'version':
            version,
            'host':
            self._headers.get('Host', ''),
            'remote_addr':
            self._headers.get('X-Real-IP')
            or self._headers.get('X-Forwarded-For', ''),
            'query_string':
            urlparse.urlparse(url).query,
            'cookies':
            dict(),
            'parsed_cookies':
            http_parse_cookies(self._headers),
        }
        args = urlparse.parse_qs(urlparse.urlparse(url).query)
        self._files = dict()
        parse_body_arguments(self._headers.get("Content-Type", ""), self._body,
                             args, self._files)
        self._request = dict_list_to_single(args)

    @property
    def headers(self):
        return self._headers

    @property
    def body(self):
        """Return request body"""
        return self._body

    @property
    def meta(self):
        return self._meta

    @property
    def request(self):
        return self._request

    @property
    def files(self):
        return self._files
Ejemplo n.º 3
0
class _HTTPRequest(object):
    def __init__(self, request, data):
        self._underlying_request = request
        method, url, version, headers, self._body = msgpack_unpackb(data)
        if six.PY3:
            method = method.decode()
            url = url.decode()
            version = version.decode()
            headers = [(k.decode(), v.decode()) for k, v in headers]

        self._headers = HTTPHeaders(headers)
        self._meta = {
            'method': method,
            'version': version,
            'host': self._headers.get('Host', ''),
            'remote_addr': self._headers.get('X-Real-IP') or self._headers.get('X-Forwarded-For', ''),
            'query_string': urlparse.urlparse(url).query,
            'cookies': dict(),
            'parsed_cookies': http_parse_cookies(self._headers),
        }
        args = urlparse.parse_qs(urlparse.urlparse(url).query)
        self._files = dict()
        parse_body_arguments(self._headers.get("Content-Type", ""), self._body, args, self._files)
        self._request = dict_list_to_single(args)

    @property
    def headers(self):
        return self._headers

    def hpack_headers(self):
        return self._underlying_request.headers

    @property
    def body(self):
        """Return request body"""
        return self._body

    @property
    def meta(self):
        return self._meta

    @property
    def request(self):
        return self._request

    @property
    def files(self):
        return self._files
Ejemplo n.º 4
0
    async def read_one(self) -> Text:
        """Read a single message"""
        message = ""
        headers = HTTPHeaders()

        line = await convert_yielded(self._readline())

        if line:
            while line and line.strip():
                headers.parse_line(line)
                line = await convert_yielded(self._readline())

            content_length = int(headers.get("content-length", "0"))

            if content_length:
                raw = await self._read_content(length=content_length)
                if raw is not None:
                    message = raw.decode("utf-8").strip()
                else:  # pragma: no cover
                    self.log.warning(
                        "%s failed to read message of length %s",
                        self,
                        content_length,
                    )

        return message
Ejemplo n.º 5
0
def make_post_request(url,
                      data='',
                      headers=None,
                      files=None,
                      content_type=None,
                      connect_timeout=None,
                      request_timeout=None,
                      follow_redirects=True):
    if files:
        body, content_type = make_mfd(data, files)
    else:
        body = make_body(data)

    headers = HTTPHeaders() if headers is None else HTTPHeaders(headers)
    if content_type is None:
        content_type = headers.get('Content-Type',
                                   'application/x-www-form-urlencoded')

    headers.update({
        'Content-Type': content_type,
        'Content-Length': str(len(body))
    })

    return HTTPRequest(url=_encode(url),
                       body=body,
                       method='POST',
                       headers=headers,
                       follow_redirects=follow_redirects,
                       connect_timeout=connect_timeout,
                       request_timeout=request_timeout)
Ejemplo n.º 6
0
def read_tracking_id_headers(headers: HTTPHeaders, raise_error=True):
    if mdc.correlation_id.get():
        return

    correlation_id = headers.get(HttpHeaders.X_CORRELATION_ID, None)

    if not correlation_id:
        correlation_id = message_utilities.get_uuid()
        logger.info(
            f"Request is missing {HttpHeaders.X_CORRELATION_ID} header. Assigning new value: {correlation_id}"
        )
    else:
        if len(re.findall(UUID_PATTERN, correlation_id)) != 1:
            if raise_error:
                raise tornado.web.HTTPError(
                    status_code=400,
                    log_message=
                    f"Invalid {HttpHeaders.X_CORRELATION_ID} header. Should be an UUIDv4 matching regex '{UUID_PATTERN}'"
                )
            else:
                correlation_id = message_utilities.get_uuid()
                logger.info(
                    f"Invalid {HttpHeaders.X_CORRELATION_ID} header. Assigning new value: {correlation_id}"
                )

    mdc.correlation_id.set(correlation_id)
Ejemplo n.º 7
0
    async def read_one(self) -> Text:
        """Read a single message"""
        message = ""
        headers = HTTPHeaders()

        line = await convert_yielded(self._readline())

        if line:
            while line and line.strip():
                headers.parse_line(line)
                line = await convert_yielded(self._readline())

            content_length = int(headers.get("content-length", "0"))

            if content_length:
                raw = None
                retries = 5
                while raw is None and retries:
                    try:
                        raw = self.stream.read(content_length)
                    except OSError:  # pragma: no cover
                        raw = None
                    if raw is None:  # pragma: no cover
                        self.log.warning(
                            "%s failed to read message of length %s",
                            self,
                            content_length,
                        )
                        await self.sleep()
                        retries -= 1
                    else:
                        message = raw.decode("utf-8").strip()
                        break

        return message
Ejemplo n.º 8
0
 def _can_keep_alive(self, start_line: httputil.RequestStartLine,
                     headers: httputil.HTTPHeaders) -> bool:
     if self.params.no_keep_alive:
         return False
     connection_header = headers.get("Connection")
     if connection_header is not None:
         connection_header = connection_header.lower()
     if start_line.version == "HTTP/1.1":
         return connection_header != "close"
     elif ("Content-Length" in headers
           or headers.get("Transfer-Encoding", "").lower() == "chunked"
           or getattr(start_line, "method", None) in ("HEAD", "GET")):
         # start_line may be a request or response start line; only
         # the former has a method attribute.
         return connection_header == "keep-alive"
     return False
def _extract_correlation_id(headers: HTTPHeaders):
    correlation_id = headers.get(HttpHeaders.CORRELATION_ID, None)
    if not correlation_id:
        correlation_id = message_utilities.get_uuid()
        logger.info(
            "Didn't receive correlation id in incoming request from supplier, so have generated a new one."
        )
    mdc.correlation_id.set(correlation_id)
Ejemplo n.º 10
0
    def _read_body(
        self,
        code: int,
        headers: httputil.HTTPHeaders,
        delegate: httputil.HTTPMessageDelegate,
    ) -> Optional[Awaitable[None]]:
        if "Content-Length" in headers:
            if "Transfer-Encoding" in headers:
                # Response cannot contain both Content-Length and
                # Transfer-Encoding headers.
                # http://tools.ietf.org/html/rfc7230#section-3.3.3
                raise httputil.HTTPInputError(
                    "Response with both Transfer-Encoding and Content-Length"
                )
            if "," in headers["Content-Length"]:
                # Proxies sometimes cause Content-Length headers to get
                # duplicated.  If all the values are identical then we can
                # use them but if they differ it's an error.
                pieces = re.split(r",\s*", headers["Content-Length"])
                if any(i != pieces[0] for i in pieces):
                    raise httputil.HTTPInputError(
                        "Multiple unequal Content-Lengths: %r"
                        % headers["Content-Length"]
                    )
                headers["Content-Length"] = pieces[0]

            try:
                content_length = int(headers["Content-Length"])  # type: Optional[int]
            except ValueError:
                # Handles non-integer Content-Length value.
                raise httputil.HTTPInputError(
                    "Only integer Content-Length is allowed: %s"
                    % headers["Content-Length"]
                )

            if cast(int, content_length) > self._max_body_size:
                raise httputil.HTTPInputError("Content-Length too long")
        else:
            content_length = None

        if code == 204:
            # This response code is not allowed to have a non-empty body,
            # and has an implicit length of zero instead of read-until-close.
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.3
            if "Transfer-Encoding" in headers or content_length not in (None, 0):
                raise httputil.HTTPInputError(
                    "Response with code %d should not have body" % code
                )
            content_length = 0

        if content_length is not None:
            return self._read_fixed_body(content_length, delegate)
        if headers.get("Transfer-Encoding", "").lower() == "chunked":
            return self._read_chunked_body(delegate)
        if self.is_client:
            return self._read_body_until_close(delegate)
        return None
Ejemplo n.º 11
0
    def _read_body(
        self,
        code: int,
        headers: httputil.HTTPHeaders,
        delegate: httputil.HTTPMessageDelegate,
    ) -> Optional[Awaitable[None]]:
        if "Content-Length" in headers:
            if "Transfer-Encoding" in headers:
                # Response cannot contain both Content-Length and
                # Transfer-Encoding headers.
                # http://tools.ietf.org/html/rfc7230#section-3.3.3
                raise httputil.HTTPInputError(
                    "Response with both Transfer-Encoding and Content-Length"
                )
            if "," in headers["Content-Length"]:
                # Proxies sometimes cause Content-Length headers to get
                # duplicated.  If all the values are identical then we can
                # use them but if they differ it's an error.
                pieces = re.split(r",\s*", headers["Content-Length"])
                if any(i != pieces[0] for i in pieces):
                    raise httputil.HTTPInputError(
                        "Multiple unequal Content-Lengths: %r"
                        % headers["Content-Length"]
                    )
                headers["Content-Length"] = pieces[0]

            try:
                content_length = int(headers["Content-Length"])  # type: Optional[int]
            except ValueError:
                # Handles non-integer Content-Length value.
                raise httputil.HTTPInputError(
                    "Only integer Content-Length is allowed: %s"
                    % headers["Content-Length"]
                )

            if cast(int, content_length) > self._max_body_size:
                raise httputil.HTTPInputError("Content-Length too long")
        else:
            content_length = None

        if code == 204:
            # This response code is not allowed to have a non-empty body,
            # and has an implicit length of zero instead of read-until-close.
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.3
            if "Transfer-Encoding" in headers or content_length not in (None, 0):
                raise httputil.HTTPInputError(
                    "Response with code %d should not have body" % code
                )
            content_length = 0

        if content_length is not None:
            return self._read_fixed_body(content_length, delegate)
        if headers.get("Transfer-Encoding", "").lower() == "chunked":
            return self._read_chunked_body(delegate)
        if self.is_client:
            return self._read_body_until_close(delegate)
        return None
Ejemplo n.º 12
0
 def _can_keep_alive(
     self, start_line: httputil.RequestStartLine, headers: httputil.HTTPHeaders
 ) -> bool:
     if self.params.no_keep_alive:
         return False
     connection_header = headers.get("Connection")
     if connection_header is not None:
         connection_header = connection_header.lower()
     if start_line.version == "HTTP/1.1":
         return connection_header != "close"
     elif (
         "Content-Length" in headers
         or headers.get("Transfer-Encoding", "").lower() == "chunked"
         or getattr(start_line, "method", None) in ("HEAD", "GET")
     ):
         # start_line may be a request or response start line; only
         # the former has a method attribute.
         return connection_header == "keep-alive"
     return False
def test_urllib2(scheme, root_span, install_hooks):
    request = urllib2.Request('%s://localhost:9777/proxy' % scheme,
                              headers={'Remote-LOC': 'New New York',
                                       'Remote-Op': 'antiquing'})

    class Response(object):
        def __init__(self):
            self.code = 200
            self.msg = ''

        def info(self):
            return None

    if root_span:
        root_span = mock.MagicMock()
        root_span.context = mock.MagicMock()
        root_span.finish = mock.MagicMock()
        root_span.__exit__ = mock.MagicMock()
    else:
        root_span = None

    span = mock.MagicMock()
    span.set_tag = mock.MagicMock()
    span.finish = mock.MagicMock()

    def inject(span_context, format, carrier):
        carrier['TRACE-ID'] = '123'

    p_do_open = mock.patch('urllib2.AbstractHTTPHandler.do_open',
                           return_value=Response())
    p_start_span = mock.patch.object(opentracing.tracer, 'start_span',
                                     return_value=span)
    p_inject = mock.patch.object(opentracing.tracer, 'inject',
                                 side_effect=inject)
    p_current_span = span_in_context(span=root_span)

    with p_do_open, p_start_span as start_call, p_inject, p_current_span:
        resp = urllib2.urlopen(request)
        expected_references = root_span.context if root_span else None
        start_call.assert_called_once_with(
            operation_name='GET:antiquing',
            child_of=expected_references,
            tags=None,
        )
    assert resp is not None
    span.set_tag.assert_any_call('span.kind', 'client')
    assert span.__enter__.call_count == 1
    assert span.__exit__.call_count == 1, 'ensure finish() was called'
    if root_span:
        assert root_span.__exit__.call_count == 0, 'do not finish root span'

    # verify trace-id was correctly injected into headers
    norm_headers = HTTPHeaders(request.headers)
    assert norm_headers.get('trace-id') == '123'
Ejemplo n.º 14
0
 def _apply_xheaders(self, headers: httputil.HTTPHeaders) -> None:
     """Rewrite the ``remote_ip`` and ``protocol`` fields."""
     # Squid uses X-Forwarded-For, others use X-Real-Ip
     ip = headers.get("X-Forwarded-For", self.remote_ip)
     # Skip trusted downstream hosts in X-Forwarded-For list
     for ip in (cand.strip() for cand in reversed(ip.split(","))):
         if ip not in self.trusted_downstream:
             break
     ip = headers.get("X-Real-Ip", ip)
     if netutil.is_valid_ip(ip):
         self.remote_ip = ip
     # AWS uses X-Forwarded-Proto
     proto_header = headers.get(
         "X-Scheme", headers.get("X-Forwarded-Proto", self.protocol))
     if proto_header:
         # use only the last proto entry if there is more than one
         # TODO: support trusting mutiple layers of proxied protocol
         proto_header = proto_header.split(",")[-1].strip()
     if proto_header in ("http", "https"):
         self.protocol = proto_header
Ejemplo n.º 15
0
 def headers_received(self, start_line: Union[httputil.RequestStartLine,
                                              httputil.ResponseStartLine],
                      headers: httputil.HTTPHeaders) -> Optional[Awaitable[None]]:
     if headers.get("Content-Encoding") == "gzip":
         self._decompressor = GzipDecompressor()
         # Downstream delegates will only see uncompressed data,
         # so rename the content-encoding header.
         # (but note that curl_httpclient doesn't do this).
         headers.add("X-Consumed-Content-Encoding",
                     headers["Content-Encoding"])
         del headers["Content-Encoding"]
     return self._delegate.headers_received(start_line, headers)
Ejemplo n.º 16
0
def test_urllib2(urllibver, scheme, root_span, install_hooks, tracer):

    module = install_hooks

    if module is None:
        pytest.skip('Skipping %s on Py3' % urllibver)

    class Response(object):
        def __init__(self):
            self.code = 200
            self.msg = ''

        def info(self):
            return None

    if root_span:
        root_span = tracer.start_span('root-span')
    else:
        root_span = None

    # ideally we should have started a test server and tested with real HTTP
    # request, but doing that for https is more difficult, so we mock the
    # request sending part.
    if urllibver == 'urllib2':
        p_do_open = mock.patch(
            'urllib2.AbstractHTTPHandler.do_open', return_value=Response()
        )
    else:
        cls = module.AbstractHTTPHandler
        p_do_open = mock._patch_object(
            cls, 'do_open', return_value=Response()
        )

    with p_do_open, span_in_context(span=root_span):
        request = module.Request(
            '%s://localhost:9777/proxy' % scheme,
            headers={
                'Remote-LOC': 'New New York',
                'Remote-Op': 'antiquing'
            })
        resp = module.urlopen(request)

    assert resp.code == 200
    assert len(tracer.recorder.get_spans()) == 1

    span = tracer.recorder.get_spans()[0]
    assert span.tags.get('span.kind') == 'client'

    # verify trace-id was correctly injected into headers
    # we wrap the headers to avoid having to deal with upper/lower case
    norm_headers = HTTPHeaders(request.headers)
    trace_id_header = norm_headers.get('ot-tracer-traceid')
    assert trace_id_header == '%x' % span.context.trace_id
Ejemplo n.º 17
0
 def _apply_xheaders(self, headers: httputil.HTTPHeaders) -> None:
     """Rewrite the ``remote_ip`` and ``protocol`` fields."""
     # Squid uses X-Forwarded-For, others use X-Real-Ip
     ip = headers.get("X-Forwarded-For", self.remote_ip)
     # Skip trusted downstream hosts in X-Forwarded-For list
     for ip in (cand.strip() for cand in reversed(ip.split(","))):
         if ip not in self.trusted_downstream:
             break
     ip = headers.get("X-Real-Ip", ip)
     if netutil.is_valid_ip(ip):
         self.remote_ip = ip
     # AWS uses X-Forwarded-Proto
     proto_header = headers.get(
         "X-Scheme", headers.get("X-Forwarded-Proto", self.protocol)
     )
     if proto_header:
         # use only the last proto entry if there is more than one
         # TODO: support trusting mutiple layers of proxied protocol
         proto_header = proto_header.split(",")[-1].strip()
     if proto_header in ("http", "https"):
         self.protocol = proto_header
Ejemplo n.º 18
0
 def headers_received(
     self,
     start_line: Union[httputil.RequestStartLine, httputil.ResponseStartLine],
     headers: httputil.HTTPHeaders,
 ) -> Optional[Awaitable[None]]:
     if headers.get("Content-Encoding") == "gzip":
         self._decompressor = GzipDecompressor()
         # Downstream delegates will only see uncompressed data,
         # so rename the content-encoding header.
         # (but note that curl_httpclient doesn't do this).
         headers.add("X-Consumed-Content-Encoding", headers["Content-Encoding"])
         del headers["Content-Encoding"]
     return self._delegate.headers_received(start_line, headers)
def get_valid_accept_type(headers: HTTPHeaders):
    accept_types = headers.get(HttpHeaders.ACCEPT,
                               APPLICATION_FHIR_JSON).lower()
    accept_types = accept_types.split(",")
    accept_types = list(map(lambda value: value.strip(), accept_types))

    if ANY in accept_types or APPLICATION_FHIR_JSON in accept_types:
        return APPLICATION_FHIR_JSON
    elif APPLICATION_JSON in accept_types:
        return APPLICATION_JSON
    else:
        logger.info("Invalid Accept header in request")
        raise tornado.web.HTTPError(
            status_code=406,
            log_message=
            f'Invalid Accept header in request, only: {APPLICATION_JSON} or {APPLICATION_FHIR_JSON} are allowed'
        )
Ejemplo n.º 20
0
def make_post_request(url, data='', headers=None, files=None, content_type=None,
                      connect_timeout=None, request_timeout=None, follow_redirects=True):
    if files:
        body, content_type = make_mfd(data, files)
    else:
        body = make_body(data)

    headers = HTTPHeaders() if headers is None else HTTPHeaders(headers)
    if content_type is None:
        content_type = headers.get('Content-Type', 'application/x-www-form-urlencoded')

    headers.update({'Content-Type': content_type, 'Content-Length': str(len(body))})

    return HTTPRequest(
        url=url,
        body=body,
        method='POST',
        headers=headers,
        follow_redirects=follow_redirects,
        connect_timeout=connect_timeout,
        request_timeout=request_timeout
    )
Ejemplo n.º 21
0
async def _build_auth_info(headers: HTTPHeaders) -> Dict[str, Any]:
    """Construct the authentication information for a user.

    Retrieve the token from the headers, use that token to retrieve the
    metadata for the token, and use that data to build an auth info dict
    in the format expected by JupyterHub.  This is in a separate method so
    that it can be unit-tested.
    """
    token = headers.get("X-Auth-Request-Token")
    if not token:
        raise web.HTTPError(401, "No request token")

    config = NubladoConfig()
    if not config.gafaelfawr_token:
        raise web.HTTPError(500, "gafaelfawr_token not set in configuration")
    if not config.base_url:
        raise web.HTTPError(500, "base_url not set in configuration")

    # Retrieve the token metadata.
    api_url = url_path_join(config.base_url, "/auth/api/v1/user-info")
    session = await get_session()
    resp = await session.get(api_url,
                             headers={"Authorization": f"bearer {token}"})
    if resp.status != 200:
        raise web.HTTPError(500, "Cannot reach token analysis API")
    try:
        auth_state = await resp.json()
    except Exception:
        raise web.HTTPError(500, "Cannot get information for token")
    if "username" not in auth_state or "uid" not in auth_state:
        raise web.HTTPError(403, "Request token is invalid")

    auth_state["token"] = token
    if "groups" not in auth_state:
        auth_state["groups"] = []
    return {
        "name": auth_state["username"],
        "auth_state": auth_state,
    }
Ejemplo n.º 22
0
class ProxyHandler(tornado.web.StaticFileHandler):
    CHUNK_SIZE = 64 * 1024
    SUPPORTED_METHODS = ['GET', 'CONNECT']

    def initialize(self, path, default_filename=None):
        self.cache_dir = path
        self.url_transpose = self.application.url_transpose

        tornado.web.StaticFileHandler.initialize(self, str(self.cache_dir))

    def data_received(self, chunk):
        raise NotImplementedError()

    def prepare(self):
        self.cacheable_exts = ('.rpm', '.img', '.sqlite.bz2', '.sqlite.gz', '.xml', '.xml.gz', '.qcow2', '.raw.xz',
                               '.iso', 'filelist.gz', 'vmlinuz')

        self.cacheable = False
        self.cache_used = False
        self.cache_file = None
        self.cache_fd = None
        self.cache_url = False

        self.req_code = None
        self.req_path = None
        self.req_headers = None

    def is_cacheable(self, path):
        return path.endswith(self.cacheable_exts)

    @tornado.gen.coroutine
    @tornado.web.asynchronous
    def get(self, path, include_body=True):
        self.req_path = path
        app_log.info('process %s', path)

        url = urlsplit(path)


        self.cache_url = path.replace(url[0] + '://', '')
        self.cacheable = self.is_cacheable(url.path)
        app_log.debug('is cacheable %r', self.cacheable)
        if self.cacheable:
            cache_file = self.url_transpose(path)
            if not cache_file:
                netloc = [x for x in reversed(url.netloc.split('.'))]
                self.cache_file = self.cache_dir / '.'.join(netloc) / url.path[1:]
            else:
                self.cache_file = self.cache_dir / cache_file

        else:
            uri = self.request.uri.encode()
            cache_id = hashlib.sha1(uri).hexdigest()
            cache_path = self.cache_dir / '~' / cache_id[:2]

            cache_info = cache_path / (cache_id + '-url.txt')
            if not cache_info.exists():
                if not cache_info.parent.exists():
                    cache_info.parent.mkdir(parents=True)

                with cache_info.open('w') as f:
                    f.write(uri.decode())

            self.cache_file = cache_path / (cache_id + '-data.txt')

        cache_time = None
        if self.cache_file.exists():
            self.cache_file = self.cache_file.resolve()
            cache_time = self.cache_file.stat().st_mtime

            lifetime = time() - int(self.settings['cache']['lifetime']) * 60 * 60
            app_log.debug('cache time is %r lifetime is %r', cache_time, lifetime)
            if cache_time > lifetime:
                app_log.info('found %s', self.cache_file)

                cache_url = self.cache_file.relative_to(self.cache_dir).as_posix()
                return tornado.web.StaticFileHandler.get(self, cache_url)

            app_log.info('%s lifetime exceeded', self.cache_file)

        args = {k: v[0] for k, v in self.request.arguments.items()}

        app_log.info('fetch %s', self.request.uri)
        if 'Range' in self.request.headers:
            del self.request.headers['Range']

        self.client = AsyncHTTPClient()
        self.client.fetch(self.request.uri,
                          method=self.request.method,
                          body=self.request.body,
                          headers=self.request.headers,
                          follow_redirects=False,
                          if_modified_since=cache_time,
                          allow_nonstandard_methods=True,
                          connect_timeout=int(self.settings['proxy']['timeout']),
                          request_timeout=2 ** 31 - 1,
                          header_callback=self.process_header,
                          streaming_callback=self.process_body,
                          callback=self.process_finish)

    def process_header(self, line):
        header = line.strip()
        app_log.debug('response header %s', header)
        if header:
            if self.req_headers is None:
                self.req_headers = HTTPHeaders()
                _, status, _ = header.split(' ', 2)
                status = int(status)

                if status == 599:
                    # network error but cache file exists
                    if self.cache_file.exists():
                        status = 200
                elif status == 304:
                    status = 200
                elif status == 200:
                    app_log.debug('prepare temp file for %s', self.req_path)
                    self.cache_fd = NamedTemporaryFile(dir=str(self.cache_dir), delete=False)

                self.set_status(status)
            else:
                self.req_headers.parse_line(line)
            return

        for header in ('Date', 'Cache-Control', 'Server', 'Content-Type', 'Location'):
            val = self.req_headers.get(header)
            if val:
                self.set_header(header, val)

        if 'content-encoding' not in self.req_headers:
            val = self.req_headers.get('Content-Length')
            if val:
                self.set_header('Content-Length', val)

        self.flush()

    def process_body(self, chunk):
        if self._finished:
            return

        if self.cache_fd is not None:
            self.cache_fd.write(chunk)

        self.write(chunk)
        self.flush()

    def process_finish(self, response):
        app_log.debug('process finish %s', self.req_path)
        if self._finished or self.cache_used:
            app_log.debug('skip process finish')
            return

        app_log.info('code %s fo %s', response.code, self.request.uri)

        if response.code in (599, 304):
            if self.cache_file.exists():
                if response.code == 304:
                    self.cache_file.touch()

                app_log.info('use %s', self.cache_file)
                self.cache_fd = self.cache_file.open('rb')
                self.process_file()
                return

        elif 200 <= response.code < 300:
            if self.cache_fd is not None:
                self.cache_fd.close()

                if self.cache_file.exists():
                    self.cache_file.unlink()
                elif not self.cache_file.parent.exists():
                    self.cache_file.parent.mkdir(parents=True)

                temp_file = Path(self.cache_dir) / self.cache_fd.name
                temp_file.rename(self.cache_file)

                app_log.info('saved %s', self.cache_file)

        self.cache_fd = None
        self.finish()

    def process_file(self):
        chunk = self.cache_fd.read(self.CHUNK_SIZE)
        if chunk:
            self.write(chunk)
            self.flush(callback=self.process_file)
            return

        self.cache_fd.close()
        self.cache_fd = None
        app_log.debug('process file %s finish', self.cache_file)
        self.finish()

    def compute_etag(self):
        if self.cache_file is None or not self.cache_file.exists():
            return None

        if not hasattr(self, 'absolute_path'):
            self.absolute_path = str(self.cache_file.absolute())

        return tornado.web.StaticFileHandler.compute_etag(self)

    def on_finish(self):
        app_log.debug('on finish')
        # sometimes, prepare is not called.
        if not hasattr(self, 'cache_fd') or self.cache_fd is None:
            return
        self.cache_fd.close()

    @tornado.web.asynchronous
    def connect(self, path):
        app_log.info('CONNECT to %s', self.request.uri)
        host, port = self.request.uri.split(':')
        client = self.request.connection.stream

        def read_from_client(data):
            # app_log.debug('read from client\n%s', data)
            upstream.write(data)

        def read_from_upstream(data):
            # app_log.debug('read from upstream\n%s', data)
            client.write(data)

        def client_close(data=None):
            # app_log.debug('client close\n%s', data)
            if upstream.closed():
                return
            if data:
                upstream.write(data)
            upstream.close()

        def upstream_close(data=None):
            # app_log.debug('upstream close\n%s', data)
            if client.closed():
                return
            if data:
                client.write(data)
            client.close()

        def start_tunnel():
            app_log.debug('start connect tunnel')
            client.read_until_close(client_close, read_from_client)
            upstream.read_until_close(upstream_close, read_from_upstream)
            client.write(b'HTTP/1.0 200 Connection established\r\n\r\n')

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
        upstream = tornado.iostream.IOStream(s)
        app_log.debug('connect to upstream')
        upstream.connect((host, int(port)), start_tunnel)
Ejemplo n.º 23
0
class GatewayApi(tornado.websocket.WebSocketHandler):

    # 针对websocket请求, 接收消息
    async def on_message(self, message):
        if self._up_websocket is not None:
            # 将接收到的消息发送给upstream websocket
            await self._up_websocket.send(message)

    # 针对websocket请求断开连接时处理, 因为tornado的实现, 这个方法不能声明为异步的
    def on_ws_connection_close(self,
                               close_code: int = None,
                               close_reason: str = None):
        if self._up_websocket is not None and self._up_websocket.closed is False:
            import tornado.ioloop
            # 因为up_websocket.close是异步的,需要启动一个task来执行
            io_loop = tornado.ioloop.IOLoop.current()
            io_loop.spawn_callback(self._up_websocket.close)
        super().on_ws_connection_close()

    async def get(self, *args, **kwargs):
        # 判断请求是否是通过HTTP进行Upgrade的websocket
        if self.request.headers.get("Upgrade", "").lower() == "websocket":
            # 调用相应的app_core来获取代理的upstream上游的websocket connection, 建立上游连接
            self._up_websocket = await g.app_core.get_up_websocket(
                self.request, self)
            # 调用父类的get的方法处理websocket请求, 产生响应
            await super().get(*args, **kwargs)
        else:
            # 其他HTTP web请求按照正常逻辑处理
            output = {'result': {}, 'finish': False}
            thread = threading.Thread(target=gateway_service,
                                      args=(self.request, output))
            thread.setDaemon(True)
            thread.start()

            while not output['finish']:
                await asyncio.sleep(0.01)
            result = output['result']

            if result['status'] == 'ok':
                new_access_token = result['value'].get(
                    'new_access_token')  # 用 get 而不是 [] 来取值,因为可能不存在
                new_refresh_token = result['value'].get('new_refresh_token')
                response = result['value']['response']

                if new_access_token is not None and new_refresh_token is not None:
                    self.set_cookie('access_token', new_access_token)
                    self.set_cookie('refresh_token', new_refresh_token)

                self.set_status(response.status_code)
                self._headers = HTTPHeaders(response.headers)  # 注意:需要强制类型转换

                if self._headers.get('Content-Type') == 'gzip':
                    try:
                        self._headers.pop('Content-Type')
                        self._headers.pop('Content-Length')
                    except:
                        pass

                if self.request.headers.get('Origin'):
                    self.set_header('Access-Control-Allow-Credentials', 'true')
                    self.set_header('Access-Control-Allow-Origin',
                                    self.request.headers.get('Origin'))

                if self._status_code in (
                        204, 304) or 100 <= self._status_code < 200:
                    # 这些状态下response中不能有body,所以不应该write
                    return

                # 判断响应是否是chunk传输
                transfer_encoding_header = response.headers.get(
                    "Transfer-Encoding")
                if transfer_encoding_header is None or transfer_encoding_header != "chunked":
                    # 不是的话按照正常逻辑处理
                    self.write(response.content)
                else:
                    # chunk传输,chunk处理
                    self._headers.pop("Transfer-Encoding")  # 注意一定要pop掉
                    raw_content = response.raw
                    chunk_size = 1024
                    while True:
                        # 分块读取响应, 并flush到客户端
                        chunk = raw_content.read(chunk_size)
                        self.write(chunk)
                        await self.flush()
                        if len(chunk) < chunk_size:
                            break
            else:
                self.set_status(500)
                self.write(result['value'])

    async def post(self, *args, **kwargs):
        output = {'result': {}, 'finish': False}
        thread = threading.Thread(target=gateway_service,
                                  args=(self.request, output))
        thread.setDaemon(True)
        thread.start()

        while not output['finish']:
            await asyncio.sleep(0.01)
        result = output['result']

        if result['status'] == 'ok':
            new_access_token = result['value'].get(
                'new_access_token')  # 用 get 而不是 [] 来取值,因为可能不存在
            new_refresh_token = result['value'].get('new_refresh_token')
            response = result['value']['response']

            if new_access_token is not None and new_refresh_token is not None:
                self.set_cookie('access_token', new_access_token)
                self.set_cookie('refresh_token', new_refresh_token)

            self.set_status(response.status_code)
            self._headers = HTTPHeaders(response.headers)  # 注意:需要强制类型转换

            if self.request.headers.get('Origin'):
                self.set_header('Access-Control-Allow-Credentials', 'true')
                self.set_header('Access-Control-Allow-Origin',
                                self.request.headers.get('Origin'))

            if self._status_code in (204,
                                     304) or 100 <= self._status_code < 200:
                # 这些状态下response中不能有body,所以不应该write
                return

            self.write(response.content)
        else:
            self.set_status(500)
            self.write(result['value'])

    async def options(self, *args, **kwargs):
        # 允许跨域
        self.set_status(204)
        self.set_header('Access-Control-Allow-Credentials', 'true')
        self.set_header('Access-Control-Allow-Origin',
                        self.request.headers.get('Origin'))
        self.set_header("Access-Control-Allow-Headers", "content-type")
        self.set_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
def _extract_inbound_message_id(headers: HTTPHeaders):
    inbound_message_id = headers.get(HttpHeaders.INBOUND_MESSAGE_ID, None)
    if inbound_message_id:
        mdc.interaction_id.set(inbound_message_id)
        logger.info('Found inbound message id on incoming request.')
def _extract_interaction_id(headers: HTTPHeaders):
    interaction_id = headers.get(HttpHeaders.INTERACTION_ID, None)
    if interaction_id:
        mdc.interaction_id.set(interaction_id)
        logger.info('Found interaction id on incoming request.')
def _extract_message_id(headers: HTTPHeaders):
    message_id = headers.get(HttpHeaders.MESSAGE_ID, None)
    if message_id:
        mdc.message_id.set(message_id)
        logger.info('Found message id on incoming request.')
Ejemplo n.º 27
0
class BalancedHttpRequest:
    def __init__(self,
                 host: str,
                 upstream: Upstream,
                 source_app: str,
                 uri: str,
                 name: str,
                 method='GET',
                 data=None,
                 headers=None,
                 files=None,
                 content_type=None,
                 connect_timeout=None,
                 request_timeout=None,
                 max_timeout_tries=None,
                 follow_redirects=True,
                 idempotent=True):
        self.source_app = source_app
        self.uri = uri if uri.startswith('/') else '/' + uri
        self.upstream = upstream
        self.name = name
        self.method = method
        self.connect_timeout = connect_timeout
        self.request_timeout = request_timeout
        self.follow_redirects = follow_redirects
        self.idempotent = idempotent
        self.body = None
        self.last_request = None

        if request_timeout is not None and max_timeout_tries is None:
            max_timeout_tries = options.http_client_default_max_timeout_tries

        if self.connect_timeout is None:
            self.connect_timeout = self.upstream.connect_timeout
        if self.request_timeout is None:
            self.request_timeout = self.upstream.request_timeout
        if max_timeout_tries is None:
            max_timeout_tries = self.upstream.max_timeout_tries

        self.session_required = self.upstream.session_required

        self.connect_timeout *= options.timeout_multiplier
        self.request_timeout *= options.timeout_multiplier

        self.headers = HTTPHeaders() if headers is None else HTTPHeaders(
            headers)
        if self.source_app and not self.headers.get(USER_AGENT_HEADER):
            self.headers[USER_AGENT_HEADER] = self.source_app
        if self.method == 'POST':
            if files:
                self.body, content_type = make_mfd(data, files)
            else:
                self.body = make_body(data)

            if content_type is None:
                content_type = self.headers.get(
                    'Content-Type', 'application/x-www-form-urlencoded')

            self.headers['Content-Length'] = str(len(self.body))
        elif self.method == 'PUT':
            self.body = make_body(data)
        else:
            self.uri = make_url(self.uri, **({} if data is None else data))

        if content_type is not None:
            self.headers['Content-Type'] = content_type
        self.request_time_left = self.request_timeout * max_timeout_tries
        self.tries = OrderedDict()
        self.current_host = host.rstrip('/')
        self.current_server_index = None
        self.current_rack = None
        self.current_datacenter = None

    def make_request(self):
        if self.upstream.balanced:
            index, host, rack, datacenter = self.upstream.borrow_server(
                self.tries.keys() if self.tries else None)

            self.current_server_index = index
            self.current_host = host
            self.current_rack = rack
            self.current_datacenter = datacenter

        request = HTTPRequest(
            url=(self.get_calling_address()) + self.uri,
            body=self.body,
            method=self.method,
            headers=self.headers,
            follow_redirects=self.follow_redirects,
            connect_timeout=self.connect_timeout,
            request_timeout=self.request_timeout,
        )

        request.upstream_name = self.upstream.name if self.upstream.balanced else self.current_host
        request.upstream_datacenter = self.current_datacenter

        if options.http_proxy_host is not None:
            request.proxy_host = options.http_proxy_host
            request.proxy_port = options.http_proxy_port

        self.last_request = request
        return self.last_request

    def backend_available(self):
        return self.current_host is not None

    def get_calling_address(self):
        return self.current_host if self.backend_available(
        ) else self.upstream.name

    def get_host(self):
        return self.upstream.name if self.upstream.balanced else self.current_host

    def check_retry(self, response):
        self.request_time_left -= response.request_time

        if self.upstream.balanced:
            do_retry = self.upstream.retry_policy.check_retry(
                response, self.idempotent)

            if self.current_server_index is not None:
                self.upstream.return_server(self.current_server_index)
        else:
            do_retry = False

        do_retry = do_retry and self.upstream.max_tries > len(
            self.tries) and self.request_time_left > 0
        return do_retry

    def pop_last_request(self):
        request = self.last_request
        self.last_request = None
        return request

    def register_try(self, response):
        index = self.current_server_index if self.current_server_index else len(
            self.tries)
        self.tries[index] = ResponseData(response.code, str(response.error))

    @staticmethod
    def get_url(request):
        return f'http://{request.get_host()}{request.uri}'

    @staticmethod
    def get_trace(request):
        def _get_server_address(index):
            if request.upstream.balanced:
                if index < len(request.upstream.servers
                               ) and request.upstream.servers[index]:
                    return request.upstream.servers[index].address
                return f'no_idx_{index}_in_upstream'
            return request.get_calling_address()

        return ' -> '.join([
            f'{_get_server_address(index)}~{data.responseCode}~{data.msg}'
            for index, data in request.tries.items()
        ])
Ejemplo n.º 28
0
 def _groups_from_headers(self, headers: HTTPHeaders) -> List[str]:
     """Parse the header containing the user's groups and return them as a list"""
     return [
         group.strip()
         for group in headers.get(self.user_groups_header, "").split(",")
     ]