def test_stack_trace_limit_types_and_values(): with pytest.raises(TypeError) as e: ecs_logging.StdlibFormatter(stack_trace_limit="a") assert str(e.value) == "'stack_trace_limit' must be None, or a non-negative integer" with pytest.raises(ValueError) as e: ecs_logging.StdlibFormatter(stack_trace_limit=-1) assert str(e.value) == "'stack_trace_limit' must be None, or a non-negative integer"
def test_exclude_fields_empty_json_object(): """Assert that if all JSON objects attributes are excluded then the object doesn't appear.""" formatter = ecs_logging.StdlibFormatter( exclude_fields=["process.pid", "process.name", "process.thread"]) ecs = formatter.format_to_ecs(make_record()) assert "process" not in ecs formatter = ecs_logging.StdlibFormatter(exclude_fields=["ecs.version"]) ecs = formatter.format_to_ecs(make_record()) assert "ecs" not in ecs
def test_exclude_fields_type_and_values(): with pytest.raises(TypeError) as e: ecs_logging.StdlibFormatter(exclude_fields="a") assert str(e.value) == "'exclude_fields' must be a sequence of strings" with pytest.raises(TypeError) as e: ecs_logging.StdlibFormatter(exclude_fields={"a"}) assert str(e.value) == "'exclude_fields' must be a sequence of strings" with pytest.raises(TypeError) as e: ecs_logging.StdlibFormatter(exclude_fields=[1]) assert str(e.value) == "'exclude_fields' must be a sequence of strings"
def _log_ecs_reformatting_callback(dict_key, old_value, new_value, config_instance): """ If ecs_logging is installed and log_ecs_reformatting is set to "override", we should set the ecs_logging.StdlibFormatter as the formatted for every handler in the root logger, and set the default processor for structlog to the ecs_logging.StructlogFormatter. """ if new_value.lower() == "override": try: import ecs_logging except ImportError: return # Stdlib root_logger = logging.getLogger() formatter = ecs_logging.StdlibFormatter() for handler in root_logger.handlers: handler.setFormatter(formatter) # Structlog try: import structlog structlog.configure(processors=[ecs_logging.StructlogFormatter()]) except ImportError: pass
def generate_logger(name: str, log_level: str = 'error', handlers: List[dict] = [{ 'type': 'stream', 'output': 'stderr' }], metadata: dict = {}): if not name: raise ValueError('name must not be empty') if not log_level: raise ValueError('log_level must not be empty') if len(handlers) == 0: raise ValueError('handlers must not be empty') logger = logging.getLogger(name) for handler_config in handlers: logging_handler = None if handler_config['type'] == 'stream': logging_handler = get_stream_handler(config=handler_config) elif handler_config['type'] == 'rotating_file': logging_handler = get_rotating_file_handler(config=handler_config) else: raise ValueError('unsupported handler type') logging_handler.setFormatter(ecs_logging.StdlibFormatter()) logger.addHandler(logging_handler) logger.setLevel(log_level) return CustomLoggerAdapter(logger, metadata)
def test_stack_trace_limit_traceback(logger): def f(): g() def g(): h() def h(): raise ValueError("error!") stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter(ecs_logging.StdlibFormatter(stack_trace_limit=2)) logger.addHandler(handler) logger.setLevel(logging.DEBUG) try: f() except ValueError: logger.info("there was an error", exc_info=True) ecs = json.loads(stream.getvalue().rstrip()) error_stack_trace = ecs["error"].pop("stack_trace") assert all(x in error_stack_trace for x in ("f()", "g()")) assert "h()" not in error_stack_trace assert ecs["error"] == { "message": "error!", "type": "ValueError", } assert ecs["log.level"] == "info" assert ecs["message"] == "there was an error" assert ecs["log"]["original"] == "there was an error"
def test_record_formatted(spec_validator): formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"]) assert spec_validator(formatter.format(make_record())) == ( '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello","ecs":{"version":"1.6.0"},' '"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},' '"original":"1: hello"}}')
def test_record_formatted(): formatter = ecs_logging.StdlibFormatter() assert formatter.format(make_record()) == ( '{"@timestamp":"2020-03-20T14:12:46.123Z","ecs":{"version":"1.5.0"},' '"log":{"level":"debug","logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},' '"function":"test_function"},"original":"1: hello"},"message":"1: hello"}' )
def test_stack_info_excluded(logger, exclude_fields): stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter(ecs_logging.StdlibFormatter(exclude_fields=exclude_fields)) logger.addHandler(handler) logger.setLevel(logging.DEBUG) logger.info("stack info!", stack_info=True) ecs = json.loads(stream.getvalue().rstrip()) assert "error" not in ecs
def test_elastic_apm_stdlib_with_filter_log_correlation_ecs_fields(): apm = elasticapm.Client({ "SERVICE_NAME": "apm-service", "DISABLE_SEND": True }) stream = StringIO() logger = logging.getLogger("apm-logger") handler = logging.StreamHandler(stream) handler.setFormatter( ecs_logging.StdlibFormatter( exclude_fields=["@timestamp", "process", "log.origin.file.line"])) handler.addFilter(LoggingFilter()) logger.addHandler(handler) logger.setLevel(logging.DEBUG) apm.begin_transaction("test-transaction") try: with elasticapm.capture_span("test-span"): span_id = elasticapm.get_span_id() trace_id = elasticapm.get_trace_id() transaction_id = elasticapm.get_transaction_id() logger.info("test message") finally: apm.end_transaction("test-transaction") ecs = json.loads(stream.getvalue().rstrip()) assert ecs == { "ecs": { "version": "1.5.0" }, "log": { "level": "info", "logger": "apm-logger", "origin": { "file": { "name": "test_apm.py" }, "function": "test_elastic_apm_stdlib_with_filter_log_correlation_ecs_fields", }, "original": "test message", }, "message": "test message", "span": { "id": span_id }, "trace": { "id": trace_id }, "transaction": { "id": transaction_id }, }
def test_can_be_set_on_handler(): stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter(ecs_logging.StdlibFormatter()) handler.handle(make_record()) assert stream.getvalue() == ( '{"@timestamp":"2020-03-20T14:12:46.123Z","ecs":{"version":"1.5.0"},' '"log":{"level":"debug","logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},' '"function":"test_function"},"original":"1: hello"},"message":"1: hello"}\n' )
def test_stack_info(logger): stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter(ecs_logging.StdlibFormatter()) logger.addHandler(handler) logger.setLevel(logging.DEBUG) logger.info("stack info!", stack_info=True) ecs = json.loads(stream.getvalue().rstrip()) assert list(ecs["error"].keys()) == ["stack_trace"] error_stack_trace = ecs["error"].pop("stack_trace") assert "test_stack_info" in error_stack_trace and __file__ in error_stack_trace
def test_exc_info_false_does_not_raise(logger): stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter(ecs_logging.StdlibFormatter()) logger.addHandler(handler) logger.setLevel(logging.DEBUG) logger.info("there was %serror", "no ", exc_info=False) ecs = json.loads(stream.getvalue().rstrip()) assert ecs["log.level"] == "info" assert ecs["message"] == "there was no error" assert "error" not in ecs
def test_extra_is_merged(time, logger): time.return_value = 1584720997.187709 stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter( ecs_logging.StdlibFormatter(exclude_fields=["process", "tls.client"])) logger.addHandler(handler) logger.setLevel(logging.INFO) logger.info( "hey world", extra={ "tls": { "cipher": "AES", "client": { "hash": { "md5": "0F76C7F2C55BFD7D8E8B8F4BFBF0C9EC" } }, }, "tls.established": True, "tls.client.certificate": "cert", }, ) ecs = json.loads(stream.getvalue().rstrip()) assert isinstance(ecs["log"]["origin"]["file"].pop("line"), int) assert ecs == { "@timestamp": "2020-03-20T16:16:37.187Z", "ecs": { "version": "1.6.0" }, "log": { "level": "info", "logger": logger.name, "origin": { "file": { "name": "test_stdlib_formatter.py" }, "function": "test_extra_is_merged", }, "original": "hey world", }, "message": "hey world", "tls": { "cipher": "AES", "established": True }, }
def test_exclude_fields(exclude_fields): if isinstance(exclude_fields, str): exclude_fields = [exclude_fields] formatter = ecs_logging.StdlibFormatter(exclude_fields=exclude_fields) ecs = formatter.format_to_ecs(make_record()) for entry in exclude_fields: field_path = entry.split(".") try: obj = ecs for path in field_path[:-1]: obj = obj[path] except KeyError: continue assert field_path[-1] not in obj
def _log_level_callback(dict_key, old_value, new_value, config_instance): elasticapm_logger = logging.getLogger("elasticapm") elasticapm_logger.setLevel(log_levels_map.get(new_value, 100)) global logfile_set_up if not logfile_set_up and config_instance.log_file: logfile_set_up = True filehandler = logging.handlers.RotatingFileHandler( config_instance.log_file, maxBytes=config_instance.log_file_size, backupCount=1 ) try: import ecs_logging filehandler.setFormatter(ecs_logging.StdlibFormatter()) except ImportError: pass elasticapm_logger.addHandler(filehandler)
def test_stack_trace_limit_disabled(stack_trace_limit, logger): stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter( ecs_logging.StdlibFormatter(stack_trace_limit=stack_trace_limit)) logger.addHandler(handler) logger.setLevel(logging.DEBUG) try: raise ValueError("error!") except ValueError: logger.info("there was an error", exc_info=True) ecs = json.loads(stream.getvalue().rstrip()) assert ecs["error"] == {"message": "error!", "type": "ValueError"} assert ecs["log"]["level"] == "info" assert ecs["message"] == "there was an error" assert ecs["log"]["original"] == "there was an error"
def test_stack_trace_limit_default(kwargs, logger): def f(): g() def g(): h() def h(): raise ValueError("error!") stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter(ecs_logging.StdlibFormatter(**kwargs)) logger.addHandler(handler) logger.setLevel(logging.DEBUG) try: f() except ValueError: logger.info("there was an error", exc_info=True) ecs = json.loads(stream.getvalue().rstrip()) error_stack_trace = ecs["error"].pop("stack_trace") assert all(x in error_stack_trace for x in ("f()", "g()", "h()"))
def test_extra_is_merged(time): time.return_value = 1584720997.187709 stream = StringIO() handler = logging.StreamHandler(stream) handler.setFormatter(ecs_logging.StdlibFormatter()) logger = logging.getLogger("test-logger") logger.addHandler(handler) logger.setLevel(logging.INFO) logger.info("hey world", extra={ "tls": { "cipher": "AES" }, "tls.established": True }) assert stream.getvalue() == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","ecs":{"version":"1.5.0"},"log":' '{"level":"info","logger":"test-logger","origin":{"file":{"line":73,"name":' '"test_stdlib_formatter.py"},"function":"test_extra_is_merged"},"original":"hey ' 'world"},"message":"hey world","tls":{"cipher":"AES","established":true}}\n' )
async def async_main(): stdout_handler = logging.StreamHandler(sys.stdout) ecs_formatter = ecs_logging.StdlibFormatter(exclude_fields=('log.original', 'message')) stdout_handler.setFormatter(ecs_formatter) for logger_name in [ 'aiohttp.server', 'aiohttp.web', 'aiohttp.access', 'proxy' ]: logger = logging.getLogger(logger_name) logger.setLevel(logging.INFO) logger.addHandler(stdout_handler) env = normalise_environment(os.environ) port = int(env['PROXY_PORT']) admin_root = env['UPSTREAM_ROOT'] superset_root = env['SUPERSET_ROOT'] hawk_senders = env['HAWK_SENDERS'] sso_base_url = env['AUTHBROKER_URL'] sso_host = URL(sso_base_url).host sso_client_id = env['AUTHBROKER_CLIENT_ID'] sso_client_secret = env['AUTHBROKER_CLIENT_SECRET'] redis_url = env['REDIS_URL'] root_domain = env['APPLICATION_ROOT_DOMAIN'] basic_auth_user = env['METRICS_SERVICE_DISCOVERY_BASIC_AUTH_USER'] basic_auth_password = env['METRICS_SERVICE_DISCOVERY_BASIC_AUTH_PASSWORD'] x_forwarded_for_trusted_hops = int(env['X_FORWARDED_FOR_TRUSTED_HOPS']) application_ip_whitelist = env['APPLICATION_IP_WHITELIST'] ga_tracking_id = env.get('GA_TRACKING_ID') mirror_remote_root = env['MIRROR_REMOTE_ROOT'] mirror_local_root = '/__mirror/' required_admin_headers = ( 'cookie', 'host', 'x-csrftoken', 'x-data-workspace-no-modify-application-instance', 'x-scheme', 'x-forwarded-proto', 'referer', ) # Cookies on the embed path must be allowed to be SameSite=None, so they # will be sent when the site is embedded in an iframe embed_path = '/visualisations/link' root_domain_no_port, _, root_port_str = root_domain.partition(':') try: root_port = int(root_port_str) except ValueError: root_port = None csp_common = "object-src 'none';" if root_domain not in ['dataworkspace.test:8000']: csp_common += 'upgrade-insecure-requests;' # A spawning application on <my-application>.<root_domain> shows the admin-styled site, # fetching assets from <root_domain>, but also makes requests to the current domain csp_application_spawning = csp_common + ( f'default-src {root_domain};' f'base-uri {root_domain};' f'font-src {root_domain} data: https://fonts.gstatic.com;' f'form-action {root_domain} *.{root_domain};' f'frame-ancestors {root_domain};' f'img-src {root_domain} data: https://www.googletagmanager.com https://www.google-analytics.com https://ssl.gstatic.com https://www.gstatic.com;' # pylint: disable=line-too-long f"script-src 'unsafe-inline' {root_domain} https://www.googletagmanager.com https://www.google-analytics.com https://tagmanager.google.com;" # pylint: disable=line-too-long f"style-src 'unsafe-inline' {root_domain} https://tagmanager.google.com https://fonts.googleapis.com;" f"connect-src {root_domain} 'self';") # A running wrapped application on <my-application>.<root_domain> has an # iframe that directly routes to the app on <my-application>--8888.<root_domain> def csp_application_running_wrapped(direct_host): return csp_common + ( f"default-src 'none';" f'base-uri {root_domain};' f"form-action 'none';" f"frame-ancestors 'none';" f'frame-src {direct_host} {sso_host} https://www.googletagmanager.com;' f'img-src {root_domain} https://www.googletagmanager.com https://www.google-analytics.com https://ssl.gstatic.com https://www.gstatic.com;' # pylint: disable=line-too-long f"font-src {root_domain} data: https://fonts.gstatic.com;" f"script-src 'unsafe-inline' https://www.googletagmanager.com https://www.google-analytics.com https://tagmanager.google.com;" # pylint: disable=line-too-long f"style-src 'unsafe-inline' {root_domain} https://tagmanager.google.com https://fonts.googleapis.com;" ) # A running application should only connect to self: this is where we have the most # concern because we run the least-trusted code def csp_application_running_direct(host, public_host): return csp_common + ( "default-src 'self';" "base-uri 'self';" # Safari does not have a 'self' for WebSockets f"connect-src 'self' wss://{host};" "font-src 'self' data:;" "form-action 'self';" f"frame-ancestors 'self' {root_domain} {public_host}.{root_domain};" "img-src 'self' data: blob:;" # Both JupyterLab and RStudio need `unsafe-eval` "script-src 'unsafe-inline' 'unsafe-eval' 'self';" "style-src 'unsafe-inline' 'self';" "worker-src 'self' blob:;") redis_pool = await aioredis.create_redis_pool(redis_url) default_http_timeout = aiohttp.ClientTimeout() # When spawning and tring to detect if the app is running, # we fail quickly and often so a connection check is quick spawning_http_timeout = aiohttp.ClientTimeout(sock_read=5, sock_connect=2) def get_random_context_logger(): return ContextAdapter( logger, {'context': ''.join(random.choices(CONTEXT_ALPHABET, k=8))}) def without_transfer_encoding(request_or_response): return tuple((key, value) for key, value in request_or_response.headers.items() if key.lower() != 'transfer-encoding') def admin_headers_request(downstream_request): # When we make a deliberate request to the admin application from the # proxy we don't want to proxy content-length or content-type return (tuple((key, value) for key, value in downstream_request.headers.items() if key.lower() in required_admin_headers) + downstream_request['sso_profile_headers']) def admin_headers_proxy(downstream_request): return (tuple((key, value) for key, value in downstream_request.headers.items() if key.lower() in required_admin_headers + ('content-length', 'content-type')) + downstream_request['sso_profile_headers']) def mirror_headers(downstream_request): return tuple((key, value) for key, value in downstream_request.headers.items() if key.lower() not in ['host', 'transfer-encoding']) def application_headers(downstream_request): return without_transfer_encoding(downstream_request) + ( (('x-scheme', downstream_request.headers['x-forwarded-proto']), ) if 'x-forwarded-proto' in downstream_request.headers else ()) def superset_headers(downstream_request): return (without_transfer_encoding(downstream_request) + downstream_request['sso_profile_headers']) def is_service_discovery(request): return (request.url.path == '/api/v1/application' and request.url.host == root_domain_no_port and request.method == 'GET') def is_superset_requested(request): return request.url.host == f'superset.{root_domain_no_port}' def is_data_explorer_requested(request): return (request.url.path.startswith('/data-explorer/') and request.url.host == root_domain_no_port) def is_app_requested(request): return (request.url.host.endswith(f'.{root_domain_no_port}') and not request.url.path.startswith(mirror_local_root) and not is_superset_requested(request)) def is_mirror_requested(request): return request.url.host.endswith( f'.{root_domain_no_port}') and request.url.path.startswith( mirror_local_root) def is_requesting_credentials(request): return (request.url.host == root_domain_no_port and request.url.path == '/api/v1/aws_credentials') def is_requesting_files(request): return request.url.host == root_domain_no_port and request.url.path == '/files' def is_dataset_requested(request): return (request.url.path.startswith('/api/v1/dataset/') or request.url.path.startswith('/api/v1/reference-dataset/') or request.url.path.startswith('/api/v1/eventlog/') or request.url.path.startswith('/api/v1/account/') or request.url.path.startswith('/api/v1/application-instance/') and request.url.host == root_domain_no_port) def is_hawk_auth_required(request): return is_dataset_requested(request) def is_healthcheck_requested(request): return (request.url.path == '/healthcheck' and request.method == 'GET' and not is_app_requested(request)) def is_table_requested(request): return (request.url.path.startswith('/api/v1/table/') and request.url.host == root_domain_no_port and request.method == 'POST') def is_sso_auth_required(request): return (not is_healthcheck_requested(request) and not is_service_discovery(request) and not is_table_requested(request) and not is_dataset_requested(request)) def get_peer_ip(request): peer_ip = (request.headers['x-forwarded-for'].split(',') [-x_forwarded_for_trusted_hops].strip()) is_private = True try: is_private = ipaddress.ip_address(peer_ip).is_private except ValueError: is_private = False return peer_ip, is_private def request_scheme(request): return request.headers.get('x-forwarded-proto', request.url.scheme) def request_url(request): return str(request.url.with_scheme(request_scheme(request))) async def handle(downstream_request): method = downstream_request.method path = downstream_request.url.path query = downstream_request.url.query app_requested = is_app_requested(downstream_request) mirror_requested = is_mirror_requested(downstream_request) superset_requested = is_superset_requested(downstream_request) # Websocket connections # - tend to close unexpectedly, both from the client and app # - don't need to show anything nice to the user on error is_websocket = (downstream_request.headers.get('connection', '').lower() == 'upgrade' and downstream_request.headers.get( 'upgrade', '').lower() == 'websocket') try: if app_requested: return await handle_application(is_websocket, downstream_request, method, path, query) if mirror_requested: return await handle_mirror(downstream_request, method, path) if superset_requested: return await handle_superset(downstream_request, method, path, query) return await handle_admin( downstream_request, method, CIMultiDict(admin_headers_proxy(downstream_request)), path, query, await get_data(downstream_request), ) except Exception as exception: # pylint: disable=broad-except user_exception = isinstance(exception, UserException) if not user_exception or (user_exception and exception.args[1] == 500): logger.exception( 'Exception during %s %s %s', downstream_request.method, downstream_request.url, type(exception), ) if is_websocket: raise params = {'message': exception.args[0]} if user_exception else {} status = exception.args[1] if user_exception else 500 return await handle_http( downstream_request, 'GET', CIMultiDict(admin_headers_request(downstream_request)), URL(admin_root).with_path(f'/error_{status}'), params, b'', default_http_timeout, ) async def handle_application(is_websocket, downstream_request, method, path, query): public_host, _, _ = downstream_request.url.host.partition( f'.{root_domain_no_port}') possible_public_host, _, public_host_or_port_override = public_host.rpartition( '--') try: port_override = int(public_host_or_port_override) except ValueError: port_override = None else: if 1 <= port_override <= 65535: public_host = possible_public_host else: port_override = None host_api_url = admin_root + '/api/v1/application/' + public_host host_html_path = '/tools/' + public_host async with client_session.request( 'GET', host_api_url, headers=CIMultiDict(admin_headers_request(downstream_request)), ) as response: host_exists = response.status == 200 application = await response.json() if response.status != 200 and response.status != 404: raise UserException('Unable to start the application', response.status) if host_exists and application['state'] not in ['SPAWNING', 'RUNNING']: if ('x-data-workspace-no-modify-application-instance' not in downstream_request.headers): async with client_session.request( 'DELETE', host_api_url, headers=CIMultiDict( admin_headers_request(downstream_request)), ) as delete_response: await delete_response.read() raise UserException('Application ' + application['state'], 500) if not host_exists: if ('x-data-workspace-no-modify-application-instance' not in downstream_request.headers): async with client_session.request( 'PUT', host_api_url, headers=CIMultiDict( admin_headers_request(downstream_request)), ) as response: host_exists = response.status == 200 application = await response.json() else: raise UserException('Application stopped while starting', 500) if response.status != 200: raise UserException('Unable to start the application', response.status) if application['state'] not in ['SPAWNING', 'RUNNING']: raise UserException( 'Attempted to start the application, but it ' + application['state'], 500, ) if not application['proxy_url']: return await handle_http( downstream_request, 'GET', CIMultiDict(admin_headers_request(downstream_request)), admin_root + host_html_path + '/spawning', {}, b'', default_http_timeout, (('content-security-policy', csp_application_spawning), ), ) if is_websocket: return await handle_application_websocket(downstream_request, application['proxy_url'], path, query, port_override) if application['state'] == 'SPAWNING': return await handle_application_http_spawning( downstream_request, method, application_upstream(application['proxy_url'], path, port_override), query, host_html_path, host_api_url, public_host, ) if (application['state'] == 'RUNNING' and application['wrap'] != 'NONE' and not port_override): return await handle_application_http_running_wrapped( downstream_request, application_upstream(application['proxy_url'], path, port_override), host_html_path, public_host, ) return await handle_application_http_running_direct( downstream_request, method, application_upstream(application['proxy_url'], path, port_override), query, public_host, ) async def handle_application_websocket(downstream_request, proxy_url, path, query, port_override): upstream_url = application_upstream(proxy_url, path, port_override).with_query(query) return await handle_websocket( downstream_request, CIMultiDict(application_headers(downstream_request)), upstream_url, ) def application_upstream(proxy_url, path, port_override): return (URL(proxy_url).with_path(path) if port_override is None else URL(proxy_url).with_path(path).with_port(port_override)) async def handle_application_http_spawning( downstream_request, method, upstream_url, query, host_html_path, host_api_url, public_host, ): host = downstream_request.headers['host'] try: logger.info('Spawning: Attempting to connect to %s', upstream_url) response = await handle_http( downstream_request, method, CIMultiDict(application_headers(downstream_request)), upstream_url, query, await get_data(downstream_request), spawning_http_timeout, # Although the application is spawning, if the response makes it back to the client, # we know the application is running, so we return the _running_ CSP headers ( ( 'content-security-policy', csp_application_running_direct(host, public_host), ), ), ) except Exception: # pylint: disable=broad-except logger.info('Spawning: Failed to connect to %s', upstream_url) return await handle_http( downstream_request, 'GET', CIMultiDict(admin_headers_request(downstream_request)), admin_root + host_html_path + '/spawning', {}, b'', default_http_timeout, (('content-security-policy', csp_application_spawning), ), ) else: # Once a streaming response is done, if we have not yet returned # from the handler, it looks like aiohttp can cancel the current # task. We set RUNNING in another task to avoid it being cancelled async def set_application_running(): async with client_session.request( 'PATCH', host_api_url, json={'state': 'RUNNING'}, headers=CIMultiDict( admin_headers_request(downstream_request)), timeout=default_http_timeout, ) as patch_response: await patch_response.read() asyncio.ensure_future(set_application_running()) await send_to_google_analytics(downstream_request) return response async def handle_application_http_running_wrapped(downstream_request, upstream_url, host_html_path, public_host): upstream = URL(upstream_url) direct_host = f'{public_host}--{upstream.port}.{root_domain}' return await handle_http( downstream_request, 'GET', CIMultiDict(admin_headers_request(downstream_request)), admin_root + host_html_path + '/running', {}, b'', default_http_timeout, (( 'content-security-policy', csp_application_running_wrapped(direct_host), ), ), ) async def handle_application_http_running_direct(downstream_request, method, upstream_url, query, public_host): host = downstream_request.headers['host'] await send_to_google_analytics(downstream_request) return await handle_http( downstream_request, method, CIMultiDict(application_headers(downstream_request)), upstream_url, query, await get_data(downstream_request), default_http_timeout, (( 'content-security-policy', csp_application_running_direct(host, public_host), ), ), ) async def handle_mirror(downstream_request, method, path): mirror_path = path[len(mirror_local_root):] upstream_url = URL(mirror_remote_root + mirror_path) return await handle_http( downstream_request, method, CIMultiDict(mirror_headers(downstream_request)), upstream_url, {}, await get_data(downstream_request), default_http_timeout, ) async def handle_superset(downstream_request, method, path, query): upstream_url = URL(superset_root).with_path(path) host = downstream_request.headers['host'] return await handle_http( downstream_request, method, CIMultiDict(superset_headers(downstream_request)), upstream_url, query, await get_data(downstream_request), default_http_timeout, (( 'content-security-policy', csp_application_running_direct(host, 'superset'), ), ), ) async def handle_admin(downstream_request, method, headers, path, query, data): upstream_url = URL(admin_root).with_path(path) return await handle_http( downstream_request, method, headers, upstream_url, query, data, default_http_timeout, ) async def handle_websocket(downstream_request, upstream_headers, upstream_url): protocol = downstream_request.headers.get('Sec-WebSocket-Protocol') protocols = (protocol, ) if protocol else () async def proxy_msg(msg, to_ws): if msg.type == aiohttp.WSMsgType.TEXT: await to_ws.send_str(msg.data) elif msg.type == aiohttp.WSMsgType.BINARY: await to_ws.send_bytes(msg.data) elif msg.type == aiohttp.WSMsgType.CLOSE: await to_ws.close() elif msg.type == aiohttp.WSMsgType.ERROR: await to_ws.close() async def upstream(): try: async with client_session.ws_connect( str(upstream_url), headers=upstream_headers, protocols=protocols) as upstream_ws: upstream_connection.set_result(upstream_ws) downstream_ws = await downstream_connection async for msg in upstream_ws: await proxy_msg(msg, downstream_ws) except BaseException as exception: if not upstream_connection.done(): upstream_connection.set_exception(exception) raise finally: try: await downstream_ws.close() except UnboundLocalError: # If we didn't get to the line that creates `downstream_ws` pass # This is slightly convoluted, but aiohttp documents that reading # from websockets should be done in the same task as the websocket was # created, so we read from downstream in _this_ task, and create # another task to connect to and read from the upstream socket. We # also need to make sure we wait for each connection before sending # data to it downstream_connection = asyncio.Future() upstream_connection = asyncio.Future() upstream_task = asyncio.ensure_future(upstream()) try: upstream_ws = await upstream_connection _, _, _, with_session_cookie = downstream_request[SESSION_KEY] downstream_ws = await with_session_cookie( web.WebSocketResponse(protocols=protocols, heartbeat=30)) await downstream_ws.prepare(downstream_request) downstream_connection.set_result(downstream_ws) async for msg in downstream_ws: await proxy_msg(msg, upstream_ws) finally: upstream_task.cancel() return downstream_ws async def send_to_google_analytics(downstream_request): # Not perfect, but a good enough guide for usage _, extension = os.path.splitext(downstream_request.url.path) send_to_google = ga_tracking_id and extension in { '', '.doc', '.docx', '.html', '.pdf', '.ppt', '.pptx', '.xlsx', '.xlsx', } if not send_to_google: return async def _send(): logger.info("Sending to Google Analytics %s...", downstream_request.url) peer_ip, _ = get_peer_ip(downstream_request) response = await client_session.request( 'POST', 'https://www.google-analytics.com/collect', data={ 'v': '1', 'tid': ga_tracking_id, 'cid': str(uuid.uuid4()), 't': 'pageview', 'uip': peer_ip, 'dh': downstream_request.url.host, 'dp': downstream_request.url.path_qs, 'ds': 'data-workspace-server', 'dr': downstream_request.headers.get('referer', ''), 'ua': downstream_request.headers.get('user-agent', ''), }, timeout=default_http_timeout, ) logger.info("Sending to Google Analytics %s... %s", downstream_request.url, response) asyncio.create_task(_send()) async def get_data(downstream_request): # Avoid aiohttp treating request as chunked unnecessarily, which works # for some upstream servers, but not all. Specifically RStudio drops # GET responses half way through if the request specified a chunked # encoding. AFAIK RStudio uses a custom webserver, so this behaviour # is not documented anywhere. # fmt: off return \ b'' if ( 'content-length' not in downstream_request.headers and downstream_request.headers.get('transfer-encoding', '').lower() != 'chunked' ) else \ await downstream_request.read() if downstream_request.content.at_eof() else \ downstream_request.content # fmt: on async def handle_http( downstream_request, upstream_method, upstream_headers, upstream_url, upstream_query, upstream_data, timeout, response_headers=tuple(), ): async with client_session.request( upstream_method, str(upstream_url), params=upstream_query, headers=upstream_headers, data=upstream_data, allow_redirects=False, timeout=timeout, ) as upstream_response: _, _, _, with_session_cookie = downstream_request[SESSION_KEY] downstream_response = await with_session_cookie( web.StreamResponse( status=upstream_response.status, headers=CIMultiDict( without_transfer_encoding(upstream_response) + response_headers), )) await downstream_response.prepare(downstream_request) async for chunk in upstream_response.content.iter_any(): await downstream_response.write(chunk) return downstream_response def server_logger(): @web.middleware async def _server_logger(request, handler): request_logger = get_random_context_logger() request['logger'] = request_logger url = request_url(request) request_logger.info( 'Receiving (%s) (%s) (%s) (%s)', request.method, url, request.headers.get('User-Agent', '-'), request.headers.get('X-Forwarded-For', '-'), ) response = await handler(request) request_logger.info( 'Responding (%s) (%s) (%s) (%s) (%s) (%s)', request.method, url, request.headers.get('User-Agent', '-'), request.headers.get('X-Forwarded-For', '-'), response.status, response.content_length, ) return response return _server_logger def authenticate_by_staff_sso(): auth_path = 'o/authorize/' token_path = 'o/token/' me_path = 'api/v1/user/me/' grant_type = 'authorization_code' scope = 'read write' response_type = 'code' redirect_from_sso_path = '/__redirect_from_sso' session_token_key = 'staff_sso_access_token' async def get_redirect_uri_authenticate(set_session_value, redirect_uri_final): scheme = URL(redirect_uri_final).scheme sso_state = await set_redirect_uri_final(set_session_value, redirect_uri_final) redirect_uri_callback = urllib.parse.quote( get_redirect_uri_callback(scheme), safe='') return (f'{sso_base_url}{auth_path}?' f'scope={scope}&state={sso_state}&' f'redirect_uri={redirect_uri_callback}&' f'response_type={response_type}&' f'client_id={sso_client_id}') def get_redirect_uri_callback(scheme): return str( URL.build( host=root_domain_no_port, port=root_port, scheme=scheme, path=redirect_from_sso_path, )) async def set_redirect_uri_final(set_session_value, redirect_uri_final): session_key = secrets.token_hex(32) sso_state = urllib.parse.quote( f'{session_key}_{redirect_uri_final}', safe='') await set_session_value(session_key, redirect_uri_final) return sso_state async def get_redirect_uri_final(get_session_value, sso_state): session_key, _, state_redirect_url = urllib.parse.unquote( sso_state).partition('_') return state_redirect_url, await get_session_value(session_key) async def redirection_to_sso(with_new_session_cookie, set_session_value, redirect_uri_final): return await with_new_session_cookie( web.Response( status=302, headers={ 'Location': await get_redirect_uri_authenticate(set_session_value, redirect_uri_final) }, )) @web.middleware async def _authenticate_by_sso(request, handler): sso_auth_required = is_sso_auth_required(request) if not sso_auth_required: request.setdefault('sso_profile_headers', ()) return await handler(request) get_session_value, set_session_value, with_new_session_cookie, _ = request[ SESSION_KEY] token = await get_session_value(session_token_key) if request.path != redirect_from_sso_path and token is None: return await redirection_to_sso(with_new_session_cookie, set_session_value, request_url(request)) if request.path == redirect_from_sso_path: code = request.query['code'] sso_state = request.query['state'] ( redirect_uri_final_from_url, redirect_uri_final_from_session, ) = await get_redirect_uri_final(get_session_value, sso_state) if redirect_uri_final_from_url != redirect_uri_final_from_session: # We might have been overtaken by a parallel request initiating another auth # flow, and so another session. However, because we haven't retrieved the final # URL from the session, we can't be sure that this is the same client that # initiated this flow. However, we can redirect back to SSO return await redirection_to_sso( with_new_session_cookie, set_session_value, redirect_uri_final_from_url, ) async with client_session.post( f'{sso_base_url}{token_path}', data={ 'grant_type': grant_type, 'code': code, 'client_id': sso_client_id, 'client_secret': sso_client_secret, 'redirect_uri': get_redirect_uri_callback(request_scheme(request)), }, ) as sso_response: sso_response_json = await sso_response.json() await set_session_value(session_token_key, sso_response_json['access_token']) return await with_new_session_cookie( web.Response( status=302, headers={'Location': redirect_uri_final_from_session}, )) # Get profile from Redis cache to avoid calling SSO on every request redis_profile_key = f'{PROFILE_CACHE_PREFIX}___{session_token_key}___{token}'.encode( 'ascii') with await redis_pool as conn: me_profile_raw = await conn.execute('GET', redis_profile_key) me_profile = json.loads(me_profile_raw) if me_profile_raw else None async def handler_with_sso_headers(): request['sso_profile_headers'] = ( ('sso-profile-email', me_profile['email']), # The default value of '' should be able to be removed after the cached # profile in Redis without contact_email has expired, i.e. 60 seconds after # deployment of this change ('sso-profile-contact-email', me_profile.get('contact_email', '')), ( 'sso-profile-related-emails', ','.join(me_profile.get('related_emails', [])), ), ('sso-profile-user-id', me_profile['user_id']), ('sso-profile-first-name', me_profile['first_name']), ('sso-profile-last-name', me_profile['last_name']), ) request['logger'].info( 'SSO-authenticated: %s %s %s', me_profile['email'], me_profile['user_id'], request_url(request), ) set_user({ "id": me_profile['user_id'], "email": me_profile['email'] }) return await handler(request) if me_profile: return await handler_with_sso_headers() async with client_session.get( f'{sso_base_url}{me_path}', headers={'Authorization': f'Bearer {token}'}) as me_response: me_profile_full = (await me_response.json() if me_response.status == 200 else None) if not me_profile_full: return await redirection_to_sso(with_new_session_cookie, set_session_value, request_url(request)) me_profile = { 'email': me_profile_full['email'], 'related_emails': me_profile_full['related_emails'], 'contact_email': me_profile_full['contact_email'], 'user_id': me_profile_full['user_id'], 'first_name': me_profile_full['first_name'], 'last_name': me_profile_full['last_name'], } with await redis_pool as conn: await conn.execute( 'SET', redis_profile_key, json.dumps(me_profile).encode('utf-8'), 'EX', 60, ) return await handler_with_sso_headers() return _authenticate_by_sso def authenticate_by_basic_auth(): @web.middleware async def _authenticate_by_basic_auth(request, handler): basic_auth_required = is_service_discovery(request) if not basic_auth_required: return await handler(request) if 'Authorization' not in request.headers: return web.Response(status=401) basic_auth_prefix = 'Basic ' auth_value = (request.headers['Authorization'] [len(basic_auth_prefix):].strip().encode('ascii')) required_auth_value = base64.b64encode( f'{basic_auth_user}:{basic_auth_password}'.encode('ascii')) if len(auth_value) != len( required_auth_value) or not hmac.compare_digest( auth_value, required_auth_value): return web.Response(status=401) request['logger'].info('Basic-authenticated: %s', basic_auth_user) set_user({"id": basic_auth_user}) return await handler(request) return _authenticate_by_basic_auth def authenticate_by_hawk_auth(): async def lookup_credentials(sender_id): for hawk_sender in hawk_senders: if hawk_sender['id'] == sender_id: return hawk_sender async def seen_nonce(nonce, sender_id): nonce_key = f'nonce-{sender_id}-{nonce}' with await redis_pool as conn: response = await conn.execute('SET', nonce_key, '1', 'EX', 60, 'NX') seen_nonce = response != b'OK' return seen_nonce @web.middleware async def _authenticate_by_hawk_auth(request, handler): hawk_auth_required = is_hawk_auth_required(request) if not hawk_auth_required: return await handler(request) try: authorization_header = request.headers['Authorization'] except KeyError: request['logger'].info('Hawk missing header') return web.Response(status=401) content = await request.read() error_message, creds = await authenticate_hawk_header( lookup_credentials, seen_nonce, 15, authorization_header, request.method, request.url.host, request.url.port, request.url.path_qs, request.headers['Content-Type'], content, ) if error_message is not None: request['logger'].info('Hawk unauthenticated: %s', error_message) return web.Response(status=401) request['logger'].info('Hawk authenticated: %s', creds['id']) set_user({"id": creds['id']}) return await handler(request) return _authenticate_by_hawk_auth def authenticate_by_ip_whitelist(): @web.middleware async def _authenticate_by_ip_whitelist(request, handler): ip_whitelist_required = (is_app_requested(request) or is_superset_requested(request) or is_mirror_requested(request) or is_requesting_credentials(request) or is_requesting_files(request) or is_data_explorer_requested(request)) if not ip_whitelist_required: return await handler(request) peer_ip, _ = get_peer_ip(request) peer_ip_in_whitelist = any( ipaddress.IPv4Address(peer_ip) in ipaddress.IPv4Network( address_or_subnet) for address_or_subnet in application_ip_whitelist) if not peer_ip_in_whitelist: request['logger'].info('IP-whitelist unauthenticated: %s', peer_ip) return await handle_admin( request, 'GET', CIMultiDict(admin_headers_request(request)), '/error_403', {}, b'', ) request['logger'].info('IP-whitelist authenticated: %s', peer_ip) return await handler(request) return _authenticate_by_ip_whitelist async with aiohttp.ClientSession( auto_decompress=False, cookie_jar=aiohttp.DummyCookieJar()) as client_session: app = web.Application(middlewares=[ server_logger(), redis_session_middleware(redis_pool, root_domain_no_port, embed_path), authenticate_by_staff_sso(), authenticate_by_basic_auth(), authenticate_by_hawk_auth(), authenticate_by_ip_whitelist(), ]) app.add_routes([ getattr(web, method)(r'/{path:.*}', handle) for method in [ 'delete', 'get', 'head', 'options', 'patch', 'post', 'put', ] ]) elastic_apm_url = env.get("ELASTIC_APM_URL") elastic_apm_secret_token = env.get("ELASTIC_APM_SECRET_TOKEN") elastic_apm = ({ 'SERVICE_NAME': 'data-workspace', 'SECRET_TOKEN': elastic_apm_secret_token, 'SERVER_URL': elastic_apm_url, 'ENVIRONMENT': env.get('ENVIRONMENT', 'development'), } if elastic_apm_secret_token else {}) app['ELASTIC_APM'] = elastic_apm if elastic_apm: ElasticAPM(app) runner = web.AppRunner(app) await runner.setup() site = web.TCPSite(runner, '0.0.0.0', port) await site.start() await asyncio.Future()
from elasticapm.conf import constants from elasticapm.conf.constants import TRACEPARENT_HEADER_NAME from elasticapm.utils.disttracing import TracingOptions, TraceParent from grpc_health.v1 import health_pb2 from grpc_health.v1 import health_pb2_grpc import logging import ecs_logging from google.protobuf.json_format import MessageToDict import demo_pb2 import demo_pb2_grpc logger = logging.getLogger('emailService') logHandler = logging.StreamHandler(stream=sys.stdout) logHandler.setFormatter(ecs_logging.StdlibFormatter()) logger.addHandler(logHandler) logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) elasticapm.instrument() event_dataset = None def get_methods(object, spacing=20): methodList = [] for method_name in dir(object): try: if callable(getattr(object, method_name)): methodList.append(str(method_name)) except: methodList.append(str(method_name))
from flask_httpauth import HTTPBasicAuth import clamd from passlib.hash import pbkdf2_sha256 as hash from raven.contrib.flask import Sentry import clamav_versions as versions from version import __version__ logger = logging.getLogger("CLAMAV-REST") logger.setLevel(os.environ.get("LOGGING_LEVEL", logging.DEBUG)) # Warnings and above log to the stderr stream stderr_handler = logging.StreamHandler(stream=sys.stderr) stderr_handler.setLevel(logging.WARNING) stderr_handler.setFormatter(ecs_logging.StdlibFormatter()) logger.addHandler(stderr_handler) # Events below Warning log to the stdout stream stdout_handler = logging.StreamHandler(stream=sys.stdout) stdout_handler.setLevel(logging.DEBUG) stdout_handler.addFilter(lambda record: record.levelno < logging.WARNING) stdout_handler.setFormatter(ecs_logging.StdlibFormatter()) logger.addHandler(stdout_handler) app = Flask("CLAMAV-REST") app.config.from_object(os.environ["APP_CONFIG"]) try: APPLICATION_USERS = dict( [
async def async_main(): env = normalise_environment(os.environ) stdout_handler = logging.StreamHandler(sys.stdout) local = "dataworkspace.test" in env["ALLOWED_HOSTS"] if not local: stdout_handler.setFormatter( ecs_logging.StdlibFormatter(exclude_fields=("log.original", "message"))) cookie_name = ("__Secure-" if not local else "") + "data_workspace_session" for logger_name in [ "aiohttp.server", "aiohttp.web", "aiohttp.access", "proxy" ]: logger = logging.getLogger(logger_name) logger.setLevel(logging.INFO) logger.addHandler(stdout_handler) port = int(env["PROXY_PORT"]) admin_root = env["UPSTREAM_ROOT"] superset_root = env["SUPERSET_ROOT"] flower_root = env["FLOWER_ROOT"] hawk_senders = env["HAWK_SENDERS"] sso_base_url = env["AUTHBROKER_URL"] sso_host = URL(sso_base_url).host sso_client_id = env["AUTHBROKER_CLIENT_ID"] sso_client_secret = env["AUTHBROKER_CLIENT_SECRET"] redis_url = env["REDIS_URL"] root_domain = env["APPLICATION_ROOT_DOMAIN"] basic_auth_user = env["METRICS_SERVICE_DISCOVERY_BASIC_AUTH_USER"] basic_auth_password = env["METRICS_SERVICE_DISCOVERY_BASIC_AUTH_PASSWORD"] x_forwarded_for_trusted_hops = int(env["X_FORWARDED_FOR_TRUSTED_HOPS"]) ip_allowlist_groups = env.get("APPLICATION_IP_ALLOWLIST_GROUPS", {}) ip_allowlist = [ ip_address for ip_addresses in ip_allowlist_groups.values() for ip_address in ip_addresses ] + env.get("APPLICATION_IP_WHITELIST", []) ga_tracking_id = env.get("GA_TRACKING_ID") mirror_remote_root = env["MIRROR_REMOTE_ROOT"] mirror_local_root = "/__mirror/" required_admin_headers = ( "cookie", "host", "x-csrftoken", "x-data-workspace-no-modify-application-instance", "x-scheme", "x-forwarded-proto", "referer", "user-agent", ) # Cookies on the embed path must be allowed to be SameSite=None, so they # will be sent when the site is embedded in an iframe embed_path = "/visualisations/link" root_domain_no_port, _, root_port_str = root_domain.partition(":") try: root_port = int(root_port_str) except ValueError: root_port = None csp_common = "object-src 'none';" if root_domain not in ["dataworkspace.test:8000"]: csp_common += "upgrade-insecure-requests;" # A spawning application on <my-application>.<root_domain> shows the admin-styled site, # fetching assets from <root_domain>, but also makes requests to the current domain csp_application_spawning = csp_common + ( f"default-src {root_domain};" f"base-uri {root_domain};" f"font-src {root_domain} data: https://fonts.gstatic.com;" f"form-action {root_domain} *.{root_domain};" f"frame-ancestors {root_domain};" f"img-src {root_domain} data: https://www.googletagmanager.com https://www.google-analytics.com https://ssl.gstatic.com https://www.gstatic.com *.google-analytics.com *.googletagmanager.com;" # pylint: disable=line-too-long f"script-src 'unsafe-inline' {root_domain} https://www.googletagmanager.com https://www.google-analytics.com https://tagmanager.google.com *.googletagmanager.com;" # pylint: disable=line-too-long f"style-src 'unsafe-inline' {root_domain} https://tagmanager.google.com https://fonts.googleapis.com;" f"connect-src {root_domain} 'self' *.google-analytics.com *.analytics.google.com *.googletagmanager.com;" ) # A running wrapped application on <my-application>.<root_domain> has an # iframe that directly routes to the app on <my-application>--8888.<root_domain> def csp_application_running_wrapped(direct_host): return csp_common + ( f"default-src 'none';" f"base-uri {root_domain};" f"form-action 'none';" f"frame-ancestors 'none';" f"frame-src {direct_host} {sso_host} https://www.googletagmanager.com;" f"img-src {root_domain} https://www.googletagmanager.com https://www.google-analytics.com https://ssl.gstatic.com https://www.gstatic.com *.google-analytics.com *.googletagmanager.com;" # pylint: disable=line-too-long f"font-src {root_domain} data: https://fonts.gstatic.com;" f"script-src 'unsafe-inline' https://www.googletagmanager.com https://www.google-analytics.com https://tagmanager.google.com *.googletagmanager.com;" # pylint: disable=line-too-long f"style-src 'unsafe-inline' {root_domain} https://tagmanager.google.com https://fonts.googleapis.com;" f"connect-src *.google-analytics.com *.analytics.google.com *.googletagmanager.com;" ) # A running application should only connect to self: this is where we have the most # concern because we run the least-trusted code def csp_application_running_direct(host, public_host): return csp_common + ( "default-src 'self';" "base-uri 'self';" # Safari does not have a 'self' for WebSockets f"connect-src 'self' wss://{host};" "font-src 'self' data:;" "form-action 'self';" f"frame-ancestors 'self' {root_domain} {public_host}.{root_domain};" "img-src 'self' data: blob:;" # Both JupyterLab and RStudio need `unsafe-eval` "script-src 'unsafe-inline' 'unsafe-eval' 'self' data:;" "style-src 'unsafe-inline' 'self' data:;" "worker-src 'self' blob:;") redis_pool = await aioredis.create_redis_pool(redis_url) default_http_timeout = aiohttp.ClientTimeout() # When spawning and tring to detect if the app is running, # we fail quickly and often so a connection check is quick spawning_http_timeout = aiohttp.ClientTimeout(sock_read=5, sock_connect=2) def get_random_context_logger(): return ContextAdapter( logger, {"context": "".join(random.choices(CONTEXT_ALPHABET, k=8))}) def without_transfer_encoding(request_or_response): return tuple((key, value) for key, value in request_or_response.headers.items() if key.lower() != "transfer-encoding") def admin_headers_request(downstream_request): # When we make a deliberate request to the admin application from the # proxy we don't want to proxy content-length or content-type return (tuple((key, value) for key, value in downstream_request.headers.items() if key.lower() in required_admin_headers) + downstream_request["sso_profile_headers"]) def admin_headers_proxy(downstream_request): return (tuple((key, value) for key, value in downstream_request.headers.items() if key.lower() in required_admin_headers + ("content-length", "content-type")) + downstream_request["sso_profile_headers"]) def flower_headers_proxy(downstream_request): return (tuple((key, value) for key, value in downstream_request.headers.items() if key.lower() in required_admin_headers + ("content-length", "content-type", "authorization")) + downstream_request["sso_profile_headers"]) def mirror_headers(downstream_request): return tuple((key, value) for key, value in downstream_request.headers.items() if key.lower() not in ["host", "transfer-encoding"]) def application_headers(downstream_request): return (without_transfer_encoding(downstream_request) + ( (("x-scheme", downstream_request.headers["x-forwarded-proto"]), ) if "x-forwarded-proto" in downstream_request.headers else ()) + downstream_request["sso_profile_headers"]) async def superset_headers(downstream_request, path): credentials = {} dashboards = [] if not path.startswith("/static/"): host_api_url = admin_root + "/api/v1/core/get-superset-role-credentials" async with client_session.request( "GET", host_api_url, headers=CIMultiDict( admin_headers_request(downstream_request)), ) as response: if response.status == 200: response_json = await response.json() credentials = response_json["credentials"] dashboards = response_json["dashboards"] else: raise UserException( "Unable to fetch credentials for superset", response.status) def standardise_header(header): # converts 'multi_word_header' to 'Multi-Word-Header' return "-".join( [s.capitalize() for s in header.replace("_", "-").split("-")]) return CIMultiDict( without_transfer_encoding(downstream_request) + (tuple([(f"Credentials-{standardise_header(k)}", v) for k, v in credentials.items()])) + (tuple([("Dashboards", ",".join(dashboards))])) + downstream_request["sso_profile_headers"]) def is_service_discovery(request): return (request.url.path == "/api/v1/application" and request.url.host == root_domain_no_port and request.method == "GET") def is_superset_requested(request): return (request.url.host == f"superset.{root_domain_no_port}" or request.url.host == f"superset-edit.{root_domain_no_port}" or request.url.host == f"superset-admin.{root_domain_no_port}") def is_flower_requested(request): return request.url.host == f"flower.{root_domain_no_port}" def is_data_explorer_requested(request): return (request.url.path.startswith("/data-explorer/") and request.url.host == root_domain_no_port) def is_app_requested(request): return (request.url.host.endswith(f".{root_domain_no_port}") and not request.url.path.startswith(mirror_local_root) and not is_superset_requested(request) and not is_flower_requested(request)) def is_mirror_requested(request): return request.url.path.startswith(mirror_local_root) def is_requesting_credentials(request): return (request.url.host == root_domain_no_port and request.url.path == "/api/v1/aws_credentials") def is_requesting_files(request): return request.url.host == root_domain_no_port and request.url.path == "/files" def is_dataset_requested(request): return (request.url.path.startswith("/api/v1/dataset/") or request.url.path.startswith("/api/v1/reference-dataset/") or request.url.path.startswith("/api/v1/eventlog/") or request.url.path.startswith("/api/v1/account/") or request.url.path.startswith("/api/v1/application-instance/") or request.url.path.startswith("/api/v1/core/") ) and request.url.host == root_domain_no_port def is_hawk_auth_required(request): return is_dataset_requested(request) def is_healthcheck_requested(request): return (request.url.path == "/healthcheck" and request.method == "GET" and not is_app_requested(request)) def is_table_requested(request): return (request.url.path.startswith("/api/v1/table/") and request.url.host == root_domain_no_port and request.method == "POST") def is_peer_ip_required(request): # The healthcheck comes from the ALB, which doesn't send x-forwarded-for return not is_healthcheck_requested(request) def is_sso_auth_required(request): return (not is_healthcheck_requested(request) and not is_service_discovery(request) and not is_table_requested(request) and not is_dataset_requested(request)) def get_peer_ip(request): try: return (request.headers["x-forwarded-for"].split(",") [-x_forwarded_for_trusted_hops].strip()) except (KeyError, IndexError): return None def get_peer_ip_group(request): peer_ip = get_peer_ip(request) if peer_ip is None: return None for group, ip_addresses in ip_allowlist_groups.items(): for address_or_subnet in ip_addresses: if ipaddress.IPv4Address(peer_ip) in ipaddress.IPv4Network( address_or_subnet): return group return peer_ip def request_scheme(request): return request.headers.get("x-forwarded-proto", request.url.scheme) def request_url(request): return str(request.url.with_scheme(request_scheme(request))) async def handle(downstream_request): method = downstream_request.method path = downstream_request.url.path query = downstream_request.url.query app_requested = is_app_requested(downstream_request) mirror_requested = is_mirror_requested(downstream_request) superset_requested = is_superset_requested(downstream_request) flower_requested = is_flower_requested(downstream_request) # Websocket connections # - tend to close unexpectedly, both from the client and app # - don't need to show anything nice to the user on error is_websocket = (downstream_request.headers.get("connection", "").lower() == "upgrade" and downstream_request.headers.get( "upgrade", "").lower() == "websocket") try: if app_requested: return await handle_application(is_websocket, downstream_request, method, path, query) if mirror_requested: return await handle_mirror(downstream_request, method, path) if superset_requested: return await handle_superset(downstream_request, method, path, query) if flower_requested: return await handle_flower(downstream_request, method, path, query) return await handle_admin( downstream_request, method, CIMultiDict(admin_headers_proxy(downstream_request)), path, query, await get_data(downstream_request), ) except Exception as exception: # pylint: disable=broad-except user_exception = isinstance(exception, UserException) if not user_exception or (user_exception and exception.args[1] == 500): logger.exception( "Exception during %s %s %s", downstream_request.method, downstream_request.url, type(exception), ) if is_websocket: raise params = {"message": exception.args[0]} if user_exception else {} status = exception.args[1] if user_exception else 500 error_url = exception.args[2] if len( exception.args) > 2 else f"/error_{status}" return await handle_http( downstream_request, "GET", CIMultiDict(admin_headers_request(downstream_request)), URL(admin_root).with_path(error_url), params, b"", default_http_timeout, ) async def handle_application(is_websocket, downstream_request, method, path, query): public_host, _, _ = downstream_request.url.host.partition( f".{root_domain_no_port}") possible_public_host, _, public_host_or_port_override = public_host.rpartition( "--") try: port_override = int(public_host_or_port_override) except ValueError: port_override = None else: if 1 <= port_override <= 65535: public_host = possible_public_host else: port_override = None host_api_url = admin_root + "/api/v1/application/" + public_host host_html_path = "/tools/" + public_host async with client_session.request( "GET", host_api_url, headers=CIMultiDict(admin_headers_request(downstream_request)), ) as response: host_exists = response.status == 200 application = await response.json() if response.status != 200 and response.status != 404: raise UserException( "Unable to start the application", response.status, application.get("redirect_url", None), ) if host_exists and application["state"] not in ["SPAWNING", "RUNNING"]: if "x-data-workspace-no-modify-application-instance" not in downstream_request.headers: async with client_session.request( "DELETE", host_api_url, headers=CIMultiDict( admin_headers_request(downstream_request)), ) as delete_response: await delete_response.read() raise UserException("Application " + application["state"], 500) if not host_exists: if "x-data-workspace-no-modify-application-instance" not in downstream_request.headers: async with client_session.request( "PUT", host_api_url, headers=CIMultiDict( admin_headers_request(downstream_request)), ) as response: host_exists = response.status == 200 application = await response.json() else: raise UserException("Application stopped while starting", 500) if response.status != 200: raise UserException("Unable to start the application", response.status) if application["state"] not in ["SPAWNING", "RUNNING"]: raise UserException( "Attempted to start the application, but it " + application["state"], 500, ) if not application["proxy_url"]: return await handle_http( downstream_request, "GET", CIMultiDict(admin_headers_request(downstream_request)), admin_root + host_html_path + "/spawning", {}, b"", default_http_timeout, (("content-security-policy", csp_application_spawning), ), ) if is_websocket: return await handle_application_websocket(downstream_request, application["proxy_url"], path, query, port_override) if application["state"] == "SPAWNING": return await handle_application_http_spawning( downstream_request, method, application_upstream(application["proxy_url"], path, port_override), query, host_html_path, host_api_url, public_host, ) if (application["state"] == "RUNNING" and application["wrap"] != "NONE" and not port_override): return await handle_application_http_running_wrapped( downstream_request, application_upstream(application["proxy_url"], path, port_override), host_html_path, public_host, ) return await handle_application_http_running_direct( downstream_request, method, application_upstream(application["proxy_url"], path, port_override), query, public_host, ) async def handle_application_websocket(downstream_request, proxy_url, path, query, port_override): upstream_url = application_upstream(proxy_url, path, port_override).with_query(query) return await handle_websocket( downstream_request, CIMultiDict(application_headers(downstream_request)), upstream_url, ) def application_upstream(proxy_url, path, port_override): return (URL(proxy_url).with_path(path) if port_override is None else URL(proxy_url).with_path(path).with_port(port_override)) async def handle_application_http_spawning( downstream_request, method, upstream_url, query, host_html_path, host_api_url, public_host, ): host = downstream_request.headers["host"] try: logger.info("Spawning: Attempting to connect to %s", upstream_url) response = await handle_http( downstream_request, method, CIMultiDict(application_headers(downstream_request)), upstream_url, query, await get_data(downstream_request), spawning_http_timeout, # Although the application is spawning, if the response makes it back to the client, # we know the application is running, so we return the _running_ CSP headers ( ( "content-security-policy", csp_application_running_direct(host, public_host), ), ), ) except Exception: # pylint: disable=broad-except logger.info("Spawning: Failed to connect to %s", upstream_url) return await handle_http( downstream_request, "GET", CIMultiDict(admin_headers_request(downstream_request)), admin_root + host_html_path + "/spawning", {}, b"", default_http_timeout, (("content-security-policy", csp_application_spawning), ), ) else: # Once a streaming response is done, if we have not yet returned # from the handler, it looks like aiohttp can cancel the current # task. We set RUNNING in another task to avoid it being cancelled async def set_application_running(): async with client_session.request( "PATCH", host_api_url, json={"state": "RUNNING"}, headers=CIMultiDict( admin_headers_request(downstream_request)), timeout=default_http_timeout, ) as patch_response: await patch_response.read() asyncio.ensure_future(set_application_running()) return response async def handle_application_http_running_wrapped(downstream_request, upstream_url, host_html_path, public_host): upstream = URL(upstream_url) direct_host = f"{public_host}--{upstream.port}.{root_domain}" return await handle_http( downstream_request, "GET", CIMultiDict(admin_headers_request(downstream_request)), admin_root + host_html_path + "/running", {}, b"", default_http_timeout, (( "content-security-policy", csp_application_running_wrapped(direct_host), ), ), ) async def handle_application_http_running_direct(downstream_request, method, upstream_url, query, public_host): host = downstream_request.headers["host"] await send_to_google_analytics(downstream_request) return await handle_http( downstream_request, method, CIMultiDict(application_headers(downstream_request)), upstream_url, query, await get_data(downstream_request), default_http_timeout, (( "content-security-policy", csp_application_running_direct(host, public_host), ), ), ) async def handle_mirror(downstream_request, method, path): mirror_path = path[len(mirror_local_root):] upstream_url = URL(mirror_remote_root + mirror_path) return await handle_http( downstream_request, method, CIMultiDict(mirror_headers(downstream_request)), upstream_url, {}, await get_data(downstream_request), default_http_timeout, ) async def handle_superset(downstream_request, method, path, query): upstream_url = URL(superset_root).with_path(path) host = downstream_request.headers["host"] return await handle_http( downstream_request, method, await superset_headers(downstream_request, path), upstream_url, query, await get_data(downstream_request), default_http_timeout, (( "content-security-policy", csp_application_running_direct(host, "superset"), ), ), ) async def handle_flower(downstream_request, method, path, query): upstream_url = URL(flower_root).with_path(path) return await handle_http( downstream_request, method, CIMultiDict(flower_headers_proxy(downstream_request)), upstream_url, query, await get_data(downstream_request), default_http_timeout, ) async def handle_admin(downstream_request, method, headers, path, query, data): upstream_url = URL(admin_root).with_path(path) return await handle_http( downstream_request, method, headers, upstream_url, query, data, default_http_timeout, ) async def handle_websocket(downstream_request, upstream_headers, upstream_url): protocol = downstream_request.headers.get("Sec-WebSocket-Protocol") protocols = (protocol, ) if protocol else () async def proxy_msg(msg, to_ws): if msg.type == aiohttp.WSMsgType.TEXT: await to_ws.send_str(msg.data) elif msg.type == aiohttp.WSMsgType.BINARY: await to_ws.send_bytes(msg.data) elif msg.type == aiohttp.WSMsgType.CLOSE: await to_ws.close() elif msg.type == aiohttp.WSMsgType.ERROR: await to_ws.close() async def upstream(): try: async with client_session.ws_connect( str(upstream_url), headers=upstream_headers, protocols=protocols) as upstream_ws: upstream_connection.set_result(upstream_ws) downstream_ws = await downstream_connection async for msg in upstream_ws: await proxy_msg(msg, downstream_ws) except BaseException as exception: if not upstream_connection.done(): upstream_connection.set_exception(exception) raise finally: try: await downstream_ws.close() except UnboundLocalError: # If we didn't get to the line that creates `downstream_ws` pass # This is slightly convoluted, but aiohttp documents that reading # from websockets should be done in the same task as the websocket was # created, so we read from downstream in _this_ task, and create # another task to connect to and read from the upstream socket. We # also need to make sure we wait for each connection before sending # data to it downstream_connection = asyncio.Future() upstream_connection = asyncio.Future() upstream_task = asyncio.ensure_future(upstream()) try: upstream_ws = await upstream_connection _, _, _, with_session_cookie = downstream_request[SESSION_KEY] downstream_ws = await with_session_cookie( web.WebSocketResponse(protocols=protocols, heartbeat=30)) await downstream_ws.prepare(downstream_request) downstream_connection.set_result(downstream_ws) async for msg in downstream_ws: await proxy_msg(msg, upstream_ws) finally: upstream_task.cancel() return downstream_ws async def send_to_google_analytics(downstream_request): # Not perfect, but a good enough guide for usage _, extension = os.path.splitext(downstream_request.url.path) send_to_google = ga_tracking_id and extension in { "", ".doc", ".docx", ".html", ".pdf", ".ppt", ".pptx", ".xlsx", ".xlsx", } if not send_to_google: return async def _send(): logger.info("Sending to Google Analytics %s...", downstream_request.url) peer_ip = get_peer_ip(downstream_request) response = await client_session.request( "POST", "https://www.google-analytics.com/collect", data={ "v": "1", "tid": ga_tracking_id, "cid": str(uuid.uuid4()), "t": "pageview", "uip": peer_ip, "dh": downstream_request.url.host, "dp": downstream_request.url.path_qs, "ds": "data-workspace-server", "dr": downstream_request.headers.get("referer", ""), "ua": downstream_request.headers.get("user-agent", ""), }, timeout=default_http_timeout, ) logger.info("Sending to Google Analytics %s... %s", downstream_request.url, response) asyncio.create_task(_send()) async def get_data(downstream_request): # Avoid aiohttp treating request as chunked unnecessarily, which works # for some upstream servers, but not all. Specifically RStudio drops # GET responses half way through if the request specified a chunked # encoding. AFAIK RStudio uses a custom webserver, so this behaviour # is not documented anywhere. # fmt: off return \ b'' if ( 'content-length' not in downstream_request.headers and downstream_request.headers.get('transfer-encoding', '').lower() != 'chunked' ) else \ await downstream_request.read() if downstream_request.content.at_eof() else \ downstream_request.content # fmt: on async def handle_http( downstream_request, upstream_method, upstream_headers, upstream_url, upstream_query, upstream_data, timeout, response_headers=tuple(), ): async with client_session.request( upstream_method, str(upstream_url), params=upstream_query, headers=upstream_headers, data=upstream_data, allow_redirects=False, timeout=timeout, ) as upstream_response: _, _, _, with_session_cookie = downstream_request[SESSION_KEY] downstream_response = await with_session_cookie( web.StreamResponse( status=upstream_response.status, headers=CIMultiDict( without_transfer_encoding(upstream_response) + response_headers), )) await downstream_response.prepare(downstream_request) async for chunk in upstream_response.content.iter_any(): await downstream_response.write(chunk) return downstream_response def server_logger(): @web.middleware async def _server_logger(request, handler): request_logger = get_random_context_logger() request["logger"] = request_logger url = request_url(request) request_logger.info( "Receiving (%s) (%s) (%s) (%s)", request.method, url, request.headers.get("User-Agent", "-"), request.headers.get("X-Forwarded-For", "-"), ) response = await handler(request) request_logger.info( "Responding (%s) (%s) (%s) (%s) (%s) (%s)", request.method, url, request.headers.get("User-Agent", "-"), request.headers.get("X-Forwarded-For", "-"), response.status, response.content_length, ) return response return _server_logger def require_peer_ip(): @web.middleware async def _authenticate_by_peer_ip(request, handler): if not is_peer_ip_required(request): return await handler(request) peer_ip = get_peer_ip(request) if peer_ip is None: request["logger"].exception("No peer IP") return web.Response(status=500) return await handler(request) return _authenticate_by_peer_ip def authenticate_by_staff_sso(): auth_path = "o/authorize/" token_path = "o/token/" me_path = "api/v1/user/me/" grant_type = "authorization_code" scope = "read write" response_type = "code" redirect_from_sso_path = "/__redirect_from_sso" session_token_key = "staff_sso_access_token" async def get_redirect_uri_authenticate(set_session_value, redirect_uri_final): scheme = URL(redirect_uri_final).scheme sso_state = await set_redirect_uri_final(set_session_value, redirect_uri_final) redirect_uri_callback = urllib.parse.quote( get_redirect_uri_callback(scheme), safe="") return (f"{sso_base_url}{auth_path}?" f"scope={scope}&state={sso_state}&" f"redirect_uri={redirect_uri_callback}&" f"response_type={response_type}&" f"client_id={sso_client_id}") def get_redirect_uri_callback(scheme): return str( URL.build( host=root_domain_no_port, port=root_port, scheme=scheme, path=redirect_from_sso_path, )) async def set_redirect_uri_final(set_session_value, redirect_uri_final): session_key = secrets.token_hex(32) sso_state = urllib.parse.quote( f"{session_key}_{redirect_uri_final}", safe="") await set_session_value(session_key, redirect_uri_final) return sso_state async def get_redirect_uri_final(get_session_value, sso_state): session_key, _, state_redirect_url = urllib.parse.unquote( sso_state).partition("_") return state_redirect_url, await get_session_value(session_key) async def redirection_to_sso(with_new_session_cookie, set_session_value, redirect_uri_final): return await with_new_session_cookie( web.Response( status=302, headers={ "Location": await get_redirect_uri_authenticate(set_session_value, redirect_uri_final) }, )) @web.middleware async def _authenticate_by_sso(request, handler): sso_auth_required = is_sso_auth_required(request) if not sso_auth_required: request.setdefault("sso_profile_headers", ()) return await handler(request) get_session_value, set_session_value, with_new_session_cookie, _ = request[ SESSION_KEY] token = await get_session_value(session_token_key) if request.path != redirect_from_sso_path and token is None: return await redirection_to_sso(with_new_session_cookie, set_session_value, request_url(request)) if request.path == redirect_from_sso_path: code = request.query["code"] sso_state = request.query["state"] ( redirect_uri_final_from_url, redirect_uri_final_from_session, ) = await get_redirect_uri_final(get_session_value, sso_state) if redirect_uri_final_from_url != redirect_uri_final_from_session: # We might have been overtaken by a parallel request initiating another auth # flow, and so another session. However, because we haven't retrieved the final # URL from the session, we can't be sure that this is the same client that # initiated this flow. However, we can redirect back to SSO return await redirection_to_sso( with_new_session_cookie, set_session_value, redirect_uri_final_from_url, ) async with client_session.post( f"{sso_base_url}{token_path}", data={ "grant_type": grant_type, "code": code, "client_id": sso_client_id, "client_secret": sso_client_secret, "redirect_uri": get_redirect_uri_callback(request_scheme(request)), }, ) as sso_response: sso_response_json = await sso_response.json() await set_session_value(session_token_key, sso_response_json["access_token"]) return await with_new_session_cookie( web.Response( status=302, headers={"Location": redirect_uri_final_from_session}, )) # Get profile from Redis cache to avoid calling SSO on every request redis_profile_key = f"{PROFILE_CACHE_PREFIX}___{session_token_key}___{token}".encode( "ascii") with await redis_pool as conn: me_profile_raw = await conn.execute("GET", redis_profile_key) me_profile = json.loads(me_profile_raw) if me_profile_raw else None async def handler_with_sso_headers(): request["sso_profile_headers"] = ( ("sso-profile-email", me_profile["email"]), # The default value of '' should be able to be removed after the cached # profile in Redis without contact_email has expired, i.e. 60 seconds after # deployment of this change ("sso-profile-contact-email", me_profile.get("contact_email", "")), ( "sso-profile-related-emails", ",".join(me_profile.get("related_emails", [])), ), ("sso-profile-user-id", me_profile["user_id"]), ("sso-profile-first-name", me_profile["first_name"]), ("sso-profile-last-name", me_profile["last_name"]), ) request["logger"].info( "SSO-authenticated: %s %s %s", me_profile["email"], me_profile["user_id"], request_url(request), ) set_user({ "id": me_profile["user_id"], "email": me_profile["email"] }) return await handler(request) if me_profile: return await handler_with_sso_headers() async with client_session.get( f"{sso_base_url}{me_path}", headers={"Authorization": f"Bearer {token}"}, ) as me_response: me_profile_full = await me_response.json( ) if me_response.status == 200 else None if not me_profile_full: return await redirection_to_sso(with_new_session_cookie, set_session_value, request_url(request)) me_profile = { "email": me_profile_full["email"], "related_emails": me_profile_full["related_emails"], "contact_email": me_profile_full["contact_email"], "user_id": me_profile_full["user_id"], "first_name": me_profile_full["first_name"], "last_name": me_profile_full["last_name"], } with await redis_pool as conn: await conn.execute( "SET", redis_profile_key, json.dumps(me_profile).encode("utf-8"), "EX", 60, ) return await handler_with_sso_headers() return _authenticate_by_sso def authenticate_by_basic_auth(): @web.middleware async def _authenticate_by_basic_auth(request, handler): basic_auth_required = is_service_discovery(request) if not basic_auth_required: return await handler(request) if "Authorization" not in request.headers: return web.Response(status=401) basic_auth_prefix = "Basic " auth_value = (request.headers["Authorization"] [len(basic_auth_prefix):].strip().encode("ascii")) required_auth_value = base64.b64encode( f"{basic_auth_user}:{basic_auth_password}".encode("ascii")) if len(auth_value) != len( required_auth_value) or not hmac.compare_digest( auth_value, required_auth_value): return web.Response(status=401) request["logger"].info("Basic-authenticated: %s", basic_auth_user) set_user({"id": basic_auth_user}) return await handler(request) return _authenticate_by_basic_auth def authenticate_by_hawk_auth(): async def lookup_credentials(sender_id): for hawk_sender in hawk_senders: if hawk_sender["id"] == sender_id: return hawk_sender async def seen_nonce(nonce, sender_id): nonce_key = f"nonce-{sender_id}-{nonce}" with await redis_pool as conn: response = await conn.execute("SET", nonce_key, "1", "EX", 60, "NX") seen_nonce = response != b"OK" return seen_nonce @web.middleware async def _authenticate_by_hawk_auth(request, handler): hawk_auth_required = is_hawk_auth_required(request) if not hawk_auth_required: return await handler(request) try: authorization_header = request.headers["Authorization"] except KeyError: request["logger"].info("Hawk missing header") return web.Response(status=401) content = await request.read() error_message, creds = await authenticate_hawk_header( lookup_credentials, seen_nonce, 15, authorization_header, request.method, request.url.host, request.url.port, request.url.path_qs, request.headers["Content-Type"], content, ) if error_message is not None: request["logger"].info("Hawk unauthenticated: %s", error_message) return web.Response(status=401) request["logger"].info("Hawk authenticated: %s", creds["id"]) set_user({"id": creds["id"]}) return await handler(request) return _authenticate_by_hawk_auth def authenticate_by_ip_whitelist(): @web.middleware async def _authenticate_by_ip_whitelist(request, handler): ip_whitelist_required = (is_app_requested(request) or is_superset_requested(request) or is_mirror_requested(request) or is_requesting_credentials(request) or is_requesting_files(request) or is_data_explorer_requested(request) or is_flower_requested(request)) if not ip_whitelist_required: return await handler(request) peer_ip = get_peer_ip(request) peer_ip_in_whitelist = any( ipaddress.IPv4Address(peer_ip) in ipaddress.IPv4Network( address_or_subnet) for address_or_subnet in ip_allowlist) if not peer_ip_in_whitelist: request["logger"].info("IP-whitelist unauthenticated: %s", peer_ip) return await handle_admin( request, "GET", CIMultiDict(admin_headers_request(request)), "/error_403", {}, b"", ) request["logger"].info("IP-whitelist authenticated: %s", peer_ip) return await handler(request) return _authenticate_by_ip_whitelist async with aiohttp.ClientSession( auto_decompress=False, cookie_jar=aiohttp.DummyCookieJar(), skip_auto_headers=["Accept-Encoding"], ) as client_session: app = web.Application(middlewares=[ server_logger(), require_peer_ip(), redis_session_middleware(get_peer_ip_group, cookie_name, redis_pool, root_domain_no_port, embed_path), authenticate_by_staff_sso(), authenticate_by_basic_auth(), authenticate_by_hawk_auth(), authenticate_by_ip_whitelist(), ]) app.add_routes([ getattr(web, method)(r"/{path:.*}", handle) for method in [ "delete", "get", "head", "options", "patch", "post", "put", ] ]) elastic_apm_url = env.get("ELASTIC_APM_URL") elastic_apm_secret_token = env.get("ELASTIC_APM_SECRET_TOKEN") elastic_apm = ({ "SERVICE_NAME": "data-workspace", "SECRET_TOKEN": elastic_apm_secret_token, "SERVER_URL": elastic_apm_url, "ENVIRONMENT": env.get("ENVIRONMENT", "development"), } if elastic_apm_secret_token else {}) app["ELASTIC_APM"] = elastic_apm if elastic_apm: ElasticAPM(app) runner = web.AppRunner(app) await runner.setup() site = web.TCPSite(runner, "0.0.0.0", port) await site.start() await asyncio.Future()