def logged_duration_wrapper(*args, **kwargs):
     return logged_duration(
         *args,
         **{
             **kwargs, "condition": True
         },
     )
Esempio n. 2
0
def test_logged_duration_real_logger(
    app_with_stream_logger,
    # value to set the is_sampled flag to on the mock request
    is_sampled,
    # how long to sleep in seconds
    sleep_time,
    # message, log_level, condition - values to pass as arguments of logged_duration verbatim
    message,
    log_level,
    condition,
    # exception (class) to raise inside logged_duration, None to raise no exception
    raise_exception,
    # dict to update log_context with inside logged_duration, None perform no update
    inject_context,
    # whether to use mocked time primitives to speed up the test
    mock_time,
    # sequence of log dicts to expect to be output as json logs
    expected_logs,
):
    app, stream = app_with_stream_logger

    with app.test_request_context("/", headers={}):
        request.is_sampled = is_sampled

        with (mock_time_functions()
              if mock_time else actual_time_functions()) as (
                  _sleep,
                  _perf_counter,
                  _process_time,
              ):
            with (null_context_manager() if raise_exception is None else
                  pytest.raises(raise_exception)):
                with timing.logged_duration(
                        logger=app.logger.getChild("foobar"),
                        message=message,
                        log_level=log_level,
                        condition=condition,
                ) as log_context:
                    _sleep(sleep_time)
                    if inject_context is not None:
                        log_context.update(inject_context)
                    if raise_exception is not None:
                        raise raise_exception("Boo")

    all_lines = tuple(
        json.loads(line) for line in stream.getvalue().splitlines())

    assert all_lines == (AnySupersetOf({
        "levelname": "INFO",
        "message": "Logging configured"
    }), ) + expected_logs
Esempio n. 3
0
def _perform_additional_checks(additional_checks, response_data, error_messages):
    for additional_check in additional_checks:
        try:
            with logged_duration(
                logger=current_app.logger,
                message=(
                    lambda log_context: logged_duration.default_message(log_context) + " ({check_function})"
                ),
                condition=(
                    lambda log_context: logged_duration.default_condition(log_context) or sys.exc_info()[0] is not None
                ),
            ) as log_context:
                log_context["check_function"] = str(additional_check)
                check_result = additional_check()
            response_data.update(check_result)
        except StatusError as e:
            error_messages.append(e.message)
def _get_certificate(url):
    with logged_duration(
            logger=current_app.logger,
            message=lambda _:
        (
            "Fetched certificate for SNS signature validation from {target_url}"
            if sys.exc_info()[0] is None else
            # need to literally format() the exception into the message as it's difficult to get it injected into extra
            "Failed to fetch certificate for SNS signature validation from {{target_url}}: {!r}"
            .format(sys.exc_info()[1])),
            log_level=logging.INFO,
            condition=True,
    ) as log_context:
        log_context.update({"target_url": url})
        response = requests.get(url)
        response.raise_for_status()
        return response.content
Esempio n. 5
0
def test_logged_duration_mock_logger(
    app_with_mocked_logger,
    # value to set the is_sampled flag to on the mock request
    is_sampled,
    # how long to sleep in seconds
    sleep_time,
    # message, log_level, condition - values to pass as arguments of logged_duration verbatim
    message,
    log_level,
    condition,
    # exception (class) to raise inside logged_duration, None to raise no exception
    raise_exception,
    # dict to update log_context with inside logged_duration, None perform no update
    inject_context,
    # whether to use mocked time primitives to speed up the test
    mock_time,
    # sequence of log dicts to expect to be output as json logs
    expected_call_args_list,
):
    with app_with_mocked_logger.test_request_context("/", headers={}):
        request.is_sampled = is_sampled

        with (mock_time_functions()
              if mock_time else actual_time_functions()) as (
                  _sleep,
                  _perf_counter,
                  _process_time,
              ):
            with (null_context_manager() if raise_exception is None else
                  pytest.raises(raise_exception)):
                with timing.logged_duration(
                        logger=app_with_mocked_logger.logger,
                        message=message,
                        log_level=log_level,
                        condition=condition,
                ) as log_context:
                    assert app_with_mocked_logger.logger.log.call_args_list == []
                    _sleep(sleep_time)
                    if inject_context is not None:
                        log_context.update(inject_context)
                    if raise_exception is not None:
                        raise raise_exception("Boo")

    assert app_with_mocked_logger.logger.log.call_args_list == expected_call_args_list
Esempio n. 6
0
    def after_request(self, response):
        x_compression_safe = response.headers.pop("X-Compression-Safe", None)
        known_values = {"0": False, "1": True}
        compress = known_values.get(x_compression_safe,
                                    self.compress_by_default)

        if x_compression_safe not in known_values and x_compression_safe != "" and x_compression_safe is not None:
            warnings.warn(
                f"{self.__class__.__name__} received unknown X-Compression-Safe header value {x_compression_safe!r} - "
                "falling back to default")

        # flask_gzip makes the minimum_size comparison itself, but we want to avoid outputting a misleading
        # logged_duration message if it's going to be prevented in the superclass.
        if compress and len(response.get_data()) >= self.minimum_size:
            with logged_duration(
                    message="Spent {duration_real}s compressing response"):
                return super().after_request(response)
        else:
            return response
Esempio n. 7
0
def get_app_status(
    data_api_client=None,
    search_api_client=None,
    ignore_dependencies=False,
    additional_checks=None,
    additional_checks_extended=None,
):
    """Generates output for `_status` endpoints on apps

    :param current_app: The flask `current_app` global.
    :param data_api_client: The app's data_api_client, if used.
    :param search_api_client: The app's search_api_client, if used.
    :param ignore_dependencies: Minimal endpoint checks only (i.e. the app is routable and disk space is fine).
    :param additional_checks: A sequence of callables that return dicts of data to be injected into the final JSON
                              response or raise StatusErrors if they need to log an error that should fail the
                              check (this will cause the endpoint to return a 500). These will be called even if
                              ignore_dependencies=True and should be reserved for checks that the service requires to
                              operate. For example checks to backing services such as persistent datastores or
                              processes.
    :param additional_checks_extended: Similar to `additional_checks`, but only called when ignore_dependencies=False
    :return: A dictionary describing the current state of the app with, at least, a 'status' key with a value of 'ok'
             or 'error'.
    """
    error_messages = []
    response_data = {'status': 'ok'}

    with logged_duration(
        logger=current_app.logger,
        message=(
            lambda log_context: logged_duration.default_message(log_context) + " (get_disk_space_status)"
        ),
    ):
        disk_status = get_disk_space_status()
    response_data['disk'] = f'{disk_status[0]} ({disk_status[1]}% free)'
    if disk_status[0].lower() != 'ok':
        error_messages.append(f'Disk space low: {disk_status[1]}% remaining.')

    if additional_checks:
        _perform_additional_checks(additional_checks, response_data, error_messages)

    if not ignore_dependencies:
        response_data['version'] = current_app.config['VERSION']

        if data_api_client:
            data_api_status = data_api_client.get_status() or {'status': 'n/a'}
            response_data['api_status'] = data_api_status
            if data_api_status['status'].lower() != 'ok':
                error_messages.append('Error connecting to the Data API.')

        if search_api_client:
            search_api_status = search_api_client.get_status() or {'status': 'n/a'}
            response_data['search_api_status'] = search_api_status
            if search_api_status['status'].lower() != 'ok':
                error_messages.append('Error connecting to the Search API.')
        if additional_checks_extended:
            _perform_additional_checks(additional_checks_extended, response_data, error_messages)

    if error_messages:
        current_app.logger.error(
            "Request completed with error_messages = {error_messages}",
            extra={"error_messages": error_messages},
        )
        response_data['status'] = 'error'
        response_data['message'] = error_messages

    return jsonify(**response_data), 200 if not error_messages else 500
def _handle_subscription_confirmation(body_json, supported_topic_name):
    topic_arn = body_json["TopicArn"]
    topic_name = topic_arn.split(":")[-1]
    # check it's for the right topic
    if topic_name != supported_topic_name:
        current_app.logger.warning(
            "Received SubscriptionConfirmation request for unrecognized topic name {topic_name} in {topic_arn}",
            extra={
                "topic_name": topic_name,
                "topic_arn": topic_arn,
            },
        )
        abort(400, f"Unrecognized topic name {topic_name!r}")

    # send a request to provided url, ensuring sufficient logging
    try:
        with logged_duration(
                logger=current_app.logger,
                message=lambda _:
            (
                "Made GET request to {target_url} to confirm subscription to topic {topic_arn}"
                if sys.exc_info()[0] is None else
                # need to literally format() exception into message as it's difficult to get it injected into extra
                "Failed to make GET request to {{target_url}} to confirm subscription to {{topic_arn}}: {!r}"
                .format(sys.exc_info()[1])),
                log_level=logging.INFO,
                condition=True,
        ) as log_context:
            confirmation_url = body_json["SubscribeURL"]
            log_context.update({
                "target_url": confirmation_url,
                "topic_arn": topic_arn,
            })
            confirmation_response = requests.get(confirmation_url)
            confirmation_response.raise_for_status()
    except requests.exceptions.RequestException:
        abort(
            400,
            f"SubscriptionConfirmation request to {confirmation_url!r} failed")

    # check we received what we expected from url
    subscription_arn = confirmation_request_id = None
    try:
        confirmation_etree = etree_fromstring(confirmation_response.content)
        # though the xml is (hopefully) supplied with an xmlns, we want to be quite lenient with what we accept. but
        # lxml/xpath doesn't like us omitting the namespace if one is specified. defusedxml doesn't provide an API to
        # get the namespace of an element, but if there is one it appears in the string representation of the element.
        # If a namespace is set, extract it from the element and add it into the search path.

        etree_str = str(confirmation_etree)
        if "{" in etree_str and "}" in etree_str:
            namespace = etree_str[etree_str.find("{"):etree_str.find("}") + 1]
        else:
            namespace = ""

        # Element.find() returns None if there's no match, but we want to return an empty string
        try:
            subscription_arn = confirmation_etree.find(
                f".//{namespace}ConfirmSubscriptionResult/{namespace}SubscriptionArn"
            ).text.strip()
        except AttributeError:
            subscription_arn = ""

        try:
            confirmation_request_id = confirmation_etree.find(
                f".//{namespace}ResponseMetadata/{namespace}RequestId",
            ).text.strip()
        except AttributeError:
            confirmation_request_id = ""

    except ParseError as e:
        current_app.logger.warning(e)

    if not subscription_arn:
        current_app.logger.warning(
            "SubscriptionConfirmation response parsing failed")
        abort(
            400,
            f"SubscriptionConfirmation request to {confirmation_url!r}: response parsing failed"
        )
    else:
        current_app.logger.info(
            "SubscriptionConfirmation succeeded for subscription {subscription_arn}",
            extra={
                "subscription_arn": subscription_arn,
                "confirmation_request_id": confirmation_request_id,
            },
        )

    return jsonify(status='ok'), 200
Esempio n. 9
0
def _handle_subscription_confirmation(body_json, supported_topic_name):
    topic_arn = body_json["TopicArn"]
    topic_name = topic_arn.split(":")[-1]
    # check it's for the right topic
    if topic_name != supported_topic_name:
        current_app.logger.warning(
            "Received SubscriptionConfirmation request for unrecognized topic name {topic_name} in {topic_arn}",
            extra={
                "topic_name": topic_name,
                "topic_arn": topic_arn,
            },
        )
        abort(400, f"Unrecognized topic name {topic_name!r}")

    # send a request to provided url, ensuring sufficient logging
    try:
        with logged_duration(
                logger=current_app.logger,
                message=lambda _:
            (
                "Made GET request to {target_url} to confirm subscription to topic {topic_arn}"
                if sys.exc_info()[0] is None else
                # need to literally format() exception into message as it's difficult to get it injected into extra
                "Failed to make GET request to {{target_url}} to confirm subscription to {{topic_arn}}: {!r}"
                .format(sys.exc_info()[1])),
                log_level=logging.INFO,
                condition=True,
        ) as log_context:
            confirmation_url = body_json["SubscribeURL"]
            log_context.update({
                "target_url": confirmation_url,
                "topic_arn": topic_arn,
            })
            confirmation_response = requests.get(confirmation_url)
            confirmation_response.raise_for_status()
    except requests.exceptions.RequestException:
        abort(
            400,
            f"SubscriptionConfirmation request to {confirmation_url!r} failed")

    # check we received what we expected from url
    subscription_arn = confirmation_request_id = None
    try:
        confirmation_etree = etree_fromstring(confirmation_response.content)
        # though the xml is (hopefully) supplied with an xmlns, we want to be quite lenient with what we accept. but
        # lxml/xpath doesn't like us omitting the namespace if one is specified, so I'm simply assigning the toplevel
        # namespace (.nsmap(None)) to a short label `n` to allow us to specify xpath expressions with as much brevity
        # as possible.
        namespaces = {"n": confirmation_etree.nsmap[None]}
        subscription_arn = confirmation_etree.xpath(
            "normalize-space(string(/n:ConfirmSubscriptionResponse/n:ConfirmSubscriptionResult/n:SubscriptionArn))",
            namespaces=namespaces,
        )
        confirmation_request_id = confirmation_etree.xpath(
            "normalize-space(string(/n:ConfirmSubscriptionResponse/n:ResponseMetadata/n:RequestId))",
            namespaces=namespaces,
        )
    except ParseError as e:
        current_app.logger.warning(e)

    if not subscription_arn:
        current_app.logger.warning(
            "SubscriptionConfirmation response parsing failed")
        abort(
            400,
            f"SubscriptionConfirmation request to {confirmation_url!r}: response parsing failed"
        )
    else:
        current_app.logger.info(
            "SubscriptionConfirmation succeeded for subscription {subscription_arn}",
            extra={
                "subscription_arn": subscription_arn,
                "confirmation_request_id": confirmation_request_id,
            },
        )

    return jsonify(status='ok'), 200
def scan_and_tag_s3_object(
    s3_client,
    s3_bucket_name,
    s3_object_key,
    s3_object_version,
    sns_message_id=None,
):
    with logged_duration(
            logger=current_app.logger,
            message=lambda _:
        (
            "Handled bucket {s3_bucket_name} key {s3_object_key} version {s3_object_version}"
            if sys.exc_info()[0] is None else
            # need to literally format() the exception into message as it's difficult to get it injected into extra
            "Failed handling {{s3_bucket_name}} key {{s3_object_key}} version {{s3_object_version}}: {!r}"
            .format(sys.exc_info()[1])),
            log_level=logging.INFO,
            condition=True,
    ) as log_context:
        base_log_context = {
            "s3_bucket_name": s3_bucket_name,
            "s3_object_key": s3_object_key,
            "s3_object_version": s3_object_version,
        }
        log_context.update(base_log_context)

        # TODO abort if file too big?

        with log_external_request(
                "S3",
                "get object tagging [{s3_bucket_name}/{s3_object_key} versionId {s3_object_version}]",
                logger=current_app.logger,
        ) as log_context_s3:
            log_context_s3.update(base_log_context)
            tagging_tag_set = s3_client.get_object_tagging(
                Bucket=s3_bucket_name,
                Key=s3_object_key,
                VersionId=s3_object_version,
            )["TagSet"]

        av_status = _prefixed_tag_values_from_tag_set(tagging_tag_set,
                                                      "avStatus.")

        if av_status.get("avStatus.result") is None:
            current_app.logger.info(
                "Object version {s3_object_version} has no 'avStatus.result' tag - will scan...",
                extra={
                    **base_log_context,
                    "av_status": av_status,
                },
            )
        else:
            current_app.logger.info(
                "Object version {s3_object_version} already has 'avStatus.result' "
                "tag: {existing_av_status_result!r}",
                extra={
                    **base_log_context,
                    "existing_av_status_result":
                    av_status["avStatus.result"],
                    "existing_av_status":
                    av_status,
                },
            )
            return av_status, False, None

        clamd_client = get_clamd_socket()
        # first check our clamd is available - there's no point in going and fetching the object if we can't do
        # anything with it. allow a raised exception to bubble up as a 500, which seems the most appropriate thing
        # in this case
        clamd_client.ping()

        # the following two requests (to S3 for the file contents and to clamd for scanning) don't really happen
        # sequentially as we're going to attempt to stream the data received from one into the other (by passing
        # the StreamingBody file-like object from this response into .instream(...)), so these logged_duration
        # sections do NOT *directly* correspond to the file being downloaded and then the file being scanned. The
        # two activities will overlap in time, something that isn't expressible with logged_duration
        with log_external_request(
                "S3",
                "initiate object download [{s3_bucket_name}/{s3_object_key} versionId {s3_object_version}]",
                logger=current_app.logger,
        ) as log_context_s3:
            log_context_s3.update(base_log_context)
            s3_object = s3_client.get_object(
                Bucket=s3_bucket_name,
                Key=s3_object_key,
                VersionId=s3_object_version,
            )

        file_name = _filename_from_content_disposition(
            s3_object.get("ContentDisposition") or "")

        with logged_duration(
                logger=current_app.logger,
                message=lambda _:
            (
                "Scanned {file_length}byte file '{file_name}', result {clamd_result}"
                if sys.exc_info()[0] is None else
                # need to literally format() exception into message as it's difficult to get it injected into extra
                "Failed scanning {{file_length}}byte file '{{file_name}}': {!r}"
                .format(sys.exc_info()[1])),
                log_level=logging.INFO,
                condition=True,
        ) as log_context_clamd:
            log_context_clamd.update({
                "file_length": s3_object["ContentLength"],
                "file_name": file_name or "<unknown>",
            })
            clamd_result = clamd_client.instream(s3_object["Body"])["stream"]
            log_context_clamd["clamd_result"] = clamd_result

        if clamd_result[0] == "ERROR":
            # let's hope this was a transient error and a later attempt may succeed. hard to know what else to do
            # in this case - tagging a file with "ERROR" would prevent further attempts.
            raise UnknownClamdError(
                f"clamd did not successfully scan file: {clamd_result!r}")

        with logged_duration(
                logger=current_app.logger,
                message=lambda _:
            (
                "Fetched clamd version string: {clamd_version}"
                if sys.exc_info()[0] is None else
                # need to literally format() exception into message as it's difficult to get it injected into extra
                "Failed fetching clamd version string: {!r}".format(
                    sys.exc_info()[1])),
                log_level=logging.DEBUG,
        ) as log_context_clamd:
            # hypothetically there is a race condition between the time of scanning the file and fetching the
            # version here when freshclam could give us a new definition file, making this information incorrect,
            # but it's a very small possibility
            clamd_version = clamd_client.version()
            log_context_clamd.update({"clamd_version": clamd_version})

        # we namespace all keys set as part of an avStatus update with an "avStatus." prefix, intending that all
        # of these keys are only ever set or removed together as they are all information about the same scanning
        # decision
        new_av_status = {
            "avStatus.result": "pass" if clamd_result[0] == "OK" else "fail",
            "avStatus.clamdVerStr": clamd_version,
            "avStatus.ts": datetime.datetime.utcnow().isoformat(),
        }

        # Now we briefly re-check the object's tags to ensure they weren't set by something else while we were
        # scanning. Note the impossibility of avoiding all possible race conditions as S3's API doesn't allow any
        # form of locking. What we *can* do is make the possible time period between check-tags and set-tags as
        # small as possible...
        with log_external_request(
                "S3",
                "get object tagging [{s3_bucket_name}/{s3_object_key} versionId {s3_object_version}]",
                logger=current_app.logger,
        ) as log_context_s3:
            log_context_s3.update(base_log_context)
            tagging_tag_set = s3_client.get_object_tagging(
                Bucket=s3_bucket_name,
                Key=s3_object_key,
                VersionId=s3_object_version,
            )["TagSet"]

        av_status = _prefixed_tag_values_from_tag_set(tagging_tag_set,
                                                      "avStatus.")

        if av_status.get("avStatus.result") is not None:
            current_app.logger.warning(
                "Object was tagged with new 'avStatus.result' ({existing_av_status_result!r}) while we were "
                "scanning. Not applying our own 'avStatus' ({unapplied_av_status_result!r})",
                extra={
                    "existing_av_status_result": av_status["avStatus.result"],
                    "unapplied_av_status_result":
                    new_av_status["avStatus.result"],
                    "existing_av_status": av_status,
                    "unapplied_av_status": new_av_status,
                },
            )
            return av_status, False, new_av_status

        tagging_tag_set = _tag_set_updated_with_dict(
            _tag_set_omitting_prefixed(tagging_tag_set, "avStatus."),
            new_av_status,
        )

        with log_external_request(
                "S3",
                "put object tagging [{s3_bucket_name}/{s3_object_key} versionId {s3_object_version}]",
                logger=current_app.logger,
        ) as log_context_s3:
            log_context_s3.update(base_log_context)
            s3_client.put_object_tagging(
                Bucket=s3_bucket_name,
                Key=s3_object_key,
                VersionId=s3_object_version,
                Tagging={"TagSet": tagging_tag_set},
            )

        if clamd_result[0] != "OK":
            # TODO? attempt to rectify the situation:
            # TODO? if this is (still) current version of object:
            # TODO?     S3: find most recent version of object which is tagged "good"
            # TODO?     if there is no such version:
            # TODO?         S3: upload fail whale?
            # TODO?     else copy that version to become new "current" ver for this key, ensuring to copy its tags
            # TODO?         note the impossibility of doing this without some race conditions

            notify_client = DMNotifyClient(
                current_app.config["DM_NOTIFY_API_KEY"])

            if (len(clamd_result) >= 2 and clamd_result[1].lower() in map(
                    str.lower, current_app.
                    config["DM_EICAR_TEST_SIGNATURE_RESULT_STRINGS"])):
                notify_kwargs = {
                    # we'll use the s3 ETag of the file as the notify ref - it will be the only piece of information
                    # that will be shared knowledge between a functional test and the application yet also allow the
                    # test to differentiate the results of its different test runs, allowing it to easily check for
                    # the message being sent
                    "reference":
                    "eicar-found-{}-{}".format(
                        _normalize_hex(s3_object["ETag"]),
                        current_app.config["DM_ENVIRONMENT"],
                    ),
                    "to_email_address":
                    current_app.
                    config["DM_EICAR_TEST_SIGNATURE_VIRUS_ALERT_EMAIL"],
                }
            else:
                notify_kwargs = {
                    "to_email_address":
                    current_app.config["DM_DEVELOPER_VIRUS_ALERT_EMAIL"],
                }

            try:
                notify_client.send_email(
                    template_name_or_id="developer_virus_alert",
                    personalisation={
                        "bucket_name":
                        s3_bucket_name,
                        "object_key":
                        s3_object_key,
                        "object_version":
                        s3_object_version,
                        "file_name":
                        file_name or "<unknown>",
                        "clamd_output":
                        ", ".join(clamd_result),
                        "sns_message_id":
                        sns_message_id or "<N/A>",
                        "dm_trace_id":
                        getattr(request, "trace_id", None) or "<unknown>",
                    },
                    **notify_kwargs,
                )
            except EmailError as e:
                current_app.logger.error(
                    "Failed to send developer_virus_alert email after scanning "
                    "{s3_bucket_name}/{s3_object_key} versionId {s3_object_version}: {e}",
                    extra={
                        **base_log_context,
                        "e": str(e),
                    },
                )
                # however we probably don't want this to cause a 500 because the main task has been completed - retrying
                # it won't work and e.g. we eill want to signify to SNS that it should not attempt to re-send this
                # message

        return av_status, True, new_av_status