Esempio n. 1
0
def load_data(
    platform,
    default=None,
    sample_name=None,
    timestamp=None,
    start_timestamp=None,
    trace=None,
    span_id=None,
    spans=None,
):
    # NOTE: Before editing this data, make sure you understand the context
    # in which its being used. It is NOT only used for local development and
    # has production consequences.
    #   * bin/load-mocks to generate fake data for local testing
    #   * When a new project is created, a fake event is generated as a "starter"
    #     event so it's not an empty project.
    #   * When a user clicks Test Configuration from notification plugin settings page,
    #     a fake event is generated to go through the pipeline.
    data = None
    language = None
    platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform)

    if platform_data is not None and platform_data["type"] != "language":
        language = platform_data["language"]

    samples_root = os.path.join(DATA_ROOT, "samples")
    all_samples = {f for f in os.listdir(samples_root) if f.endswith(".json")}

    for platform in (platform, language, default):
        if not platform:
            continue

        # Verify by checking if the file is within our folder explicitly
        # avoids being able to have a name that invokes traversing directories.
        json_path = f"{platform}.json"

        if json_path not in all_samples:
            continue

        if not sample_name:
            try:
                sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]["name"]
            except KeyError:
                pass

        # XXX: At this point, it's assumed that `json_path` was safely found
        # within `samples_root` due to the check above and cannot traverse
        # into paths.
        with open(os.path.join(samples_root, json_path)) as fp:
            data = json.load(fp)
            break

    if data is None:
        return

    data = CanonicalKeyDict(data)
    if platform in ("csp", "hkpk", "expectct", "expectstaple"):
        return data

    # Generate a timestamp in the present.
    if timestamp is None:
        timestamp = timezone.now()
    else:
        timestamp = timestamp.replace(tzinfo=pytz.utc)
    data.setdefault("timestamp", to_timestamp(timestamp))

    if data.get("type") == "transaction":
        if start_timestamp is None:
            start_timestamp = timestamp - timedelta(seconds=3)
        else:
            start_timestamp = start_timestamp.replace(tzinfo=pytz.utc)
        data["start_timestamp"] = to_timestamp(start_timestamp)

        if trace is None:
            trace = uuid4().hex
        if span_id is None:
            span_id = uuid4().hex[:16]

        for tag in data["tags"]:
            if tag[0] == "trace":
                tag[1] = trace
            elif tag[0] == "trace.span":
                tag[1] = span_id
        data["contexts"]["trace"]["trace_id"] = trace
        data["contexts"]["trace"]["span_id"] = span_id
        if spans:
            data["spans"] = spans

        for span in data.get("spans", []):
            # Use data to generate span timestamps consistently and based
            # on event timestamp
            duration = span.get("data", {}).get("duration", 10.0)
            offset = span.get("data", {}).get("offset", 0)

            # Span doesn't have a parent, make it the transaction
            if span.get("parent_span_id") is None:
                span["parent_span_id"] = span_id
            if span.get("span_id") is None:
                span["span_id"] = uuid4().hex[:16]

            span_start = data["start_timestamp"] + offset
            span["trace_id"] = trace
            span.setdefault("start_timestamp", span_start)
            span.setdefault("timestamp", span_start + duration)

        measurements = data.get("measurements")

        if measurements:
            measurement_markers = {}
            for key, entry in measurements.items():
                if key in ["fp", "fcp", "lcp", "fid"]:
                    measurement_markers[f"mark.{key}"] = {
                        "value":
                        round(data["start_timestamp"] + entry["value"] / 1000,
                              3)
                    }
            measurements.update(measurement_markers)

    data["platform"] = platform
    # XXX: Message is a legacy alias for logentry. Do not overwrite if set.
    if "message" not in data:
        data[
            "message"] = f"This is an example {sample_name or platform} exception"
    data.setdefault(
        "user",
        generate_user(ip_address="127.0.0.1",
                      username="******",
                      id=1,
                      email="*****@*****.**"),
    )
    data.setdefault(
        "extra",
        {
            "session": {
                "foo": "bar"
            },
            "results": [1, 2, 3, 4, 5],
            "emptyList": [],
            "emptyMap": {},
            "length": 10837790,
            "unauthorized": False,
            "url": "http://example.org/foo/bar/",
        },
    )
    data.setdefault("modules", {"my.package": "1.0.0"})
    data.setdefault(
        "request",
        {
            "cookies": "foo=bar;biz=baz",
            "url": "http://example.com/foo",
            "headers": {
                "Referer":
                "http://example.com",
                "Content-Type":
                "application/json",
                "User-Agent":
                "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
            },
            "env": {
                "ENV": "prod"
            },
            "query_string": "foo=bar",
            "data": '{"hello": "world"}',
            "method": "GET",
        },
    )

    return data
Esempio n. 2
0
def _do_symbolicate_event(cache_key,
                          start_time,
                          event_id,
                          symbolicate_task,
                          data=None):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "symbolicate"
                     },
                     skip_internal=False)
        error_logger.error("symbolicate.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_event_project(project_id)

    event_id = data["event_id"]

    if killswitch_matches_context(
            "store.load-shed-symbolicate-event-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "platform": data.get("platform") or "null",
        },
    ):
        return

    symbolication_function = get_symbolication_function(data)

    has_changed = False

    from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing

    symbolication_start_time = time()

    with sentry_sdk.start_span(
            op="tasks.store.symbolicate_event.symbolication") as span:
        span.set_data("symbolicaton_function", symbolication_function.__name__)
        with metrics.timer(
                "tasks.store.symbolicate_event.symbolication",
                tags={
                    "symbolication_function": symbolication_function.__name__
                },
        ):
            while True:
                try:
                    with sentry_sdk.start_span(
                            op="tasks.store.symbolicate_event.%s" %
                            symbolication_function.__name__) as span:
                        symbolicated_data = symbolication_function(data)
                        span.set_data("symbolicated_data",
                                      bool(symbolicated_data))

                    if symbolicated_data:
                        data = symbolicated_data
                        has_changed = True

                    break
                except RetrySymbolication as e:
                    if (time() - symbolication_start_time
                        ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
                        error_logger.warning(
                            "symbolicate.slow",
                            extra={
                                "project_id": project_id,
                                "event_id": event_id
                            },
                        )
                    if (time() - symbolication_start_time
                        ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
                        # Do not drop event but actually continue with rest of pipeline
                        # (persisting unsymbolicated event)
                        metrics.incr(
                            "tasks.store.symbolicate_event.fatal",
                            tags={
                                "reason":
                                "timeout",
                                "symbolication_function":
                                symbolication_function.__name__,
                            },
                        )
                        error_logger.exception(
                            "symbolicate.failed.infinite_retry",
                            extra={
                                "project_id": project_id,
                                "event_id": event_id
                            },
                        )
                        data.setdefault("_metrics",
                                        {})["flag.processing.error"] = True
                        data.setdefault("_metrics",
                                        {})["flag.processing.fatal"] = True
                        has_changed = True
                        break
                    else:
                        # sleep for `retry_after` but max 5 seconds and try again
                        metrics.incr(
                            "tasks.store.symbolicate_event.retry",
                            tags={
                                "symbolication_function":
                                symbolication_function.__name__
                            },
                        )
                        sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER))
                        continue
                except Exception:
                    metrics.incr(
                        "tasks.store.symbolicate_event.fatal",
                        tags={
                            "reason":
                            "error",
                            "symbolication_function":
                            symbolication_function.__name__,
                        },
                    )
                    error_logger.exception(
                        "tasks.store.symbolicate_event.symbolication")
                    data.setdefault("_metrics",
                                    {})["flag.processing.error"] = True
                    data.setdefault("_metrics",
                                    {})["flag.processing.fatal"] = True
                    has_changed = True
                    break

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        cache_key = event_processing_store.store(data)

    process_task = process_event_from_reprocessing if from_reprocessing else process_event
    _do_process_event(
        cache_key=cache_key,
        start_time=start_time,
        event_id=event_id,
        process_task=process_task,
        data=data,
        data_has_changed=has_changed,
        from_symbolicate=True,
    )
Esempio n. 3
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    from_symbolicate=False,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_event_project(project_id)

    event_id = data["event_id"]

    if killswitch_matches_context(
            "store.load-shed-process-event-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "platform": data.get("platform") or "null",
        },
    ):
        return

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_project_from_cache"):
        project = Project.objects.get_from_cache(id=project_id)

    with metrics.timer(
            "tasks.store.process_event.organization.get_from_cache"):
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

    has_changed = bool(data_has_changed)

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_reprocessing_revision"):
        # Fetch the reprocessing revision
        reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    # Stacktrace based event processors.
    with sentry_sdk.start_span(op="task.store.process_event.stacktraces"):
        with metrics.timer("tasks.store.process_event.stacktraces",
                           tags={"from_symbolicate": from_symbolicate}):
            new_data = process_stacktraces(data)

    if new_data is not None:
        has_changed = True
        data = new_data

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if has_changed and options.get("processing.can-use-scrubbers"):
        with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"):
            with metrics.timer("tasks.store.datascrubbers.scrub",
                               tags={"from_symbolicate": from_symbolicate}):
                new_data = safe_execute(scrub_data,
                                        project=project,
                                        event=data.data)

                # XXX(markus): When datascrubbing is finally "totally stable", we might want
                # to drop the event if it crashes to avoid saving PII
                if new_data is not None:
                    data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with sentry_sdk.start_span(
                op="task.store.process_event.preprocessors") as span:
            span.set_data("plugin", plugin.slug)
            span.set_data("from_symbolicate", from_symbolicate)
            with metrics.timer(
                    "tasks.store.process_event.preprocessors",
                    tags={
                        "plugin": plugin.slug,
                        "from_symbolicate": from_symbolicate
                    },
            ):
                processors = safe_execute(plugin.get_event_preprocessors,
                                          data=data,
                                          _with_transaction=False)
                for processor in processors or ():
                    try:
                        result = processor(data)
                    except Exception:
                        error_logger.exception(
                            "tasks.store.preprocessors.error")
                        data.setdefault("_metrics",
                                        {})["flag.processing.error"] = True
                        has_changed = True
                    else:
                        if result:
                            data = result
                            has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke ourselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id,
                                 process_task, project)
            return

        cache_key = event_processing_store.store(data)

    from_reprocessing = process_task is process_event_from_reprocessing
    submit_save_event(project, from_reprocessing, cache_key, event_id,
                      start_time, data)
Esempio n. 4
0
def load_data(platform, default=None, sample_name=None):
    # NOTE: Before editing this data, make sure you understand the context
    # in which its being used. It is NOT only used for local development and
    # has production consequences.
    #   * bin/load-mocks to generate fake data for local testing
    #   * When a new project is created, a fake event is generated as a "starter"
    #     event so it's not an empty project.
    #   * When a user clicks Test Configuration from notification plugin settings page,
    #     a fake event is generated to go through the pipeline.
    data = None
    language = None
    platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform)

    if platform_data is not None and platform_data['type'] != 'language':
        language = platform_data['language']

    for platform in (platform, language, default):
        if not platform:
            continue

        json_path = os.path.join(DATA_ROOT, 'samples',
                                 '%s.json' % (platform.encode('utf-8'), ))
        if not os.path.exists(json_path):
            continue

        if not sample_name:
            try:
                sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]['name']
            except KeyError:
                pass

        with open(json_path) as fp:
            data = json.loads(fp.read())
            break

    if data is None:
        return

    data = CanonicalKeyDict(data)
    if platform in ('csp', 'hkpk', 'expectct', 'expectstaple'):
        return data

    data['platform'] = platform
    # XXX: Message is a legacy alias for logentry. Do not overwrite if set.
    if 'message' not in data:
        data['message'] = 'This is an example %s exception' % (sample_name
                                                               or platform, )
    data.setdefault(
        'user',
        generate_user(
            ip_address='127.0.0.1',
            username='******',
            id=1,
            email='*****@*****.**',
        ))
    data.setdefault(
        'extra', {
            'session': {
                'foo': 'bar',
            },
            'results': [1, 2, 3, 4, 5],
            'emptyList': [],
            'emptyMap': {},
            'length': 10837790,
            'unauthorized': False,
            'url': 'http://example.org/foo/bar/',
        })
    data.setdefault('modules', {
        'my.package': '1.0.0',
    })
    data.setdefault(
        'request', {
            "cookies": 'foo=bar;biz=baz',
            "url": "http://example.com/foo",
            "headers": {
                "Referer":
                "http://example.com",
                "Content-Type":
                "application/json",
                "User-Agent":
                "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36"
            },
            "env": {
                'ENV': 'prod',
            },
            "query_string": "foo=bar",
            "data": '{"hello": "world"}',
            "method": "GET"
        })

    return data
Esempio n. 5
0
def load_data(platform, default=None, sample_name=None):
    # NOTE: Before editing this data, make sure you understand the context
    # in which its being used. It is NOT only used for local development and
    # has production consequences.
    #   * bin/load-mocks to generate fake data for local testing
    #   * When a new project is created, a fake event is generated as a "starter"
    #     event so it's not an empty project.
    #   * When a user clicks Test Configuration from notification plugin settings page,
    #     a fake event is generated to go through the pipeline.
    data = None
    language = None
    platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform)

    if platform_data is not None and platform_data["type"] != "language":
        language = platform_data["language"]

    for platform in (platform, language, default):
        if not platform:
            continue

        json_path = os.path.join(DATA_ROOT, "samples", "%s.json" % (platform.encode("utf-8"),))
        if not os.path.exists(json_path):
            continue

        if not sample_name:
            try:
                sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]["name"]
            except KeyError:
                pass

        with open(json_path) as fp:
            data = json.loads(fp.read())
            break

    if data is None:
        return

    data = CanonicalKeyDict(data)
    if platform in ("csp", "hkpk", "expectct", "expectstaple"):
        return data

    # Transaction events need timestamp data set to something current.
    if platform == "transaction":
        now = timezone.now()
        now_time = to_timestamp(now)
        start_time = to_timestamp(now - timedelta(seconds=-2))
        data.setdefault("timestamp", now_time)
        data.setdefault("start_timestamp", start_time)
        for span in data["spans"]:
            span.setdefault("timestamp", now_time)
            span.setdefault("start_timestamp", start_time)

    data["platform"] = platform
    # XXX: Message is a legacy alias for logentry. Do not overwrite if set.
    if "message" not in data:
        data["message"] = "This is an example %s exception" % (sample_name or platform,)
    data.setdefault(
        "user",
        generate_user(ip_address="127.0.0.1", username="******", id=1, email="*****@*****.**"),
    )
    data.setdefault(
        "extra",
        {
            "session": {"foo": "bar"},
            "results": [1, 2, 3, 4, 5],
            "emptyList": [],
            "emptyMap": {},
            "length": 10837790,
            "unauthorized": False,
            "url": "http://example.org/foo/bar/",
        },
    )
    data.setdefault("modules", {"my.package": "1.0.0"})
    data.setdefault(
        "request",
        {
            "cookies": "foo=bar;biz=baz",
            "url": "http://example.com/foo",
            "headers": {
                "Referer": "http://example.com",
                "Content-Type": "application/json",
                "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
            },
            "env": {"ENV": "prod"},
            "query_string": "foo=bar",
            "data": '{"hello": "world"}',
            "method": "GET",
        },
    )

    return data
Esempio n. 6
0
def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, data=None):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "symbolicate"}, skip_internal=False
        )
        error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    symbolication_function = get_symbolication_function(data)

    has_changed = False

    from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing

    try:
        with sentry_sdk.start_span(op="tasks.store.symbolicate_event.symbolication") as span:
            span.set_data("symbolicaton_function", symbolication_function.__name__)

            with metrics.timer("tasks.store.symbolicate_event.symbolication"):
                symbolicated_data = symbolication_function(data)

            span.set_data("symbolicated_data", bool(symbolicated_data))
            if symbolicated_data:
                data = symbolicated_data
                has_changed = True

    except RetrySymbolication as e:
        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning(
                "symbolicate.slow", extra={"project_id": project_id, "event_id": event_id}
            )

        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "symbolicate.failed.infinite_retry",
                extra={"project_id": project_id, "event_id": event_id},
            )
            data.setdefault("_metrics", {})["flag.processing.error"] = True
            data.setdefault("_metrics", {})["flag.processing.fatal"] = True
            has_changed = True
        else:
            # Requeue the task in the "sleep" queue
            retry_symbolicate_event.apply_async(
                args=(),
                kwargs={
                    "symbolicate_task_name": symbolicate_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return
    except Exception:
        error_logger.exception("tasks.store.symbolicate_event.symbolication")
        data.setdefault("_metrics", {})["flag.processing.error"] = True
        data.setdefault("_metrics", {})["flag.processing.fatal"] = True
        has_changed = True

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        cache_key = event_processing_store.store(data)

    process_task = process_event_from_reprocessing if from_reprocessing else process_event
    _do_process_event(
        cache_key=cache_key,
        start_time=start_time,
        event_id=event_id,
        process_task=process_task,
        data=data,
        data_has_changed=has_changed,
        from_symbolicate=True,
    )
Esempio n. 7
0
def load_data(platform, default=None, sample_name=None):
    # NOTE: Before editing this data, make sure you understand the context
    # in which its being used. It is NOT only used for local development and
    # has production consequences.
    #   * bin/load-mocks to generate fake data for local testing
    #   * When a new project is created, a fake event is generated as a "starter"
    #     event so it's not an empty project.
    #   * When a user clicks Test Configuration from notification plugin settings page,
    #     a fake event is generated to go through the pipeline.
    data = None
    language = None
    platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform)

    if platform_data is not None and platform_data['type'] != 'language':
        language = platform_data['language']

    for platform in (platform, language, default):
        if not platform:
            continue

        json_path = os.path.join(DATA_ROOT, 'samples', '%s.json' % (platform.encode('utf-8'), ))
        if not os.path.exists(json_path):
            continue

        if not sample_name:
            try:
                sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]['name']
            except KeyError:
                pass

        with open(json_path) as fp:
            data = json.loads(fp.read())
            break

    if data is None:
        return

    data = CanonicalKeyDict(data)
    if platform in ('csp', 'hkpk', 'expectct', 'expectstaple'):
        return data

    data['platform'] = platform
    # XXX: Message is a legacy alias for logentry. Do not overwrite if set.
    if 'message' not in data:
        data['message'] = 'This is an example %s exception' % (sample_name or platform, )
    data.setdefault('user', generate_user(
        ip_address='127.0.0.1',
        username='******',
        id=1,
        email='*****@*****.**',
    ))
    data.setdefault('extra', {
        'session': {
            'foo': 'bar',
        },
        'results': [1, 2, 3, 4, 5],
        'emptyList': [],
        'emptyMap': {},
        'length': 10837790,
        'unauthorized': False,
        'url': 'http://example.org/foo/bar/',
    })
    data.setdefault('modules', {
        'my.package': '1.0.0',
    })
    data.setdefault('request', {
        "cookies": 'foo=bar;biz=baz',
        "url": "http://example.com/foo",
        "headers": {
            "Referer":
            "http://example.com",
            "Content-Type":
            "application/json",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36"
        },
        "env": {
            'ENV': 'prod',
        },
        "query_string": "foo=bar",
        "data": '{"hello": "world"}',
        "method": "GET"
    })

    return data
Esempio n. 8
0
def _do_symbolicate_event(
    cache_key, start_time, event_id, symbolicate_task, data=None, queue_switches=0
):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "symbolicate"}, skip_internal=False
        )
        error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_event_project(project_id)

    event_id = data["event_id"]

    from_reprocessing = (
        symbolicate_task is symbolicate_event_from_reprocessing
        or symbolicate_task is symbolicate_event_from_reprocessing_low_priority
    )

    # check whether the event is in the wrong queue and if so, move it to the other one.
    # we do this at most SYMBOLICATOR_MAX_QUEUE_SWITCHES times.
    if queue_switches >= SYMBOLICATOR_MAX_QUEUE_SWITCHES:
        metrics.gauge("tasks.store.symbolicate_event.low_priority.max_queue_switches", 1)
    else:
        is_low_priority = symbolicate_task in [
            symbolicate_event_low_priority,
            symbolicate_event_from_reprocessing_low_priority,
        ]
        should_be_low_priority = should_demote_symbolication(project_id)

        if is_low_priority != should_be_low_priority:
            metrics.gauge("tasks.store.symbolicate_event.low_priority.wrong_queue", 1)
            submit_symbolicate(
                should_be_low_priority,
                from_reprocessing,
                cache_key,
                event_id,
                start_time,
                data,
                queue_switches + 1,
            )
            return

    def _continue_to_process_event():
        process_task = process_event_from_reprocessing if from_reprocessing else process_event
        _do_process_event(
            cache_key=cache_key,
            start_time=start_time,
            event_id=event_id,
            process_task=process_task,
            data=data,
            data_has_changed=has_changed,
            from_symbolicate=True,
        )

    symbolication_function = get_symbolication_function(data)
    symbolication_function_name = getattr(symbolication_function, "__name__", "none")

    if killswitch_matches_context(
        "store.load-shed-symbolicate-event-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "platform": data.get("platform") or "null",
            "symbolication_function": symbolication_function_name,
        },
    ):
        return _continue_to_process_event()

    has_changed = False

    symbolication_start_time = time()

    submission_ratio = options.get("symbolicate-event.low-priority.metrics.submission-rate")
    submit_realtime_metrics = not from_reprocessing and random.random() < submission_ratio

    if submit_realtime_metrics:
        with sentry_sdk.start_span(op="tasks.store.symbolicate_event.low_priority.metrics.counter"):
            timestamp = int(symbolication_start_time)
            try:
                realtime_metrics.increment_project_event_counter(project_id, timestamp)
            except Exception as e:
                sentry_sdk.capture_exception(e)

    with sentry_sdk.start_span(op="tasks.store.symbolicate_event.symbolication") as span:
        span.set_data("symbolication_function", symbolication_function_name)
        with metrics.timer(
            "tasks.store.symbolicate_event.symbolication",
            tags={"symbolication_function": symbolication_function_name},
        ):
            while True:
                try:
                    with sentry_sdk.start_span(
                        op="tasks.store.symbolicate_event.%s" % symbolication_function_name
                    ) as span:
                        symbolicated_data = symbolication_function(data)
                        span.set_data("symbolicated_data", bool(symbolicated_data))

                    if symbolicated_data:
                        data = symbolicated_data
                        has_changed = True

                    break
                except RetrySymbolication as e:
                    if (
                        time() - symbolication_start_time
                    ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
                        error_logger.warning(
                            "symbolicate.slow",
                            extra={"project_id": project_id, "event_id": event_id},
                        )
                    if (
                        time() - symbolication_start_time
                    ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
                        # Do not drop event but actually continue with rest of pipeline
                        # (persisting unsymbolicated event)
                        metrics.incr(
                            "tasks.store.symbolicate_event.fatal",
                            tags={
                                "reason": "timeout",
                                "symbolication_function": symbolication_function_name,
                            },
                        )
                        error_logger.exception(
                            "symbolicate.failed.infinite_retry",
                            extra={"project_id": project_id, "event_id": event_id},
                        )
                        data.setdefault("_metrics", {})["flag.processing.error"] = True
                        data.setdefault("_metrics", {})["flag.processing.fatal"] = True
                        has_changed = True
                        break
                    else:
                        # sleep for `retry_after` but max 5 seconds and try again
                        metrics.incr(
                            "tasks.store.symbolicate_event.retry",
                            tags={"symbolication_function": symbolication_function_name},
                        )
                        sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER))
                        continue
                except Exception:
                    metrics.incr(
                        "tasks.store.symbolicate_event.fatal",
                        tags={
                            "reason": "error",
                            "symbolication_function": symbolication_function_name,
                        },
                    )
                    error_logger.exception("tasks.store.symbolicate_event.symbolication")
                    data.setdefault("_metrics", {})["flag.processing.error"] = True
                    data.setdefault("_metrics", {})["flag.processing.fatal"] = True
                    has_changed = True
                    break

    if submit_realtime_metrics:
        with sentry_sdk.start_span(
            op="tasks.store.symbolicate_event.low_priority.metrics.histogram"
        ):
            symbolication_duration = int(time() - symbolication_start_time)
            try:
                realtime_metrics.increment_project_duration_counter(
                    project_id, timestamp, symbolication_duration
                )
            except Exception as e:
                sentry_sdk.capture_exception(e)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        cache_key = event_processing_store.store(data)

    return _continue_to_process_event()