Beispiel #1
0
def _get_calculated_grouping_variants_for_event(event, config):
    winning_strategy = None
    precedence_hint = None
    per_variant_components = {}

    for strategy in config.iter_strategies():
        rv = strategy.get_grouping_component_variants(event, config=config)
        for (variant, component) in six.iteritems(rv):
            per_variant_components.setdefault(variant, []).append(component)

            if winning_strategy is None:
                if component.contributes:
                    winning_strategy = strategy.name
                    precedence_hint = '%s takes precedence' % (
                        '%s of %s' % (strategy.name, variant) if
                        variant != 'default' else
                        strategy.name
                    )
            elif component.contributes and winning_strategy != strategy.name:
                component.update(
                    contributes=False,
                    hint=precedence_hint
                )

    rv = {}
    for (variant, components) in six.iteritems(per_variant_components):
        component = GroupingComponent(
            id=variant,
            values=components,
        )
        if not component.contributes and precedence_hint:
            component.update(hint=precedence_hint)
        rv[variant] = component

    return rv
Beispiel #2
0
def get_module_component_v1(abs_path, module, platform):
    """Given an absolute path, module and platform returns the module component
    with some necessary cleaning performed.
    """
    if module is None:
        return GroupingComponent(id='module')

    module_component = GroupingComponent(
        id='module',
        values=[module]
    )

    if platform == 'javascript' and '/' in module and abs_path and abs_path.endswith(module):
        module_component.update(
            contributes=False,
            hint='ignored bad javascript module',
        )
    elif platform == 'java':
        if '$$Lambda$' in module:
            module_component.update(
                contributes=False,
                hint='ignored java lambda',
            )
        if module[:35] == 'sun.reflect.GeneratedMethodAccessor':
            module_component.update(
                values=['sun.reflect.GeneratedMethodAccessor'],
                hint='removed reflection marker',
            )
        else:
            old_module = module
            module = _java_reflect_enhancer_re.sub(r'\1<auto>', module)
            module = _java_cglib_enhancer_re.sub(r'\1<auto>', module)
            module = _java_assist_enhancer_re.sub(r'\1<auto>', module)
            module = _clojure_enhancer_re.sub(r'\1<auto>', module)
            if module != old_module:
                module_component.update(
                    values=[module],
                    hint='removed codegen marker'
                )

    return module_component
Beispiel #3
0
def get_filename_component(abs_path, filename, platform,
                           allow_file_origin=False):
    """Attempt to normalize filenames by detecing special filenames and by
    using the basename only.
    """
    if filename is None:
        return GroupingComponent(id='filename')

    # Only use the platform independent basename for grouping and
    # lowercase it
    filename = _basename_re.split(filename)[-1].lower()
    filename_component = GroupingComponent(
        id='filename',
        values=[filename],
    )

    if has_url_origin(abs_path, allow_file_origin=allow_file_origin):
        filename_component.update(
            contributes=False,
            hint='ignored because frame points to a URL',
        )
    elif filename == '<anonymous>':
        filename_component.update(
            contributes=False,
            hint='anonymous filename discarded'
        )
    elif filename == '[native code]':
        filename_component.update(
            contributes=False,
            hint='native code indicated by filename'
        )
    elif platform == 'java':
        new_filename = _java_assist_enhancer_re.sub(r'\1<auto>', filename)
        if new_filename != filename:
            filename_component.update(
                values=[new_filename],
                hint='cleaned javassist parts'
            )

    return filename_component
Beispiel #4
0
def single_exception_common(exception, config, meta, with_value):
    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
    else:
        stacktrace_component = GroupingComponent(id='stacktrace')

    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
    )

    if exception.mechanism and exception.mechanism.synthetic:
        type_component.update(
            contributes=False,
            hint='ignored because exception is synthetic'
        )

    values = [stacktrace_component, type_component]

    if with_value:
        value_component = GroupingComponent(id='value')

        value_in = exception.value
        if value_in is not None:
            value_trimmed = trim_message_for_grouping(value_in)
            hint = 'stripped common values' if value_in != value_trimmed else None
            if value_trimmed:
                value_component.update(
                    values=[value_trimmed],
                    hint=hint
                )

        if stacktrace_component.contributes and value_component.contributes:
            value_component.update(
                contributes=False,
                hint='ignored because stacktrace takes precedence'
            )

        values.append(value_component)

    return GroupingComponent(
        id='exception',
        values=values
    )
Beispiel #5
0
def template_v1(template, **meta):
    filename_component = GroupingComponent(id='filename')
    if template.filename is not None:
        filename_component.update(values=[template.filename])

    context_line_component = GroupingComponent(id='context-line')
    if template.context_line is not None:
        context_line_component.update(values=[template.context_line])

    return GroupingComponent(
        id='template',
        values=[
            filename_component,
            context_line_component,
        ]
    )
Beispiel #6
0
def get_contextline_component(frame, platform):
    """Returns a contextline component.  The caller's responsibility is to
    make sure context lines are only used for platforms where we trust the
    quality of the sourcecode.  It does however protect against some bad
    JavaScript environments based on origin checks.
    """
    component = GroupingComponent(id='context-line')

    if not frame.context_line:
        return component

    line = ' '.join(frame.context_line.expandtabs(2).split())
    if line:
        if len(frame.context_line) > 120:
            component.update(hint='discarded because line too long')
        elif get_behavior_family_for_platform(platform) == 'javascript' \
                and has_url_origin(frame.abs_path, allow_file_origin=True):
            component.update(hint='discarded because from URL origin')
        else:
            component.update(values=[line])

    return component
Beispiel #7
0
def frame(interface: Frame, event: Event, context: GroupingContext,
          **meta: Any) -> ReturnedVariants:
    frame = interface
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path,
        frame.filename,
        platform,
        allow_file_origin=context["javascript_fuzzing"])

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component(frame.abs_path, frame.module,
                                            platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(contributes=False,
                                  contributes_to_similarity=True,
                                  hint="module takes precedence")

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if platform in context["contextline_platforms"]:
        context_line_component = get_contextline_component(
            frame,
            platform,
            function=frame.function,
            context=context,
        )

    context_line_available = bool(context_line_component
                                  and context_line_component.contributes)

    function_component = get_function_component(
        context=context,
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get("sourcemap") is not None,
        context_line_available=context_line_available,
    )

    values = [module_component, filename_component, function_component]
    if context_line_component is not None:
        # Typically we want to add whichever frame component contributes to
        # the title. In JS, frames are hashed by source context, which we
        # cannot show. In that case we want to show something else instead
        # of hiding the frame from the title as if it didn't contribute.
        context_line_component.update(tree_label=function_component.tree_label)
        values.append(context_line_component)

    if (context["discard_native_filename"]
            and get_behavior_family_for_platform(platform) == "native"
            and function_component.contributes
            and filename_component.contributes):
        # In native, function names usually describe a full namespace. Adding
        # the filename there just brings extra instability into grouping.
        filename_component.update(
            contributes=False,
            hint="discarded native filename for grouping stability")

    if context["use_package_fallback"] and frame.package:
        # If function did not symbolicate properly and we also have no filename, use package as fallback.
        package_component = get_package_component(package=frame.package,
                                                  platform=platform)
        if package_component.contributes:
            use_package_component = all(not component.contributes
                                        for component in values)

            if use_package_component:
                package_component.update(
                    hint=
                    "used as fallback because function name is not available")
            else:
                package_component.update(
                    contributes=False,
                    hint="ignored because function takes precedence")

            if package_component.values and context["hierarchical_grouping"]:
                package_component.update(
                    tree_label={"package": package_component.values[0]})

            values.append(package_component)

    rv = GroupingComponent(id="frame", values=values)

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if context["javascript_fuzzing"] and get_behavior_family_for_platform(
            platform) == "javascript":
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit(".", 1)[-1]
        # special case empty functions not to have a hint
        if not func:
            function_component.update(contributes=False)
        elif (func in (
                "?",
                "<anonymous function>",
                "<anonymous>",
                "Anonymous function",
        ) or func.endswith("/<")):
            function_component.update(contributes=False,
                                      hint="ignored unknown function name")
        if (func == "eval") or frame.abs_path in (
                "[native code]",
                "native code",
                "eval code",
                "<anonymous>",
        ):
            rv.update(contributes=False,
                      hint="ignored low quality javascript frame")

    if context["is_recursion"]:
        rv.update(contributes=False, hint="ignored due to recursion")

    if rv.contributes:
        tree_label = {}

        for value in rv.values:
            if isinstance(value, GroupingComponent
                          ) and value.contributes and value.tree_label:
                tree_label.update(value.tree_label)

        if tree_label and context["hierarchical_grouping"]:
            tree_label["datapath"] = frame.datapath
            rv.tree_label = tree_label
        else:
            # The frame contributes (somehow) but we have nothing meaningful to
            # show.
            rv.tree_label = None

    return {context["variant"]: rv}
Beispiel #8
0
def get_module_component(abs_path: Optional[str], module: Optional[str],
                         platform: Optional[str]) -> GroupingComponent:
    """Given an absolute path, module and platform returns the module component
    with some necessary cleaning performed.
    """
    if module is None:
        return GroupingComponent(id="module")

    module_component = GroupingComponent(id="module",
                                         values=[module],
                                         similarity_encoder=ident_encoder)

    if platform == "javascript" and "/" in module and abs_path and abs_path.endswith(
            module):
        module_component.update(contributes=False,
                                hint="ignored bad javascript module")
    elif platform == "java":
        if "$$Lambda$" in module:
            module_component.update(contributes=False,
                                    hint="ignored java lambda")
        if module[:35] == "sun.reflect.GeneratedMethodAccessor":
            module_component.update(
                values=["sun.reflect.GeneratedMethodAccessor"],
                hint="removed reflection marker")
        elif module[:44] == "jdk.internal.reflect.GeneratedMethodAccessor":
            module_component.update(
                values=["jdk.internal.reflect.GeneratedMethodAccessor"],
                hint="removed reflection marker",
            )
        else:
            old_module = module
            module = _java_reflect_enhancer_re.sub(r"\1<auto>", module)
            module = _java_cglib_enhancer_re.sub(r"\1<auto>", module)
            module = _java_assist_enhancer_re.sub(r"\1<auto>", module)
            module = _clojure_enhancer_re.sub(r"\1<auto>", module)
            if module != old_module:
                module_component.update(values=[module],
                                        hint="removed codegen marker")

        for part in reversed(module.split(".")):
            if "$" not in part:
                module_component.update(tree_label={"classbase": part})
                break

    return module_component
Beispiel #9
0
def single_exception_common(exception, config, meta, with_value):
    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(exception.stacktrace, **meta)
    else:
        stacktrace_component = GroupingComponent(id="stacktrace")

    type_component = GroupingComponent(
        id="type",
        values=[exception.type] if exception.type else [],
        similarity_encoder=ident_encoder,
    )

    if exception.mechanism and exception.mechanism.synthetic:
        type_component.update(contributes=False, hint="ignored because exception is synthetic")

    values = [stacktrace_component, type_component]

    if with_value:
        value_component = GroupingComponent(id="value", similarity_encoder=text_shingle_encoder(5))

        value_in = exception.value
        if value_in is not None:
            value_trimmed = trim_message_for_grouping(value_in)
            hint = "stripped common values" if value_in != value_trimmed else None
            if value_trimmed:
                value_component.update(values=[value_trimmed], hint=hint)

        if stacktrace_component.contributes and value_component.contributes:
            value_component.update(
                contributes=False,
                contributes_to_similarity=True,
                hint="ignored because stacktrace takes precedence",
            )

        values.append(value_component)

    return GroupingComponent(id="exception", values=values)
Beispiel #10
0
def get_function_component(
    function,
    platform,
    legacy_function_logic,
    prefer_raw_function_name=False,
    sourcemap_used=False,
    context_line_available=False,
    raw_function=None,
    javascript_fuzzing=False,
    php_detect_anonymous_classes=False,
):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.  Related to this is
    the `prefer_raw_function_name` parameter which just flat out prefers the
    raw function name over the non raw one.
    """
    from sentry.stacktraces.functions import trim_function_name

    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic or prefer_raw_function_name:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id="function")

    function_component = GroupingComponent(
        id="function", values=[func], similarity_encoder=ident_encoder
    )

    if platform == "ruby":
        if func.startswith("block "):
            function_component.update(values=["block"], hint="ruby block")
        else:
            new_function = _ruby_erb_func.sub("", func)
            if new_function != func:
                function_component.update(
                    values=[new_function], hint="removed generated erb template suffix"
                )

    elif platform == "php":
        if func.startswith(("[Anonymous", "class@anonymous\x00")):
            function_component.update(contributes=False, hint="ignored anonymous function")
        if php_detect_anonymous_classes and func.startswith("class@anonymous"):
            new_function = func.rsplit("::", 1)[-1]
            if new_function != func:
                function_component.update(values=[new_function], hint="anonymous class method")

    elif platform == "java":
        if func.startswith("lambda$"):
            function_component.update(contributes=False, hint="ignored lambda function")

    elif behavior_family == "native":
        if func in ("<redacted>", "<unknown>"):
            function_component.update(contributes=False, hint="ignored unknown function")
        elif legacy_function_logic:
            new_function = trim_function_name(func, platform, normalize_lambdas=False)
            if new_function != func:
                function_component.update(values=[new_function], hint="isolated function")

    elif javascript_fuzzing and behavior_family == "javascript":
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit(".", 1)[-1]
        if new_function != func:
            function_component.update(values=[new_function], hint="trimmed javascript function")

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                contributes_to_similarity=True,
                hint="ignored because sourcemap used and context line available",
            )

    return function_component
Beispiel #11
0
def single_exception_common(exception, config, meta, with_value):
    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
    else:
        stacktrace_component = GroupingComponent(id='stacktrace')

    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
    )

    if exception.mechanism and exception.mechanism.synthetic:
        type_component.update(
            contributes=False,
            hint='ignored because exception is synthetic'
        )

    values = [stacktrace_component, type_component]

    if with_value:
        value_component = GroupingComponent(id='value')

        value_in = exception.value
        if value_in is not None:
            value_trimmed = trim_message_for_grouping(value_in)
            hint = 'stripped common values' if value_in != value_trimmed else None
            if value_trimmed:
                value_component.update(
                    values=[value_trimmed],
                    hint=hint
                )

        if stacktrace_component.contributes and value_component.contributes:
            value_component.update(
                contributes=False,
                hint='ignored because stacktrace takes precedence'
            )

        values.append(value_component)

    return GroupingComponent(
        id='exception',
        values=values
    )
Beispiel #12
0
def get_function_component(function, platform, legacy_function_logic,
                           sourcemap_used=False, context_line_available=False,
                           raw_function=None, javascript_fuzzing=False):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.
    """
    from sentry.stacktraces.functions import trim_function_name
    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id='function')

    function_component = GroupingComponent(
        id='function',
        values=[func],
    )

    if platform == 'ruby':
        if func.startswith('block '):
            function_component.update(
                values=['block'],
                hint='ruby block'
            )
        else:
            new_function = _ruby_erb_func.sub('', func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='removed generated erb template suffix'
                )

    elif platform == 'php':
        if func.startswith('[Anonymous'):
            function_component.update(
                contributes=False,
                hint='ignored anonymous function'
            )

    elif platform == 'java':
        if func.startswith('lambda$'):
            function_component.update(
                contributes=False,
                hint='ignored lambda function'
            )

    elif behavior_family == 'native':
        if func in ('<redacted>', '<unknown>'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function'
            )
        elif legacy_function_logic:
            new_function = trim_function_name(func, platform,
                                              normalize_lambdas=False)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='isolated function'
                )

    elif javascript_fuzzing and behavior_family == 'javascript':
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit('.', 1)[-1]
        if new_function != func:
            function_component.update(
                values=[new_function],
                hint='trimmed javascript function'
            )

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                hint='ignored because sourcemap used and context line available'
            )

    return function_component
Beispiel #13
0
def get_module_component(abs_path, module, platform):
    """Given an absolute path, module and platform returns the module component
    with some necessary cleaning performed.
    """
    if module is None:
        return GroupingComponent(id="module")

    module_component = GroupingComponent(id="module", values=[module])

    if platform == "javascript" and "/" in module and abs_path and abs_path.endswith(module):
        module_component.update(contributes=False, hint="ignored bad javascript module")
    elif platform == "java":
        if "$$Lambda$" in module:
            module_component.update(contributes=False, hint="ignored java lambda")
        if module[:35] == "sun.reflect.GeneratedMethodAccessor":
            module_component.update(
                values=["sun.reflect.GeneratedMethodAccessor"], hint="removed reflection marker"
            )
        elif module[:44] == "jdk.internal.reflect.GeneratedMethodAccessor":
            module_component.update(
                values=["jdk.internal.reflect.GeneratedMethodAccessor"],
                hint="removed reflection marker",
            )
        else:
            old_module = module
            module = _java_reflect_enhancer_re.sub(r"\1<auto>", module)
            module = _java_cglib_enhancer_re.sub(r"\1<auto>", module)
            module = _java_assist_enhancer_re.sub(r"\1<auto>", module)
            module = _clojure_enhancer_re.sub(r"\1<auto>", module)
            if module != old_module:
                module_component.update(values=[module], hint="removed codegen marker")

    return module_component
Beispiel #14
0
def get_function_component_v1(function, platform):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.
    """
    if not function:
        return GroupingComponent(id='function')

    function_component = GroupingComponent(
        id='function',
        values=[function],
    )

    if platform == 'ruby':
        if function.startswith('block '):
            function_component.update(
                values=['block'],
                hint='ruby block'
            )
        else:
            new_function = _ruby_erb_func.sub('', function)
            if new_function != function:
                function_component.update(
                    values=[new_function],
                    hint='removed generated erb template suffix'
                )

    elif platform == 'php':
        if function.startswith('[Anonymous'):
            function_component.update(
                contributes=False,
                hint='ignored anonymous function'
            )

    elif platform == 'java':
        if function.startswith('lambda$'):
            function_component.update(
                contributes=False,
                hint='ignored lambda function'
            )

    elif platform in ('objc', 'cocoa', 'native'):
        if function in ('<redacted>', '<unknown>'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function'
            )
        else:
            new_function = isolate_native_function_v1(function)
            if new_function != function:
                function_component.update(
                    values=[new_function],
                    hint='isolated function'
                )

    return function_component
def csp_v1(csp_interface, context, **meta):
    violation_component = GroupingComponent(id="violation")
    uri_component = GroupingComponent(id="uri")

    if csp_interface.local_script_violation_type:
        violation_component.update(
            values=["'%s'" % csp_interface.local_script_violation_type])
        uri_component.update(
            contributes=False,
            hint="violation takes precedence",
            values=[csp_interface.normalized_blocked_uri],
        )
    else:
        violation_component.update(contributes=False,
                                   hint="not a local script violation")
        uri_component.update(values=[csp_interface.normalized_blocked_uri])

    return {
        context["variant"]:
        GroupingComponent(
            id="csp",
            values=[
                GroupingComponent(id="salt",
                                  values=[csp_interface.effective_directive]),
                violation_component,
                uri_component,
            ],
        )
    }
Beispiel #16
0
def single_exception(exception, context, **meta):
    type_component = GroupingComponent(
        id="type",
        values=[exception.type] if exception.type else [],
        similarity_encoder=ident_encoder,
    )

    ns_error_component = None

    if exception.mechanism:
        if exception.mechanism.synthetic:
            type_component.update(contributes=False, hint="ignored because exception is synthetic")
        if exception.mechanism.meta and "ns_error" in exception.mechanism.meta:
            ns_error_component = GroupingComponent(
                id="ns-error",
                values=[
                    exception.mechanism.meta["ns_error"].get("domain"),
                    exception.mechanism.meta["ns_error"].get("code"),
                ],
            )

    if exception.stacktrace is not None:
        with context:
            context["exception_data"] = exception.to_json()
            stacktrace_variants = context.get_grouping_component(exception.stacktrace, **meta)
    else:
        stacktrace_variants = {
            "app": GroupingComponent(id="stacktrace"),
        }

    rv = {}

    for variant, stacktrace_component in stacktrace_variants.items():
        values = [stacktrace_component, type_component]

        if ns_error_component is not None:
            values.append(ns_error_component)

        if context["with_exception_value_fallback"]:
            value_component = GroupingComponent(
                id="value", similarity_encoder=text_shingle_encoder(5)
            )

            value_in = exception.value
            if value_in is not None:
                value_trimmed = trim_message_for_grouping(value_in)
                hint = "stripped common values" if value_in != value_trimmed else None
                if value_trimmed:
                    value_component.update(values=[value_trimmed], hint=hint)

            if stacktrace_component.contributes and value_component.contributes:
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because stacktrace takes precedence",
                )

            if (
                ns_error_component is not None
                and ns_error_component.contributes
                and value_component.contributes
            ):
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because ns-error info takes precedence",
                )

            values.append(value_component)

        rv[variant] = GroupingComponent(id="exception", values=values)

    return rv
Beispiel #17
0
def get_stacktrace_hierarchy(main_variant, components, frames,
                             inverted_hierarchy):
    main_variant.update(tree_label="<entire stacktrace>")

    frames_iter = list(zip(frames, components))
    if not inverted_hierarchy:
        # frames are sorted in a way where the crashing frame is at the end of
        # the list. In "non-inverted" mode we want to start at the crashing
        # frame, in inverted mode we want to start at the threadbase
        frames_iter = reversed(frames_iter)

    frames_iter = iter(frames_iter)

    prev_variant = GroupingComponent(id="stacktrace", values=[])
    all_variants = {}

    while len(all_variants) < MAX_LAYERS:
        depth = len(all_variants) + 1
        key = f"app-depth-{depth}"
        assert key not in all_variants

        tree_categories = set()

        for frame, component in frames_iter:
            if component.contributes and component.is_sentinel_frame:
                break
        else:
            break

        layer = list(prev_variant.values)
        layer.append(component)
        tree_categories.add(get_path(frame, "data", "category") or None)
        prev_component = component

        if prev_component.is_prefix_frame:
            for frame, component in frames_iter:
                if not component.contributes:
                    continue

                layer.append(component)
                tree_categories.add(
                    get_path(frame, "data", "category") or None)
                prev_component = component

                if not component.is_prefix_frame:
                    break
            else:
                break

        tree_label = _compute_tree_label(prev_variant, layer)
        tree_categories.discard(None)
        if tree_categories:
            tree_label = f"{tree_label} [{'/'.join(sorted(tree_categories))}]"

        all_variants[key] = prev_variant = GroupingComponent(
            id="stacktrace", values=layer, tree_label=tree_label)
    else:
        all_variants["app-depth-max"] = main_variant

    if not all_variants:
        all_variants.update(
            _build_fallback_tree(main_variant, components, frames,
                                 inverted_hierarchy))
    return all_variants
Beispiel #18
0
def get_stacktrace_hierarchy(
    main_variant, components, frames, inverted_hierarchy
) -> ReturnedVariants:
    frames_iter = list(zip(frames, components))
    if not inverted_hierarchy:
        # frames are sorted in a way where the crashing frame is at the end of
        # the list. In "non-inverted" mode we want to start at the crashing
        # frame, in inverted mode we want to start at the threadbase
        frames_iter = reversed(frames_iter)

    frames_iter = iter(frames_iter)

    prev_variant = GroupingComponent(id="stacktrace", values=[])
    all_variants = {}

    while len(all_variants) < MAX_LAYERS:
        depth = len(all_variants) + 1
        key = f"app-depth-{depth}"
        assert key not in all_variants

        found_sentinel = False

        for frame, component in frames_iter:
            if not component.contributes:
                continue

            # We found a sentinel frame, which somebody decided was important
            # to group by. In that case we group only by sentinel frames as we
            # can't be sure that in-app is a good indicator of relevance.

            if component.is_sentinel_frame:
                found_sentinel = True
                break

            # In case we found an application frame before the first sentinel
            # frame, use the "fallback logic". Sentinel frames are mostly
            # useful to identify important frames *called by* app frames that
            # would otherwise be discarded from grouping (in case of ANR
            # grouping/inverted_hierarchy similar reasoning applies)

            if frame["in_app"]:
                break

        if not found_sentinel:
            break

        add_to_layer = [component]

        prev_component = component

        if prev_component.is_prefix_frame:
            for frame, component in frames_iter:
                if not component.contributes:
                    continue

                add_to_layer.append(component)
                prev_component = component

                if not component.is_prefix_frame:
                    break

        # For consistency, we always want to preserve the sort order of the
        # event frames, no matter what order we're going through.

        if not inverted_hierarchy:
            layer = add_to_layer
            layer.reverse()
            layer.extend(prev_variant.values)

        else:
            layer = list(prev_variant.values)
            layer.extend(add_to_layer)

        tree_label = _compute_tree_label(layer)

        all_variants[key] = prev_variant = GroupingComponent(
            id="stacktrace", values=layer, tree_label=tree_label
        )

    if not all_variants:
        # In case we haven't found any sentinel frames, start grouping by
        # application frames.
        all_variants = _build_fallback_tree(main_variant, components, frames, inverted_hierarchy)
    else:
        all_variants["app-depth-max"] = main_variant

    main_variant.update(tree_label=_compute_tree_label(main_variant.values))

    return all_variants
Beispiel #19
0
def single_exception_legacy(exception, context, **meta):
    type_component = GroupingComponent(
        id="type",
        values=[exception.type] if exception.type else [],
        similarity_encoder=ident_encoder,
        contributes=False,
    )
    value_component = GroupingComponent(
        id="value",
        values=[exception.value] if exception.value else [],
        similarity_encoder=text_shingle_encoder(5),
        contributes=False,
    )
    stacktrace_component = GroupingComponent(id="stacktrace")

    if exception.stacktrace is not None:
        stacktrace_component = context.get_grouping_component(exception.stacktrace, **meta)
        if stacktrace_component.contributes:
            if exception.type:
                type_component.update(contributes=True)
                if exception.value:
                    value_component.update(hint="stacktrace and type take precedence")
            elif exception.value:
                value_component.update(hint="stacktrace takes precedence")

    if not stacktrace_component.contributes:
        if exception.type:
            type_component.update(contributes=True)
        if exception.value:
            value_component.update(contributes=True)

    return GroupingComponent(
        id="exception", values=[stacktrace_component, type_component, value_component]
    )
Beispiel #20
0
def message_v1(message_interface, **meta):
    return GroupingComponent(
        id="message", values=[message_interface.message or message_interface.formatted or u""]
    )
Beispiel #21
0
def message_v1(message_interface, **meta):
    return GroupingComponent(
        id="message",
        values=[message_interface.message or message_interface.formatted or ""],
        similarity_encoder=text_shingle_encoder(5),
    )
Beispiel #22
0
def message_v1(message_interface, **meta):
    return GroupingComponent(
        id='message',
        values=[message_interface.message or message_interface.formatted],
    )
Beispiel #23
0
def get_frame_component(frame, event, meta, legacy_function_logic=False,
                        use_contextline=False,
                        javascript_fuzzing=False):
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path, frame.filename, platform,
        allow_file_origin=javascript_fuzzing)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component_v1(
        frame.abs_path, frame.module, platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(
            contributes=False,
            hint='module takes precedence'
        )

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if use_contextline:
        context_line_component = get_contextline_component(frame, platform)

    function_component = get_function_component(
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get('sourcemap') is not None,
        context_line_available=context_line_component and context_line_component.contributes,
        legacy_function_logic=legacy_function_logic,
        javascript_fuzzing=javascript_fuzzing,
    )

    values = [
        module_component,
        filename_component,
        function_component,
    ]
    if context_line_component is not None:
        values.append(context_line_component)

    rv = GroupingComponent(
        id='frame',
        values=values,
    )

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if javascript_fuzzing \
       and get_behavior_family_for_platform(platform) == 'javascript':
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit('.', 1)[-1]
        if func in (None, '?', '<anonymous function>', '<anonymous>',
                    'Anonymous function') \
           or func.endswith('/<'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function name'
            )
        if (func == 'eval') or \
           frame.abs_path in ('[native code]', 'native code', 'eval code', '<anonymous>'):
            rv.update(
                contributes=False,
                hint='ignored low quality javascript frame'
            )

    return rv
Beispiel #24
0
def message_v2(message_interface, **meta):
    message_in = message_interface.message or message_interface.formatted
    message_trimmed = trim_message_for_grouping(message_in)
    hint = 'stripped common values' if message_in != message_trimmed else None
    return GroupingComponent(id='message', values=[message_trimmed], hint=hint)
Beispiel #25
0
def get_filename_component(abs_path, filename, platform,
                           allow_file_origin=False):
    """Attempt to normalize filenames by detecing special filenames and by
    using the basename only.
    """
    if filename is None:
        return GroupingComponent(id='filename')

    # Only use the platform independent basename for grouping and
    # lowercase it
    filename = _basename_re.split(filename)[-1].lower()
    filename_component = GroupingComponent(
        id='filename',
        values=[filename],
    )

    if has_url_origin(abs_path, allow_file_origin=allow_file_origin):
        filename_component.update(
            contributes=False,
            hint='ignored because frame points to a URL',
        )
    elif filename == '<anonymous>':
        filename_component.update(
            contributes=False,
            hint='anonymous filename discarded'
        )
    elif filename == '[native code]':
        filename_component.update(
            contributes=False,
            hint='native code indicated by filename'
        )
    elif platform == 'java':
        new_filename = _java_assist_enhancer_re.sub(r'\1<auto>', filename)
        if new_filename != filename:
            filename_component.update(
                values=[new_filename],
                hint='cleaned javassist parts'
            )

    return filename_component
Beispiel #26
0
def stacktrace_legacy(stacktrace, config, variant, **meta):
    frames = stacktrace.frames
    contributes = None
    hint = None
    all_frames_considered_in_app = False

    # TODO(dcramer): this should apply only to platform=javascript
    # Browser JS will often throw errors (from inlined code in an HTML page)
    # which contain only a single frame, no function name, and have the HTML
    # document as the filename. In this case the hash is often not usable as
    # the context cannot be trusted and the URL is dynamic (this also means
    # the line number cannot be trusted).
    if (len(frames) == 1 and not frames[0].function and frames[0].is_url()):
        contributes = False
        hint = 'ignored single frame stack'
    elif variant == 'app':
        total_frames = len(frames)
        in_app_count = sum(1 if f.in_app else 0 for f in frames)
        if in_app_count == 0:
            in_app_count = total_frames
            all_frames_considered_in_app = True

        # if app frames make up less than 10% of the stacktrace discard
        # the hash as invalid
        if total_frames > 0 and in_app_count / float(total_frames) < 0.10:
            contributes = False
            hint = 'less than 10% of frames are in-app'

    values = []
    prev_frame = None
    frames_for_filtering = []
    for frame in frames:
        frame_component = config.get_grouping_component(frame,
                                                        variant=variant,
                                                        **meta)
        if variant == 'app' and not frame.in_app and not all_frames_considered_in_app:
            frame_component.update(
                contributes=False,
                hint='non app frame',
            )
        elif prev_frame is not None and is_recursion_legacy(frame, prev_frame):
            frame_component.update(
                contributes=False,
                hint='ignored due to recursion',
            )
        elif variant == 'app' and not frame.in_app and all_frames_considered_in_app:
            frame_component.update(
                hint='frame considered in-app because no frame is in-app')
        values.append(frame_component)
        frames_for_filtering.append(frame.get_raw_data())
        prev_frame = frame

    config.enhancements.update_frame_components_contributions(
        values, frames_for_filtering, meta['event'].platform)

    return GroupingComponent(
        id='stacktrace',
        values=values,
        contributes=contributes,
        hint=hint,
    )
Beispiel #27
0
def get_frame_component(
    frame,
    event,
    meta,
    legacy_function_logic=False,
    use_contextline=False,
    javascript_fuzzing=False,
    with_context_line_file_origin_bug=False,
    php_detect_anonymous_classes=False,
    prefer_raw_function_name=False,
):
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path, frame.filename, platform, allow_file_origin=javascript_fuzzing
    )

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component(frame.abs_path, frame.module, platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(
            contributes=False, contributes_to_similarity=True, hint="module takes precedence"
        )

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if use_contextline:
        context_line_component = get_contextline_component(
            frame,
            platform,
            function=frame.function,
            with_context_line_file_origin_bug=with_context_line_file_origin_bug,
        )

    function_component = get_function_component(
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get("sourcemap") is not None,
        context_line_available=context_line_component and context_line_component.contributes,
        legacy_function_logic=legacy_function_logic,
        prefer_raw_function_name=prefer_raw_function_name,
        javascript_fuzzing=javascript_fuzzing,
        php_detect_anonymous_classes=php_detect_anonymous_classes,
    )

    values = [module_component, filename_component, function_component]
    if context_line_component is not None:
        values.append(context_line_component)

    rv = GroupingComponent(id="frame", values=values)

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if javascript_fuzzing and get_behavior_family_for_platform(platform) == "javascript":
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit(".", 1)[-1]
        # special case empty functions not to have a hint
        if not func:
            function_component.update(contributes=False)
        elif (
            func
            in (
                "?",
                "<anonymous function>",
                "<anonymous>",
                "Anonymous function",
            )
            or func.endswith("/<")
        ):
            function_component.update(contributes=False, hint="ignored unknown function name")
        if (func == "eval") or frame.abs_path in (
            "[native code]",
            "native code",
            "eval code",
            "<anonymous>",
        ):
            rv.update(contributes=False, hint="ignored low quality javascript frame")

    return rv
Beispiel #28
0
def single_exception_legacy(interface: SingleException, event: Event,
                            context: GroupingContext,
                            **meta: Any) -> ReturnedVariants:

    type_component = GroupingComponent(
        id="type",
        values=[interface.type] if interface.type else [],
        similarity_encoder=ident_encoder,
        contributes=False,
    )
    value_component = GroupingComponent(
        id="value",
        values=[interface.value] if interface.value else [],
        similarity_encoder=text_shingle_encoder(5),
        contributes=False,
    )
    stacktrace_component = GroupingComponent(id="stacktrace")

    if interface.stacktrace is not None:
        stacktrace_component = context.get_grouping_component(
            interface.stacktrace, event=event, **meta)
        if stacktrace_component.contributes:
            if interface.type:
                type_component.update(contributes=True)
                if interface.value:
                    value_component.update(
                        hint="stacktrace and type take precedence")
            elif interface.value:
                value_component.update(hint="stacktrace takes precedence")

    if not stacktrace_component.contributes:
        if interface.type:
            type_component.update(contributes=True)
        if interface.value:
            value_component.update(contributes=True)

    return {
        context["variant"]:
        GroupingComponent(
            id="exception",
            values=[stacktrace_component, type_component, value_component])
    }
Beispiel #29
0
def get_filename_component(abs_path, filename, platform, allow_file_origin=False):
    """Attempt to normalize filenames by detecting special filenames and by
    using the basename only.
    """
    if filename is None:
        return GroupingComponent(id="filename")

    # Only use the platform independent basename for grouping and
    # lowercase it
    filename = _basename_re.split(filename)[-1].lower()
    filename_component = GroupingComponent(
        id="filename", values=[filename], similarity_encoder=ident_encoder
    )

    if has_url_origin(abs_path, allow_file_origin=allow_file_origin):
        filename_component.update(contributes=False, hint="ignored because frame points to a URL")
    elif filename == "<anonymous>":
        filename_component.update(contributes=False, hint="anonymous filename discarded")
    elif filename == "[native code]":
        filename_component.update(contributes=False, hint="native code indicated by filename")
    elif platform == "java":
        new_filename = _java_assist_enhancer_re.sub(r"\1<auto>", filename)
        if new_filename != filename:
            filename_component.update(values=[new_filename], hint="cleaned javassist parts")

    return filename_component
Beispiel #30
0
def frame_legacy(interface: Frame, event: Event, context: GroupingContext,
                 **meta: Any) -> ReturnedVariants:
    platform = interface.platform or event.platform

    # In certain situations we want to disregard the entire frame.
    contributes = None
    hint = None

    # this requires some explanation: older sentry versions did not have
    # raw_function but only function.  For some platforms like native
    # we now instead store a trimmed function name in frame.function so
    # and the original value moved to raw_function.  This requires us to
    # prioritize raw_function over function in the legacy grouping code to
    # avoid creating new groups.
    func = interface.raw_function or interface.function

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = GroupingComponent(id="filename",
                                           similarity_encoder=ident_encoder)
    if interface.filename == "<anonymous>":
        filename_component.update(contributes=False,
                                  values=[interface.filename],
                                  hint="anonymous filename discarded")
    elif interface.filename == "[native code]":
        contributes = False
        hint = "native code indicated by filename"
    elif interface.filename:
        if has_url_origin(interface.abs_path):
            filename_component.update(
                contributes=False,
                values=[interface.filename],
                hint="ignored because filename is a URL",
            )
        # XXX(dcramer): don't compute hash using frames containing the 'Caused by'
        # text as it contains an exception value which may may contain dynamic
        # values (see raven-java#125)
        elif interface.filename.startswith("Caused by: "):
            filename_component.update(values=[interface.filename],
                                      contributes=False,
                                      hint="ignored because invalid")
        else:
            hashable_filename, hashable_filename_hint = remove_filename_outliers_legacy(
                interface.filename, platform)
            filename_component.update(values=[hashable_filename],
                                      hint=hashable_filename_hint)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename, even if the module is
    # considered unhashable.
    module_component = GroupingComponent(id="module",
                                         similarity_encoder=ident_encoder)
    if interface.module:
        if is_unhashable_module_legacy(interface, platform):
            module_component.update(
                values=[
                    GroupingComponent(id="salt",
                                      values=["<module>"],
                                      hint="normalized generated module name")
                ],
                hint="ignored module",
            )

            # <module> still contributes, though it should not contribute to
            # similarity
            module_component.similarity_encoder = None
        else:
            module_name, module_hint = remove_module_outliers_legacy(
                interface.module, platform)
            module_component.update(values=[module_name], hint=module_hint)
        if interface.filename:
            filename_component.update(values=[interface.filename],
                                      contributes=False,
                                      hint="module takes precedence")

    # Context line when available is the primary contributor
    context_line_component = GroupingComponent(
        id="context-line", similarity_encoder=ident_encoder)
    if interface.context_line is not None:
        if len(interface.context_line) > 120:
            context_line_component.update(
                hint="discarded because line too long")
        elif has_url_origin(interface.abs_path) and not func:
            context_line_component.update(
                hint="discarded because from URL origin")
        else:
            context_line_component.update(values=[interface.context_line])

    symbol_component = GroupingComponent(id="symbol",
                                         similarity_encoder=ident_encoder)
    function_component = GroupingComponent(id="function",
                                           similarity_encoder=ident_encoder)
    lineno_component = GroupingComponent(id="lineno",
                                         similarity_encoder=ident_encoder)

    # The context line grouping information is the most reliable one.
    # If we did not manage to find some information there, we want to
    # see if we can come up with some extra information.  We only want
    # to do that if we managed to get a module of filename.
    if not context_line_component.contributes and (
            module_component.contributes or filename_component.contributes):
        if interface.symbol:
            symbol_component.update(values=[interface.symbol])
            if func:
                function_component.update(contributes=False,
                                          values=[func],
                                          hint="symbol takes precedence")
            if interface.lineno:
                lineno_component.update(contributes=False,
                                        values=[interface.lineno],
                                        hint="symbol takes precedence")
        elif func:
            if is_unhashable_function_legacy(func):
                function_component.update(values=[
                    GroupingComponent(id="salt",
                                      values=["<function>"],
                                      hint="normalized lambda function name")
                ])
                # <module> still contributes, though it should not contribute to
                # similarity
                function_component.similarity_encoder = None
            else:
                function, function_hint = remove_function_outliers_legacy(func)
                function_component.update(values=[function],
                                          hint=function_hint)
            if interface.lineno:
                lineno_component.update(contributes=False,
                                        values=[interface.lineno],
                                        hint="function takes precedence")
        elif interface.lineno:
            lineno_component.update(values=[interface.lineno])
    else:
        if context_line_component.contributes:
            fallback_hint = "is not used if context-line is available"
        else:
            fallback_hint = "is not used if module or filename are available"
        if interface.symbol:
            symbol_component.update(contributes=False,
                                    values=[interface.symbol],
                                    hint="symbol " + fallback_hint)
        if func:
            function_component.update(contributes=False,
                                      values=[func],
                                      hint="function name " + fallback_hint)
        if interface.lineno:
            lineno_component.update(contributes=False,
                                    values=[interface.lineno],
                                    hint="line number " + fallback_hint)

    return {
        context["variant"]:
        GroupingComponent(
            id="frame",
            values=[
                module_component,
                filename_component,
                context_line_component,
                symbol_component,
                function_component,
                lineno_component,
            ],
            contributes=contributes,
            hint=hint,
        )
    }
Beispiel #31
0
def get_function_component(
    context: GroupingContext,
    function: Optional[str],
    raw_function: Optional[str],
    platform: Optional[str],
    sourcemap_used: bool = False,
    context_line_available: bool = False,
) -> GroupingComponent:
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.  Related to this is
    the `prefer_raw_function_name` flag which just flat out prefers the
    raw function name over the non raw one.
    """
    from sentry.stacktraces.functions import trim_function_name

    behavior_family = get_behavior_family_for_platform(platform)

    # We started trimming function names in csharp late which changed the
    # inputs to the grouping code.  Where previously the `function` attribute
    # contained the raw and untrimmed strings, it now contains the trimmed one
    # which is preferred by the frame component.  Because of this we tell the
    # component to prefer the raw function name over the function name for
    # csharp.
    # TODO: if a frame:v5 is added the raw function name should not be preferred
    # for csharp.
    prefer_raw_function_name = platform == "csharp"

    if context["legacy_function_logic"] or prefer_raw_function_name:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id="function")

    function_component = GroupingComponent(id="function",
                                           values=[func],
                                           similarity_encoder=ident_encoder)

    if platform == "ruby":
        if func.startswith("block "):
            function_component.update(values=["block"], hint="ruby block")
        else:
            new_function = _ruby_erb_func.sub("", func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint="removed generated erb template suffix")

    elif platform == "php":
        if func.startswith(("[Anonymous", "class@anonymous\x00")):
            function_component.update(contributes=False,
                                      hint="ignored anonymous function")
        if context["php_detect_anonymous_classes"] and func.startswith(
                "class@anonymous"):
            new_function = func.rsplit("::", 1)[-1]
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="anonymous class method")

    elif platform == "java":
        if func.startswith("lambda$"):
            function_component.update(contributes=False,
                                      hint="ignored lambda function")

    elif behavior_family == "native":
        if func in ("<redacted>", "<unknown>"):
            function_component.update(contributes=False,
                                      hint="ignored unknown function")
        elif context["legacy_function_logic"]:
            new_function = trim_function_name(func,
                                              platform,
                                              normalize_lambdas=False)
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="isolated function")
                func = new_function

        if context["native_fuzzing"]:
            # Normalize macOS/llvm anonymous namespaces to
            # Windows-like/msvc
            new_function = func.replace("(anonymous namespace)",
                                        "`anonymous namespace'")
            if new_function != func:
                function_component.update(values=[new_function])

    elif context["javascript_fuzzing"] and behavior_family == "javascript":
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit(".", 1)[-1]
        if new_function != func:
            function_component.update(values=[new_function],
                                      hint="trimmed javascript function")

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                contributes_to_similarity=True,
                hint=
                "ignored because sourcemap used and context line available",
            )

    if function_component.values and context["hierarchical_grouping"]:
        function_component.update(
            tree_label={"function": function_component.values[0]})

    return function_component
Beispiel #32
0
def single_exception_legacy(exception, config, **meta):
    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
        contributes=False)
    value_component = GroupingComponent(
        id='value',
        values=[exception.value] if exception.value else [],
        contributes=False)
    stacktrace_component = GroupingComponent(id='stacktrace')

    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
        if stacktrace_component.contributes:
            if exception.type:
                type_component.update(contributes=True)
                if exception.value:
                    value_component.update(
                        hint='stacktrace and type take precedence')
            elif exception.value:
                value_component.update(hint='stacktrace takes precedence')

    if not stacktrace_component.contributes:
        if exception.type:
            type_component.update(contributes=True)
        if exception.value:
            value_component.update(contributes=True)

    return GroupingComponent(id='exception',
                             values=[
                                 stacktrace_component,
                                 type_component,
                                 value_component,
                             ])
Beispiel #33
0
def single_exception(interface: SingleException, event: Event,
                     context: GroupingContext,
                     **meta: Any) -> ReturnedVariants:
    type_component = GroupingComponent(
        id="type",
        values=[interface.type] if interface.type else [],
        similarity_encoder=ident_encoder,
    )
    system_type_component = type_component.shallow_copy()

    ns_error_component = None

    if interface.mechanism:
        if interface.mechanism.synthetic:
            # Ignore synthetic exceptions as they are produced from platform
            # specific error codes.
            #
            # For example there can be crashes with EXC_ACCESS_VIOLATION_* on Windows with
            # the same exact stacktrace as a crash with EXC_BAD_ACCESS on macOS.
            #
            # Do not update type component of system variant, such that regex
            # can be continuously modified without unnecessarily creating new
            # groups.
            type_component.update(
                contributes=False,
                hint="ignored because exception is synthetic")
        if interface.mechanism.meta and "ns_error" in interface.mechanism.meta:
            ns_error_component = GroupingComponent(
                id="ns-error",
                values=[
                    interface.mechanism.meta["ns_error"].get("domain"),
                    interface.mechanism.meta["ns_error"].get("code"),
                ],
            )

    if interface.stacktrace is not None:
        with context:
            context["exception_data"] = interface.to_json()
            stacktrace_variants = context.get_grouping_component(
                interface.stacktrace, event=event, **meta)
    else:
        stacktrace_variants = {
            "app": GroupingComponent(id="stacktrace"),
        }

    rv = {}

    for variant, stacktrace_component in stacktrace_variants.items():
        values = [
            stacktrace_component,
            system_type_component if variant == "system" else type_component,
        ]

        if ns_error_component is not None:
            values.append(ns_error_component)

        if context["with_exception_value_fallback"]:
            value_component = GroupingComponent(
                id="value", similarity_encoder=text_shingle_encoder(5))

            value_in = interface.value
            if value_in is not None:
                value_trimmed = trim_message_for_grouping(value_in)
                hint = "stripped common values" if value_in != value_trimmed else None
                if value_trimmed:
                    value_component.update(values=[value_trimmed], hint=hint)

            if stacktrace_component.contributes and value_component.contributes:
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because stacktrace takes precedence",
                )

            if (ns_error_component is not None
                    and ns_error_component.contributes
                    and value_component.contributes):
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because ns-error info takes precedence",
                )

            values.append(value_component)

        rv[variant] = GroupingComponent(id="exception", values=values)

    return rv
Beispiel #34
0
def frame_legacy(frame, event, **meta):
    platform = frame.platform or event.platform

    # In certain situations we want to disregard the entire frame.
    contributes = None
    hint = None

    # this requires some explanation: older sentry versions did not have
    # raw_function but only function.  For some platforms like native
    # we now instead store a trimmed function name in frame.function so
    # and the original value moved to raw_function.  This requires us to
    # prioritize raw_function over function in the legacy grouping code to
    # avoid creating new groups.
    func = frame.raw_function or frame.function

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = GroupingComponent(id='filename')
    if frame.filename == '<anonymous>':
        filename_component.update(contributes=False,
                                  values=[frame.filename],
                                  hint='anonymous filename discarded')
    elif frame.filename == '[native code]':
        contributes = False
        hint = 'native code indicated by filename'
    elif frame.filename:
        if has_url_origin(frame.abs_path):
            filename_component.update(
                contributes=False,
                values=[frame.filename],
                hint='ignored because filename is a URL',
            )
        # XXX(dcramer): dont compute hash using frames containing the 'Caused by'
        # text as it contains an exception value which may may contain dynamic
        # values (see raven-java#125)
        elif frame.filename.startswith('Caused by: '):
            filename_component.update(values=[frame.filename],
                                      contributes=False,
                                      hint='ignored because invalid')
        else:
            hashable_filename, hashable_filename_hint = \
                remove_filename_outliers_legacy(frame.filename, platform)
            filename_component.update(values=[hashable_filename],
                                      hint=hashable_filename_hint)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename, even if the module is
    # considered unhashable.
    module_component = GroupingComponent(id='module')
    if frame.module:
        if is_unhashable_module_legacy(frame, platform):
            module_component.update(
                values=[
                    GroupingComponent(id='salt',
                                      values=['<module>'],
                                      hint='normalized generated module name')
                ],
                hint='ignored module',
            )
        else:
            module_name, module_hint = \
                remove_module_outliers_legacy(frame.module, platform)
            module_component.update(values=[module_name], hint=module_hint)
        if frame.filename:
            filename_component.update(values=[frame.filename],
                                      contributes=False,
                                      hint='module takes precedence')

    # Context line when available is the primary contributor
    context_line_component = GroupingComponent(id='context-line')
    if frame.context_line is not None:
        if len(frame.context_line) > 120:
            context_line_component.update(
                hint='discarded because line too long')
        elif has_url_origin(frame.abs_path) and not func:
            context_line_component.update(
                hint='discarded because from URL origin')
        else:
            context_line_component.update(values=[frame.context_line])

    symbol_component = GroupingComponent(id='symbol')
    function_component = GroupingComponent(id='function')
    lineno_component = GroupingComponent(id='lineno')

    # The context line grouping information is the most reliable one.
    # If we did not manage to find some information there, we want to
    # see if we can come up with some extra information.  We only want
    # to do that if we managed to get a module of filename.
    if not context_line_component.contributes and \
       (module_component.contributes or filename_component.contributes):
        if frame.symbol:
            symbol_component.update(values=[frame.symbol])
            if func:
                function_component.update(contributes=False,
                                          values=[func],
                                          hint='symbol takes precedence')
            if frame.lineno:
                lineno_component.update(contributes=False,
                                        values=[frame.lineno],
                                        hint='symbol takes precedence')
        elif func:
            if is_unhashable_function_legacy(func):
                function_component.update(values=[
                    GroupingComponent(id='salt',
                                      values=['<function>'],
                                      hint='normalized lambda function name')
                ])
            else:
                function, function_hint = remove_function_outliers_legacy(func)
                function_component.update(values=[function],
                                          hint=function_hint)
            if frame.lineno:
                lineno_component.update(contributes=False,
                                        values=[frame.lineno],
                                        hint='function takes precedence')
        elif frame.lineno:
            lineno_component.update(values=[frame.lineno])
    else:
        if frame.symbol:
            symbol_component.update(
                contributes=False,
                values=[frame.symbol],
                hint='symbol is used only if module or filename are available')
        if func:
            function_component.update(
                contributes=False,
                values=[func],
                hint=
                'function name is used only if module or filename are available'
            )
        if frame.lineno:
            lineno_component.update(
                contributes=False,
                values=[frame.lineno],
                hint=
                'line number is used only if module or filename are available')

    return GroupingComponent(
        id='frame',
        values=[
            module_component,
            filename_component,
            context_line_component,
            symbol_component,
            function_component,
            lineno_component,
        ],
        contributes=contributes,
        hint=hint,
    )
Beispiel #35
0
def get_function_component(function, platform, legacy_function_logic,
                           sourcemap_used=False, context_line_available=False,
                           raw_function=None, javascript_fuzzing=False):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.
    """
    from sentry.stacktraces.functions import trim_function_name
    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id='function')

    function_component = GroupingComponent(
        id='function',
        values=[func],
    )

    if platform == 'ruby':
        if func.startswith('block '):
            function_component.update(
                values=['block'],
                hint='ruby block'
            )
        else:
            new_function = _ruby_erb_func.sub('', func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='removed generated erb template suffix'
                )

    elif platform == 'php':
        if func.startswith('[Anonymous'):
            function_component.update(
                contributes=False,
                hint='ignored anonymous function'
            )

    elif platform == 'java':
        if func.startswith('lambda$'):
            function_component.update(
                contributes=False,
                hint='ignored lambda function'
            )

    elif behavior_family == 'native':
        if func in ('<redacted>', '<unknown>'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function'
            )
        elif legacy_function_logic:
            new_function = trim_function_name(func, platform)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='isolated function'
                )

    elif javascript_fuzzing and behavior_family == 'javascript':
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit('.', 1)[-1]
        if new_function != func:
            function_component.update(
                values=[new_function],
                hint='trimmed javascript function'
            )

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                hint='ignored because sourcemap used and context line available'
            )

    return function_component
Beispiel #36
0
def frame_legacy(frame, event, **meta):
    platform = frame.platform or event.platform

    # In certain situations we want to disregard the entire frame.
    contributes = None
    hint = None

    # this requires some explanation: older sentry versions did not have
    # raw_function but only function.  For some platforms like native
    # we now instead store a trimmed function name in frame.function so
    # and the original value moved to raw_function.  This requires us to
    # prioritize raw_function over function in the legacy grouping code to
    # avoid creating new groups.
    func = frame.raw_function or frame.function

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = GroupingComponent(id='filename')
    if frame.filename == '<anonymous>':
        filename_component.update(
            contributes=False,
            values=[frame.filename],
            hint='anonymous filename discarded'
        )
    elif frame.filename == '[native code]':
        contributes = False
        hint = 'native code indicated by filename'
    elif frame.filename:
        if has_url_origin(frame.abs_path):
            filename_component.update(
                contributes=False,
                values=[frame.filename],
                hint='ignored because filename is a URL',
            )
        # XXX(dcramer): dont compute hash using frames containing the 'Caused by'
        # text as it contains an exception value which may may contain dynamic
        # values (see raven-java#125)
        elif frame.filename.startswith('Caused by: '):
            filename_component.update(
                values=[frame.filename],
                contributes=False,
                hint='ignored because invalid'
            )
        else:
            hashable_filename, hashable_filename_hint = \
                remove_filename_outliers_legacy(frame.filename, platform)
            filename_component.update(
                values=[hashable_filename],
                hint=hashable_filename_hint
            )

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename, even if the module is
    # considered unhashable.
    module_component = GroupingComponent(id='module')
    if frame.module:
        if is_unhashable_module_legacy(frame, platform):
            module_component.update(
                values=[GroupingComponent(
                    id='salt',
                    values=['<module>'],
                    hint='normalized generated module name'
                )],
                hint='ignored module',
            )
        else:
            module_name, module_hint = \
                remove_module_outliers_legacy(frame.module, platform)
            module_component.update(
                values=[module_name],
                hint=module_hint
            )
        if frame.filename:
            filename_component.update(
                values=[frame.filename],
                contributes=False,
                hint='module takes precedence'
            )

    # Context line when available is the primary contributor
    context_line_component = GroupingComponent(id='context-line')
    if frame.context_line is not None:
        if len(frame.context_line) > 120:
            context_line_component.update(hint='discarded because line too long')
        elif has_url_origin(frame.abs_path) and not func:
            context_line_component.update(hint='discarded because from URL origin')
        else:
            context_line_component.update(values=[frame.context_line])

    symbol_component = GroupingComponent(id='symbol')
    function_component = GroupingComponent(id='function')
    lineno_component = GroupingComponent(id='lineno')

    # The context line grouping information is the most reliable one.
    # If we did not manage to find some information there, we want to
    # see if we can come up with some extra information.  We only want
    # to do that if we managed to get a module of filename.
    if not context_line_component.contributes and \
       (module_component.contributes or filename_component.contributes):
        if frame.symbol:
            symbol_component.update(values=[frame.symbol])
            if func:
                function_component.update(
                    contributes=False,
                    values=[func],
                    hint='symbol takes precedence'
                )
            if frame.lineno:
                lineno_component.update(
                    contributes=False,
                    values=[frame.lineno],
                    hint='symbol takes precedence'
                )
        elif func:
            if is_unhashable_function_legacy(func):
                function_component.update(values=[
                    GroupingComponent(
                        id='salt',
                        values=['<function>'],
                        hint='normalized lambda function name'
                    )
                ])
            else:
                function, function_hint = remove_function_outliers_legacy(func)
                function_component.update(
                    values=[function],
                    hint=function_hint
                )
            if frame.lineno:
                lineno_component.update(
                    contributes=False,
                    values=[frame.lineno],
                    hint='function takes precedence'
                )
        elif frame.lineno:
            lineno_component.update(values=[frame.lineno])
    else:
        if frame.symbol:
            symbol_component.update(
                contributes=False,
                values=[frame.symbol],
                hint='symbol is used only if module or filename are available'
            )
        if func:
            function_component.update(
                contributes=False,
                values=[func],
                hint='function name is used only if module or filename are available'
            )
        if frame.lineno:
            lineno_component.update(
                contributes=False,
                values=[frame.lineno],
                hint='line number is used only if module or filename are available'
            )

    return GroupingComponent(
        id='frame',
        values=[
            module_component,
            filename_component,
            context_line_component,
            symbol_component,
            function_component,
            lineno_component,
        ],
        contributes=contributes,
        hint=hint,
    )
Beispiel #37
0
def get_frame_component(frame, event, meta, legacy_function_logic=False,
                        use_contextline=False,
                        javascript_fuzzing=False):
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path, frame.filename, platform,
        allow_file_origin=javascript_fuzzing)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component_v1(
        frame.abs_path, frame.module, platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(
            contributes=False,
            hint='module takes precedence'
        )

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if use_contextline:
        context_line_component = get_contextline_component(frame, platform)

    function_component = get_function_component(
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get('sourcemap') is not None,
        context_line_available=context_line_component and context_line_component.contributes,
        legacy_function_logic=legacy_function_logic,
        javascript_fuzzing=javascript_fuzzing,
    )

    values = [
        module_component,
        filename_component,
        function_component,
    ]
    if context_line_component is not None:
        values.append(context_line_component)

    rv = GroupingComponent(
        id='frame',
        values=values,
    )

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if javascript_fuzzing \
       and get_behavior_family_for_platform(platform) == 'javascript':
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit('.', 1)[-1]
        if func in (None, '?', '<anonymous function>', '<anonymous>',
                    'Anonymous function') \
           or func.endswith('/<'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function name'
            )
        if (func == 'eval') or \
           frame.abs_path in ('[native code]', 'native code', 'eval code', '<anonymous>'):
            rv.update(
                contributes=False,
                hint='ignored low quality javascript frame'
            )

    return rv
Beispiel #38
0
def get_module_component_v1(abs_path, module, platform):
    """Given an absolute path, module and platform returns the module component
    with some necessary cleaning performed.
    """
    if module is None:
        return GroupingComponent(id='module')

    module_component = GroupingComponent(
        id='module',
        values=[module]
    )

    if platform == 'javascript' and '/' in module and abs_path and abs_path.endswith(module):
        module_component.update(
            contributes=False,
            hint='ignored bad javascript module',
        )
    elif platform == 'java':
        if '$$Lambda$' in module:
            module_component.update(
                contributes=False,
                hint='ignored java lambda',
            )
        if module[:35] == 'sun.reflect.GeneratedMethodAccessor':
            module_component.update(
                values=['sun.reflect.GeneratedMethodAccessor'],
                hint='removed reflection marker',
            )
        elif module[:44] == 'jdk.internal.reflect.GeneratedMethodAccessor':
            module_component.update(
                values=['jdk.internal.reflect.GeneratedMethodAccessor'],
                hint='removed reflection marker',
            )
        else:
            old_module = module
            module = _java_reflect_enhancer_re.sub(r'\1<auto>', module)
            module = _java_cglib_enhancer_re.sub(r'\1<auto>', module)
            module = _java_assist_enhancer_re.sub(r'\1<auto>', module)
            module = _clojure_enhancer_re.sub(r'\1<auto>', module)
            if module != old_module:
                module_component.update(
                    values=[module],
                    hint='removed codegen marker'
                )

    return module_component
Beispiel #39
0
def single_exception_legacy(exception, config, **meta):
    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
        contributes=False
    )
    value_component = GroupingComponent(
        id='value',
        values=[exception.value] if exception.value else [],
        contributes=False
    )
    stacktrace_component = GroupingComponent(id='stacktrace')

    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
        if stacktrace_component.contributes:
            if exception.type:
                type_component.update(contributes=True)
                if exception.value:
                    value_component.update(hint='stacktrace and type take precedence')
            elif exception.value:
                value_component.update(hint='stacktrace takes precedence')

    if not stacktrace_component.contributes:
        if exception.type:
            type_component.update(contributes=True)
        if exception.value:
            value_component.update(contributes=True)

    return GroupingComponent(
        id='exception',
        values=[
            stacktrace_component,
            type_component,
            value_component,
        ]
    )
Beispiel #40
0
def get_stacktrace_hierarchy(main_variant, components, frames,
                             inverted_hierarchy):
    frames_iter = list(zip(frames, components))
    if not inverted_hierarchy:
        # frames are sorted in a way where the crashing frame is at the end of
        # the list. In "non-inverted" mode we want to start at the crashing
        # frame, in inverted mode we want to start at the threadbase
        frames_iter = reversed(frames_iter)

    frames_iter = iter(frames_iter)

    prev_variant = GroupingComponent(id="stacktrace", values=[])
    all_variants = {}

    while len(all_variants) < MAX_LAYERS:
        depth = len(all_variants) + 1
        key = f"app-depth-{depth}"
        assert key not in all_variants

        for frame, component in frames_iter:
            if component.contributes and component.is_sentinel_frame:
                break
        else:
            break

        add_to_layer = [component]

        prev_component = component

        if prev_component.is_prefix_frame:
            for frame, component in frames_iter:
                if not component.contributes:
                    continue

                add_to_layer.append(component)
                prev_component = component

                if not component.is_prefix_frame:
                    break
            else:
                break

        # For consistency, we always want to preserve the sort order of the
        # event frames, no matter what order we're going through.

        if not inverted_hierarchy:
            layer = add_to_layer
            layer.reverse()
            layer.extend(prev_variant.values)

        else:
            layer = list(prev_variant.values)
            layer.extend(add_to_layer)

        tree_label = _compute_tree_label(layer)

        all_variants[key] = prev_variant = GroupingComponent(
            id="stacktrace", values=layer, tree_label=tree_label)

    if not all_variants:
        all_variants = _build_fallback_tree(main_variant, components, frames,
                                            inverted_hierarchy)

    all_variants["app-depth-max"] = main_variant

    main_variant.update(tree_label=_compute_tree_label(main_variant.values))

    return all_variants