예제 #1
0
파일: api.py 프로젝트: getsentry/sentry
def _get_calculated_grouping_variants_for_event(event, config):
    winning_strategy = None
    precedence_hint = None
    per_variant_components = {}

    for strategy in config.iter_strategies():
        rv = strategy.get_grouping_component_variants(event, config=config)
        for (variant, component) in six.iteritems(rv):
            per_variant_components.setdefault(variant, []).append(component)

            if winning_strategy is None:
                if component.contributes:
                    winning_strategy = strategy.name
                    precedence_hint = '%s takes precedence' % (
                        '%s of %s' % (strategy.name, variant) if
                        variant != 'default' else
                        strategy.name
                    )
            elif component.contributes and winning_strategy != strategy.name:
                component.update(
                    contributes=False,
                    hint=precedence_hint
                )

    rv = {}
    for (variant, components) in six.iteritems(per_variant_components):
        component = GroupingComponent(
            id=variant,
            values=components,
        )
        if not component.contributes and precedence_hint:
            component.update(hint=precedence_hint)
        rv[variant] = component

    return rv
예제 #2
0
def _get_calculated_grouping_variants_for_event(event, config):
    winning_strategy = None
    precedence_hint = None
    per_variant_components = {}

    for strategy in config.iter_strategies():
        rv = strategy.get_grouping_component_variants(event, config=config)
        for (variant, component) in six.iteritems(rv):
            per_variant_components.setdefault(variant, []).append(component)

            if winning_strategy is None:
                if component.contributes:
                    winning_strategy = strategy.name
                    variants_hint = "/".join(
                        sorted(k for k, v in six.iteritems(rv) if v.contributes)
                    )
                    precedence_hint = "%s take%s precedence" % (
                        "%s of %s" % (strategy.name, variants_hint)
                        if variant != "default"
                        else strategy.name,
                        "" if strategy.name.endswith("s") else "s",
                    )
            elif component.contributes and winning_strategy != strategy.name:
                component.update(
                    contributes=False, contributes_to_similarity=True, hint=precedence_hint
                )

    rv = {}
    for (variant, components) in six.iteritems(per_variant_components):
        component = GroupingComponent(id=variant, values=components)
        if not component.contributes and precedence_hint:
            component.update(hint=precedence_hint)
        rv[variant] = component

    return rv
예제 #3
0
def single_exception_v1(exception, config, **meta):
    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
    else:
        stacktrace_component = GroupingComponent(id='stacktrace')

    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
    )

    if exception.mechanism and exception.mechanism.synthetic:
        type_component.update(
            contributes=False,
            hint='ignored because exception is synthetic'
        )

    return GroupingComponent(
        id='exception',
        values=[
            stacktrace_component,
            type_component,
        ]
    )
예제 #4
0
def _get_calculated_grouping_variants_for_event(event, config):
    winning_strategy = None
    precedence_hint = None
    per_variant_components = {}

    for strategy in config.iter_strategies():
        rv = strategy.get_grouping_component_variants(event, config=config)
        for (variant, component) in six.iteritems(rv):
            per_variant_components.setdefault(variant, []).append(component)

            if winning_strategy is None:
                if component.contributes:
                    winning_strategy = strategy.name
                    precedence_hint = '%s takes precedence' % (
                        '%s of %s' % (strategy.name, variant)
                        if variant != 'default' else strategy.name)
            elif component.contributes and winning_strategy != strategy.name:
                component.update(contributes=False, hint=precedence_hint)

    rv = {}
    for (variant, components) in six.iteritems(per_variant_components):
        component = GroupingComponent(
            id=variant,
            values=components,
        )
        if not component.contributes and precedence_hint:
            component.update(hint=precedence_hint)
        rv[variant] = component

    return rv
예제 #5
0
파일: legacy.py 프로젝트: mburgs/sentry
def single_exception_legacy(exception, context, **meta):
    type_component = GroupingComponent(
        id="type",
        values=[exception.type] if exception.type else [],
        similarity_encoder=ident_encoder,
        contributes=False,
    )
    value_component = GroupingComponent(
        id="value",
        values=[exception.value] if exception.value else [],
        similarity_encoder=text_shingle_encoder(5),
        contributes=False,
    )
    stacktrace_component = GroupingComponent(id="stacktrace")

    if exception.stacktrace is not None:
        stacktrace_component = context.get_grouping_component(exception.stacktrace, **meta)
        if stacktrace_component.contributes:
            if exception.type:
                type_component.update(contributes=True)
                if exception.value:
                    value_component.update(hint="stacktrace and type take precedence")
            elif exception.value:
                value_component.update(hint="stacktrace takes precedence")

    if not stacktrace_component.contributes:
        if exception.type:
            type_component.update(contributes=True)
        if exception.value:
            value_component.update(contributes=True)

    return GroupingComponent(
        id="exception", values=[stacktrace_component, type_component, value_component]
    )
예제 #6
0
def single_exception_v1(exception, config, **meta):
    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
        contributes=False)
    value_component = GroupingComponent(
        id='value',
        values=[exception.value] if exception.value else [],
        contributes=False)
    stacktrace_component = GroupingComponent(id='stacktrace')

    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
        if stacktrace_component.contributes:
            if exception.type:
                type_component.update(contributes=True)
                if exception.value:
                    value_component.update(
                        hint='stacktrace and type take precedence')
            elif exception.value:
                value_component.update(hint='stacktrace takes precedence')

    if not stacktrace_component.contributes:
        if exception.type:
            type_component.update(contributes=True)
        if exception.value:
            value_component.update(contributes=True)

    return GroupingComponent(id='exception',
                             values=[
                                 stacktrace_component,
                                 type_component,
                                 value_component,
                             ])
예제 #7
0
def get_module_component(abs_path, module, platform):
    """Given an absolute path, module and platform returns the module component
    with some necessary cleaning performed.
    """
    if module is None:
        return GroupingComponent(id="module")

    module_component = GroupingComponent(
        id="module", values=[module], similarity_encoder=ident_encoder
    )

    if platform == "javascript" and "/" in module and abs_path and abs_path.endswith(module):
        module_component.update(contributes=False, hint="ignored bad javascript module")
    elif platform == "java":
        if "$$Lambda$" in module:
            module_component.update(contributes=False, hint="ignored java lambda")
        if module[:35] == "sun.reflect.GeneratedMethodAccessor":
            module_component.update(
                values=["sun.reflect.GeneratedMethodAccessor"], hint="removed reflection marker"
            )
        elif module[:44] == "jdk.internal.reflect.GeneratedMethodAccessor":
            module_component.update(
                values=["jdk.internal.reflect.GeneratedMethodAccessor"],
                hint="removed reflection marker",
            )
        else:
            old_module = module
            module = _java_reflect_enhancer_re.sub(r"\1<auto>", module)
            module = _java_cglib_enhancer_re.sub(r"\1<auto>", module)
            module = _java_assist_enhancer_re.sub(r"\1<auto>", module)
            module = _clojure_enhancer_re.sub(r"\1<auto>", module)
            if module != old_module:
                module_component.update(values=[module], hint="removed codegen marker")

    return module_component
예제 #8
0
def template_v1(template, **meta):
    filename_component = GroupingComponent(id="filename")
    if template.filename is not None:
        filename_component.update(values=[template.filename])

    context_line_component = GroupingComponent(id="context-line")
    if template.context_line is not None:
        context_line_component.update(values=[template.context_line])

    return GroupingComponent(id="template", values=[filename_component, context_line_component])
예제 #9
0
def get_filename_component_v1(abs_path, filename, platform):
    """Attempt to normalize filenames by detecing special filenames and by
    using the basename only.
    """
    if filename is None:
        return GroupingComponent(id='filename')

    # Only use the platform independent basename for grouping and
    # lowercase it
    filename = _basename_re.split(filename)[-1].lower()
    filename_component = GroupingComponent(
        id='filename',
        values=[filename],
    )

    if abs_path_is_url_v1(abs_path):
        filename_component.update(
            contributes=False,
            hint='ignored because frame points to a URL',
        )
    elif filename == '<anonymous>':
        filename_component.update(contributes=False,
                                  hint='anonymous filename discarded')
    elif filename == '[native code]':
        filename_component.update(contributes=False,
                                  hint='native code indicated by filename')
    elif platform == 'java':
        new_filename = _java_assist_enhancer_re.sub(r'\1<auto>', filename)
        if new_filename != filename:
            filename_component.update(values=[new_filename],
                                      hint='cleaned javassist parts')

    return filename_component
예제 #10
0
def csp_v1(csp_interface, **meta):
    violation_component = GroupingComponent(id='violation')
    uri_component = GroupingComponent(id='uri')

    if csp_interface.local_script_violation_type:
        violation_component.update(
            values=["'%s'" % csp_interface.local_script_violation_type], )
        uri_component.update(
            contributes=False,
            hint='violation takes precedence',
            values=[csp_interface.normalized_blocked_uri],
        )
    else:
        violation_component.update(
            contributes=False,
            hint='not a local script violation',
        )
        uri_component.update(values=[csp_interface.normalized_blocked_uri])

    return GroupingComponent(
        id='csp',
        values=[
            GroupingComponent(id='salt',
                              values=[csp_interface.effective_directive]),
            violation_component,
            uri_component,
        ],
    )
예제 #11
0
def get_filename_component(abs_path,
                           filename,
                           platform,
                           allow_file_origin=False):
    """Attempt to normalize filenames by detecting special filenames and by
    using the basename only.
    """
    if filename is None:
        return GroupingComponent(id="filename")

    # Only use the platform independent basename for grouping and
    # lowercase it
    filename = _basename_re.split(filename)[-1].lower()
    filename_component = GroupingComponent(id="filename", values=[filename])

    if has_url_origin(abs_path, allow_file_origin=allow_file_origin):
        filename_component.update(contributes=False,
                                  hint="ignored because frame points to a URL")
    elif filename == "<anonymous>":
        filename_component.update(contributes=False,
                                  hint="anonymous filename discarded")
    elif filename == "[native code]":
        filename_component.update(contributes=False,
                                  hint="native code indicated by filename")
    elif platform == "java":
        new_filename = _java_assist_enhancer_re.sub(r"\1<auto>", filename)
        if new_filename != filename:
            filename_component.update(values=[new_filename],
                                      hint="cleaned javassist parts")

    return filename_component
예제 #12
0
def csp_v1(interface: Csp, event: Event, context: GroupingContext, **meta: Any) -> ReturnedVariants:
    violation_component = GroupingComponent(id="violation")
    uri_component = GroupingComponent(id="uri")

    if interface.local_script_violation_type:
        violation_component.update(values=["'%s'" % interface.local_script_violation_type])
        uri_component.update(
            contributes=False,
            hint="violation takes precedence",
            values=[interface.normalized_blocked_uri],
        )
    else:
        violation_component.update(contributes=False, hint="not a local script violation")
        uri_component.update(values=[interface.normalized_blocked_uri])

    return {
        context["variant"]: GroupingComponent(
            id="csp",
            values=[
                GroupingComponent(id="salt", values=[interface.effective_directive]),
                violation_component,
                uri_component,
            ],
        )
    }
예제 #13
0
def template_v1(interface: Template, event: Event, context: GroupingContext,
                **meta: Any) -> ReturnedVariants:
    filename_component = GroupingComponent(id="filename")
    if interface.filename is not None:
        filename_component.update(values=[interface.filename])

    context_line_component = GroupingComponent(id="context-line")
    if interface.context_line is not None:
        context_line_component.update(values=[interface.context_line])

    return {
        context["variant"]:
        GroupingComponent(id="template",
                          values=[filename_component, context_line_component])
    }
예제 #14
0
def template_v1(template, **meta):
    filename_component = GroupingComponent(id='filename')
    if template.filename is not None:
        filename_component.update(values=[template.filename])

    context_line_component = GroupingComponent(id='context-line')
    if template.context_line is not None:
        context_line_component.update(values=[template.context_line])

    return GroupingComponent(
        id='template',
        values=[
            filename_component,
            context_line_component,
        ]
    )
예제 #15
0
def message_v1(template, **meta):
    filename_component = GroupingComponent(id='filename')
    if template.filename is not None:
        filename_component.update(values=[template.filename])

    context_line_component = GroupingComponent(id='context-line')
    if template.context_line is not None:
        context_line_component.update(values=[template.context_line])

    return GroupingComponent(
        id='template',
        values=[
            filename_component,
            context_line_component,
        ]
    )
예제 #16
0
def get_module_component_v1(abs_path, module, platform):
    """Given an absolute path, module and platform returns the module component
    with some necessary cleaning performed.
    """
    if module is None:
        return GroupingComponent(id='module')

    module_component = GroupingComponent(
        id='module',
        values=[module]
    )

    if platform == 'javascript' and '/' in module and abs_path and abs_path.endswith(module):
        module_component.update(
            contributes=False,
            hint='ignored bad javascript module',
        )
    elif platform == 'java':
        if '$$Lambda$' in module:
            module_component.update(
                contributes=False,
                hint='ignored java lambda',
            )
        if module[:35] == 'sun.reflect.GeneratedMethodAccessor':
            module_component.update(
                values=['sun.reflect.GeneratedMethodAccessor'],
                hint='removed reflection marker',
            )
        elif module[:44] == 'jdk.internal.reflect.GeneratedMethodAccessor':
            module_component.update(
                values=['jdk.internal.reflect.GeneratedMethodAccessor'],
                hint='removed reflection marker',
            )
        else:
            old_module = module
            module = _java_reflect_enhancer_re.sub(r'\1<auto>', module)
            module = _java_cglib_enhancer_re.sub(r'\1<auto>', module)
            module = _java_assist_enhancer_re.sub(r'\1<auto>', module)
            module = _clojure_enhancer_re.sub(r'\1<auto>', module)
            if module != old_module:
                module_component.update(
                    values=[module],
                    hint='removed codegen marker'
                )

    return module_component
예제 #17
0
파일: legacy.py 프로젝트: waterdrops/sentry
def single_exception_legacy(interface: SingleException, event: Event,
                            context: GroupingContext,
                            **meta: Any) -> ReturnedVariants:

    type_component = GroupingComponent(
        id="type",
        values=[interface.type] if interface.type else [],
        similarity_encoder=ident_encoder,
        contributes=False,
    )
    value_component = GroupingComponent(
        id="value",
        values=[interface.value] if interface.value else [],
        similarity_encoder=text_shingle_encoder(5),
        contributes=False,
    )
    stacktrace_component = GroupingComponent(id="stacktrace")

    if interface.stacktrace is not None:
        stacktrace_component = context.get_grouping_component(
            interface.stacktrace, event=event, **meta)
        if stacktrace_component.contributes:
            if interface.type:
                type_component.update(contributes=True)
                if interface.value:
                    value_component.update(
                        hint="stacktrace and type take precedence")
            elif interface.value:
                value_component.update(hint="stacktrace takes precedence")

    if not stacktrace_component.contributes:
        if interface.type:
            type_component.update(contributes=True)
        if interface.value:
            value_component.update(contributes=True)

    return {
        context["variant"]:
        GroupingComponent(
            id="exception",
            values=[stacktrace_component, type_component, value_component])
    }
예제 #18
0
def get_calculated_grouping_variants_for_event(event, config_name=None):
    """Given an event this returns a dictionary of the matching grouping
    variants.  Checksum and fingerprinting logic are not handled by this
    function which is handled by `get_grouping_variants_for_event`.
    """
    winning_strategy = None
    precedence_hint = None
    per_variant_components = {}

    config = CONFIGURATIONS[config_name or DEFAULT_CONFIG]

    for strategy in config.iter_strategies():
        rv = strategy.get_grouping_component_variants(event, config=config)
        for (variant, component) in six.iteritems(rv):
            per_variant_components.setdefault(variant, []).append(component)

            if winning_strategy is None:
                if component.contributes:
                    winning_strategy = strategy.name
                    precedence_hint = '%s takes precedence' % (
                        '%s of %s' % (strategy.name, variant) if
                        variant != 'default' else
                        strategy.name
                    )
            elif component.contributes and winning_strategy != strategy.name:
                component.update(
                    contributes=False,
                    hint=precedence_hint
                )

    rv = {}
    for (variant, components) in six.iteritems(per_variant_components):
        component = GroupingComponent(
            id=variant,
            values=components,
        )
        if not component.contributes and precedence_hint:
            component.update(hint=precedence_hint)
        rv[variant] = component

    return rv
예제 #19
0
def get_filename_component(
    abs_path: str,
    filename: Optional[str],
    platform: Optional[str],
    allow_file_origin: bool = False,
) -> GroupingComponent:
    """Attempt to normalize filenames by detecting special filenames and by
    using the basename only.
    """
    if filename is None:
        return GroupingComponent(id="filename")

    # Only use the platform independent basename for grouping and
    # lowercase it
    filename = _basename_re.split(filename)[-1].lower()
    filename_component = GroupingComponent(id="filename",
                                           values=[filename],
                                           similarity_encoder=ident_encoder)

    if has_url_origin(abs_path, allow_file_origin=allow_file_origin):
        filename_component.update(contributes=False,
                                  hint="ignored because frame points to a URL")
    elif filename == "<anonymous>":
        filename_component.update(contributes=False,
                                  hint="anonymous filename discarded")
    elif filename == "[native code]":
        filename_component.update(contributes=False,
                                  hint="native code indicated by filename")
    elif platform == "java":
        new_filename = _java_assist_enhancer_re.sub(r"\1<auto>", filename)
        if new_filename != filename:
            filename_component.update(values=[new_filename],
                                      hint="cleaned javassist parts")
            filename = new_filename

    # Best-effort to show a very short filename in the title. We truncate it to
    # basename so technically there can be two issues that differ in filename
    # paths but end up having the same title.
    filename_component.update(tree_label={"filebase": get_basename(filename)})

    return filename_component
예제 #20
0
def get_contextline_component(frame, platform, function, context):
    """Returns a contextline component.  The caller's responsibility is to
    make sure context lines are only used for platforms where we trust the
    quality of the sourcecode.  It does however protect against some bad
    JavaScript environments based on origin checks.
    """
    line = " ".join((frame.context_line or "").expandtabs(2).split())
    if not line:
        return GroupingComponent(id="context-line")

    component = GroupingComponent(
        id="context-line",
        values=[line],
        similarity_encoder=ident_encoder,
    )
    if line:
        if len(frame.context_line) > 120:
            component.update(hint="discarded because line too long",
                             contributes=False)
        elif get_behavior_family_for_platform(platform) == "javascript":
            if context["with_context_line_file_origin_bug"]:
                if has_url_origin(frame.abs_path, allow_file_origin=True):
                    component.update(hint="discarded because from URL origin",
                                     contributes=False)
            elif not function and has_url_origin(frame.abs_path):
                component.update(
                    hint="discarded because from URL origin and no function",
                    contributes=False)

    return component
예제 #21
0
def single_exception_common(exception, config, meta, with_value):
    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
    else:
        stacktrace_component = GroupingComponent(id="stacktrace")

    type_component = GroupingComponent(
        id="type", values=[exception.type] if exception.type else [])

    if exception.mechanism and exception.mechanism.synthetic:
        type_component.update(contributes=False,
                              hint="ignored because exception is synthetic")

    values = [stacktrace_component, type_component]

    if with_value:
        value_component = GroupingComponent(id="value")

        value_in = exception.value
        if value_in is not None:
            value_trimmed = trim_message_for_grouping(value_in)
            hint = "stripped common values" if value_in != value_trimmed else None
            if value_trimmed:
                value_component.update(values=[value_trimmed], hint=hint)

        if stacktrace_component.contributes and value_component.contributes:
            value_component.update(
                contributes=False,
                hint="ignored because stacktrace takes precedence")

        values.append(value_component)

    return GroupingComponent(id="exception", values=values)
예제 #22
0
파일: legacy.py 프로젝트: getsentry/sentry
def single_exception_legacy(exception, config, **meta):
    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
        contributes=False
    )
    value_component = GroupingComponent(
        id='value',
        values=[exception.value] if exception.value else [],
        contributes=False
    )
    stacktrace_component = GroupingComponent(id='stacktrace')

    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
        if stacktrace_component.contributes:
            if exception.type:
                type_component.update(contributes=True)
                if exception.value:
                    value_component.update(hint='stacktrace and type take precedence')
            elif exception.value:
                value_component.update(hint='stacktrace takes precedence')

    if not stacktrace_component.contributes:
        if exception.type:
            type_component.update(contributes=True)
        if exception.value:
            value_component.update(contributes=True)

    return GroupingComponent(
        id='exception',
        values=[
            stacktrace_component,
            type_component,
            value_component,
        ]
    )
예제 #23
0
def get_module_component_v1(abs_path, module, platform):
    """Given an absolute path, module and platform returns the module component
    with some necessary cleaning performed.
    """
    if module is None:
        return GroupingComponent(id='module')

    module_component = GroupingComponent(
        id='module',
        values=[module]
    )

    if platform == 'javascript' and '/' in module and abs_path and abs_path.endswith(module):
        module_component.update(
            contributes=False,
            hint='ignored bad javascript module',
        )
    elif platform == 'java':
        if '$$Lambda$' in module:
            module_component.update(
                contributes=False,
                hint='ignored java lambda',
            )
        if module[:35] == 'sun.reflect.GeneratedMethodAccessor':
            module_component.update(
                values=['sun.reflect.GeneratedMethodAccessor'],
                hint='removed reflection marker',
            )
        else:
            old_module = module
            module = _java_reflect_enhancer_re.sub(r'\1<auto>', module)
            module = _java_cglib_enhancer_re.sub(r'\1<auto>', module)
            module = _java_assist_enhancer_re.sub(r'\1<auto>', module)
            module = _clojure_enhancer_re.sub(r'\1<auto>', module)
            if module != old_module:
                module_component.update(
                    values=[module],
                    hint='removed codegen marker'
                )

    return module_component
예제 #24
0
def get_filename_component(abs_path, filename, platform,
                           allow_file_origin=False):
    """Attempt to normalize filenames by detecing special filenames and by
    using the basename only.
    """
    if filename is None:
        return GroupingComponent(id='filename')

    # Only use the platform independent basename for grouping and
    # lowercase it
    filename = _basename_re.split(filename)[-1].lower()
    filename_component = GroupingComponent(
        id='filename',
        values=[filename],
    )

    if has_url_origin(abs_path, allow_file_origin=allow_file_origin):
        filename_component.update(
            contributes=False,
            hint='ignored because frame points to a URL',
        )
    elif filename == '<anonymous>':
        filename_component.update(
            contributes=False,
            hint='anonymous filename discarded'
        )
    elif filename == '[native code]':
        filename_component.update(
            contributes=False,
            hint='native code indicated by filename'
        )
    elif platform == 'java':
        new_filename = _java_assist_enhancer_re.sub(r'\1<auto>', filename)
        if new_filename != filename:
            filename_component.update(
                values=[new_filename],
                hint='cleaned javassist parts'
            )

    return filename_component
예제 #25
0
def single_exception_common(exception, config, meta, with_value):
    if exception.stacktrace is not None:
        stacktrace_component = config.get_grouping_component(
            exception.stacktrace, **meta)
    else:
        stacktrace_component = GroupingComponent(id='stacktrace')

    type_component = GroupingComponent(
        id='type',
        values=[exception.type] if exception.type else [],
    )

    if exception.mechanism and exception.mechanism.synthetic:
        type_component.update(
            contributes=False,
            hint='ignored because exception is synthetic'
        )

    values = [stacktrace_component, type_component]

    if with_value:
        value_component = GroupingComponent(id='value')

        value_in = exception.value
        if value_in is not None:
            value_trimmed = trim_message_for_grouping(value_in)
            hint = 'stripped common values' if value_in != value_trimmed else None
            if value_trimmed:
                value_component.update(
                    values=[value_trimmed],
                    hint=hint
                )

        if stacktrace_component.contributes and value_component.contributes:
            value_component.update(
                contributes=False,
                hint='ignored because stacktrace takes precedence'
            )

        values.append(value_component)

    return GroupingComponent(
        id='exception',
        values=values
    )
예제 #26
0
def get_contextline_component(frame, platform):
    """Returns a contextline component.  The caller's responsibility is to
    make sure context lines are only used for platforms where we trust the
    quality of the sourcecode.  It does however protect against some bad
    JavaScript environments based on origin checks.
    """
    component = GroupingComponent(id='context-line')

    if not frame.context_line:
        return component

    line = ' '.join(frame.context_line.expandtabs(2).split())
    if line:
        if len(frame.context_line) > 120:
            component.update(hint='discarded because line too long')
        elif get_behavior_family_for_platform(platform) == 'javascript' \
                and has_url_origin(frame.abs_path, allow_file_origin=True):
            component.update(hint='discarded because from URL origin')
        else:
            component.update(values=[line])

    return component
예제 #27
0
def get_contextline_component(frame, platform):
    """Returns a contextline component.  The caller's responsibility is to
    make sure context lines are only used for platforms where we trust the
    quality of the sourcecode.  It does however protect against some bad
    JavaScript environments based on origin checks.
    """
    component = GroupingComponent(id='context-line')

    if not frame.context_line:
        return component

    line = ' '.join(frame.context_line.expandtabs(2).split())
    if line:
        if len(frame.context_line) > 120:
            component.update(hint='discarded because line too long')
        elif get_behavior_family_for_platform(platform) == 'javascript' \
                and has_url_origin(frame.abs_path, allow_file_origin=True):
            component.update(hint='discarded because from URL origin')
        else:
            component.update(values=[line])

    return component
예제 #28
0
파일: newstyle.py 프로젝트: mburgs/sentry
def single_exception(exception, context, **meta):
    if exception.stacktrace is not None:
        stacktrace_component = context.get_grouping_component(exception.stacktrace, **meta)
    else:
        stacktrace_component = GroupingComponent(id="stacktrace")

    type_component = GroupingComponent(
        id="type",
        values=[exception.type] if exception.type else [],
        similarity_encoder=ident_encoder,
    )

    if exception.mechanism and exception.mechanism.synthetic:
        type_component.update(contributes=False, hint="ignored because exception is synthetic")

    values = [stacktrace_component, type_component]

    if context["with_exception_value_fallback"]:
        value_component = GroupingComponent(id="value", similarity_encoder=text_shingle_encoder(5))

        value_in = exception.value
        if value_in is not None:
            value_trimmed = trim_message_for_grouping(value_in)
            hint = "stripped common values" if value_in != value_trimmed else None
            if value_trimmed:
                value_component.update(values=[value_trimmed], hint=hint)

        if stacktrace_component.contributes and value_component.contributes:
            value_component.update(
                contributes=False,
                contributes_to_similarity=True,
                hint="ignored because stacktrace takes precedence",
            )

        values.append(value_component)

    return GroupingComponent(id="exception", values=values)
예제 #29
0
def get_frame_component(frame,
                        event,
                        meta,
                        legacy_function_logic=False,
                        use_contextline=False,
                        javascript_fuzzing=False):
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path,
        frame.filename,
        platform,
        allow_file_origin=javascript_fuzzing)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component_v1(frame.abs_path, frame.module,
                                               platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(contributes=False,
                                  hint="module takes precedence")

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if use_contextline:
        context_line_component = get_contextline_component(frame, platform)

    function_component = get_function_component(
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get("sourcemap") is not None,
        context_line_available=context_line_component
        and context_line_component.contributes,
        legacy_function_logic=legacy_function_logic,
        javascript_fuzzing=javascript_fuzzing,
    )

    values = [module_component, filename_component, function_component]
    if context_line_component is not None:
        values.append(context_line_component)

    rv = GroupingComponent(id="frame", values=values)

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if javascript_fuzzing and get_behavior_family_for_platform(
            platform) == "javascript":
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit(".", 1)[-1]
        if func in (
                None,
                "?",
                "<anonymous function>",
                "<anonymous>",
                "Anonymous function",
        ) or func.endswith("/<"):
            function_component.update(contributes=False,
                                      hint="ignored unknown function name")
        if (func == "eval") or frame.abs_path in (
                "[native code]",
                "native code",
                "eval code",
                "<anonymous>",
        ):
            rv.update(contributes=False,
                      hint="ignored low quality javascript frame")

    return rv
예제 #30
0
def get_function_component(
    function,
    platform,
    legacy_function_logic,
    sourcemap_used=False,
    context_line_available=False,
    raw_function=None,
    javascript_fuzzing=False,
):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.
    """
    from sentry.stacktraces.functions import trim_function_name

    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id="function")

    function_component = GroupingComponent(id="function", values=[func])

    if platform == "ruby":
        if func.startswith("block "):
            function_component.update(values=["block"], hint="ruby block")
        else:
            new_function = _ruby_erb_func.sub("", func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint="removed generated erb template suffix")

    elif platform == "php":
        if func.startswith("[Anonymous"):
            function_component.update(contributes=False,
                                      hint="ignored anonymous function")

    elif platform == "java":
        if func.startswith("lambda$"):
            function_component.update(contributes=False,
                                      hint="ignored lambda function")

    elif behavior_family == "native":
        if func in ("<redacted>", "<unknown>"):
            function_component.update(contributes=False,
                                      hint="ignored unknown function")
        elif legacy_function_logic:
            new_function = trim_function_name(func,
                                              platform,
                                              normalize_lambdas=False)
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="isolated function")

    elif javascript_fuzzing and behavior_family == "javascript":
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit(".", 1)[-1]
        if new_function != func:
            function_component.update(values=[new_function],
                                      hint="trimmed javascript function")

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                hint="ignored because sourcemap used and context line available"
            )

    return function_component
예제 #31
0
def single_exception(interface: SingleException, event: Event,
                     context: GroupingContext,
                     **meta: Any) -> ReturnedVariants:
    type_component = GroupingComponent(
        id="type",
        values=[interface.type] if interface.type else [],
        similarity_encoder=ident_encoder,
    )
    system_type_component = type_component.shallow_copy()

    ns_error_component = None

    if interface.mechanism:
        if interface.mechanism.synthetic:
            # Ignore synthetic exceptions as they are produced from platform
            # specific error codes.
            #
            # For example there can be crashes with EXC_ACCESS_VIOLATION_* on Windows with
            # the same exact stacktrace as a crash with EXC_BAD_ACCESS on macOS.
            #
            # Do not update type component of system variant, such that regex
            # can be continuously modified without unnecessarily creating new
            # groups.
            type_component.update(
                contributes=False,
                hint="ignored because exception is synthetic")
        if interface.mechanism.meta and "ns_error" in interface.mechanism.meta:
            ns_error_component = GroupingComponent(
                id="ns-error",
                values=[
                    interface.mechanism.meta["ns_error"].get("domain"),
                    interface.mechanism.meta["ns_error"].get("code"),
                ],
            )

    if interface.stacktrace is not None:
        with context:
            context["exception_data"] = interface.to_json()
            stacktrace_variants = context.get_grouping_component(
                interface.stacktrace, event=event, **meta)
    else:
        stacktrace_variants = {
            "app": GroupingComponent(id="stacktrace"),
        }

    rv = {}

    for variant, stacktrace_component in stacktrace_variants.items():
        values = [
            stacktrace_component,
            system_type_component if variant == "system" else type_component,
        ]

        if ns_error_component is not None:
            values.append(ns_error_component)

        if context["with_exception_value_fallback"]:
            value_component = GroupingComponent(
                id="value", similarity_encoder=text_shingle_encoder(5))

            value_in = interface.value
            if value_in is not None:
                value_trimmed = trim_message_for_grouping(value_in)
                hint = "stripped common values" if value_in != value_trimmed else None
                if value_trimmed:
                    value_component.update(values=[value_trimmed], hint=hint)

            if stacktrace_component.contributes and value_component.contributes:
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because stacktrace takes precedence",
                )

            if (ns_error_component is not None
                    and ns_error_component.contributes
                    and value_component.contributes):
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because ns-error info takes precedence",
                )

            values.append(value_component)

        rv[variant] = GroupingComponent(id="exception", values=values)

    return rv
예제 #32
0
def frame_legacy(frame, event, **meta):
    platform = frame.platform or event.platform

    # In certain situations we want to disregard the entire frame.
    contributes = None
    hint = None

    # this requires some explanation: older sentry versions did not have
    # raw_function but only function.  For some platforms like native
    # we now instead store a trimmed function name in frame.function so
    # and the original value moved to raw_function.  This requires us to
    # prioritize raw_function over function in the legacy grouping code to
    # avoid creating new groups.
    func = frame.raw_function or frame.function

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = GroupingComponent(id='filename')
    if frame.filename == '<anonymous>':
        filename_component.update(contributes=False,
                                  values=[frame.filename],
                                  hint='anonymous filename discarded')
    elif frame.filename == '[native code]':
        contributes = False
        hint = 'native code indicated by filename'
    elif frame.filename:
        if has_url_origin(frame.abs_path):
            filename_component.update(
                contributes=False,
                values=[frame.filename],
                hint='ignored because filename is a URL',
            )
        # XXX(dcramer): dont compute hash using frames containing the 'Caused by'
        # text as it contains an exception value which may may contain dynamic
        # values (see raven-java#125)
        elif frame.filename.startswith('Caused by: '):
            filename_component.update(values=[frame.filename],
                                      contributes=False,
                                      hint='ignored because invalid')
        else:
            hashable_filename, hashable_filename_hint = \
                remove_filename_outliers_legacy(frame.filename, platform)
            filename_component.update(values=[hashable_filename],
                                      hint=hashable_filename_hint)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename, even if the module is
    # considered unhashable.
    module_component = GroupingComponent(id='module')
    if frame.module:
        if is_unhashable_module_legacy(frame, platform):
            module_component.update(
                values=[
                    GroupingComponent(id='salt',
                                      values=['<module>'],
                                      hint='normalized generated module name')
                ],
                hint='ignored module',
            )
        else:
            module_name, module_hint = \
                remove_module_outliers_legacy(frame.module, platform)
            module_component.update(values=[module_name], hint=module_hint)
        if frame.filename:
            filename_component.update(values=[frame.filename],
                                      contributes=False,
                                      hint='module takes precedence')

    # Context line when available is the primary contributor
    context_line_component = GroupingComponent(id='context-line')
    if frame.context_line is not None:
        if len(frame.context_line) > 120:
            context_line_component.update(
                hint='discarded because line too long')
        elif has_url_origin(frame.abs_path) and not func:
            context_line_component.update(
                hint='discarded because from URL origin')
        else:
            context_line_component.update(values=[frame.context_line])

    symbol_component = GroupingComponent(id='symbol')
    function_component = GroupingComponent(id='function')
    lineno_component = GroupingComponent(id='lineno')

    # The context line grouping information is the most reliable one.
    # If we did not manage to find some information there, we want to
    # see if we can come up with some extra information.  We only want
    # to do that if we managed to get a module of filename.
    if not context_line_component.contributes and \
       (module_component.contributes or filename_component.contributes):
        if frame.symbol:
            symbol_component.update(values=[frame.symbol])
            if func:
                function_component.update(contributes=False,
                                          values=[func],
                                          hint='symbol takes precedence')
            if frame.lineno:
                lineno_component.update(contributes=False,
                                        values=[frame.lineno],
                                        hint='symbol takes precedence')
        elif func:
            if is_unhashable_function_legacy(func):
                function_component.update(values=[
                    GroupingComponent(id='salt',
                                      values=['<function>'],
                                      hint='normalized lambda function name')
                ])
            else:
                function, function_hint = remove_function_outliers_legacy(func)
                function_component.update(values=[function],
                                          hint=function_hint)
            if frame.lineno:
                lineno_component.update(contributes=False,
                                        values=[frame.lineno],
                                        hint='function takes precedence')
        elif frame.lineno:
            lineno_component.update(values=[frame.lineno])
    else:
        if frame.symbol:
            symbol_component.update(
                contributes=False,
                values=[frame.symbol],
                hint='symbol is used only if module or filename are available')
        if func:
            function_component.update(
                contributes=False,
                values=[func],
                hint=
                'function name is used only if module or filename are available'
            )
        if frame.lineno:
            lineno_component.update(
                contributes=False,
                values=[frame.lineno],
                hint=
                'line number is used only if module or filename are available')

    return GroupingComponent(
        id='frame',
        values=[
            module_component,
            filename_component,
            context_line_component,
            symbol_component,
            function_component,
            lineno_component,
        ],
        contributes=contributes,
        hint=hint,
    )
예제 #33
0
def frame_legacy(frame, event, context, **meta):
    platform = frame.platform or event.platform

    # In certain situations we want to disregard the entire frame.
    contributes = None
    hint = None

    # this requires some explanation: older sentry versions did not have
    # raw_function but only function.  For some platforms like native
    # we now instead store a trimmed function name in frame.function so
    # and the original value moved to raw_function.  This requires us to
    # prioritize raw_function over function in the legacy grouping code to
    # avoid creating new groups.
    func = frame.raw_function or frame.function

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = GroupingComponent(id="filename",
                                           similarity_encoder=ident_encoder)
    if frame.filename == "<anonymous>":
        filename_component.update(contributes=False,
                                  values=[frame.filename],
                                  hint="anonymous filename discarded")
    elif frame.filename == "[native code]":
        contributes = False
        hint = "native code indicated by filename"
    elif frame.filename:
        if has_url_origin(frame.abs_path):
            filename_component.update(contributes=False,
                                      values=[frame.filename],
                                      hint="ignored because filename is a URL")
        # XXX(dcramer): dont compute hash using frames containing the 'Caused by'
        # text as it contains an exception value which may may contain dynamic
        # values (see raven-java#125)
        elif frame.filename.startswith("Caused by: "):
            filename_component.update(values=[frame.filename],
                                      contributes=False,
                                      hint="ignored because invalid")
        else:
            hashable_filename, hashable_filename_hint = remove_filename_outliers_legacy(
                frame.filename, platform)
            filename_component.update(values=[hashable_filename],
                                      hint=hashable_filename_hint)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename, even if the module is
    # considered unhashable.
    module_component = GroupingComponent(id="module",
                                         similarity_encoder=ident_encoder)
    if frame.module:
        if is_unhashable_module_legacy(frame, platform):
            module_component.update(
                values=[
                    GroupingComponent(id="salt",
                                      values=["<module>"],
                                      hint="normalized generated module name")
                ],
                hint="ignored module",
            )

            # <module> still contributes, though it should not contribute to
            # similarity
            module_component.similarity_encoder = None
        else:
            module_name, module_hint = remove_module_outliers_legacy(
                frame.module, platform)
            module_component.update(values=[module_name], hint=module_hint)
        if frame.filename:
            filename_component.update(values=[frame.filename],
                                      contributes=False,
                                      hint="module takes precedence")

    # Context line when available is the primary contributor
    context_line_component = GroupingComponent(
        id="context-line", similarity_encoder=ident_encoder)
    if frame.context_line is not None:
        if len(frame.context_line) > 120:
            context_line_component.update(
                hint="discarded because line too long")
        elif has_url_origin(frame.abs_path) and not func:
            context_line_component.update(
                hint="discarded because from URL origin")
        else:
            context_line_component.update(values=[frame.context_line])

    symbol_component = GroupingComponent(id="symbol",
                                         similarity_encoder=ident_encoder)
    function_component = GroupingComponent(id="function",
                                           similarity_encoder=ident_encoder)
    lineno_component = GroupingComponent(id="lineno",
                                         similarity_encoder=ident_encoder)

    # The context line grouping information is the most reliable one.
    # If we did not manage to find some information there, we want to
    # see if we can come up with some extra information.  We only want
    # to do that if we managed to get a module of filename.
    if not context_line_component.contributes and (
            module_component.contributes or filename_component.contributes):
        if frame.symbol:
            symbol_component.update(values=[frame.symbol])
            if func:
                function_component.update(contributes=False,
                                          values=[func],
                                          hint="symbol takes precedence")
            if frame.lineno:
                lineno_component.update(contributes=False,
                                        values=[frame.lineno],
                                        hint="symbol takes precedence")
        elif func:
            if is_unhashable_function_legacy(func):
                function_component.update(values=[
                    GroupingComponent(id="salt",
                                      values=["<function>"],
                                      hint="normalized lambda function name")
                ])
                # <module> still contributes, though it should not contribute to
                # similarity
                function_component.similarity_encoder = None
            else:
                function, function_hint = remove_function_outliers_legacy(func)
                function_component.update(values=[function],
                                          hint=function_hint)
            if frame.lineno:
                lineno_component.update(contributes=False,
                                        values=[frame.lineno],
                                        hint="function takes precedence")
        elif frame.lineno:
            lineno_component.update(values=[frame.lineno])
    else:
        if context_line_component.contributes:
            fallback_hint = "is not used if context-line is available"
        else:
            fallback_hint = "is not used if module or filename are available"
        if frame.symbol:
            symbol_component.update(contributes=False,
                                    values=[frame.symbol],
                                    hint="symbol " + fallback_hint)
        if func:
            function_component.update(contributes=False,
                                      values=[func],
                                      hint="function name " + fallback_hint)
        if frame.lineno:
            lineno_component.update(contributes=False,
                                    values=[frame.lineno],
                                    hint="line number " + fallback_hint)

    return {
        context["variant"]:
        GroupingComponent(
            id="frame",
            values=[
                module_component,
                filename_component,
                context_line_component,
                symbol_component,
                function_component,
                lineno_component,
            ],
            contributes=contributes,
            hint=hint,
        )
    }
예제 #34
0
def single_exception(exception, context, **meta):
    type_component = GroupingComponent(
        id="type",
        values=[exception.type] if exception.type else [],
        similarity_encoder=ident_encoder,
    )

    ns_error_component = None

    if exception.mechanism:
        if exception.mechanism.synthetic:
            type_component.update(
                contributes=False,
                hint="ignored because exception is synthetic")
        if exception.mechanism.meta and "ns_error" in exception.mechanism.meta:
            ns_error_component = GroupingComponent(
                id="ns-error",
                values=[
                    exception.mechanism.meta["ns_error"].get("domain"),
                    exception.mechanism.meta["ns_error"].get("code"),
                ],
            )

    if exception.stacktrace is not None:
        stacktrace_variants = context.get_grouping_component(
            exception.stacktrace, **meta)
    else:
        stacktrace_variants = {
            "app": GroupingComponent(id="stacktrace"),
        }

    rv = {}

    for variant, stacktrace_component in stacktrace_variants.items():
        values = [stacktrace_component, type_component]

        if ns_error_component is not None:
            values.append(ns_error_component)

        if context["with_exception_value_fallback"]:
            value_component = GroupingComponent(
                id="value", similarity_encoder=text_shingle_encoder(5))

            value_in = exception.value
            if value_in is not None:
                value_trimmed = trim_message_for_grouping(value_in)
                hint = "stripped common values" if value_in != value_trimmed else None
                if value_trimmed:
                    value_component.update(values=[value_trimmed], hint=hint)

            if stacktrace_component.contributes and value_component.contributes:
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because stacktrace takes precedence",
                )

            if (ns_error_component is not None
                    and ns_error_component.contributes
                    and value_component.contributes):
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because ns-error info takes precedence",
                )

            values.append(value_component)

        rv[variant] = GroupingComponent(id="exception", values=values)

    return rv
예제 #35
0
def get_function_component(
    context,
    function,
    raw_function,
    platform,
    sourcemap_used=False,
    context_line_available=False,
):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.  Related to this is
    the `prefer_raw_function_name` flag which just flat out prefers the
    raw function name over the non raw one.
    """
    from sentry.stacktraces.functions import trim_function_name

    behavior_family = get_behavior_family_for_platform(platform)

    # We started trimming function names in csharp late which changed the
    # inputs to the grouping code.  Where previously the `function` attribute
    # contained the raw and untrimmed strings, it now contains the trimmed one
    # which is preferred by the frame component.  Because of this we tell the
    # component to prefer the raw function name over the function name for
    # csharp.
    # TODO: if a frame:v5 is added the raw function name should not be preferred
    # for csharp.
    prefer_raw_function_name = platform == "csharp"

    if context["legacy_function_logic"] or prefer_raw_function_name:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id="function")

    function_component = GroupingComponent(id="function",
                                           values=[func],
                                           similarity_encoder=ident_encoder)

    if platform == "ruby":
        if func.startswith("block "):
            function_component.update(values=["block"], hint="ruby block")
        else:
            new_function = _ruby_erb_func.sub("", func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint="removed generated erb template suffix")

    elif platform == "php":
        if func.startswith(("[Anonymous", "class@anonymous\x00")):
            function_component.update(contributes=False,
                                      hint="ignored anonymous function")
        if context["php_detect_anonymous_classes"] and func.startswith(
                "class@anonymous"):
            new_function = func.rsplit("::", 1)[-1]
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="anonymous class method")

    elif platform == "java":
        if func.startswith("lambda$"):
            function_component.update(contributes=False,
                                      hint="ignored lambda function")

    elif behavior_family == "native":
        if func in ("<redacted>", "<unknown>"):
            function_component.update(contributes=False,
                                      hint="ignored unknown function")
        elif context["legacy_function_logic"]:
            new_function = trim_function_name(func,
                                              platform,
                                              normalize_lambdas=False)
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="isolated function")

    elif context["javascript_fuzzing"] and behavior_family == "javascript":
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit(".", 1)[-1]
        if new_function != func:
            function_component.update(values=[new_function],
                                      hint="trimmed javascript function")

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                contributes_to_similarity=True,
                hint=
                "ignored because sourcemap used and context line available",
            )

    if function_component.values and context["hierarchical_grouping"]:
        function_component.update(tree_label=function_component.values[0])

    return function_component
예제 #36
0
파일: legacy.py 프로젝트: getsentry/sentry
def frame_legacy(frame, event, **meta):
    platform = frame.platform or event.platform

    # In certain situations we want to disregard the entire frame.
    contributes = None
    hint = None

    # this requires some explanation: older sentry versions did not have
    # raw_function but only function.  For some platforms like native
    # we now instead store a trimmed function name in frame.function so
    # and the original value moved to raw_function.  This requires us to
    # prioritize raw_function over function in the legacy grouping code to
    # avoid creating new groups.
    func = frame.raw_function or frame.function

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = GroupingComponent(id='filename')
    if frame.filename == '<anonymous>':
        filename_component.update(
            contributes=False,
            values=[frame.filename],
            hint='anonymous filename discarded'
        )
    elif frame.filename == '[native code]':
        contributes = False
        hint = 'native code indicated by filename'
    elif frame.filename:
        if has_url_origin(frame.abs_path):
            filename_component.update(
                contributes=False,
                values=[frame.filename],
                hint='ignored because filename is a URL',
            )
        # XXX(dcramer): dont compute hash using frames containing the 'Caused by'
        # text as it contains an exception value which may may contain dynamic
        # values (see raven-java#125)
        elif frame.filename.startswith('Caused by: '):
            filename_component.update(
                values=[frame.filename],
                contributes=False,
                hint='ignored because invalid'
            )
        else:
            hashable_filename, hashable_filename_hint = \
                remove_filename_outliers_legacy(frame.filename, platform)
            filename_component.update(
                values=[hashable_filename],
                hint=hashable_filename_hint
            )

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename, even if the module is
    # considered unhashable.
    module_component = GroupingComponent(id='module')
    if frame.module:
        if is_unhashable_module_legacy(frame, platform):
            module_component.update(
                values=[GroupingComponent(
                    id='salt',
                    values=['<module>'],
                    hint='normalized generated module name'
                )],
                hint='ignored module',
            )
        else:
            module_name, module_hint = \
                remove_module_outliers_legacy(frame.module, platform)
            module_component.update(
                values=[module_name],
                hint=module_hint
            )
        if frame.filename:
            filename_component.update(
                values=[frame.filename],
                contributes=False,
                hint='module takes precedence'
            )

    # Context line when available is the primary contributor
    context_line_component = GroupingComponent(id='context-line')
    if frame.context_line is not None:
        if len(frame.context_line) > 120:
            context_line_component.update(hint='discarded because line too long')
        elif has_url_origin(frame.abs_path) and not func:
            context_line_component.update(hint='discarded because from URL origin')
        else:
            context_line_component.update(values=[frame.context_line])

    symbol_component = GroupingComponent(id='symbol')
    function_component = GroupingComponent(id='function')
    lineno_component = GroupingComponent(id='lineno')

    # The context line grouping information is the most reliable one.
    # If we did not manage to find some information there, we want to
    # see if we can come up with some extra information.  We only want
    # to do that if we managed to get a module of filename.
    if not context_line_component.contributes and \
       (module_component.contributes or filename_component.contributes):
        if frame.symbol:
            symbol_component.update(values=[frame.symbol])
            if func:
                function_component.update(
                    contributes=False,
                    values=[func],
                    hint='symbol takes precedence'
                )
            if frame.lineno:
                lineno_component.update(
                    contributes=False,
                    values=[frame.lineno],
                    hint='symbol takes precedence'
                )
        elif func:
            if is_unhashable_function_legacy(func):
                function_component.update(values=[
                    GroupingComponent(
                        id='salt',
                        values=['<function>'],
                        hint='normalized lambda function name'
                    )
                ])
            else:
                function, function_hint = remove_function_outliers_legacy(func)
                function_component.update(
                    values=[function],
                    hint=function_hint
                )
            if frame.lineno:
                lineno_component.update(
                    contributes=False,
                    values=[frame.lineno],
                    hint='function takes precedence'
                )
        elif frame.lineno:
            lineno_component.update(values=[frame.lineno])
    else:
        if frame.symbol:
            symbol_component.update(
                contributes=False,
                values=[frame.symbol],
                hint='symbol is used only if module or filename are available'
            )
        if func:
            function_component.update(
                contributes=False,
                values=[func],
                hint='function name is used only if module or filename are available'
            )
        if frame.lineno:
            lineno_component.update(
                contributes=False,
                values=[frame.lineno],
                hint='line number is used only if module or filename are available'
            )

    return GroupingComponent(
        id='frame',
        values=[
            module_component,
            filename_component,
            context_line_component,
            symbol_component,
            function_component,
            lineno_component,
        ],
        contributes=contributes,
        hint=hint,
    )
예제 #37
0
def single_exception(exception, context, **meta):
    type_component = GroupingComponent(
        id="type",
        values=[exception.type] if exception.type else [],
        similarity_encoder=ident_encoder,
    )
    system_type_component = type_component.shallow_copy()

    ns_error_component = None

    if exception.mechanism:
        if exception.mechanism.synthetic:
            type_component.update(contributes=False, hint="ignored because exception is synthetic")
            system_type_component.update(
                contributes=False, hint="ignored because exception is synthetic"
            )
        if exception.mechanism.meta and "ns_error" in exception.mechanism.meta:
            ns_error_component = GroupingComponent(
                id="ns-error",
                values=[
                    exception.mechanism.meta["ns_error"].get("domain"),
                    exception.mechanism.meta["ns_error"].get("code"),
                ],
            )

        if context["detect_synthetic_exception_types"] and _synthetic_exception_type_re.match(
            exception.type
        ):
            # Do not update type component of system variant, such that regex
            # can be continuously modified without unnecessarily creating new
            # groups.
            type_component.update(
                contributes=False,
                hint="ignored because exception is synthetic (detected via exception type)",
            )

    if exception.stacktrace is not None:
        with context:
            context["exception_data"] = exception.to_json()
            stacktrace_variants = context.get_grouping_component(exception.stacktrace, **meta)
    else:
        stacktrace_variants = {
            "app": GroupingComponent(id="stacktrace"),
        }

    rv = {}

    for variant, stacktrace_component in stacktrace_variants.items():
        values = [
            stacktrace_component,
            system_type_component if variant == "system" else type_component,
        ]

        if ns_error_component is not None:
            values.append(ns_error_component)

        if context["with_exception_value_fallback"]:
            value_component = GroupingComponent(
                id="value", similarity_encoder=text_shingle_encoder(5)
            )

            value_in = exception.value
            if value_in is not None:
                value_trimmed = trim_message_for_grouping(value_in)
                hint = "stripped common values" if value_in != value_trimmed else None
                if value_trimmed:
                    value_component.update(values=[value_trimmed], hint=hint)

            if stacktrace_component.contributes and value_component.contributes:
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because stacktrace takes precedence",
                )

            if (
                ns_error_component is not None
                and ns_error_component.contributes
                and value_component.contributes
            ):
                value_component.update(
                    contributes=False,
                    contributes_to_similarity=True,
                    hint="ignored because ns-error info takes precedence",
                )

            values.append(value_component)

        rv[variant] = GroupingComponent(id="exception", values=values)

    return rv
예제 #38
0
def frame(frame, event, context, **meta):
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path, frame.filename, platform, allow_file_origin=context["javascript_fuzzing"]
    )

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component(frame.abs_path, frame.module, platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(
            contributes=False, contributes_to_similarity=True, hint="module takes precedence"
        )

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if platform in context["contextline_platforms"]:
        context_line_component = get_contextline_component(
            frame,
            platform,
            function=frame.function,
            context=context,
        )

    function_component = get_function_component(
        context=context,
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get("sourcemap") is not None,
        context_line_available=context_line_component and context_line_component.contributes,
    )

    values = [module_component, filename_component, function_component]
    if context_line_component is not None:
        values.append(context_line_component)

    if (
        context["discard_native_filename"]
        and get_behavior_family_for_platform(platform) == "native"
        and function_component.contributes
        and filename_component.contributes
    ):
        # In native, function names usually describe a full namespace. Adding
        # the filename there just brings extra instability into grouping.
        filename_component.update(
            contributes=False, hint="discarded native filename for grouping stability"
        )

    if context["use_package_fallback"] and frame.package:
        # If function did not symbolicate properly and we also have no filename, use package as fallback.
        package_component = get_package_component(package=frame.package, platform=platform)
        if package_component.contributes:
            use_package_component = all(not component.contributes for component in values)

            if use_package_component:
                package_component.update(
                    hint="used as fallback because function name is not available"
                )
            else:
                package_component.update(
                    contributes=False, hint="ignored because function takes precedence"
                )

            if package_component.values and context["hierarchical_grouping"]:
                package_component.update(tree_label=package_component.values[0])

            values.append(package_component)

    rv = GroupingComponent(id="frame", values=values)

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if context["javascript_fuzzing"] and get_behavior_family_for_platform(platform) == "javascript":
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit(".", 1)[-1]
        # special case empty functions not to have a hint
        if not func:
            function_component.update(contributes=False)
        elif (
            func
            in (
                "?",
                "<anonymous function>",
                "<anonymous>",
                "Anonymous function",
            )
            or func.endswith("/<")
        ):
            function_component.update(contributes=False, hint="ignored unknown function name")
        if (func == "eval") or frame.abs_path in (
            "[native code]",
            "native code",
            "eval code",
            "<anonymous>",
        ):
            rv.update(contributes=False, hint="ignored low quality javascript frame")

    if context["is_recursion"]:
        rv.update(contributes=False, hint="ignored due to recursion")

    return {context["variant"]: rv}
예제 #39
0
def get_function_component(function, platform, legacy_function_logic,
                           sourcemap_used=False, context_line_available=False,
                           raw_function=None, javascript_fuzzing=False):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.
    """
    from sentry.stacktraces.functions import trim_function_name
    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id='function')

    function_component = GroupingComponent(
        id='function',
        values=[func],
    )

    if platform == 'ruby':
        if func.startswith('block '):
            function_component.update(
                values=['block'],
                hint='ruby block'
            )
        else:
            new_function = _ruby_erb_func.sub('', func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='removed generated erb template suffix'
                )

    elif platform == 'php':
        if func.startswith('[Anonymous'):
            function_component.update(
                contributes=False,
                hint='ignored anonymous function'
            )

    elif platform == 'java':
        if func.startswith('lambda$'):
            function_component.update(
                contributes=False,
                hint='ignored lambda function'
            )

    elif behavior_family == 'native':
        if func in ('<redacted>', '<unknown>'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function'
            )
        elif legacy_function_logic:
            new_function = trim_function_name(func, platform)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='isolated function'
                )

    elif javascript_fuzzing and behavior_family == 'javascript':
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit('.', 1)[-1]
        if new_function != func:
            function_component.update(
                values=[new_function],
                hint='trimmed javascript function'
            )

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                hint='ignored because sourcemap used and context line available'
            )

    return function_component
예제 #40
0
def get_frame_component(frame, event, meta, legacy_function_logic=False,
                        use_contextline=False,
                        javascript_fuzzing=False):
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path, frame.filename, platform,
        allow_file_origin=javascript_fuzzing)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component_v1(
        frame.abs_path, frame.module, platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(
            contributes=False,
            hint='module takes precedence'
        )

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if use_contextline:
        context_line_component = get_contextline_component(frame, platform)

    function_component = get_function_component(
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get('sourcemap') is not None,
        context_line_available=context_line_component and context_line_component.contributes,
        legacy_function_logic=legacy_function_logic,
        javascript_fuzzing=javascript_fuzzing,
    )

    values = [
        module_component,
        filename_component,
        function_component,
    ]
    if context_line_component is not None:
        values.append(context_line_component)

    rv = GroupingComponent(
        id='frame',
        values=values,
    )

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if javascript_fuzzing \
       and get_behavior_family_for_platform(platform) == 'javascript':
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit('.', 1)[-1]
        if func in (None, '?', '<anonymous function>', '<anonymous>',
                    'Anonymous function') \
           or func.endswith('/<'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function name'
            )
        if (func == 'eval') or \
           frame.abs_path in ('[native code]', 'native code', 'eval code', '<anonymous>'):
            rv.update(
                contributes=False,
                hint='ignored low quality javascript frame'
            )

    return rv