Exemple #1
0
    def matches_frame(self, frame_data, platform):
        # Path matches are always case insensitive
        if self.key in ('path', 'package'):
            if self.key == 'package':
                value = frame_data.get('package') or ''
            else:
                value = frame_data.get('abs_path') or frame_data.get('filename') or ''
            if glob_match(value, self.pattern, ignorecase=True,
                          doublestar=True, path_normalize=True):
                return True
            if not value.startswith('/') and glob_match('/' + value, self.pattern,
                                                        ignorecase=True, doublestar=True, path_normalize=True):
                return True
            return False

        # families need custom handling as well
        if self.key == 'family':
            flags = self.pattern.split(',')
            if 'all' in flags:
                return True
            family = get_behavior_family_for_platform(frame_data.get('platform') or platform)
            return family in flags

        # all other matches are case sensitive
        if self.key == 'function':
            from sentry.stacktraces.functions import get_function_name_for_frame
            value = get_function_name_for_frame(frame_data, platform) or '<unknown>'
        elif self.key == 'module':
            value = frame_data.get('module') or '<unknown>'
        else:
            # should not happen :)
            value = '<unknown>'
        return glob_match(value, self.pattern)
Exemple #2
0
def _normalize_in_app(stacktrace, platform=None, sdk_info=None):
    """
    Ensures consistent values of in_app across a stacktrace.
    """
    # Native frames have special rules regarding in_app. Apply them before other
    # normalization, just like grouping enhancers.
    # TODO(ja): Clean up those rules and put them in enhancers instead
    for frame in stacktrace:
        if frame.get('in_app') is not None:
            continue

        family = get_behavior_family_for_platform(frame.get('platform') or platform)
        if family == 'native':
            frame_package = frame.get('package')
            frame['in_app'] = bool(frame_package) and \
                not is_known_third_party(frame_package, sdk_info=sdk_info)

    has_system_frames = _has_system_frames(stacktrace)
    for frame in stacktrace:
        # If all frames are in_app, flip all of them. This is expected by the UI
        if not has_system_frames:
            frame['in_app'] = False

        # Default to false in all cases where processors or grouping enhancers
        # have not yet set in_app.
        elif frame.get('in_app') is None:
            frame['in_app'] = False
Exemple #3
0
 def _push_frame(frame):
     platform = frame.get('platform') or self.event.get('platform')
     func = get_function_name_for_frame(frame, platform)
     self._frames.append({
         'function': func or '<unknown>',
         'path': frame.get('abs_path') or frame.get('filename'),
         'module': frame.get('module'),
         'family': get_behavior_family_for_platform(platform),
         'package': frame.get('package'),
     })
Exemple #4
0
 def get_exceptions(self):
     if self._exceptions is None:
         self._exceptions = []
         for exc in get_path(self.event, 'exception', 'values', filter=True) or ():
             self._exceptions.append({
                 'type': exc.get('type'),
                 'value': exc.get('value'),
                 'family': get_behavior_family_for_platform(self.event.get('platform')),
             })
     return self._exceptions
Exemple #5
0
 def get_messages(self):
     if self._messages is None:
         self._messages = []
         message = get_path(self.event, 'logentry', 'formatted', filter=True)
         if message:
             self._messages.append({
                 'message': message,
                 'family': get_behavior_family_for_platform(self.event.get('platform')),
             })
     return self._messages
Exemple #6
0
 def get_exceptions(self):
     if self._exceptions is None:
         self._exceptions = []
         for exc in get_path(self.event, "exception", "values", filter=True) or ():
             self._exceptions.append(
                 {
                     "type": exc.get("type"),
                     "value": exc.get("value"),
                     "family": get_behavior_family_for_platform(self.event.get("platform")),
                 }
             )
     return self._exceptions
Exemple #7
0
def get_contextline_component(frame, platform):
    """Returns a contextline component.  The caller's responsibility is to
    make sure context lines are only used for platforms where we trust the
    quality of the sourcecode.  It does however protect against some bad
    JavaScript environments based on origin checks.
    """
    component = GroupingComponent(id='context-line')

    if not frame.context_line:
        return component

    line = ' '.join(frame.context_line.expandtabs(2).split())
    if line:
        if len(frame.context_line) > 120:
            component.update(hint='discarded because line too long')
        elif get_behavior_family_for_platform(platform) == 'javascript' \
                and has_url_origin(frame.abs_path, allow_file_origin=True):
            component.update(hint='discarded because from URL origin')
        else:
            component.update(values=[line])

    return component
Exemple #8
0
def trim_function_name(function, platform, normalize_lambdas=True):
    """Given a function value from the frame's function attribute this returns
    a trimmed version that can be stored in `function_name`.  This is only used
    if the client did not supply a value itself already.
    """
    if get_behavior_family_for_platform(platform) != 'native':
        return function
    if function in ('<redacted>', '<unknown>'):
        return function

    original_function = function
    function = function.strip()

    # Ensure we don't operate on objc functions
    if function.startswith(('[', '+[', '-[')):
        return function

    # Chop off C++ trailers
    while 1:
        match = _cpp_trailer_re.search(function)
        if match is None:
            break
        function = function[:match.start()].rstrip()

    # Because operator<< really screws with our balancing, so let's work
    # around that by replacing it with a character we do not observe in
    # `split_func_tokens` or `replace_enclosed_string`.
    function = function \
        .replace('operator<<', u'operator⟨⟨') \
        .replace('operator<', u'operator⟨') \
        .replace('operator()', u'operator◯')\
        .replace(' -> ', u' ⟿ ')

    # normalize C++ lambdas.  This is necessary because different
    # compilers use different rules for now to name a lambda and they are
    # all quite inconsistent.  This does not give us perfect answers to
    # this problem but closer.  In particular msvc will call a lambda
    # something like `lambda_deadbeefeefffeeffeeff` whereas clang for
    # instance will name it `main::$_0` which will tell us in which outer
    # function it was declared.
    if normalize_lambdas:
        function = _lambda_re.sub('lambda', function)

    # Remove the arguments if there is one.
    def process_args(value, start):
        value = value.strip()
        if value in ('anonymous namespace', 'operator'):
            return '(%s)' % value
        return ''
    function = replace_enclosed_string(function, '(', ')', process_args)

    # Resolve generic types, but special case rust which uses things like
    # <Foo as Bar>::baz to denote traits.
    def process_generics(value, start):
        # Rust special case
        if start == 0:
            return '<%s>' % replace_enclosed_string(value, '<', '>', process_generics)
        return '<T>'
    function = replace_enclosed_string(function, '<', '>', process_generics)

    tokens = split_func_tokens(function)

    # find the token which is the function name.  Since we chopped of C++
    # trailers there are only two cases we care about: the token left to
    # the -> return marker which is for instance used in Swift and if that
    # is not found, the last token in the last.
    #
    # ["unsigned", "int", "whatever"] -> whatever
    # ["@objc", "whatever", "->", "int"] -> whatever
    try:
        func_token = tokens[tokens.index(u'⟿') - 1]
    except ValueError:
        if tokens:
            func_token = tokens[-1]
        else:
            func_token = None

    if func_token:
        function = func_token.replace(u'⟨', '<') \
            .replace(u'◯', '()') \
            .replace(u' ⟿ ', ' -> ')

    # This really should never happen
    else:
        function = original_function

    # trim off rust markers
    function = _rust_hash.sub('', function)

    # trim off windows decl markers
    return _windecl_hash.sub('\\1', function)
Exemple #9
0
def get_frame_component(frame, event, meta, legacy_function_logic=False,
                        use_contextline=False,
                        javascript_fuzzing=False):
    platform = frame.platform or event.platform

    # Safari throws [native code] frames in for calls like ``forEach``
    # whereas Chrome ignores these. Let's remove it from the hashing algo
    # so that they're more likely to group together
    filename_component = get_filename_component(
        frame.abs_path, frame.filename, platform,
        allow_file_origin=javascript_fuzzing)

    # if we have a module we use that for grouping.  This will always
    # take precedence over the filename if it contributes
    module_component = get_module_component_v1(
        frame.abs_path, frame.module, platform)
    if module_component.contributes and filename_component.contributes:
        filename_component.update(
            contributes=False,
            hint='module takes precedence'
        )

    context_line_component = None

    # If we are allowed to use the contextline we add it now.
    if use_contextline:
        context_line_component = get_contextline_component(frame, platform)

    function_component = get_function_component(
        function=frame.function,
        raw_function=frame.raw_function,
        platform=platform,
        sourcemap_used=frame.data and frame.data.get('sourcemap') is not None,
        context_line_available=context_line_component and context_line_component.contributes,
        legacy_function_logic=legacy_function_logic,
        javascript_fuzzing=javascript_fuzzing,
    )

    values = [
        module_component,
        filename_component,
        function_component,
    ]
    if context_line_component is not None:
        values.append(context_line_component)

    rv = GroupingComponent(
        id='frame',
        values=values,
    )

    # if we are in javascript fuzzing mode we want to disregard some
    # frames consistently.  These force common bad stacktraces together
    # to have a common hash at the cost of maybe skipping over frames that
    # would otherwise be useful.
    if javascript_fuzzing \
       and get_behavior_family_for_platform(platform) == 'javascript':
        func = frame.raw_function or frame.function
        if func:
            func = func.rsplit('.', 1)[-1]
        if func in (None, '?', '<anonymous function>', '<anonymous>',
                    'Anonymous function') \
           or func.endswith('/<'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function name'
            )
        if (func == 'eval') or \
           frame.abs_path in ('[native code]', 'native code', 'eval code', '<anonymous>'):
            rv.update(
                contributes=False,
                hint='ignored low quality javascript frame'
            )

    return rv
Exemple #10
0
def get_function_component(function, platform, legacy_function_logic,
                           sourcemap_used=False, context_line_available=False,
                           raw_function=None, javascript_fuzzing=False):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.
    """
    from sentry.stacktraces.functions import trim_function_name
    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id='function')

    function_component = GroupingComponent(
        id='function',
        values=[func],
    )

    if platform == 'ruby':
        if func.startswith('block '):
            function_component.update(
                values=['block'],
                hint='ruby block'
            )
        else:
            new_function = _ruby_erb_func.sub('', func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='removed generated erb template suffix'
                )

    elif platform == 'php':
        if func.startswith('[Anonymous'):
            function_component.update(
                contributes=False,
                hint='ignored anonymous function'
            )

    elif platform == 'java':
        if func.startswith('lambda$'):
            function_component.update(
                contributes=False,
                hint='ignored lambda function'
            )

    elif behavior_family == 'native':
        if func in ('<redacted>', '<unknown>'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function'
            )
        elif legacy_function_logic:
            new_function = trim_function_name(func, platform)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='isolated function'
                )

    elif javascript_fuzzing and behavior_family == 'javascript':
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit('.', 1)[-1]
        if new_function != func:
            function_component.update(
                values=[new_function],
                hint='trimmed javascript function'
            )

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                hint='ignored because sourcemap used and context line available'
            )

    return function_component
Exemple #11
0
def is_native(meta):
    return get_behavior_family_for_platform(meta['event'].platform) == 'native'
Exemple #12
0
def test_get_grouping_family_for_platform(input, output):
    assert get_behavior_family_for_platform(input) == output