def matches_frame(self, frame_data, platform): # Path matches are always case insensitive if self.key in ('path', 'package'): if self.key == 'package': value = frame_data.get('package') or '' else: value = frame_data.get('abs_path') or frame_data.get('filename') or '' if glob_match(value, self.pattern, ignorecase=True, doublestar=True, path_normalize=True): return True if not value.startswith('/') and glob_match('/' + value, self.pattern, ignorecase=True, doublestar=True, path_normalize=True): return True return False # families need custom handling as well if self.key == 'family': flags = self.pattern.split(',') if 'all' in flags: return True family = get_behavior_family_for_platform(frame_data.get('platform') or platform) return family in flags # all other matches are case sensitive if self.key == 'function': from sentry.stacktraces.functions import get_function_name_for_frame value = get_function_name_for_frame(frame_data, platform) or '<unknown>' elif self.key == 'module': value = frame_data.get('module') or '<unknown>' else: # should not happen :) value = '<unknown>' return glob_match(value, self.pattern)
def _normalize_in_app(stacktrace, platform=None, sdk_info=None): """ Ensures consistent values of in_app across a stacktrace. """ # Native frames have special rules regarding in_app. Apply them before other # normalization, just like grouping enhancers. # TODO(ja): Clean up those rules and put them in enhancers instead for frame in stacktrace: if frame.get('in_app') is not None: continue family = get_behavior_family_for_platform(frame.get('platform') or platform) if family == 'native': frame_package = frame.get('package') frame['in_app'] = bool(frame_package) and \ not is_known_third_party(frame_package, sdk_info=sdk_info) has_system_frames = _has_system_frames(stacktrace) for frame in stacktrace: # If all frames are in_app, flip all of them. This is expected by the UI if not has_system_frames: frame['in_app'] = False # Default to false in all cases where processors or grouping enhancers # have not yet set in_app. elif frame.get('in_app') is None: frame['in_app'] = False
def _push_frame(frame): platform = frame.get('platform') or self.event.get('platform') func = get_function_name_for_frame(frame, platform) self._frames.append({ 'function': func or '<unknown>', 'path': frame.get('abs_path') or frame.get('filename'), 'module': frame.get('module'), 'family': get_behavior_family_for_platform(platform), 'package': frame.get('package'), })
def get_exceptions(self): if self._exceptions is None: self._exceptions = [] for exc in get_path(self.event, 'exception', 'values', filter=True) or (): self._exceptions.append({ 'type': exc.get('type'), 'value': exc.get('value'), 'family': get_behavior_family_for_platform(self.event.get('platform')), }) return self._exceptions
def get_messages(self): if self._messages is None: self._messages = [] message = get_path(self.event, 'logentry', 'formatted', filter=True) if message: self._messages.append({ 'message': message, 'family': get_behavior_family_for_platform(self.event.get('platform')), }) return self._messages
def get_exceptions(self): if self._exceptions is None: self._exceptions = [] for exc in get_path(self.event, "exception", "values", filter=True) or (): self._exceptions.append( { "type": exc.get("type"), "value": exc.get("value"), "family": get_behavior_family_for_platform(self.event.get("platform")), } ) return self._exceptions
def get_contextline_component(frame, platform): """Returns a contextline component. The caller's responsibility is to make sure context lines are only used for platforms where we trust the quality of the sourcecode. It does however protect against some bad JavaScript environments based on origin checks. """ component = GroupingComponent(id='context-line') if not frame.context_line: return component line = ' '.join(frame.context_line.expandtabs(2).split()) if line: if len(frame.context_line) > 120: component.update(hint='discarded because line too long') elif get_behavior_family_for_platform(platform) == 'javascript' \ and has_url_origin(frame.abs_path, allow_file_origin=True): component.update(hint='discarded because from URL origin') else: component.update(values=[line]) return component
def trim_function_name(function, platform, normalize_lambdas=True): """Given a function value from the frame's function attribute this returns a trimmed version that can be stored in `function_name`. This is only used if the client did not supply a value itself already. """ if get_behavior_family_for_platform(platform) != 'native': return function if function in ('<redacted>', '<unknown>'): return function original_function = function function = function.strip() # Ensure we don't operate on objc functions if function.startswith(('[', '+[', '-[')): return function # Chop off C++ trailers while 1: match = _cpp_trailer_re.search(function) if match is None: break function = function[:match.start()].rstrip() # Because operator<< really screws with our balancing, so let's work # around that by replacing it with a character we do not observe in # `split_func_tokens` or `replace_enclosed_string`. function = function \ .replace('operator<<', u'operator⟨⟨') \ .replace('operator<', u'operator⟨') \ .replace('operator()', u'operator◯')\ .replace(' -> ', u' ⟿ ') # normalize C++ lambdas. This is necessary because different # compilers use different rules for now to name a lambda and they are # all quite inconsistent. This does not give us perfect answers to # this problem but closer. In particular msvc will call a lambda # something like `lambda_deadbeefeefffeeffeeff` whereas clang for # instance will name it `main::$_0` which will tell us in which outer # function it was declared. if normalize_lambdas: function = _lambda_re.sub('lambda', function) # Remove the arguments if there is one. def process_args(value, start): value = value.strip() if value in ('anonymous namespace', 'operator'): return '(%s)' % value return '' function = replace_enclosed_string(function, '(', ')', process_args) # Resolve generic types, but special case rust which uses things like # <Foo as Bar>::baz to denote traits. def process_generics(value, start): # Rust special case if start == 0: return '<%s>' % replace_enclosed_string(value, '<', '>', process_generics) return '<T>' function = replace_enclosed_string(function, '<', '>', process_generics) tokens = split_func_tokens(function) # find the token which is the function name. Since we chopped of C++ # trailers there are only two cases we care about: the token left to # the -> return marker which is for instance used in Swift and if that # is not found, the last token in the last. # # ["unsigned", "int", "whatever"] -> whatever # ["@objc", "whatever", "->", "int"] -> whatever try: func_token = tokens[tokens.index(u'⟿') - 1] except ValueError: if tokens: func_token = tokens[-1] else: func_token = None if func_token: function = func_token.replace(u'⟨', '<') \ .replace(u'◯', '()') \ .replace(u' ⟿ ', ' -> ') # This really should never happen else: function = original_function # trim off rust markers function = _rust_hash.sub('', function) # trim off windows decl markers return _windecl_hash.sub('\\1', function)
def get_frame_component(frame, event, meta, legacy_function_logic=False, use_contextline=False, javascript_fuzzing=False): platform = frame.platform or event.platform # Safari throws [native code] frames in for calls like ``forEach`` # whereas Chrome ignores these. Let's remove it from the hashing algo # so that they're more likely to group together filename_component = get_filename_component( frame.abs_path, frame.filename, platform, allow_file_origin=javascript_fuzzing) # if we have a module we use that for grouping. This will always # take precedence over the filename if it contributes module_component = get_module_component_v1( frame.abs_path, frame.module, platform) if module_component.contributes and filename_component.contributes: filename_component.update( contributes=False, hint='module takes precedence' ) context_line_component = None # If we are allowed to use the contextline we add it now. if use_contextline: context_line_component = get_contextline_component(frame, platform) function_component = get_function_component( function=frame.function, raw_function=frame.raw_function, platform=platform, sourcemap_used=frame.data and frame.data.get('sourcemap') is not None, context_line_available=context_line_component and context_line_component.contributes, legacy_function_logic=legacy_function_logic, javascript_fuzzing=javascript_fuzzing, ) values = [ module_component, filename_component, function_component, ] if context_line_component is not None: values.append(context_line_component) rv = GroupingComponent( id='frame', values=values, ) # if we are in javascript fuzzing mode we want to disregard some # frames consistently. These force common bad stacktraces together # to have a common hash at the cost of maybe skipping over frames that # would otherwise be useful. if javascript_fuzzing \ and get_behavior_family_for_platform(platform) == 'javascript': func = frame.raw_function or frame.function if func: func = func.rsplit('.', 1)[-1] if func in (None, '?', '<anonymous function>', '<anonymous>', 'Anonymous function') \ or func.endswith('/<'): function_component.update( contributes=False, hint='ignored unknown function name' ) if (func == 'eval') or \ frame.abs_path in ('[native code]', 'native code', 'eval code', '<anonymous>'): rv.update( contributes=False, hint='ignored low quality javascript frame' ) return rv
def get_function_component(function, platform, legacy_function_logic, sourcemap_used=False, context_line_available=False, raw_function=None, javascript_fuzzing=False): """ Attempt to normalize functions by removing common platform outliers. - Ruby generates (random?) integers for various anonymous style functions such as in erb and the active_support library. - Block functions have metadata that we don't care about. The `legacy_function_logic` parameter controls if the system should use the frame v1 function name logic or the frame v2 logic. The difference is that v2 uses the function name consistently and v1 prefers raw function or a trimmed version (of the truncated one) for native. """ from sentry.stacktraces.functions import trim_function_name behavior_family = get_behavior_family_for_platform(platform) if legacy_function_logic: func = raw_function or function else: func = function or raw_function if not raw_function and function: func = trim_function_name(func, platform) if not func: return GroupingComponent(id='function') function_component = GroupingComponent( id='function', values=[func], ) if platform == 'ruby': if func.startswith('block '): function_component.update( values=['block'], hint='ruby block' ) else: new_function = _ruby_erb_func.sub('', func) if new_function != func: function_component.update( values=[new_function], hint='removed generated erb template suffix' ) elif platform == 'php': if func.startswith('[Anonymous'): function_component.update( contributes=False, hint='ignored anonymous function' ) elif platform == 'java': if func.startswith('lambda$'): function_component.update( contributes=False, hint='ignored lambda function' ) elif behavior_family == 'native': if func in ('<redacted>', '<unknown>'): function_component.update( contributes=False, hint='ignored unknown function' ) elif legacy_function_logic: new_function = trim_function_name(func, platform) if new_function != func: function_component.update( values=[new_function], hint='isolated function' ) elif javascript_fuzzing and behavior_family == 'javascript': # This changes Object.foo or Foo.foo into foo so that we can # resolve some common cross browser differences new_function = func.rsplit('.', 1)[-1] if new_function != func: function_component.update( values=[new_function], hint='trimmed javascript function' ) # if a sourcemap was used for this frame and we know that we can # use the context line information we no longer want to use the # function name. The reason for this is that function names in # sourcemaps are unreliable by the nature of sourcemaps and thus a # bad indicator for grouping. if sourcemap_used and context_line_available: function_component.update( contributes=False, hint='ignored because sourcemap used and context line available' ) return function_component
def is_native(meta): return get_behavior_family_for_platform(meta['event'].platform) == 'native'
def test_get_grouping_family_for_platform(input, output): assert get_behavior_family_for_platform(input) == output