def symbolize(self, dsym_path, image_vmaddr, image_addr, instruction_addr, cpu_name, symbolize_inlined=False): """Symbolizes a single frame based on the information provided. If the symbolication fails a `SymbolicationError` is raised. `dsym_path` is the path to the dsym file on the file system. `image_vmaddr` is the slide of the image. For most situations this can just be set to `0`. If it's zero or unset we will attempt to find the slide from the dsym file. `image_addr` is the canonical image address as loaded. `instruction_addr` is the address where the error happened. `cpu_name` is the CPU name. It follows general apple conventions and is used to special case certain behavior and look up the right symbols. Common names are `armv7` and `arm64`. Additionally if `symbolize_inlined` is set to `True` then a list of frames is returned instead which might contain inlined frames. In that case the return value might be an empty list instead. """ if self._closed: raise RuntimeError('Symbolizer is closed') dsym_path = normalize_dsym_path(dsym_path) image_vmaddr = parse_addr(image_vmaddr) if not image_vmaddr: di = self._symbolizer.get_debug_info(dsym_path) if di is not None: variant = di.get_variant(cpu_name) if variant is not None: image_vmaddr = variant.vmaddr image_addr = parse_addr(image_addr) instruction_addr = parse_addr(instruction_addr) if not is_valid_cpu_name(cpu_name): raise SymbolicationError('"%s" is not a valid cpu name' % cpu_name) addr = image_vmaddr + instruction_addr - image_addr with self._lock: with timedsection('symbolize'): if symbolize_inlined: return self._symbolizer.symbolize_inlined( dsym_path, addr, cpu_name) return self._symbolizer.symbolize(dsym_path, addr, cpu_name)
def sym_app_frame(self, frame, img, symbolize_inlined=False): assert symbolize_inlined object_name = ( "/var/containers/Bundle/Application/" "B33C37A8-F933-4B6B-9FFA-152282BFDF13/" "SentryTest.app/SentryTest" ) if not (4295098384 <= parse_addr(frame['instruction_addr']) < 4295098388): return [{ 'filename': 'Foo.swift', 'line': 82, 'column': 23, 'object_name': object_name, 'symbol_name': 'other_main', 'symbol_addr': '0x1', "instruction_addr": '0x1', }] return [{ 'filename': 'Foo.swift', 'line': 42, 'column': 23, 'object_name': object_name, 'symbol_name': 'real_main', 'symbol_addr': '0x1000262a0', "instruction_addr": '0x100026330', }]
def sym_app_frame(self, frame, img, symbolize_inlined=False): assert symbolize_inlined object_name = ("/var/containers/Bundle/Application/" "B33C37A8-F933-4B6B-9FFA-152282BFDF13/" "SentryTest.app/SentryTest") if not (4295098384 <= parse_addr(frame['instruction_addr']) < 4295098388): return [{ 'filename': 'Foo.swift', 'line': 82, 'column': 23, 'object_name': object_name, 'symbol_name': 'other_main', 'symbol_addr': '0x1', "instruction_addr": '0x1', }] return [{ 'filename': 'Foo.swift', 'line': 42, 'column': 23, 'object_name': object_name, 'symbol_name': 'real_main', 'symbol_addr': '0x1000262a0', "instruction_addr": '0x100026330', }]
def find_best_instruction(self, processable_frame): """Given a frame, stacktrace info and frame index this returns the interpolated instruction address we then use for symbolication later. """ if self.cpu_name is None: return parse_addr(processable_frame['instruction_addr']) meta = None # We only need to provide meta information for frame zero if processable_frame.idx == 0: # The signal is useful information for symsynd in some situations # to disambiugate the first frame. If we can get this information # from the mechanism we want to pass it onwards. signal = None exc = self.data.get('sentry.interfaces.Exception') if exc is not None: mechanism = exc['values'][0].get('mechanism') if mechanism and 'posix_signal' in mechanism and \ 'signal' in mechanism['posix_signal']: signal = mechanism['posix_signal']['signal'] meta = { 'frame_number': 0, 'registers': processable_frame.stacktrace_info.stacktrace.get('registers'), 'signal': signal, } return find_best_instruction(processable_frame['instruction_addr'], self.cpu_name, meta=meta)
def symbolize_app_frame(self, frame, img, symbolize_inlined=False): # If we have an image but we can't find the image in the symsynd # symbolizer it means we are dealing with a missing dsym here. if parse_addr(img['image_addr']) not in self.symsynd_symbolizer.images: if self._is_optional_dsym(frame, img): type = EventError.NATIVE_MISSING_OPTIONALLY_BUNDLED_DSYM else: type = EventError.NATIVE_MISSING_DSYM raise SymbolicationFailed( type=type, image=img ) try: rv = self.symsynd_symbolizer.symbolize_frame( frame, silent=False, demangle=False, symbolize_inlined=symbolize_inlined) except SymbolicationError as e: raise SymbolicationFailed( type=EventError.NATIVE_BAD_DSYM, message=six.text_type(e), image=img ) if not rv: raise SymbolicationFailed( type=EventError.NATIVE_MISSING_SYMBOL, image=img ) if symbolize_inlined: return [self._process_frame(nf, img) for nf in rv] return self._process_frame(rv, img)
def __init__(self, project, binary_images, referenced_images=None, cpu_name=None): self.symsynd_symbolizer = make_symbolizer( project, binary_images, referenced_images=referenced_images) # This is a duplication from symsynd. The reason is that symsynd # will only load images that it can find dsyms for but we also # have system symbols which there are no dsyms for. self._image_addresses = [] self.images = {} for img in binary_images: img_addr = parse_addr(img['image_addr']) self._image_addresses.append(img_addr) self.images[img_addr] = img self._image_addresses.sort() # This should always succeed but you never quite know. self.cpu_name = cpu_name if self.cpu_name is None: for img in six.itervalues(self.images): cpu_name = get_cpu_name(img['cpu_type'], img['cpu_subtype']) if self.cpu_name is None: self.cpu_name = cpu_name elif self.cpu_name != cpu_name: self.cpu_name = None break
def find_best_instruction(self, processable_frame): """Given a frame, stacktrace info and frame index this returns the interpolated instruction address we then use for symbolication later. """ if self.cpu_name is None: return parse_addr(processable_frame['instruction_addr']) meta = None # We only need to provide meta information for frame zero if processable_frame.idx == 0: # The signal is useful information for symsynd in some situations # to disambiugate the first frame. If we can get this information # from the mechanism we want to pass it onwards. signal = None exc = self.data.get('sentry.interfaces.Exception') if exc is not None: mechanism = exc['values'][0].get('mechanism') if mechanism and 'posix_signal' in mechanism and \ 'signal' in mechanism['posix_signal']: signal = mechanism['posix_signal']['signal'] meta = { 'frame_number': 0, 'registers': processable_frame.stacktrace_info.stacktrace.get('registers'), 'signal': signal, } return find_best_instruction( processable_frame['instruction_addr'], self.cpu_name, meta=meta)
def find_image(self, addr): """Given an instruction address this locates the image this address is contained in. """ idx = bisect.bisect_left(self._image_addresses, parse_addr(addr)) if idx > 0: return self.images[self._image_addresses[idx - 1]]
def symbolize_app_frame(self, frame, img, symbolize_inlined=False): # If we have an image but we can't find the image in the symsynd # symbolizer it means we are dealing with a missing dsym here. if parse_addr(img['image_addr']) not in self.symsynd_symbolizer.images: if self._is_optional_dsym(frame, img): type = EventError.NATIVE_MISSING_OPTIONALLY_BUNDLED_DSYM else: type = EventError.NATIVE_MISSING_DSYM raise SymbolicationFailed(type=type, image=img) try: rv = self.symsynd_symbolizer.symbolize_frame( frame, silent=False, demangle=False, symbolize_inlined=symbolize_inlined) except SymbolicationError as e: raise SymbolicationFailed(type=EventError.NATIVE_BAD_DSYM, message=six.text_type(e), image=img) if not rv: raise SymbolicationFailed(type=EventError.NATIVE_MISSING_SYMBOL, image=img) if symbolize_inlined: return [self._process_frame(nf, img) for nf in rv] return self._process_frame(rv, img)
def find_best_instruction(addr, cpu_name, meta=None): """Given an instruction and meta information this attempts to find the best instruction for the frame. In some circumstances we can fix it up a bit to improve the accuracy. For more information see `symbolize_frame`. """ addr = rv = parse_addr(addr) # In case we're not on the crashing frame we apply a simple heuristic: # since we're most likely dealing with return addresses we just assume # that the call is one instruction behind the current one. if not meta or meta.get('frame_number') != 0: rv = get_previous_instruction(addr, cpu_name) # In case registers are available we can check if the PC register # does not match the given address we have from the first frame. # If that is the case and we got one of a few signals taht are likely # it seems that going with one instruction back is actually the # correct thing to do. else: regs = meta.get('registers') ip = get_ip_register(regs, cpu_name) if ip is not None and ip != addr and \ meta.get('signal') in (SIGILL, SIGBUS, SIGSEGV): rv = get_previous_instruction(addr, cpu_name) # Don't ask me why we do this, but apparently on arm we get better # hits if we look at the end of an instruction in the DWARF file than # the beginning. return round_to_instruction_end(rv, cpu_name)
def find_best_instruction(self, frame, meta=None): """Finds the best instruction for a given frame.""" # If we have no images or cpu name we cannot possibly fix the # instruction here. if not self.images or self.cpu_name is None: return parse_addr(frame['instruction_addr']) return self.symsynd_symbolizer.find_best_instruction( frame['instruction_addr'], cpu_name=self.cpu_name, meta=meta)
def __init__(self, images): self._image_addresses = [] self.images = {} for img in images: img_addr = parse_addr(img['image_addr']) self._image_addresses.append(img_addr) self.images[img_addr] = img self._image_addresses.sort()
def symbolize(self, dsym_path, image_vmaddr, image_addr, instruction_addr, cpu_name, uuid=None, silent=True, demangle=True): if self._closed: raise RuntimeError('Symbolizer is closed') if not is_valid_cpu_name(cpu_name): raise ValueError('"%s" is not a valid cpu name' % cpu_name) dsym_path = normalize_dsym_path(dsym_path) image_vmaddr = parse_addr(image_vmaddr) if not image_vmaddr: image_vmaddr = get_macho_vmaddr(dsym_path, cpu_name) or 0 image_addr = parse_addr(image_addr) instruction_addr = parse_addr(instruction_addr) addr = image_vmaddr + instruction_addr - image_addr try: with self._lock: with timedsection('symbolize'): sym = self.symbolizer.symbolize(dsym_path, addr, cpu_name) if sym[0] is None: raise SymbolicationError('Symbolizer could not find symbol') except SymbolicationError: if not silent: raise sym = (None, None, 0, 0) symbol_name = sym[0] if demangle: symbol_name = demangle_symbol(symbol_name) return { 'symbol_name': symbol_name, 'filename': sym[1], 'line': sym[2], 'column': sym[3], 'uuid': uuid, }
def get_ip_register(registers, cpu_name): rv = None if registers: if cpu_name[:3] == 'arm': rv = registers.get('pc') elif cpu_name == 'x86_64': rv = registers.get('rip') if rv is not None: return parse_addr(rv)
def symbolize(self, dsym_path, image_vmaddr, image_addr, instruction_addr, cpu_name, uuid=None, silent=True): if self._closed: raise RuntimeError('Symbolizer is closed') if not is_valid_cpu_name(cpu_name): raise ValueError('"%s" is not a valid cpu name' % cpu_name) dsym_path = normalize_dsym_path(dsym_path) image_vmaddr = parse_addr(image_vmaddr) if not image_vmaddr: image_vmaddr = get_macho_vmaddr(dsym_path, cpu_name) or 0 image_addr = parse_addr(image_addr) instruction_addr = parse_addr(instruction_addr) addr = image_vmaddr + instruction_addr - image_addr try: with self._lock: with timedsection('symbolize'): sym = self.symbolizer.symbolize(dsym_path, addr, cpu_name) if sym[0] is None: raise SymbolicationError('Symbolizer could not find symbol') except SymbolicationError: if not silent: raise sym = (None, None, 0, 0) return { 'symbol_name': demangle_symbol(sym[0]), 'filename': sym[1], 'line': sym[2], 'column': sym[3], 'uuid': uuid, }
def preprocess_frame(self, processable_frame): instr_addr = self.find_best_instruction(processable_frame) img = self.image_lookup.find_image(instr_addr) processable_frame.data = { 'instruction_addr': instr_addr, 'image_uuid': img['uuid'] if img is not None else None, } if img is not None: processable_frame.set_cache_key_from_values(( FRAME_CACHE_VERSION, # Because the images can move around, we want to rebase # the address for the cache key to be within the image # the same way as we do it in the symbolizer. (parse_addr(img['image_vmaddr']) + instr_addr - parse_addr(img['image_addr'])), img['uuid'].lower(), img['cpu_type'], img['cpu_subtype'], img['image_size'], ))
def rebase_addr(instr_addr, img): return parse_addr(img['image_vmaddr']) + \ parse_addr(instr_addr) - parse_addr(img['image_addr'])
def find_debug_images(dsym_paths, binary_images): """Given a list of paths and a list of binary images this returns a dictionary of image addresses to the locations on the file system for all found images. """ images_to_load = set() with timedsection('iterimages0'): for image in binary_images: if get_image_cpu_name(image) is not None: images_to_load.add(image['uuid'].lower()) images = {} # Step one: load images that are named by their UUID with timedsection('loadimages-fast'): for uuid in list(images_to_load): for dsym_path in dsym_paths: fn = os.path.join(dsym_path, uuid) if os.path.isfile(fn): images[uuid] = fn images_to_load.discard(uuid) break # Otherwise fall back to loading images from the dsym bundle. Because # this loading strategy is pretty slow we do't actually want to use it # unless we have a path that looks like a bundle. As a result we # find all the paths which are bundles and then only process those. if images_to_load: slow_paths = [] for dsym_path in dsym_paths: if os.path.isdir(os.path.join(dsym_path, 'Contents')): slow_paths.append(dsym_path) with timedsection('loadimages-slow'): for dsym_path in slow_paths: dwarf_base = os.path.join(dsym_path, 'Contents', 'Resources', 'DWARF') if os.path.isdir(dwarf_base): for fn in os.listdir(dwarf_base): # Looks like a UUID we loaded, skip it if fn in images: continue full_fn = os.path.join(dwarf_base, fn) try: di = DebugInfo.open_path(full_fn) except DebugInfoError: continue for variant in di.get_variants(): uuid = str(variant.uuid) if uuid in images_to_load: images[uuid] = full_fn images_to_load.discard(uuid) rv = {} # Now resolve all the images. with timedsection('resolveimages'): for image in binary_images: cpu_name = get_image_cpu_name(image) if cpu_name is None: continue uid = image['uuid'].lower() if uid not in images: continue rv[parse_addr(image['image_addr'])] = images[uid] return rv
def rebase_addr(instr_addr, img): return parse_addr(instr_addr) - parse_addr(img['image_addr'])
def find_best_instruction(self, frame, meta=None): """Finds the best instruction for a given frame.""" if not self.images: return parse_addr(frame['instruction_addr']) return self.symsynd_symbolizer.find_best_instruction( frame['instruction_addr'], cpu_name=self.cpu_name, meta=meta)