class KerneloopsProblem(ProblemType): name = "kerneloops" nice_name = "Kernel oops" tainted_flags = { "module_proprietary": ("P", "Proprietary module has been loaded"), "forced_module": ("F", "Module has been forcibly loaded"), "smp_unsafe": ("S", "SMP with CPUs not designed for SMP"), "forced_removal": ("R", "User forced a module unload"), "mce": ("M", "System experienced a machine check exception"), "page_release": ("B", "System has hit bad_page"), "userspace": ("U", "Userspace-defined naughtiness"), "died_recently": ("D", "Kernel has oopsed before"), "acpi_overridden": ("A", "ACPI table overridden"), "warning": ("W", "Taint on warning"), "staging_driver": ("C", "Modules from drivers/staging are loaded"), "firmware_workaround": ("I", "Working around severe firmware bug"), "module_out_of_tree": ("O", "Out-of-tree module has been loaded"), "unsigned_module": ("E", "Unsigned module has been loaded"), "soft_lockup": ("L", "A soft lockup previously occurred"), "live_patch": ("K", "Kernel has been live patched"), } checker = DictChecker({ # no need to check type twice, the toplevel checker already did it # "type": StringChecker(allowed=[KerneloopsProblem.name]), "component": StringChecker(pattern=r"^(kernel|xorg-x11-drv-[a-z\-]+)(-[a-zA-Z0-9\-\._]+)?$", maxlen=column_len(OpSysComponent, "name")), "version": StringChecker(pattern=(r"^[0-9]+\.[0-9]+\.[0-9]+" r"(.[^\-]+)?(\-.*)?$"), maxlen=column_len(SymbolSource, "build_id")), "taint_flags": ListChecker(StringChecker(allowed=tainted_flags.keys())), "modules": ListChecker(StringChecker(pattern=r"^[a-zA-Z0-9_]+(\([A-Z\+\-]+\))?$", maxlen=column_len(KernelModule, "name")), mandatory=False), "raw_oops": StringChecker(maxlen=Report.__lobs__["oops"], mandatory=False), "frames": ListChecker(DictChecker({ "address": IntChecker(minval=0, maxval=((1 << 64) - 1)), "reliable": Checker(bool), "function_name": StringChecker(pattern=r"^[a-zA-Z0-9_\.]+$", maxlen=column_len(Symbol, "name")), "function_offset": IntChecker(minval=0, maxval=((1 << 63) - 1)), "function_length": IntChecker(minval=0, maxval=((1 << 63) - 1)), "module_name": StringChecker(pattern=r"^[a-zA-Z0-9_]+(\([A-Z\+\-]+\))?$", mandatory=False), }), minlen=1) }) @classmethod def install(cls, db, logger=None): if logger is None: logger = log.getChildLogger(cls.__name__) for flag, (char, nice_name) in cls.tainted_flags.items(): if get_taint_flag_by_ureport_name(db, flag) is None: logger.info("Adding kernel taint flag '{0}': {1}" .format(char, nice_name)) new = KernelTaintFlag() new.character = char new.ureport_name = flag new.nice_name = nice_name db.session.add(new) db.session.flush() @classmethod def installed(cls, db): for flag in cls.tainted_flags.keys(): if get_taint_flag_by_ureport_name(db, flag) is None: return False return True def __init__(self, *args, **kwargs): super(KerneloopsProblem, self).__init__() hashkeys = ["processing.oopshashframes", "processing.hashframes"] self.load_config_to_self("hashframes", hashkeys, 16, callback=int) cmpkeys = ["processing.oopscmpframes", "processing.cmpframes", "processing.clusterframes"] self.load_config_to_self("cmpframes", cmpkeys, 16, callback=int) cutkeys = ["processing.oopscutthreshold", "processing.cutthreshold"] self.load_config_to_self("cutthreshold", cutkeys, 0.3, callback=float) normkeys = ["processing.oopsnormalize", "processing.normalize"] self.load_config_to_self("normalize", normkeys, True, callback=str2bool) skipkeys = ["retrace.oopsskipsource", "retrace.skipsource"] self.load_config_to_self("skipsrc", skipkeys, True, callback=str2bool) self.add_lob = {} self._kernel_pkg_map = {} self.archnames = None def _hash_koops(self, koops, taintflags=None, skip_unreliable=False): if taintflags is None: taintflags = [] if skip_unreliable: frames = filter(lambda f: f["reliable"], koops) else: frames = koops if len(frames) < 1: return None hashbase = list(taintflags) for frame in frames: if not "module_name" in frame: module = "vmlinux" else: module = frame["module_name"] hashbase.append("{0} {1}+{2}/{3} @ {4}" .format(frame["address"], frame["function_name"], frame["function_offset"], frame["function_length"], module)) return sha1("\n".join(hashbase)).hexdigest() def _db_backtrace_to_satyr(self, db_backtrace): stacktrace = satyr.Kerneloops() if len(db_backtrace.threads) < 1: self.log_warn("Backtrace #{0} has no usable threads" .format(db_backtrace.id)) return None db_thread = db_backtrace.threads[0] if len(db_thread.frames) < 1: self.log_warn("Thread #{0} has no usable frames" .format(db_thread.id)) return None for db_frame in db_thread.frames: frame = satyr.KerneloopsFrame() if db_frame.symbolsource.symbol is not None: frame.function_name = db_frame.symbolsource.symbol.name else: frame.function_name = "??" frame.address = db_frame.symbolsource.offset frame.function_offset = db_frame.symbolsource.func_offset frame.reliable = db_frame.reliable if frame.address < 0: frame.address += (1 << 64) stacktrace.frames.append(frame) if self.normalize: stacktrace.normalize() return stacktrace def _db_report_to_satyr(self, db_report): if len(db_report.backtraces) < 1: self.log_warn("Report #{0} has no usable backtraces" .format(db_report.id)) return None return self._db_backtrace_to_satyr(db_report.backtraces[0]) def _parse_kernel_build_id(self, build_id, archs): """ Parses the kernel build string such as 3.10.0-3.fc19.x86_64 3.10.0-3.fc19.armv7hl.tegra 2.6.32-358.14.1.el6.i686.PAE 3.15.6-200.fc20.i686+PAE """ arch = None flavour = None splitby = "+" if "+" in build_id else "." head, tail = build_id.rsplit(splitby, 1) if tail in archs: arch = tail else: flavour = tail head, tail = head.rsplit(".", 1) if not tail in archs: raise FafError("Unable to determine architecture from '{0}'" .format(build_id)) arch = tail try: version, release = head.rsplit("-", 1) except ValueError: raise FafError("Unable to determine release from '{0}'" .format(head)) return version, release, arch, flavour def _get_debug_path(self, db, module, db_package): """ Return the path of debuginfo file for a given module or None if not found. """ if module == "vmlinux": filename = module else: filename = "{0}.ko.debug".format(module) dep = (db.session.query(PackageDependency) .filter(PackageDependency.package == db_package) .filter(PackageDependency.type == "PROVIDES") .filter(PackageDependency.name.like("/%%/{0}" .format(filename))) .first()) if dep is None: filename = "{0}.ko.debug".format(module.replace("_", "-")) dep = (db.session.query(PackageDependency) .filter(PackageDependency.package == db_package) .filter(PackageDependency.type == "PROVIDES") .filter(PackageDependency.name.like("/%%/{0}" .format(filename))) .first()) if dep is None: self.log_debug("Unable to find debuginfo for module '{0}'" .format(module)) return None return dep.name def validate_ureport(self, ureport): # we want to keep unreliable frames without function name RHBZ#1119072 if "frames" in ureport: for frame in ureport["frames"]: if ("function_name" not in frame and "reliable" in frame and not frame["reliable"]): frame["function_name"] = "_unknown_" KerneloopsProblem.checker.check(ureport) return True def hash_ureport(self, ureport): hashbase = [ureport["component"]] hashbase.extend(ureport["taint_flags"]) for i, frame in enumerate(ureport["frames"]): # Instance of 'KerneloopsProblem' has no 'hashframes' member # pylint: disable-msg=E1101 if i >= self.hashframes: break if not "module_name" in frame: module = "vmlinux" else: module = frame["module_name"] hashbase.append("{0} @ {1}".format(frame["function_name"], module)) return sha1("\n".join(hashbase)).hexdigest() def save_ureport(self, db, db_report, ureport, flush=False, count=1): bthash1 = self._hash_koops(ureport["frames"], skip_unreliable=False) bthash2 = self._hash_koops(ureport["frames"], skip_unreliable=True) if len(db_report.backtraces) < 1: db_backtrace = ReportBacktrace() db_backtrace.report = db_report db.session.add(db_backtrace) db_thread = ReportBtThread() db_thread.backtrace = db_backtrace db_thread.crashthread = True db.session.add(db_thread) db_bthash1 = ReportBtHash() db_bthash1.backtrace = db_backtrace db_bthash1.hash = bthash1 db_bthash1.type = "NAMES" db.session.add(db_bthash1) if bthash2 is not None and bthash1 != bthash2: db_bthash2 = ReportBtHash() db_bthash2.backtrace = db_backtrace db_bthash2.hash = bthash2 db_bthash2.type = "NAMES" db.session.add(db_bthash2) new_symbols = {} new_symbolsources = {} i = 0 for frame in ureport["frames"]: # OK, this is totally ugly. # Frames may contain inlined functions, that would normally # require shifting all frames by 1 and inserting a new one. # There is no way to do this efficiently with SQL Alchemy # (you need to go one by one and flush after each) so # creating a space for additional frames is a huge speed # optimization. i += 10 # nah, another hack, deals with wrong parsing if frame["function_name"].startswith("0x"): continue if not "module_name" in frame: module = "vmlinux" else: module = frame["module_name"] db_symbol = get_symbol_by_name_path(db, frame["function_name"], module) if db_symbol is None: key = (frame["function_name"], module) if key in new_symbols: db_symbol = new_symbols[key] else: db_symbol = Symbol() db_symbol.name = frame["function_name"] db_symbol.normalized_path = module db.session.add(db_symbol) new_symbols[key] = db_symbol # this doesn't work well. on 64bit, kernel maps to # the end of address space (64bit unsigned), but in # postgres bigint is 64bit signed and can't save # the value - let's just map it to signed if frame["address"] >= (1 << 63): address = frame["address"] - (1 << 64) else: address = frame["address"] db_symbolsource = get_ssource_by_bpo(db, ureport["version"], module, address) if db_symbolsource is None: key = (ureport["version"], module, address) if key in new_symbolsources: db_symbolsource = new_symbolsources[key] else: db_symbolsource = SymbolSource() db_symbolsource.path = module db_symbolsource.offset = address db_symbolsource.func_offset = frame["function_offset"] db_symbolsource.symbol = db_symbol db_symbolsource.build_id = ureport["version"] db.session.add(db_symbolsource) new_symbolsources[key] = db_symbolsource db_frame = ReportBtFrame() db_frame.thread = db_thread db_frame.order = i db_frame.symbolsource = db_symbolsource db_frame.inlined = False db_frame.reliable = frame["reliable"] db.session.add(db_frame) for taintflag in ureport["taint_flags"]: db_taintflag = get_taint_flag_by_ureport_name(db, taintflag) if db_taintflag is None: self.log_warn("Skipping unsupported taint flag '{0}'" .format(taintflag)) continue db_bttaintflag = ReportBtTaintFlag() db_bttaintflag.backtrace = db_backtrace db_bttaintflag.taintflag = db_taintflag db.session.add(db_bttaintflag) if "modules" in ureport: new_modules = {} # use set() to remove duplicates for module in set(ureport["modules"]): idx = module.find("(") if idx >= 0: module = module[:idx] db_module = get_kernelmodule_by_name(db, module) if db_module is None: if module in new_modules: db_module = new_modules[module] else: db_module = KernelModule() db_module.name = module db.session.add(db_module) new_modules[module] = db_module db_btmodule = ReportBtKernelModule() db_btmodule.kernelmodule = db_module db_btmodule.backtrace = db_backtrace db.session.add(db_btmodule) # do not overwrite an existing oops if not db_report.has_lob("oops"): # do not append here, but create a new dict # we only want save_ureport_post_flush process the most # recently saved report self.add_lob = {db_report: ureport["raw_oops"].encode("utf-8")} if flush: db.session.flush() def save_ureport_post_flush(self): for report, raw_oops in self.add_lob.items(): report.save_lob("oops", raw_oops) # clear the list so that re-calling does not make problems self.add_lob = {} def get_component_name(self, ureport): return ureport["component"] def compare(self, db_report1, db_report2): satyr_report1 = self._db_report_to_satyr(db_report1) satyr_report2 = self._db_report_to_satyr(db_report2) return satyr_report1.distance(satyr_report2) def compare_many(self, db_reports): self.log_info("Loading reports") reports = [] ret_db_reports = [] i = 0 for db_report in db_reports: i += 1 self.log_debug("[{0} / {1}] Loading report #{2}" .format(i, len(db_reports), db_report.id)) report = self._db_report_to_satyr(db_report) if report is None: self.log_debug("Unable to build satyr.Kerneloops") continue reports.append(report) ret_db_reports.append(db_report) self.log_info("Calculating distances") distances = satyr.Distances(reports, len(reports)) return ret_db_reports, distances def get_ssources_for_retrace(self, db, max_fail_count=-1): q = (db.session.query(SymbolSource) .join(ReportBtFrame) .join(ReportBtThread) .join(ReportBacktrace) .join(Report) .filter(Report.type == KerneloopsProblem.name) .filter((SymbolSource.source_path == None) | (SymbolSource.line_number == None)) .filter(SymbolSource.symbol_id != None)) if max_fail_count >= 0: q = q.filter(SymbolSource.retrace_fail_count <= max_fail_count) return q.all() def find_packages_for_ssource(self, db, db_ssource): if db_ssource.build_id is None: self.log_debug("No kernel information for '{0}' @ '{1}'" .format(db_ssource.symbol.name, db_ssource.path)) return db_ssource, (None, None, None) if db_ssource.build_id in self._kernel_pkg_map: return db_ssource, self._kernel_pkg_map[db_ssource.build_id] if self.archnames is None: self.archnames = set(arch.name for arch in get_archs(db)) kernelver = self._parse_kernel_build_id(db_ssource.build_id, self.archnames) version, release, arch, flavour = kernelver if flavour is not None: basename = "kernel-{0}-debuginfo".format(flavour) else: basename = "kernel-debuginfo" db_debug_pkg = get_package_by_nevra(db, basename, 0, version, release, arch) nvra = "{0}-{1}-{2}.{3}".format(basename, version, release, arch) db_src_pkg = None if db_debug_pkg is None: self.log_debug("Package {0} not found in storage".format(nvra)) elif not self.skipsrc: srcname = "kernel-debuginfo-common-{0}".format(arch) db_src_pkg = get_package_by_name_build_arch(db, srcname, db_debug_pkg.build, db_debug_pkg.arch) if db_src_pkg is None: self.log_debug("Package {0}-{1}-{2}.{3} not found in storage" .format(srcname, version, release, arch)) result = db_debug_pkg, db_debug_pkg, db_src_pkg self._kernel_pkg_map[db_ssource.build_id] = result return db_ssource, result def retrace(self, db, task): new_symbols = {} new_symbolsources = {} debug_paths = set(os.path.join(task.debuginfo.unpacked_path, fname[1:]) for fname in task.debuginfo.debug_files) if task.debuginfo.debug_files is not None: db_debug_pkg = task.debuginfo.db_package if db_debug_pkg.has_lob("offset_map"): with db_debug_pkg.get_lob_fd("offset_map") as fd: offset_map = pickle.load(fd) else: offset_map = get_function_offset_map(debug_paths) db_debug_pkg.save_lob("offset_map", pickle.dumps(offset_map)) else: offset_map = {} for bin_pkg, db_ssources in task.binary_packages.items(): i = 0 for db_ssource in db_ssources: i += 1 module = db_ssource.path self.log_info(u"[{0} / {1}] Processing '{2}' @ '{3}'" .format(i, len(db_ssources), db_ssource.symbol.name, module)) if db_ssource.path == "vmlinux": address = db_ssource.offset if address < 0: address += (1 << 64) else: if module not in offset_map: self.log_debug("Module '{0}' not found in package '{1}'" .format(module, task.debuginfo.nvra)) db_ssource.retrace_fail_count += 1 continue module_map = offset_map[module] symbol_name = db_ssource.symbol.name if symbol_name not in module_map: symbol_name = symbol_name.lstrip("_") if symbol_name not in module_map: self.log_debug("Function '{0}' not found in module " "'{1}'".format(db_ssource.symbol.name, module)) db_ssource.retrace_fail_count += 1 continue address = module_map[symbol_name] + db_ssource.func_offset debug_dir = os.path.join(task.debuginfo.unpacked_path, "usr", "lib", "debug") debug_path = self._get_debug_path(db, module, task.debuginfo.db_package) if debug_path is None: db_ssource.retrace_fail_count += 1 continue try: abspath = os.path.join(task.debuginfo.unpacked_path, debug_path[1:]) results = addr2line(abspath, address, debug_dir) results.reverse() except FafError as ex: self.log_debug("addr2line failed: {0}".format(str(ex))) db_ssource.retrace_fail_count += 1 continue inl_id = 0 while len(results) > 1: inl_id += 1 funcname, srcfile, srcline = results.pop() self.log_debug("Unwinding inlined function '{0}'" .format(funcname)) # hack - we have no offset for inlined symbols # let's use minus source line to avoid collisions offset = -srcline db_ssource_inl = get_ssource_by_bpo(db, db_ssource.build_id, db_ssource.path, offset) if db_ssource_inl is None: key = (db_ssource.build_id, db_ssource.path, offset) if key in new_symbolsources: db_ssource_inl = new_symbolsources[key] else: db_symbol_inl = get_symbol_by_name_path(db, funcname, module) if db_symbol_inl is None: sym_key = (funcname, module) if sym_key in new_symbols: db_symbol_inl = new_symbols[sym_key] else: db_symbol_inl = Symbol() db_symbol_inl.name = funcname db_symbol_inl.normalized_path = module db.session.add(db_symbol_inl) new_symbols[sym_key] = db_symbol_inl db_ssource_inl = SymbolSource() db_ssource_inl.symbol = db_symbol_inl db_ssource_inl.build_id = db_ssource.build_id db_ssource_inl.path = module db_ssource_inl.offset = offset db_ssource_inl.source_path = srcfile db_ssource_inl.line_number = srcline db.session.add(db_ssource_inl) new_symbolsources[key] = db_ssource_inl for db_frame in db_ssource.frames: db_frames = sorted(db_frame.thread.frames, key=lambda f: f.order) idx = db_frames.index(db_frame) if idx > 0: prevframe = db_frame.thread.frames[idx - 1] if (prevframe.inlined and prevframe.symbolsource == db_ssource_inl): continue db_newframe = ReportBtFrame() db_newframe.symbolsource = db_ssource_inl db_newframe.thread = db_frame.thread db_newframe.inlined = True db_newframe.order = db_frame.order - inl_id db.session.add(db_newframe) funcname, srcfile, srcline = results.pop() self.log_debug("Result: {0}".format(funcname)) db_symbol = get_symbol_by_name_path(db, funcname, module) if db_symbol is None: key = (funcname, module) if key in new_symbols: db_symbol = new_symbols[key] else: self.log_debug("Creating new symbol '{0}' @ '{1}'" .format(funcname, module)) db_symbol = Symbol() db_symbol.name = funcname db_symbol.normalized_path = module db.session.add(db_symbol) new_symbols[key] = db_symbol if db_symbol.nice_name is None: db_symbol.nice_name = demangle(funcname) db_ssource.symbol = db_symbol db_ssource.source_path = srcfile db_ssource.line_number = srcline if task.debuginfo is not None: self.log_debug("Removing {0}".format(task.debuginfo.unpacked_path)) shutil.rmtree(task.debuginfo.unpacked_path, ignore_errors=True) if task.source is not None and task.source.unpacked_path is not None: self.log_debug("Removing {0}".format(task.source.unpacked_path)) shutil.rmtree(task.source.unpacked_path, ignore_errors=True) def check_btpath_match(self, ureport, parser): for frame in ureport["frames"]: # vmlinux if not "module_name" in frame: continue match = parser.match(frame["module_name"]) if match is not None: return True return False def find_crash_function(self, db_backtrace): satyr_koops = self._db_backtrace_to_satyr(db_backtrace) return satyr_koops.frames[0].function_name
"os": DictChecker({ "name": StringChecker(allowed=list(systems.keys())), "version": StringChecker(pattern=r"^[a-zA-Z0-9_\.\-\+~]+$", maxlen=column_len(OpSysRelease, "version")), "architecture": StringChecker(pattern=r"^[a-zA-Z0-9_]+$", maxlen=column_len(Arch, "name")), # Anything else will be checked by the plugin }), # The checker for packages depends on operating system "packages": ListChecker(Checker(object)), "problem": DictChecker({ "type": StringChecker(allowed=list(problemtypes.keys())), # Anything else will be checked by the plugin }), "reason": StringChecker(maxlen=column_len(ReportReason, "reason")), "reporter": DictChecker({ "name": StringChecker(pattern=r"^[a-zA-Z0-9 ]+$", maxlen=64), "version": StringChecker(pattern=r"^[a-zA-Z0-9_\.\- ]+$", maxlen=64), }), "ureport_version":
class CoredumpProblem(ProblemType): name = "core" nice_name = "Crash of user-space binary" checker = DictChecker({ # no need to check type twice, the toplevel checker already did it # "type": StringChecker(allowed=[CoredumpProblem.name]), "signal": IntChecker(minval=0), "component": StringChecker(pattern=r"^[a-zA-Z0-9\-\._]+$", maxlen=column_len(OpSysComponent, "name")), "executable": StringChecker(maxlen=column_len(ReportExecutable, "path")), "user": DictChecker({ "root": Checker(bool), "local": Checker(bool), }), "stacktrace": ListChecker(DictChecker({ "crash_thread": Checker(bool, mandatory=False), "frames": ListChecker(DictChecker({ "address": IntChecker(minval=0), "build_id_offset": IntChecker(minval=0), "file_name": StringChecker(maxlen=column_len(SymbolSource, "path")), "build_id": StringChecker(pattern=r"^[a-fA-F0-9]+$", maxlen=column_len(SymbolSource, "build_id"), mandatory=False), "fingerprint": StringChecker(pattern=r"^[a-fA-F0-9]+$", maxlen=column_len(ReportBtHash, "hash"), mandatory=False), "function_name": StringChecker(maxlen=column_len(Symbol, "nice_name"), mandatory=False) }), minlen=1) }), minlen=1) }) def __init__(self, *args, **kwargs): super(CoredumpProblem, self).__init__() hashkeys = ["processing.corehashframes", "processing.hashframes"] self.hashframes = None self.load_config_to_self("hashframes", hashkeys, 16, callback=int) cmpkeys = [ "processing.corecmpframes", "processing.cmpframes", "processing.clusterframes" ] self.cmpframes = None self.load_config_to_self("cmpframes", cmpkeys, 16, callback=int) cutkeys = ["processing.corecutthreshold", "processing.cutthreshold"] self.cutthreshold = None self.load_config_to_self("cutthreshold", cutkeys, 0.3, callback=float) normkeys = ["processing.corenormalize", "processing.normalize"] self.normalize = None self.load_config_to_self("normalize", normkeys, True, callback=str2bool) skipkeys = ["retrace.coreskipsource", "retrace.skipsource"] self.skipsrc = None self.load_config_to_self("skipsrc", skipkeys, True, callback=str2bool) def _get_crash_thread(self, stacktrace): """ Searches for a single crash thread and return it. Raises FafError if there is no crash thread or if there are multiple crash threads. """ crashthreads = [ t for t in stacktrace if ("crash_thread" in t and t["crash_thread"]) ] if not crashthreads: raise FafError("No crash thread found") if len(crashthreads) > 1: raise FafError("Multiple crash threads found") return crashthreads[0]["frames"] def _hash_backtrace(self, backtrace): result = [] for key in ["function_name", "fingerprint", "build_id_offset"]: hashbase = [] threads_sane = [] for thread in backtrace: threads_sane.append(all(key in f for f in thread["frames"])) if not all(threads_sane): continue for thread in backtrace: if "crash_thread" in thread and thread["crash_thread"]: hashbase.append("Crash Thread") else: hashbase.append("Thread") for frame in thread["frames"]: if "build_id" in frame: build_id = frame["build_id"] else: build_id = None hashbase.append(" {0} @ {1} ({2})".format( frame[key], frame["file_name"].encode("ascii", "ignore"), build_id)) result.append(hash_list(hashbase)) return result def _db_thread_to_satyr(self, db_thread): self.log_debug("Creating threads using satyr") thread = satyr.GdbThread() thread.number = db_thread.number for db_frame in db_thread.frames: frame = satyr.GdbFrame() frame.address = db_frame.symbolsource.offset frame.library_name = db_frame.symbolsource.path frame.number = db_frame.order if db_frame.symbolsource.symbol is not None: frame.function_name = db_frame.symbolsource.symbol.name else: frame.function_name = "??" if db_frame.symbolsource.source_path is not None: frame.source_file = db_frame.symbolsource.source_path if db_frame.symbolsource.line_number is not None: frame.source_line = db_frame.symbolsource.line_number thread.frames.append(frame) if self.normalize: stacktrace = satyr.GdbStacktrace() stacktrace.threads.append(thread) stacktrace.normalize() return thread def _db_thread_validate(self, db_thread): if len(db_thread.frames) == 1: db_frame = db_thread.frames[0] if (db_frame.symbolsource.symbol is not None and db_frame.symbolsource.symbol.name == "anonymous function" and db_frame.symbolsource.symbol.normalized_path == "unknown filename"): return False return True def db_report_to_satyr(self, db_report): if not db_report.backtraces: self.log_warn("Report #{0} has no usable backtraces".format( db_report.id)) return None if not db_report.backtraces[0].threads: self.log_warn("Report #{0} has no usable threads".format( db_report.id)) return None for db_thread in db_report.backtraces[0].threads: if not db_thread.crashthread: continue if self._db_thread_validate(db_thread): return self._db_thread_to_satyr(db_thread) self.log_warn("Report #{0} has only one bad frame".format( db_report.id)) return None self.log_warn("Report #{0} has no crash thread".format(db_report.id)) return None def _build_id_to_debug_files(self, build_id): return [ "/usr/lib/debug/.build-id/{0}/{1}.debug".format( build_id[:2], build_id[2:]), "/usr/lib/.build-id/{0}/{1}".format(build_id[:2], build_id[2:]) ] def validate_ureport(self, ureport): # Frames calling JIT compiled functions usually do not contain # function name nor file name. This would result to the uReport being # rejected. However the stack above is often the relevant part and we # do not want to reject such uReports. # This code tries to detect calling JIT compiled code and filling # the frames with file name (the JIT caller) and function name # (anonymous function). if "stacktrace" in ureport and isinstance(ureport["stacktrace"], list): for thread in ureport["stacktrace"]: if not isinstance(thread, dict): continue jit_fname = None if "frames" in thread and isinstance(thread["frames"], list): for frame in thread["frames"]: if not isinstance(frame, dict): continue if ("file_name" in frame and "function_name" in frame and "jit" in frame["function_name"].lower()): jit_fname = frame["file_name"] if "file_name" not in frame and jit_fname is not None: frame["file_name"] = jit_fname if ("function_name" not in frame or frame["function_name"] == "??"): frame["function_name"] = "anonymous function" if thread["frames"]: last_frame = thread["frames"][-1] if isinstance(last_frame, dict): if "file_name" not in last_frame: last_frame["file_name"] = "unknown filename" if ("function_name" not in last_frame or last_frame["function_name"] == "??"): last_frame[ "function_name"] = "anonymous function" CoredumpProblem.checker.check(ureport) # just to be sure there is exactly one crash thread self._get_crash_thread(ureport["stacktrace"]) return True def hash_ureport(self, ureport): crashthread = self._get_crash_thread(ureport["stacktrace"]) hashbase = [ureport["component"]] if all("function_name" in f for f in crashthread): key = "function_name" elif all("fingerprint" in f for f in crashthread): key = "fingerprint" else: key = "build_id_offset" for i, frame in enumerate(crashthread): # Instance of 'CoredumpProblem' has no 'hashframes' member # pylint: disable-msg=E1101 if i >= self.hashframes: break hashbase.append("{0} @ {1}".format( frame[key], frame["file_name"].encode("ascii", "ignore"))) return hash_list(hashbase) def save_ureport(self, db, db_report, ureport, flush=False, count=1): db_report.errname = str(ureport["signal"]) db_reportexe = get_reportexe(db, db_report, ureport["executable"]) if db_reportexe is None: db_reportexe = ReportExecutable() db_reportexe.path = ureport["executable"] db_reportexe.report = db_report db_reportexe.count = 0 db.session.add(db_reportexe) db_reportexe.count += count bthashes = self._hash_backtrace(ureport["stacktrace"]) if not bthashes: raise FafError("Unable to get backtrace hash") if not db_report.backtraces: new_symbols = {} new_symbolsources = {} db_backtrace = ReportBacktrace() db_backtrace.report = db_report db.session.add(db_backtrace) for bthash in bthashes: db_bthash = ReportBtHash() db_bthash.backtrace = db_backtrace db_bthash.type = "NAMES" db_bthash.hash = bthash db.session.add(db_bthash) tid = 0 for thread in ureport["stacktrace"]: tid += 1 crash = "crash_thread" in thread and thread["crash_thread"] db_thread = ReportBtThread() db_thread.backtrace = db_backtrace db_thread.number = tid db_thread.crashthread = crash db.session.add(db_thread) fid = 0 for frame in thread["frames"]: # OK, this is totally ugly. # Frames may contain inlined functions, that would normally # require shifting all frames by 1 and inserting a new one. # There is no way to do this efficiently with SQL Alchemy # (you need to go one by one and flush after each) so # creating a space for additional frames is a huge speed # optimization. fid += 10 if "build_id" in frame: build_id = frame["build_id"] else: build_id = None if "fingerprint" in frame: fingerprint = frame["fingerprint"] else: fingerprint = None path = os.path.abspath(frame["file_name"]) offset = frame["build_id_offset"] db_symbol = None if "function_name" in frame: norm_path = get_libname(path) db_symbol = \ get_symbol_by_name_path(db, frame["function_name"], norm_path) if db_symbol is None: key = (frame["function_name"], norm_path) if key in new_symbols: db_symbol = new_symbols[key] else: db_symbol = Symbol() db_symbol.name = frame["function_name"] db_symbol.normalized_path = norm_path db.session.add(db_symbol) new_symbols[key] = db_symbol db_symbolsource = get_ssource_by_bpo( db, build_id, path, offset) if db_symbolsource is None: key = (build_id, path, offset) if key in new_symbolsources: db_symbolsource = new_symbolsources[key] else: db_symbolsource = SymbolSource() db_symbolsource.symbol = db_symbol db_symbolsource.build_id = build_id db_symbolsource.path = path db_symbolsource.offset = offset db_symbolsource.hash = fingerprint db.session.add(db_symbolsource) new_symbolsources[key] = db_symbolsource db_frame = ReportBtFrame() db_frame.thread = db_thread db_frame.order = fid db_frame.symbolsource = db_symbolsource db_frame.inlined = False db.session.add(db_frame) if flush: db.session.flush() def save_ureport_post_flush(self): self.log_debug("save_ureport_post_flush is not required for coredumps") def get_component_name(self, ureport): return ureport["component"] def compare(self, db_report1, db_report2): satyr_report1 = self.db_report_to_satyr(db_report1) satyr_report2 = self.db_report_to_satyr(db_report2) return satyr_report1.distance(satyr_report2) def check_btpath_match(self, ureport, parser): crash_thread = None for thread in ureport["stacktrace"]: if "crash_thread" not in thread or not thread["crash_thread"]: continue crash_thread = thread for frame in crash_thread["frames"]: match = parser.match(frame["file_name"]) if match is not None: return True return False def _get_ssources_for_retrace_query(self, db): core_syms = (db.session.query( SymbolSource.id).join(ReportBtFrame).join(ReportBtThread).join( ReportBacktrace).join(Report).filter( Report.type == CoredumpProblem.name).subquery()) q = (db.session.query(SymbolSource).filter( SymbolSource.id.in_(core_syms)).filter( SymbolSource.build_id.isnot( None)).filter((SymbolSource.symbol_id.is_(None)) | (SymbolSource.source_path.is_(None)) | (SymbolSource.line_number.is_(None)))) return q def find_packages_for_ssource(self, db, db_ssource): self.log_debug("Build-id: %d", db_ssource.build_id) files = self._build_id_to_debug_files(db_ssource.build_id) self.log_debug("File names: %s", ', '.join(files)) db_debug_package = get_package_by_file(db, files) if db_debug_package is None: debug_nvra = "Not found" else: debug_nvra = db_debug_package.nvra() self.log_debug("Debug Package: %s", debug_nvra) db_bin_package = None if db_debug_package is not None: paths = [db_ssource.path] if os.path.sep in db_ssource.path: paths.append(usrmove(db_ssource.path)) paths.append(os.path.abspath(db_ssource.path)) paths.append(usrmove(os.path.abspath(db_ssource.path))) db_build = db_debug_package.build db_arch = db_debug_package.arch for path in paths: db_bin_package = get_package_by_file_build_arch( db, path, db_build, db_arch) if db_bin_package is not None: # Do not fix UsrMove in the DB - it's a terrible slow-down # Rather fake it with symlinks when unpacking #if db_ssource.path != path: # self.log_debug("Fixing path: {0} ~> {1}" # .format(db_ssource.path, path)) # build_id = db_ssource.build_id # db_ssource_fixed = get_ssource_by_bpo(db, # build_id, # path, # db_ssource.offset) # if db_ssource_fixed is None: # db_ssource.path = path # else: # db_ssource = db_ssource_fixed break if db_bin_package is None: bin_nvra = "Not found" else: bin_nvra = db_bin_package.nvra() self.log_debug("Binary Package: %s", bin_nvra) db_src_package = None if not self.skipsrc and db_debug_package is not None: db_build = db_debug_package.build db_src_package = get_src_package_by_build(db, db_build) if db_src_package is None: src_nvra = "Not found" else: src_nvra = db_src_package.nvra() self.log_debug("Source Package: %s", src_nvra) # indicate incomplete result if db_bin_package is None: db_debug_package = None return db_ssource, (db_debug_package, db_bin_package, db_src_package) def retrace(self, db, task): new_symbols = {} new_symbolsources = {} for bin_pkg, db_ssources in task.binary_packages.items(): self.log_info("Retracing symbols from package {0}".format( bin_pkg.nvra)) i = 0 for db_ssource in db_ssources: i += 1 self.log_debug("[%d / %d] Processing '%s' @ '%s'", i, len(db_ssources), ssource2funcname(db_ssource), db_ssource.path) norm_path = get_libname(db_ssource.path) if bin_pkg.unpacked_path is None: self.log_debug( "fail: path to unpacked binary package not found") db_ssource.retrace_fail_count += 1 continue binary = os.path.join(bin_pkg.unpacked_path, db_ssource.path[1:]) try: address = get_base_address(binary) + db_ssource.offset except FafError as ex: self.log_debug("get_base_address failed: %s", str(ex)) db_ssource.retrace_fail_count += 1 continue try: debug_path = os.path.join(task.debuginfo.unpacked_path, "usr", "lib", "debug") results = addr2line(binary, address, debug_path) results.reverse() except Exception as ex: # pylint: disable=broad-except self.log_debug("addr2line failed: %s", str(ex)) db_ssource.retrace_fail_count += 1 continue inl_id = 0 while len(results) > 1: inl_id += 1 funcname, srcfile, srcline = results.pop() self.log_debug("Unwinding inlined function '%s'", funcname) # hack - we have no offset for inlined symbols # let's use minus source line to avoid collisions offset = -srcline db_ssource_inl = get_ssource_by_bpo( db, db_ssource.build_id, db_ssource.path, offset) if db_ssource_inl is None: key = (db_ssource.build_id, db_ssource.path, offset) if key in new_symbolsources: db_ssource_inl = new_symbolsources[key] else: db_symbol_inl = get_symbol_by_name_path( db, funcname, norm_path) if db_symbol_inl is None: sym_key = (funcname, norm_path) if sym_key in new_symbols: db_symbol_inl = new_symbols[sym_key] else: db_symbol_inl = Symbol() db_symbol_inl.name = funcname db_symbol_inl.normalized_path = norm_path db.session.add(db_symbol_inl) new_symbols[sym_key] = db_symbol_inl db_ssource_inl = SymbolSource() db_ssource_inl.symbol = db_symbol_inl db_ssource_inl.build_id = db_ssource.build_id db_ssource_inl.path = db_ssource.path db_ssource_inl.offset = offset db_ssource_inl.source_path = srcfile db_ssource_inl.line_number = srcline db.session.add(db_ssource_inl) new_symbolsources[key] = db_ssource_inl for db_frame in db_ssource.frames: db_frames = sorted(db_frame.thread.frames, key=lambda f: f.order) idx = db_frames.index(db_frame) if idx > 0: prevframe = db_frame.thread.frames[idx - 1] if (prevframe.inlined and prevframe.symbolsource == db_ssource_inl): continue db_newframe = ReportBtFrame() db_newframe.symbolsource = db_ssource_inl db_newframe.thread = db_frame.thread db_newframe.inlined = True db_newframe.order = db_frame.order - inl_id db.session.add(db_newframe) funcname, srcfile, srcline = results.pop() self.log_debug("Result: %s", funcname) db_symbol = get_symbol_by_name_path(db, funcname, norm_path) if db_symbol is None: key = (funcname, norm_path) if key in new_symbols: db_symbol = new_symbols[key] else: self.log_debug("Creating new symbol '%s' @ '%s'", funcname, db_ssource.path) db_symbol = Symbol() db_symbol.name = funcname db_symbol.normalized_path = norm_path db.session.add(db_symbol) new_symbols[key] = db_symbol if db_symbol.nice_name is None: db_symbol.nice_name = demangle(funcname) db_ssource.symbol = db_symbol db_ssource.source_path = srcfile db_ssource.line_number = srcline if task.debuginfo.unpacked_path is not None: self.log_debug("Removing %s", task.debuginfo.unpacked_path) shutil.rmtree(task.debuginfo.unpacked_path, ignore_errors=True) if task.source is not None and task.source.unpacked_path is not None: self.log_debug("Removing %s", task.source.unpacked_path) shutil.rmtree(task.source.unpacked_path, ignore_errors=True) for bin_pkg in task.binary_packages.keys(): if bin_pkg.unpacked_path is not None: self.log_debug("Removing %s", bin_pkg.unpacked_path) shutil.rmtree(bin_pkg.unpacked_path, ignore_errors=True) def find_crash_function(self, db_backtrace): for db_thread in db_backtrace.threads: if not db_thread.crashthread: continue satyr_thread = self._db_thread_to_satyr(db_thread) satyr_stacktrace = satyr.GdbStacktrace() satyr_stacktrace.threads.append(satyr_thread) return satyr_stacktrace.find_crash_frame().function_name self.log_warn("Backtrace #{0} has no crash thread".format( db_backtrace.id)) return None
class JavaProblem(ProblemType): name = "java" nice_name = "Unhandled Java exception" checker = DictChecker({ # no need to check type twice, the toplevel checker already did it # "type": StringChecker(allowed=[JavaProblem.name]), "component": StringChecker(pattern=r"^[a-zA-Z0-9\-\._]+$", maxlen=column_len(OpSysComponent, "name")), "threads": ListChecker(DictChecker({ "name": StringChecker(), "frames": ListChecker(DictChecker({ "name": StringChecker(maxlen=column_len(Symbol, "name")), "is_native": Checker(bool), "is_exception": Checker(bool), }), minlen=1), }), minlen=1) }) default_frame_checker = DictChecker({ "file_name": StringChecker(maxlen=column_len(SymbolSource, "source_path")), "file_line": IntChecker(minval=1), "class_path": StringChecker(maxlen=column_len(SymbolSource, "path")), }) exception = "Exception thrown" native = "Native function call" unknown = "Unknown" def __init__(self, *args, **kwargs): super(JavaProblem, self).__init__() hashkeys = ["processing.javahashframes", "processing.hashframes"] self.hashframes = None self.load_config_to_self("hashframes", hashkeys, 16, callback=int) cmpkeys = [ "processing.javacmpframes", "processing.cmpframes", "processing.clusterframes" ] self.cmpframes = None self.load_config_to_self("cmpframes", cmpkeys, 16, callback=int) cutkeys = ["processing.javacutthreshold", "processing.cutthreshold"] self.cutthreshold = None self.load_config_to_self("cutthreshold", cutkeys, 0.3, callback=float) normkeys = ["processing.javanormalize", "processing.normalize"] self.normalize = None self.load_config_to_self("normalize", normkeys, True, callback=str2bool) skipkeys = ["retrace.javaskipsource", "retrace.skipsource"] self.skipsrc = None self.load_config_to_self("skipsrc", skipkeys, True, callback=str2bool) def _hash_backtrace(self, threads): hashbase = [] for thread in threads: hashbase.append("Thread") for frame in thread["frames"]: # Instance of 'JavaProblem' has no 'hashframes' member # pylint: disable-msg=E1101 if "class_path" in frame: hashbase.append("{0} @ {1}".format(frame["name"], frame["class_path"])) continue hashbase.append(frame["name"]) return hash_list(hashbase) def _db_backtrace_find_crash_thread(self, db_backtrace): if len(db_backtrace.threads) == 1: return db_backtrace.threads[0] db_threads = [t for t in db_backtrace.threads if t.crashthread] if not db_threads: raise FafError( "No crash thread could be found for backtrace #{0}".format( db_backtrace.id)) if len(db_threads) > 1: raise FafError( "Multiple crash threads found for backtrace #{0}".format( db_backtrace.id)) return db_threads[0] def _db_frame_to_satyr(self, db_frame): class_path = db_frame.symbolsource.path result = satyr.JavaFrame() result.name = db_frame.symbolsource.symbol.name result.is_native = class_path == JavaProblem.native result.is_exception = class_path == JavaProblem.exception if class_path not in [ JavaProblem.exception, JavaProblem.native, JavaProblem.unknown ]: result.class_path = class_path if db_frame.symbolsource.source_path is not None: result.file_name = db_frame.symbolsource.source_path result.file_line = db_frame.symbolsource.line_number return result def _db_thread_to_satyr(self, db_thread): if not db_thread.frames: self.log_warn("Thread #{0} has no usable frames".format( db_thread.id)) return None result = satyr.JavaThread() result.name = "Thread #{0}".format(db_thread.number) for db_frame in db_thread.frames: frame = self._db_frame_to_satyr(db_frame) if frame is None: continue result.frames.append(frame) return result def _db_backtrace_to_satyr(self, db_backtrace): if not db_backtrace.threads: self.log_warn("Backtrace #{0} has no usable threads".format( db_backtrace.id)) return None if len(db_backtrace.threads) > 1: self.log_warn("Backtrace #{0} has several threads".format( db_backtrace.id)) return self._db_thread_to_satyr(db_backtrace.threads[0]) def _db_report_to_satyr(self, db_report): if not db_report.backtraces: self.log_warn("Report #{0} has no usable backtraces".format( db_report.id)) return None return self._db_backtrace_to_satyr(db_report.backtraces[0]) def validate_ureport(self, ureport): JavaProblem.checker.check(ureport) for thread in ureport["threads"]: for frame in thread["frames"]: if not frame["is_native"] and not frame["is_exception"]: JavaProblem.default_frame_checker.check(frame) return True def hash_ureport(self, ureport): hashbase = [ureport["component"]] # at the moment we only send crash thread # we may need to identify the crash thread in the future for i, frame in enumerate(ureport["threads"][0]["frames"]): # Instance of 'JavaProblem' has no 'hashframes' member # pylint: disable-msg=E1101 if i >= self.hashframes: break if "class_path" in frame: hashbase.append("{0} @ {1}".format(frame["name"], frame["class_path"])) continue hashbase.append(frame["name"]) return hash_list(hashbase) def get_component_name(self, ureport): return ureport["component"] def save_ureport(self, db, db_report, ureport, flush=False, count=1): # at the moment we only send crash thread # we may need to identify the crash thread in the future crashthread = ureport["threads"][0] crashfn = None for frame in crashthread["frames"]: if not frame["is_exception"]: crashfn = frame["name"] break if crashfn is not None and "." in crashfn: crashfn = crashfn.rsplit(".", 1)[1] errname = None for frame in crashthread["frames"]: if frame["is_exception"]: errname = frame["name"] break if "." in errname: errname = errname.rsplit(".", 1)[1] db_report.errname = errname bthash = self._hash_backtrace(ureport["threads"]) if not db_report.backtraces: db_backtrace = ReportBacktrace() db_backtrace.report = db_report db_backtrace.crashfn = crashfn db.session.add(db_backtrace) db_bthash = ReportBtHash() db_bthash.type = "NAMES" db_bthash.hash = bthash db_bthash.backtrace = db_backtrace new_symbols = {} new_symbolsources = {} j = 0 for thread in ureport["threads"]: j += 1 db_thread = ReportBtThread() db_thread.backtrace = db_backtrace db_thread.crashthread = thread == crashthread db_thread.number = j db.session.add(db_thread) i = 0 for frame in thread["frames"]: i += 1 function_name = frame["name"] if "class_path" in frame: file_name = frame["class_path"] elif frame["is_exception"]: file_name = JavaProblem.exception elif frame["is_native"]: file_name = JavaProblem.native else: file_name = JavaProblem.unknown if "file_line" in frame: file_line = frame["file_line"] else: file_line = 0 db_symbol = get_symbol_by_name_path( db, function_name, file_name) if db_symbol is None: key = (function_name, file_name) if key in new_symbols: db_symbol = new_symbols[key] else: db_symbol = Symbol() db_symbol.name = function_name db_symbol.normalized_path = file_name db.session.add(db_symbol) new_symbols[key] = db_symbol db_symbolsource = get_symbolsource(db, db_symbol, file_name, file_line) if db_symbolsource is None: key = (function_name, file_name, file_line) if key in new_symbolsources: db_symbolsource = new_symbolsources[key] else: db_symbolsource = SymbolSource() db_symbolsource.path = file_name db_symbolsource.offset = file_line if "file_name" in frame: db_symbolsource.source_path = frame[ "file_name"] db_symbolsource.line_number = file_line db_symbolsource.symbol = db_symbol db.session.add(db_symbolsource) new_symbolsources[key] = db_symbolsource db_frame = ReportBtFrame() db_frame.order = i db_frame.inlined = False db_frame.symbolsource = db_symbolsource db_frame.thread = db_thread db.session.add(db_frame) if flush: db.session.flush() def save_ureport_post_flush(self): self.log_debug("save_ureport_post_flush is not required for java") def _get_ssources_for_retrace_query(self, db): return None def find_packages_for_ssource(self, db, db_ssource): self.log_info("Retracing is not required for Java exceptions") return None, (None, None, None) def retrace(self, db, task): self.log_info("Retracing is not required for Java exceptions") def compare(self, db_report1, db_report2): satyr_report1 = self._db_report_to_satyr(db_report1) satyr_report2 = self._db_report_to_satyr(db_report2) return satyr_report1.distance(satyr_report2) def compare_many(self, db_reports): self.log_info("Loading reports") reports = [] ret_db_reports = [] i = 0 for db_report in db_reports: i += 1 self.log_debug("[{0} / {1}] Loading report #{2}".format( i, len(db_reports), db_report.id)) report = self._db_report_to_satyr(db_report) if report is None: self.log_debug("Unable to build satyr.JavaStacktrace") continue reports.append(report) ret_db_reports.append(db_report) self.log_info("Calculating distances") distances = satyr.Distances(reports, len(reports)) return ret_db_reports, distances def check_btpath_match(self, ureport, parser): for thread in ureport["threads"]: for frame in thread["frames"]: for key in ["class_path", "file_name"]: if key in frame: match = parser.match(frame[key]) if match is not None: return True return False def find_crash_function(self, db_backtrace): crash_thread = self._db_backtrace_find_crash_thread(db_backtrace) return crash_thread.frames[0].symbolsource.symbol.name
"validate", "validate_attachment"] UREPORT_CHECKER = DictChecker({ "os": DictChecker({ "name": StringChecker(allowed=systems.keys()), "version": StringChecker(pattern=r"^[a-zA-Z0-9_\.\-\+~]+$", maxlen=column_len(OpSysRelease, "version")), "architecture": StringChecker(pattern=r"^[a-zA-Z0-9_]+$", maxlen=column_len(Arch, "name")), # Anything else will be checked by the plugin }), # The checker for packages depends on operating system "packages": ListChecker(Checker(object)), "problem": DictChecker({ "type": StringChecker(allowed=problemtypes.keys()), # Anything else will be checked by the plugin }), "reason": StringChecker(maxlen=column_len(ReportReason, "reason")), "reporter": DictChecker({ "name": StringChecker(pattern=r"^[a-zA-Z0-9 ]+$", maxlen=64), "version": StringChecker(pattern=r"^[a-zA-Z0-9_\.\- ]+$", maxlen=64), }), "ureport_version": IntChecker(minval=0),