def deserialize_deps_and_syms(obj, cur, lib_map): if not hasattr(deserialize_deps_and_syms, 'warned'): deserialize_deps_and_syms.warned = set() obj.imports, obj.exports = Symbol.deserialize_syms(cur, obj) obj.deserialize_deps(cur) new_deps = [] for dep_obj in obj.deps: if dep_obj.soname is None: warn("object %s does not have a SONAME, skipping..." % dep_obj.soname) if dep_obj.soname in lib_map: old_obj = lib_map[dep_obj.soname] if old_obj.name != dep_obj.name and dep_obj.soname not in deserialize_deps_and_syms.warned: deserialize_deps_and_syms.warned.add(dep_obj.soname) warn("libraries %s and %s have same soname %s" % (old_obj.name, dep_obj.name, dep_obj.soname)) dep_obj = old_obj else: deserialize_deps_and_syms(dep_obj, cur, lib_map) new_deps.append(dep_obj) if dep_obj.soname is not None: lib_map[dep_obj.soname] = dep_obj new_deps.append(dep_obj) obj.deps = new_deps
def deserialize_deps(self, cur): if not hasattr(Object.deserialize_deps, 'warned_sonames'): Object.deserialize_deps.warned_sonames = set() self.deps = [] cur.execute( 'SELECT Objects.ID, Objects.Name, SoName, IsShlib, IsSymbolic, Packages.ID, Packages.Name, Packages.SourceName FROM (Objects INNER JOIN ShlibDeps ON Objects.SoName = ShlibDeps.DepName INNER JOIN Packages ON Objects.PackageID = Packages.ID) WHERE ShlibDeps.ObjectID = %d' % self.id) soname_origins = {} for ID, obj_name, soname, is_shlib, is_symbolic, pkg_id, pkg_name, pkg_source_name in cur.fetchall( ): if soname in soname_origins and soname not in Object.deserialize_deps.warned_sonames: orig_obj_name, orig_pkg_name = soname_origins[soname] warn( "duplicate implementations of SONAME '%s': %s (from %s) and %s (from %s)" % (soname, obj_name, pkg_name, orig_obj_name, orig_pkg_name)) Object.deserialize_deps.warned_sonames.add(soname) continue soname_origins[soname] = obj_name, pkg_name pkg = Package(pkg_name, pkg_source_name) pkg.id = pkg_id obj = Object(obj_name, soname, pkg, [], [], [], is_shlib, is_symbolic) obj.id = ID obj.deserialize_deps(cur) # TODO: circular deps self.deps.append(obj)
def raise_errors(exc_lists): E = None for i, lst in enumerate(exc_lists): for e in lst: warn("exception in thread %d: %s" % (i, e)) E = e if E is not None: raise E
def parse_elf_file(f, file_type, pkg): is_shlib = 'shared object' in file_type \ and '.so' in file_type # Detect PIEs with open(f, 'rb') as stream: elf_file = ELFFile(stream) f = os.path.basename(f) # First collect dependency info dynsect = elf_file.get_section_by_name('.dynamic') if not dynsect: error("%s: no .dynamic section" % f) elif not isinstance(dynsect, DynamicSection): # TODO: investigate error("%s: unexpected type of .dynamic" % f) soname = None deps = [] is_symbolic = False for tag in dynsect.iter_tags(): if tag.entry.d_tag == 'DT_NEEDED': deps.append(tag.needed) elif tag.entry.d_tag == 'DT_SONAME': if soname is not None: error("%s: multiple DT_SONAME in .dynamic section" % f) soname = tag.soname elif tag.entry.d_tag == 'DT_SYMBOLIC' \ or (tag.entry.d_tag == 'DT_FLAGS' and (tag.entry.d_val & 0x2)): is_symbolic = True if not deps and not linker.is_dynamic_linker(f): warn("%s: no DT_NEEDED in .dynamic section" % f) # Get copy relocs (they are not real exports) copy_relocated_addresses = set() reladyn_name = '.rela.dyn' reladyn = elf_file.get_section_by_name(reladyn_name) if not isinstance(reladyn, RelocationSection): warn("%s: unexpected type of .rela.dyn" % f) else: # The symbol table section pointed to in sh_link for rel in reladyn.iter_relocations(): rel_type = describe_reloc_type(rel['r_info_type'], elf_file) if rel_type == 'R_X86_64_COPY': copy_relocated_addresses.add(rel['r_offset']) # Get version names verdef = elf_file.get_section_by_name('.gnu.version_d') ver_names = set() if verdef: if not isinstance(verdef, GNUVerDefSection): error("%s: unexpected type of .gnu.version_d" % f) else: for verdef, verdaux_iter in verdef.iter_versions(): verdaux = next(verdaux_iter) ver_names.add(verdaux.name) # Now analyze interface # TODO: versions symtab = elf_file.get_section_by_name('.dynsym') if not symtab: error("%s: no symbol table in %s") return False if not isinstance(symtab, SymbolTableSection): error("%s: unexpected type of .dynsym" % f) return False obj = Object(f, soname, pkg, deps, [], [], is_shlib, is_symbolic) for ndx, elf_symbol in enumerate(symtab.iter_symbols()): bind = elf_symbol['st_info']['bind'] vis = elf_symbol['st_other']['visibility'] # STB_LOOS means STB_GNU_UNIQUE if bind in ('STB_GLOBAL', 'STB_WEAK', 'STB_LOOS') \ and vis in ('STV_DEFAULT', 'STV_PROTECTED'): if elf_symbol.name in ver_names: continue symbol = Symbol(elf_symbol.name, obj, bind == 'STB_WEAK', vis == 'STV_PROTECTED') if elf_symbol['st_shndx'] == 'SHN_UNDEF' \ or elf_symbol['st_value'] in copy_relocated_addresses: obj.imports.append(symbol) else: obj.exports.append(symbol) return obj
def find_interposes(pkg, conn, v): if not hasattr(find_interposes, 'dup_warnings'): find_interposes.dup_warnings = set() find_interposes.soname_warnings = set() # TODO: thread-local cache for most commonly used libs? with conn as cur: pkg_objects = Object.deserialize_pkg_objects(cur, pkg) lib_map = {} for obj in pkg_objects: deserialize_deps_and_syms(obj, cur, lib_map) for pkg_obj in pkg_objects: # Build library load list lib_list = [pkg_obj] loaded_sonames = set() pending_libs = pkg_obj.deps while pending_libs: new_pending_libs = [] for obj in pending_libs: # TODO: check soname is present for libs if obj.soname is None and ( pkg.name, obj.name) not in find_interposes.soname_warnings: warn("library %s does not have a SONAME" % obj.name) find_interposes.soname_warnings.add((pkg.name, obj.name)) elif obj.soname not in loaded_sonames: lib_list.append(obj) loaded_sonames.add(obj.soname) new_pending_libs += obj.deps pending_libs = new_pending_libs if v: print("Library list for object %s in package %s:" % (pkg_obj.name, pkg.name)) for obj in lib_list: print(" object %s:" % obj.name) for sym in obj.exports: print(" %s" % sym.name) # Collect definitions and report interpositions # TODO: report interposition only if there's an actual use for it? sym_origins = {} for obj in lib_list: for sym in obj.exports: if sym.name not in sym_origins: sym_origins[sym.name] = obj continue other_obj = sym_origins[sym.name] if not can_ignore_dup(sym, obj, other_obj) \ and (sym.name, obj.name, other_obj.name) not in find_interposes.dup_warnings: print( "Duplicate definition of symbol '%s' in modules %s (from package %s) and %s (from package %s) (when loading object %s in package %s)" % (sym.name, other_obj.name, other_obj.pkg.source_name, obj.name, obj.pkg.source_name, pkg_obj.name, pkg.name)) find_interposes.dup_warnings.add( (sym.name, obj.name, other_obj.name)) find_interposes.dup_warnings.add( (sym.name, other_obj.name, obj.name)) # Resolve symbols ref_origins = {} for obj in lib_list: for sym in obj.imports: if sym.name not in sym_origins and not sym.is_weak and not can_ignore_unres( sym, obj, pkg_obj): warn( "unresolved reference to symbol '%s' in library %s (from package %s) (when loading object %s in package %s)" % (sym.name, obj.name, obj.pkg.source_name, pkg_obj.name, pkg.name))