def __init__(self, location): self.cmd_loc = get_location(SCANCODE_DWARFDUMP_EXE) self.lib_loc = get_location(SCANCODE_DWARFDUMP_LIB) # The elf location self.elf_location = location # Source files that were compiled and linked explicitly to create this # Elf This are the source files that a developer typically edits. self.original_source_files = [] # Source files that were compiled and linked implicitly from the # standard library or by the toolchain when this Elf was created. # These files may vary from platform to platform and version of the Gnu # toolchain. They are not always relevant from an interaction perspective # except in a few cases, such as LKM. self.included_source_files = [] self._files = [] self.parse_errors = [] # now parse thyself self._parseinfo() # and cleanup thyself self.cleanup()
def __init__(self, location): # Dynamic libraries needed by this Elf at runtime self.needed_libraries = set() # Symbols is an instance of ElfSymbolsTableSection self.symbols_section = ElfSymbolsTableSection() self.files = self.symbols_section.files # sections parsers self.readelf_sections = [ElfDynamicSection(), self.symbols_section] self.handlers = {} self.readelf_options = [] # The elf location self.elf_location = location self.cmd_loc = get_location(SCANCODE_READELF_EXE) self.lib_loc = get_location(SCANCODE_READELF_LIB) # Information contained in the elf header self.info = {} self.setup_handlers() # now parse thyself self.parse()
def load_lib(): """ Return the loaded libarchive shared library object from plugin provided or default "vendored" paths. """ # get paths from plugins dll = get_location(EXTRACTCODE_LIBARCHIVE_DLL) libdir = get_location(EXTRACTCODE_LIBARCHIVE_LIBDIR) return command.load_shared_library(dll, libdir)
def load_lib(): """ Return the loaded libmagic shared library object from plugin provided or default "vendored" paths. """ # get paths from plugins dll = get_location(TYPECODE_LIBMAGIC_DLL) libdir = get_location(TYPECODE_LIBMAGIC_LIBDIR) return command.load_shared_library(dll, libdir)
def get_bin_locations(): """ Return a tuple of (lib_dir, cmd_loc) for 7zip loaded from plugin-provided path. """ from plugincode.location_provider import get_location # get paths from plugins lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR) cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE) return lib_dir, cmd_loc
def load_lib(): """ Return the loaded libarchive shared library object from plugin-provided path. """ from plugincode.location_provider import get_location # get paths from plugins dll = get_location(EXTRACTCODE_LIBARCHIVE_DLL) libdir = get_location(EXTRACTCODE_LIBARCHIVE_LIBDIR) return command.load_shared_library(dll, libdir)
def load_lib(): """ Return the loaded libmagic shared library object from plugin provided or default "vendored" paths. """ # get paths from plugins dll = get_location(TYPECODE_LIBMAGIC_DLL) libdir = get_location(TYPECODE_LIBMAGIC_LIBDIR) if not (dll and libdir) or not os.path.isfile(dll) or not os.path.isdir(libdir): raise Exception( 'CRITICAL: libmagic DLL and is magic database are not installed. ' 'Unable to continue: you need to install a valid typecode-libmagic ' 'plugin with a valid and proper libmagic and magic DB available.' ) return command.load_shared_library(dll, libdir)
def __init__(self, sourcefile): # yield nothing if we do not have a proper command self.sourcefile = sourcefile self.cmd_loc = get_location(SCANCODE_CTAGS_EXE) self.lib_loc = get_location(SCANCODE_CTAGS_LIB) # nb: those attributes names are api and expected when fingerprinting # a list of sources files names (not path) self.files = [] self.files.append(fileutils.file_name(sourcefile)) # a list of function names self.local_functions = [] self.global_functions = [] self._collect_and_parse_tags()
def get_magicdb_location(_cache=[]): """ Return the location of the magicdb loaded from either: - an environment variable ``TYPECODE_LIBMAGIC_DB_PATH``, - a plugin-provided path, - the system PATH. Trigger a warning if no magicdb file is found. """ if _cache: return _cache[0] from plugincode.location_provider import get_location # try the environment first magicdb_loc = os.environ.get(TYPECODE_LIBMAGIC_DB_PATH_ENVVAR) if TRACE and magicdb_loc: logger_debug('get_magicdb_location:', 'got environ magicdb location:', magicdb_loc) # try a plugin-provided path second if not magicdb_loc: magicdb_loc = get_location(TYPECODE_LIBMAGIC_DB) if TRACE and magicdb_loc: logger_debug('get_magicdb_location:', 'got plugin magicdb location:', magicdb_loc) # try the PATH if not magicdb_loc: db = 'magic.mgc' magicdb_loc = command.find_in_path(db) if magicdb_loc: warnings.warn( 'magicdb found in the PATH. ' 'Install instead a typecode-libmagic plugin for best support.\n' f'OR set the {TYPECODE_LIBMAGIC_DB_PATH_ENVVAR} environment variable.' ) if TRACE and magicdb_loc: logger_debug('get_magicdb_location:', 'got path magicdb location:', magicdb_loc) if not magicdb_loc: warnings.warn( 'Libmagic magic database not found. ' 'A default will be used if possible. ' 'Install instead a typecode-libmagic plugin for best support.\n' f'OR set the {TYPECODE_LIBMAGIC_DB_PATH_ENVVAR} environment variable.' ) return _cache.append(magicdb_loc) return magicdb_loc
def load_lib(): """ Return the libmagic shared library object loaded from either: - an environment variable ``TYPECODE_LIBMAGIC_PATH`` - a plugin-provided path, - the system PATH. Raise an NoMagicLibError if no libmagic can be found. """ from plugincode.location_provider import get_location # try the environment first dll_loc = os.environ.get(TYPECODE_LIBMAGIC_PATH_ENVVAR) if TRACE and dll_loc: logger_debug('load_lib:', 'got environ magic location:', dll_loc) # try a plugin-provided path second if not dll_loc: dll_loc = get_location(TYPECODE_LIBMAGIC_DLL) if TRACE and dll_loc: logger_debug('load_lib:', 'got plugin magic location:', dll_loc) # try well known locations if not dll_loc: failover_lib = load_lib_failover() if failover_lib: warnings.warn( 'System libmagic found in typical location is used. ' 'Install instead a typecode-libmagic plugin for best support.') return failover_lib # try the PATH if not dll_loc: dll = 'libmagic.dll' if on_windows else 'libmagic.so' dll_loc = command.find_in_path(dll) if dll_loc: warnings.warn( 'libmagic found in the PATH. ' 'Install instead a typecode-libmagic plugin for best support.') if TRACE and dll_loc: logger_debug('load_lib:', 'got path magic location:', dll_loc) if not dll_loc or not os.path.isfile(dll_loc): raise NoMagicLibError( 'CRITICAL: libmagic DLL and its magic database are not installed. ' 'Unable to continue: you need to install a valid typecode-libmagic ' 'plugin with a valid and proper libmagic and magic DB available.\n' f'OR set the {TYPECODE_LIBMAGIC_PATH_ENVVAR} and ' f'{TYPECODE_LIBMAGIC_DB_PATH_ENVVAR} environment variables.\n' f'OR install libmagic in typical common locations.\n' f'OR have a libmagic in the system PATH.\n') return command.load_shared_library(dll_loc)
def __init__(self, flags, magic_db_location=None): """ Create a new libmagic detector. flags - the libmagic flags magic_file - use a mime database other than the vendored default """ self.flags = flags self.cookie = _magic_open(self.flags) if not magic_db_location: magic_db_location = get_location(TYPECODE_LIBMAGIC_DATABASE) _magic_load(self.cookie, magic_db_location)
def call_nm(elffile): """ Call nm and returns the returncode, and the filepaths containing the stdout and stderr. """ logger.debug('Executing nm command on %(elffile)r' % locals()) nm_command = get_location(SCANCODE_BINUTILS_NM_EXE) return command.execute2( cmd_loc=nm_command, args=['-al', elffile], to_files=True)
def demangle_chunk(symbols): """ Return a set of demangled Elf symbol names using binutils c++filt. The symbols are filtered for eventual known junk. """ if not symbols: return [] args = ['--no-strip-underscores', '--no-verbose', '--no-params'] + symbols cmd_loc = get_location(SCANCODE_CPLUSPLUSFILT_EXE) lib_loc = get_location(SCANCODE_CPLUSPLUSFILT_LIB) rc, out, err = command.execute( cmd_loc, args, lib_dir=lib_loc, to_files=True, ) if rc != 0: raise Exception(open(err).read()) demangled = set() with open(out, 'rb') as names: for name in names: # ignore junk injected by the compiler isjunk = False for junk in demangled_junk: if name.startswith(junk): isjunk = True break if isjunk: continue # do not keep params for CPP functions, just the function if '(' in name: name = name.split('(')[0] demangled.add(name.strip()) return list(demangled)
def get_rpm_bin_location(): """ Return the binary location for an RPM exe loaded from a plugin-provided path. """ from plugincode.location_provider import get_location rpm_bin_dir = get_location(RPM_BIN_DIR) if not rpm_bin_dir: raise Exception( 'CRITICAL: RPM executable is not provided. ' 'Unable to continue: you need to install a valid rpm-inspector-rpm ' 'plugin with a valid RPM executable and shared libraries available.' ) return rpm_bin_dir
def __init__(self, flags, magic_db_location=None): """ Create a new libmagic detector. flags - the libmagic flags magic_file - use a mime database other than the vendored default """ self.flags = flags self.cookie = _magic_open(self.flags) if not magic_db_location: magic_db_location = get_location(TYPECODE_LIBMAGIC_DATABASE) # Note: this location must always be bytes on Python2 and 3, all OSes if isinstance(magic_db_location, compat.unicode): magic_db_location = fsencode(magic_db_location) _magic_load(self.cookie, magic_db_location)
def get_msiinfo_bin_location(_cache=[]): """ Return the binary location for msiinfo from either: - a plugin-provided path, - the system PATH. Raise an Exception if no msiinfo command can be found. """ if _cache: return _cache[0] from plugincode.location_provider import get_location # try a plugin-provided path first cmd_loc = get_location(MSIINFO_BIN_LOCATION) # try the PATH if not cmd_loc: cmd = 'msiinfo' cmd_loc = find_in_path(cmd) if not cmd_loc: cmd_loc = which(cmd) if cmd_loc: warnings.warn( 'Using "msiinfo" command found in the PATH. ' 'Install instead a plugincode-msitools plugin for best support.' ) if not cmd_loc or not os.path.isfile(cmd_loc): raise Exception( 'CRITICAL: msiinfo not provided. ' 'Unable to continue: you need to install the plugin packagedcode-msitools' ) _cache.append(cmd_loc) return cmd_loc
def extract(location, target_dir, arch_type='*'): """ Extract all files from a 7zip-supported archive file at location in the target_dir directory. Return a list of warning messages. Raise exception on errors. `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be None. """ assert location assert target_dir abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) if is_rar(location): raise ExtractErrorFailedToExtract('RAR extraction disactivated') # note: there are some issues with the extraction of debian .deb ar files # see sevenzip bug http://sourceforge.net/p/sevenzip/bugs/1472/ # 7z arguments extract = 'x' yes_to_all = '-y' # NB: we use t* to ensure that all archive types are honored if not arch_type: arch_type = '' else: arch_type = '-t' + arch_type # pass an empty password so that extraction with passwords WILL fail password = '******' # renaming may not behave the same way on all OSes in particular Mac and Windows auto_rename_dupe_names = '-aou' # These things do not work well with p7zip for now: # - ensure that we treat the FS as case insensitive even if it is # this ensure we have consistent names across OSes # case_insensitive = '-ssc-' # - force any console output to be UTF-8 encoded # TODO: add this may be for a UTF output on Windows only # output_as_utf = '-sccUTF-8' # working_tmp_dir = '-w<path>' # NB: we force running in the GMT timezone, because 7z is unable to set # the TZ correctly when the archive does not contain TZ info. This does # not work on Windows, because 7z is not using the TZ env var there. timezone = dict(os.environ) timezone.update({u'TZ': u'GMT'}) timezone = command.get_env(timezone) # Note: 7z does extract in the current directory so we cwd to the target dir first args = [ extract, yes_to_all, auto_rename_dupe_names, arch_type, abs_location, password ] lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR) cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE) ex_args = dict( cmd_loc=cmd_loc, args=args, lib_dir=lib_dir, cwd=abs_target_dir, env=timezone, ) if TRACE: logger.debug('extract: args:') pprint(ex_args) rc, stdout, stderr = command.execute2(**ex_args) if rc != 0: if TRACE: logger.debug( 'extract: failure: {rc}\nstderr: {stderr}\nstdout: {stdout}\n'. format(**locals())) error = get_7z_errors(stdout) or UNKNOWN_ERROR raise ExtractErrorFailedToExtract(error) extractcode.remove_backslashes_and_dotdots(abs_target_dir) return get_7z_warnings(stdout)
def list_entries(location, arch_type='*'): """ Tield Entry tuples for each entry found in a 7zip-supported archive file at `location`. Use the provided 7zip `arch_type` CLI archive type code (e.g. with the "-t* 7z" cli type option) (can be None). """ assert location abs_location = os.path.abspath(os.path.expanduser(location)) if is_rar(location): return [] # 7z arguments listing = 'l' # NB: we use t* to ensure that all archive types are honored if not arch_type: arch_type = '' else: arch_type = '-t' + arch_type # pass an empty password so that extraction with passwords WILL fail password = '******' tech_info = '-slt' output_as_utf = '' if on_windows: output_as_utf = '-sccUTF-8' # NB: we force running in the GMT timezone, because 7z is unable to set # the TZ correctly when the archive does not contain TZ info. This does # not work on Windows, because 7z is not using the TZ env var there. timezone = dict(os.environ) timezone.update({u'TZ': u'GMT'}) timezone = command.get_env(timezone) args = [ listing, tech_info, arch_type, output_as_utf, abs_location, password, ] lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR) cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE) rc, stdout, stderr = command.execute2(cmd_loc=cmd_loc, args=args, lib_dir=lib_dir, env=timezone, to_files=True) if TRACE: logger.debug( 'list_entries: rc: {rc}\nstderr: file://{stderr}\nstdout: file://{stdout}\n' .format(**locals())) if rc != 0: # FIXME: this test is useless _error = get_7z_errors(stdout) or UNKNOWN_ERROR # the listing was produced as UTF on windows to avoid damaging binary # paths in console outputs utf = bool(output_as_utf) return parse_7z_listing(stdout, utf)