def call_nm(elffile): """ Call nm and returns the returncode, and the filepaths containing the stdout and stderr. """ logger.debug('Executing nm command on %(elffile)r' % locals()) nm_command = get_location(SCANCODE_BINUTILS_NM_EXE) return command.execute2( cmd_loc=nm_command, args=['-al', elffile], to_files=True)
def test_execute2_non_ascii_output(self): # Popen returns a *binary* string with non-ascii chars: skips these rc, stdout, stderr = command.execute2( b'python', ['-c', "print b'non ascii: \\xe4 just passed it !'"]) assert rc == 0 assert stderr == b'' # converting to Unicode could cause an "ordinal not in range..." # exception assert stdout == b'non ascii: just passed it !' unicode(stdout)
def _parseinfo(self): """ Parse dwarfdump info section of an elf file. """ rc, out, err = command.execute2(cmd_loc=self.cmd_loc, args=['-i', self.elf_location], lib_dir=self.lib_loc, to_files=True) if rc != 0: raise Exception(open(err).read()) # loop through each returned line passing control to a handler with open(out, 'rb') as lines: for line in lines: line = line.strip() if DCOMP_UNIT_START_RE().match(line): dwarfinfo = DwarfInfo() dwarfinfo.parse(self, lines)
def _collect_and_parse_tags(self): ctags_args = ['--fields=K', '--c-kinds=fp', '-f', '-', self.sourcefile] ctags_temp_dir = fileutils.get_temp_dir(base_dir='ctags') envt = {'TMPDIR': ctags_temp_dir} try: rc, stdo, err = command.execute2(cmd_loc=self.cmd_loc, ctags_args, env=envt, lib_dir=self.lib_loc, to_files=True) if rc != 0: raise Exception(open(err).read()) with open(stdo, 'rb') as lines: for line in lines: if 'cannot open temporary file' in line: raise Exception('ctags: cannot open temporary file ' ': Permission denied') if line.startswith('!'): continue line = line.strip() if not line: continue splitted = line.split('\t') if (line.endswith('function\tfile:') or line.endswith('prototype\tfile:')): self.local_functions.append(splitted[0]) elif (line.endswith('function') or line.endswith('prototype')): self.global_functions.append(splitted[0]) finally: fileutils.delete(ctags_temp_dir)
def demangle_chunk(symbols): """ Return a set of demangled Elf symbol names using binutils c++filt. The symbols are filtered for eventual known junk. """ if not symbols: return [] cppfilt_command = 'c++filt' args = ['--no-strip-underscores', '--no-verbose', '--no-params'] + symbols cmd_loc = get_location(SCANCODE_CPLUSPLUSFILT_EXE) lib_loc = get_location(SCANCODE_CPLUSPLUSFILT_LIB) rc, out, err = command.execute2(cmd_loc, args, lib_dir=lib_loc, to_files=True) if rc != 0: raise Exception(open(err).read()) demangled = set() with open(out, 'rb') as names: for name in names: # ignore junk injected by the compiler isjunk = False for junk in demangled_junk: if name.startswith(junk): isjunk = True break if isjunk: continue # do not keep params for CPP functions, just the function if '(' in name: name = name.split('(')[0] demangled.add(name.strip()) return list(demangled)
def parse(self): """ Parse readelf sections to populates the Elf object for an elf location. """ readelf_args = ['--wide'] readelf_args.extend(self.readelf_options) readelf_args.append(self.elf_location) rc, out, err = command.execute2(cmd_loc=self.cmd_loc, args=readelf_args, lib_dir=self.lib_loc, to_files=True) if rc != 0: raise Exception(open(err).read() + '\n' + open(out).read()) # loop through each line passing control to a handler as needed with open(out, 'rb') as elf_lines: for line in elf_lines: # get the handler for this line line = line.strip() if line: handler = self.get_handler(line) if handler: handler(self, elf_lines)
def list_entries(location, arch_type='*'): """ Tield Entry tuples for each entry found in a 7zip-supported archive file at `location`. Use the provided 7zip `arch_type` CLI archive type code (e.g. with the "-t* 7z" cli type option) (can be None). """ assert location abs_location = os.path.abspath(os.path.expanduser(location)) if is_rar(location): return [] # 7z arguments listing = 'l' # NB: we use t* to ensure that all archive types are honored if not arch_type: arch_type = '' else: arch_type = '-t' + arch_type # pass an empty password so that extraction with passwords WILL fail password = '******' tech_info = '-slt' output_as_utf = '' if on_windows: output_as_utf = '-sccUTF-8' # NB: we force running in the GMT timezone, because 7z is unable to set # the TZ correctly when the archive does not contain TZ info. This does # not work on Windows, because 7z is not using the TZ env var there. timezone = dict(os.environ) timezone.update({u'TZ': u'GMT'}) timezone = command.get_env(timezone) args = [ listing, tech_info, arch_type, output_as_utf, abs_location, password, ] lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR) cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE) rc, stdout, stderr = command.execute2(cmd_loc=cmd_loc, args=args, lib_dir=lib_dir, env=timezone, to_files=True) if TRACE: logger.debug( 'list_entries: rc: {rc}\nstderr: file://{stderr}\nstdout: file://{stdout}\n' .format(**locals())) if rc != 0: # FIXME: this test is useless _error = get_7z_errors(stdout) or UNKNOWN_ERROR # the listing was produced as UTF on windows to avoid damaging binary # paths in console outputs utf = bool(output_as_utf) return parse_7z_listing(stdout, utf)
def extract(location, target_dir, arch_type='*'): """ Extract all files from a 7zip-supported archive file at location in the target_dir directory. Return a list of warning messages. Raise exception on errors. `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be None. """ assert location assert target_dir abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) if is_rar(location): raise ExtractErrorFailedToExtract('RAR extraction disactivated') # note: there are some issues with the extraction of debian .deb ar files # see sevenzip bug http://sourceforge.net/p/sevenzip/bugs/1472/ # 7z arguments extract = 'x' yes_to_all = '-y' # NB: we use t* to ensure that all archive types are honored if not arch_type: arch_type = '' else: arch_type = '-t' + arch_type # pass an empty password so that extraction with passwords WILL fail password = '******' # renaming may not behave the same way on all OSes in particular Mac and Windows auto_rename_dupe_names = '-aou' # These things do not work well with p7zip for now: # - ensure that we treat the FS as case insensitive even if it is # this ensure we have consistent names across OSes # case_insensitive = '-ssc-' # - force any console output to be UTF-8 encoded # TODO: add this may be for a UTF output on Windows only # output_as_utf = '-sccUTF-8' # working_tmp_dir = '-w<path>' # NB: we force running in the GMT timezone, because 7z is unable to set # the TZ correctly when the archive does not contain TZ info. This does # not work on Windows, because 7z is not using the TZ env var there. timezone = dict(os.environ) timezone.update({u'TZ': u'GMT'}) timezone = command.get_env(timezone) # Note: 7z does extract in the current directory so we cwd to the target dir first args = [ extract, yes_to_all, auto_rename_dupe_names, arch_type, abs_location, password ] lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR) cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE) ex_args = dict( cmd_loc=cmd_loc, args=args, lib_dir=lib_dir, cwd=abs_target_dir, env=timezone, ) if TRACE: logger.debug('extract: args:') pprint(ex_args) rc, stdout, stderr = command.execute2(**ex_args) if rc != 0: if TRACE: logger.debug( 'extract: failure: {rc}\nstderr: {stderr}\nstdout: {stdout}\n'. format(**locals())) error = get_7z_errors(stdout) or UNKNOWN_ERROR raise ExtractErrorFailedToExtract(error) extractcode.remove_backslashes_and_dotdots(abs_target_dir) return get_7z_warnings(stdout)
def extract_file_by_file(location, target_dir, arch_type='*', skip_symlinks=True): """ Extract all files using a one-by-one process from a 7zip-supported archive file at location in the `target_dir` directory. Return a list of warning messages if any or an empty list. Raise exception on errors. `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be None. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) entries, errors_msgs = list_entries(location, arch_type) entries = list(entries) # Determine if we need a one-by-one approach: technically the aproach is to # check if we have files that are in the same dir and have the same name # when the case is ignored. We take a simpler approach: we check if all # paths are unique when we ignore the case: for that we only check that the # length of two paths sets are the same: one set as-is and the other # lowercased. paths_as_is = set(e.path for e in entries) paths_no_case = set(p.lower() for p in paths_as_is) need_by_file = len(paths_as_is) != len(paths_no_case) if not need_by_file: # use regular extract return extract_all_files_at_once(location=location, target_dir=target_dir, arch_type=arch_type) # now we are extracting one file at a time. this is a tad painful because we # are dealing with a full command execution at each time. errors = {} warnings = {} tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-') for i, entry in enumerate(entries): if not entry.is_file: continue tmp_extract_dir = os.path.join(tmp_dir, str(i)) fileutils.create_dir(tmp_extract_dir) ex_args = build_7z_extract_command( location=location, target_dir=tmp_extract_dir, single_entry=entry, arch_type=arch_type, ) rc, stdout, stderr = command.execute2(**ex_args) error = get_7z_errors(stdout, stderr) if error or rc != 0: error = error or UNKNOWN_ERROR if TRACE: logger.debug( 'extract: failure: {rc}\n' 'stderr: {stderr}\nstdout: {stdout}'.format(**locals())) errors[entry.path] = error continue # these are all for a single file path warns = get_7z_warnings(stdout) or {} wmsg = '\n'.join(warns.values()) if wmsg: if entry.path in warnings: warnings[entry.path] += '\n' + wmsg else: warnings[entry.path] = wmsg # finally move that extracted file to its target location, possibly renamed source_file_name = fileutils.file_name(entry.path) source_file_loc = os.path.join(tmp_extract_dir, source_file_name) if not os.path.exists(source_file_loc): if entry.path in errors: errors[entry.path] += '\nNo file name extracted.' else: errors[entry.path] = 'No file name extracted.' continue safe_path = paths.safe_path(entry.path, posix=True) target_file_loc = os.path.join(target_dir, safe_path) target_file_dir = os.path.dirname(target_file_loc) fileutils.create_dir(target_file_dir) unique_target_file_loc = extractcode.new_name(target_file_loc, is_dir=False) if TRACE: logger.debug( 'extract: unique_target_file_loc: from {} to {}'.format( target_file_loc, unique_target_file_loc)) if os.path.isfile(source_file_loc): fileutils.copyfile(source_file_loc, unique_target_file_loc) else: fileutils.copytree(source_file_loc, unique_target_file_loc) extractcode.remove_backslashes_and_dotdots(abs_target_dir) if errors: raise ExtractErrorFailedToExtract(errors) return convert_warnings_to_list(warnings)