Esempio n. 1
0
def call_nm(elffile):
    """
    Call nm and returns the returncode, and the filepaths containing the
    stdout and stderr.
    """
    logger.debug('Executing nm command on %(elffile)r' % locals())

    nm_command = get_location(SCANCODE_BINUTILS_NM_EXE)
    return command.execute2(
        cmd_loc=nm_command, 
        args=['-al', elffile], to_files=True)
Esempio n. 2
0
    def test_execute2_non_ascii_output(self):
        # Popen returns a *binary* string with non-ascii chars: skips these
        rc, stdout, stderr = command.execute2(
            b'python', ['-c', "print b'non ascii: \\xe4 just passed it !'"])
        assert rc == 0
        assert stderr == b''

        # converting to Unicode could cause an "ordinal not in range..."
        # exception
        assert stdout == b'non ascii:  just passed it !'
        unicode(stdout)
Esempio n. 3
0
    def _parseinfo(self):
        """
        Parse dwarfdump info section of an elf file.
        """
        rc, out, err = command.execute2(cmd_loc=self.cmd_loc,
                                        args=['-i', self.elf_location],
                                        lib_dir=self.lib_loc,
                                        to_files=True)

        if rc != 0:
            raise Exception(open(err).read())

        # loop through each returned line passing control to a handler
        with open(out, 'rb') as lines:
            for line in lines:
                line = line.strip()
                if DCOMP_UNIT_START_RE().match(line):
                    dwarfinfo = DwarfInfo()
                    dwarfinfo.parse(self, lines)
Esempio n. 4
0
    def _collect_and_parse_tags(self):
        ctags_args = ['--fields=K', '--c-kinds=fp', '-f', '-', self.sourcefile]
        ctags_temp_dir = fileutils.get_temp_dir(base_dir='ctags')
        envt = {'TMPDIR': ctags_temp_dir}
        try:
            rc, stdo, err = command.execute2(cmd_loc=self.cmd_loc,
                                             ctags_args,
                                             env=envt,
                                             lib_dir=self.lib_loc,
                                             to_files=True)

            if rc != 0:
                raise Exception(open(err).read())

            with open(stdo, 'rb') as lines:
                for line in lines:
                    if 'cannot open temporary file' in line:
                        raise Exception('ctags: cannot open temporary file '
                                        ': Permission denied')

                    if line.startswith('!'):
                        continue

                    line = line.strip()
                    if not line:
                        continue

                    splitted = line.split('\t')

                    if (line.endswith('function\tfile:')
                            or line.endswith('prototype\tfile:')):
                        self.local_functions.append(splitted[0])

                    elif (line.endswith('function')
                          or line.endswith('prototype')):
                        self.global_functions.append(splitted[0])
        finally:
            fileutils.delete(ctags_temp_dir)
Esempio n. 5
0
def demangle_chunk(symbols):
    """
    Return a set of demangled Elf symbol names using binutils
    c++filt. The symbols are filtered for eventual known junk.
    """
    if not symbols:
        return []

    cppfilt_command = 'c++filt'

    args = ['--no-strip-underscores', '--no-verbose', '--no-params'] + symbols

    cmd_loc = get_location(SCANCODE_CPLUSPLUSFILT_EXE)
    lib_loc = get_location(SCANCODE_CPLUSPLUSFILT_LIB)
    rc, out, err = command.execute2(cmd_loc,
                                    args,
                                    lib_dir=lib_loc,
                                    to_files=True)
    if rc != 0:
        raise Exception(open(err).read())

    demangled = set()
    with open(out, 'rb') as names:
        for name in names:
            # ignore junk injected by the compiler
            isjunk = False
            for junk in demangled_junk:
                if name.startswith(junk):
                    isjunk = True
                    break
            if isjunk:
                continue
            # do not keep params for CPP functions, just the function
            if '(' in name:
                name = name.split('(')[0]
            demangled.add(name.strip())
    return list(demangled)
Esempio n. 6
0
    def parse(self):
        """
        Parse readelf sections to populates the Elf object for an elf location.
        """
        readelf_args = ['--wide']
        readelf_args.extend(self.readelf_options)
        readelf_args.append(self.elf_location)

        rc, out, err = command.execute2(cmd_loc=self.cmd_loc,
                                        args=readelf_args,
                                        lib_dir=self.lib_loc,
                                        to_files=True)
        if rc != 0:
            raise Exception(open(err).read() + '\n' + open(out).read())

        # loop through each line passing control to a handler as needed
        with open(out, 'rb') as elf_lines:
            for line in elf_lines:
                # get the handler for this line
                line = line.strip()
                if line:
                    handler = self.get_handler(line)
                    if handler:
                        handler(self, elf_lines)
Esempio n. 7
0
def list_entries(location, arch_type='*'):
    """
    Tield Entry tuples for each entry found in a 7zip-supported archive file at
    `location`. Use the provided 7zip `arch_type` CLI archive type code (e.g. with
    the "-t* 7z" cli type option) (can be None).
    """
    assert location
    abs_location = os.path.abspath(os.path.expanduser(location))

    if is_rar(location):
        return []

    # 7z arguments
    listing = 'l'

    # NB: we use t* to ensure that all archive types are honored
    if not arch_type:
        arch_type = ''
    else:
        arch_type = '-t' + arch_type

    # pass an empty password  so that extraction with passwords WILL fail
    password = '******'
    tech_info = '-slt'

    output_as_utf = ''
    if on_windows:
        output_as_utf = '-sccUTF-8'

    # NB: we force running in the GMT timezone, because 7z is unable to set
    # the TZ correctly when the archive does not contain TZ info. This does
    # not work on Windows, because 7z is not using the TZ env var there.
    timezone = dict(os.environ)
    timezone.update({u'TZ': u'GMT'})
    timezone = command.get_env(timezone)

    args = [
        listing,
        tech_info,
        arch_type,
        output_as_utf,
        abs_location,
        password,
    ]

    lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR)
    cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE)

    rc, stdout, stderr = command.execute2(cmd_loc=cmd_loc,
                                          args=args,
                                          lib_dir=lib_dir,
                                          env=timezone,
                                          to_files=True)

    if TRACE:
        logger.debug(
            'list_entries: rc: {rc}\nstderr: file://{stderr}\nstdout: file://{stdout}\n'
            .format(**locals()))

    if rc != 0:
        # FIXME: this test is useless
        _error = get_7z_errors(stdout) or UNKNOWN_ERROR

    # the listing was produced as UTF on windows to avoid damaging binary
    # paths in console outputs
    utf = bool(output_as_utf)

    return parse_7z_listing(stdout, utf)
Esempio n. 8
0
def extract(location, target_dir, arch_type='*'):
    """
    Extract all files from a 7zip-supported archive file at location in the
    target_dir directory. Return a list of warning messages.
    Raise exception on errors.

    `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be
    None.
    """
    assert location
    assert target_dir
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))

    if is_rar(location):
        raise ExtractErrorFailedToExtract('RAR extraction disactivated')

    # note: there are some issues with the extraction of debian .deb ar files
    # see sevenzip bug http://sourceforge.net/p/sevenzip/bugs/1472/

    # 7z arguments
    extract = 'x'
    yes_to_all = '-y'

    # NB: we use t* to ensure that all archive types are honored
    if not arch_type:
        arch_type = ''
    else:
        arch_type = '-t' + arch_type

    # pass an empty password  so that extraction with passwords WILL fail
    password = '******'

    # renaming may not behave the same way on all OSes in particular Mac and Windows
    auto_rename_dupe_names = '-aou'

    # These things do not work well with p7zip for now:
    # - ensure that we treat the FS as case insensitive even if it is
    #   this ensure we have consistent names across OSes
    #   case_insensitive = '-ssc-'
    # - force any console output to be UTF-8 encoded
    #   TODO: add this may be for a UTF output on Windows only
    #   output_as_utf = '-sccUTF-8'
    #   working_tmp_dir = '-w<path>'

    # NB: we force running in the GMT timezone, because 7z is unable to set
    # the TZ correctly when the archive does not contain TZ info. This does
    # not work on Windows, because 7z is not using the TZ env var there.
    timezone = dict(os.environ)
    timezone.update({u'TZ': u'GMT'})
    timezone = command.get_env(timezone)
    # Note: 7z does extract in the current directory so we cwd to the target dir first
    args = [
        extract, yes_to_all, auto_rename_dupe_names, arch_type, abs_location,
        password
    ]

    lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR)
    cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE)

    ex_args = dict(
        cmd_loc=cmd_loc,
        args=args,
        lib_dir=lib_dir,
        cwd=abs_target_dir,
        env=timezone,
    )

    if TRACE:
        logger.debug('extract: args:')
        pprint(ex_args)

    rc, stdout, stderr = command.execute2(**ex_args)

    if rc != 0:
        if TRACE:
            logger.debug(
                'extract: failure: {rc}\nstderr: {stderr}\nstdout: {stdout}\n'.
                format(**locals()))
        error = get_7z_errors(stdout) or UNKNOWN_ERROR
        raise ExtractErrorFailedToExtract(error)

    extractcode.remove_backslashes_and_dotdots(abs_target_dir)
    return get_7z_warnings(stdout)
Esempio n. 9
0
def extract_file_by_file(location,
                         target_dir,
                         arch_type='*',
                         skip_symlinks=True):
    """
    Extract all files using a one-by-one process from a 7zip-supported archive
    file at location in the `target_dir` directory.

    Return a list of warning messages if any or an empty list.
    Raise exception on errors.

    `arch_type` is the type of 7zip archive passed to the -t 7zip option.
    Can be None.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))

    entries, errors_msgs = list_entries(location, arch_type)
    entries = list(entries)

    # Determine if we need a one-by-one approach: technically the aproach is to
    # check if we have files that are in the same dir and have the same name
    # when the case is ignored. We take a simpler approach: we check if all
    # paths are unique when we ignore the case: for that we only check that the
    # length of two paths sets are the same: one set as-is and the other
    # lowercased.

    paths_as_is = set(e.path for e in entries)
    paths_no_case = set(p.lower() for p in paths_as_is)
    need_by_file = len(paths_as_is) != len(paths_no_case)

    if not need_by_file:
        # use regular extract
        return extract_all_files_at_once(location=location,
                                         target_dir=target_dir,
                                         arch_type=arch_type)

    # now we are extracting one file at a time. this is a tad painful because we
    # are dealing with a full command execution at each time.

    errors = {}
    warnings = {}
    tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-')
    for i, entry in enumerate(entries):

        if not entry.is_file:
            continue

        tmp_extract_dir = os.path.join(tmp_dir, str(i))
        fileutils.create_dir(tmp_extract_dir)

        ex_args = build_7z_extract_command(
            location=location,
            target_dir=tmp_extract_dir,
            single_entry=entry,
            arch_type=arch_type,
        )
        rc, stdout, stderr = command.execute2(**ex_args)

        error = get_7z_errors(stdout, stderr)
        if error or rc != 0:
            error = error or UNKNOWN_ERROR
            if TRACE:
                logger.debug(
                    'extract: failure: {rc}\n'
                    'stderr: {stderr}\nstdout: {stdout}'.format(**locals()))
            errors[entry.path] = error
            continue

        # these are all for a single file path
        warns = get_7z_warnings(stdout) or {}
        wmsg = '\n'.join(warns.values())
        if wmsg:
            if entry.path in warnings:
                warnings[entry.path] += '\n' + wmsg
            else:
                warnings[entry.path] = wmsg

        # finally move that extracted file to its target location, possibly renamed
        source_file_name = fileutils.file_name(entry.path)
        source_file_loc = os.path.join(tmp_extract_dir, source_file_name)
        if not os.path.exists(source_file_loc):
            if entry.path in errors:
                errors[entry.path] += '\nNo file name extracted.'
            else:
                errors[entry.path] = 'No file name extracted.'
            continue

        safe_path = paths.safe_path(entry.path, posix=True)
        target_file_loc = os.path.join(target_dir, safe_path)
        target_file_dir = os.path.dirname(target_file_loc)
        fileutils.create_dir(target_file_dir)

        unique_target_file_loc = extractcode.new_name(target_file_loc,
                                                      is_dir=False)

        if TRACE:
            logger.debug(
                'extract: unique_target_file_loc: from {} to {}'.format(
                    target_file_loc, unique_target_file_loc))

        if os.path.isfile(source_file_loc):
            fileutils.copyfile(source_file_loc, unique_target_file_loc)
        else:
            fileutils.copytree(source_file_loc, unique_target_file_loc)

    extractcode.remove_backslashes_and_dotdots(abs_target_dir)
    if errors:
        raise ExtractErrorFailedToExtract(errors)

    return convert_warnings_to_list(warnings)