Ejemplo n.º 1
0
def try_to_extract(location, target_dir, extractor):
    """
    Extract archive at `location` to `target_dir` trying the `extractor` function.
    If extract fails, just return without returning warnings nor raising exceptions.

    Note: there are a few cases where we want to attempt extracting something
    but do not care if this fails.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(
        os.path.abspath(os.path.expanduser(target_dir)))
    temp_target = compat.unicode(
        fileutils.get_temp_dir(prefix='extractcode-extract1-'))
    warnings = []
    try:
        warnings = extractor(abs_location, temp_target)
        if TRACE:
            logger.debug('try_to_extract: temp_target: %(temp_target)r' %
                         locals())
        fileutils.copytree(temp_target, abs_target_dir)
    except:
        return warnings
    finally:
        fileutils.delete(temp_target)
    return warnings
Ejemplo n.º 2
0
def extract_with_fallback(location, target_dir, extractor1, extractor2):
    """
    Extract archive at `location` to `target_dir` trying first the primary
    `extractor1` function. If extract fails with this function, attempt
    extraction again with the fallback `extractor2` function.
    Return a list of warning messages. Raise exceptions on errors.

    Note: there are a few cases where the primary extractor for a type may fail
    and a fallback extractor will succeed.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(os.path.abspath(os.path.expanduser(target_dir)))
    # attempt extract first to a temp dir
    temp_target1 = compat.unicode(fileutils.get_temp_dir(prefix='scancode-extract1-'))
    try:
        warnings = extractor1(abs_location, temp_target1)
        if TRACE:
            logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals())
        fileutils.copytree(temp_target1, abs_target_dir)
    except:
        try:
            temp_target2 = compat.unicode(fileutils.get_temp_dir(prefix='scancode-extract2-'))
            warnings = extractor2(abs_location, temp_target2)
            if TRACE:
                logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals())
            fileutils.copytree(temp_target2, abs_target_dir)
        finally:
            fileutils.delete(temp_target2)
    finally:
        fileutils.delete(temp_target1)
    return warnings
Ejemplo n.º 3
0
 def test_execute2_non_ascii_output_py2(self):
     # Popen returns a *binary* string with non-ascii chars: skips these
     python = sys.executable
     rc, stdout, stderr = command.execute2(
         python, ['-c', "print b'non ascii: \\xe4 just passed it !'"])
     assert b'' == stderr
     assert b'non ascii: a just passed it !' == stdout
     assert rc == 0
     # do not throw exception
     compat.unicode(stdout)
Ejemplo n.º 4
0
def as_unicode(line):
    """
    Return a unicode text line from a text line.
    Try to decode line as Unicode. Try first some default encodings,
    then attempt Unicode trans-literation and finally
    fall-back to ASCII strings extraction.

    TODO: Add file/magic detection, unicodedmanit/BS3/4
    """
    if isinstance(line, compat.unicode):
        return remove_null_bytes(line)

    try:
        s = line.decode('UTF-8')
    except UnicodeDecodeError:
        try:
            # FIXME: latin-1 may never fail
            s = line.decode('LATIN-1')
        except UnicodeDecodeError:
            try:
                # Convert some byte string to ASCII characters as Unicode including
                # replacing accented characters with their non- accented NFKD
                # equivalent. Non ISO-Latin and non ASCII characters are stripped
                # from the output. Does not preserve the original length offsets.
                # For Unicode NFKD equivalence, see:
                # http://en.wikipedia.org/wiki/Unicode_equivalence
                s = unicodedata.normalize('NFKD', line).encode('ASCII')
            except UnicodeDecodeError:
                try:
                    enc = chardet.detect(line)['encoding']
                    s = compat.unicode(line, enc)
                except UnicodeDecodeError:
                    # fall-back to strings extraction if all else fails
                    s = strings.string_from_string(s)
    return remove_null_bytes(s)
Ejemplo n.º 5
0
def extract_twice(location, target_dir, extractor1, extractor2):
    """
    Extract a nested compressed archive at `location` to `target_dir` using
    the `extractor1` function to a temporary directory then the `extractor2`
    function on the extracted payload of `extractor1`.

    Return a list of warning messages. Raise exceptions on errors.

    Typical nested archives include compressed tarballs and RPMs (containing a
    compressed cpio).

    Note: it would be easy to support deeper extractor chains, but this gets
    hard to trace and debug very quickly. A depth of two is simple and sane and
    covers most common cases.
    """
    if on_linux and py2:
        location = fileutils.fsencode(location)
        target_dir = fileutils.fsencode(target_dir)
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(
        os.path.abspath(os.path.expanduser(target_dir)))
    # extract first the intermediate payload to a temp dir
    temp_target = compat.unicode(
        fileutils.get_temp_dir(prefix='extractcode-extract-'))
    warnings = extractor1(abs_location, temp_target)
    if TRACE:
        logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())

    # extract this intermediate payload to the final target_dir
    try:
        inner_archives = list(
            fileutils.resource_iter(temp_target, with_dirs=False))
        if not inner_archives:
            warnings.append(location + ': No files found in archive.')
        else:
            for extracted1_loc in inner_archives:
                if TRACE:
                    logger.debug(
                        'extract_twice: extractor2: %(extracted1_loc)r' %
                        locals())
                warnings.extend(extractor2(extracted1_loc, abs_target_dir))
    finally:
        # cleanup the temporary output from extractor1
        fileutils.delete(temp_target)
    return warnings
Ejemplo n.º 6
0
def get_ip(s):
    """
    Return True is string s is an IP address
    """
    if not is_ip(s):
        return False

    try:
        ip = ipaddress.ip_address(compat.unicode(s))
        return ip
    except ValueError:
        return False
Ejemplo n.º 7
0
def path_handlers(path, posix=True):
    """
    Return a path module and path separator to use for handling (e.g. split and
    join) `path` using either POSIX or Windows conventions depending on the
    `path` content. Force usage of POSIX conventions if `posix` is True.
    """
    # determine if we use posix or windows path handling
    is_posix = is_posixpath(path)
    use_posix = posix or is_posix
    pathmod = use_posix and posixpath or ntpath
    path_sep = POSIX_PATH_SEP if use_posix else WIN_PATH_SEP
    path_sep = isinstance(
        path, compat.unicode) and compat.unicode(path_sep) or path_sep
    return pathmod, path_sep
Ejemplo n.º 8
0
def check_error(result, func, args):  # NOQA
    """
    ctypes error handler/checker:  Check for errors and raise an exception or
    return the result otherwise.
    """
    is_int = isinstance(result, int)
    is_bytes = isinstance(result, bytes)
    is_text = isinstance(result, compat.unicode)

    if (result is None
    or (is_int and result < 0)
    or (is_bytes and compat.unicode(result, encoding='utf-8').startswith('cannot open'))
    or (is_text and result.startswith('cannot open'))):
        err = _magic_error(args[0])
        raise MagicException(err)
    else:
        return result
Ejemplo n.º 9
0
def path_progress_message(item, verbose=False, prefix='Scanned: '):
    """
    Return a styled message suitable for progress display when processing a path
    for an `item` tuple of (location, rid, scan_errors, *other items)
    """
    if not item:
        return ''
    location = item[0]
    errors = item[2]
    location = compat.unicode(toascii(location))
    progress_line = location
    if not verbose:
        max_file_name_len = file_name_max_len()
        # do not display a file name in progress bar if there is no space available
        if max_file_name_len <= 10:
            return ''
        progress_line = fixed_width_file_name(location, max_file_name_len)

    color = 'red' if errors else 'green'
    return style(prefix) + style(progress_line, fg=color)
Ejemplo n.º 10
0
def find_urls(location, unique=True):
    """
    Yield urls found in file at `location`.
    Only return unique items if unique is True.
    `location` can be a list of strings for testing.
    """
    patterns = [(
        'urls',
        urls_regex(),
    )]
    matches = find(location, patterns)
    if TRACE:
        matches = list(matches)
        for m in matches:
            logger_debug('url match:', m)
    # the order of filters IS important
    filters = (
        verbatim_crlf_url_cleaner,
        end_of_url_cleaner,
        empty_urls_filter,
        scheme_adder,
        user_pass_cleaning_filter,
        build_regex_filter(INVALID_URLS_PATTERN),
        canonical_url_cleaner,
        junk_url_hosts_filter,
        junk_urls_filter,
    )
    if unique:
        filters += (unique_filter, )

    matches = apply_filters(matches, *filters)
    for _key, url, _line, lineno in matches:
        if TRACE_URL:
            logger_debug('find_urls: lineno:', lineno, '_line:', repr(_line),
                         'type(url):', type(url), 'url:', repr(url))
        yield compat.unicode(url), lineno
Ejemplo n.º 11
0
if on_linux and py2:
    PATH_TYPE = bytes
    POSIX_PATH_SEP = b'/'
    WIN_PATH_SEP = b'\\'
    EMPTY_STRING = b''
    DOT = b'.'
    PATH_SEP = bytes(os.sep)
    PATH_ENV_VAR = b'PATH'
    PATH_ENV_SEP = bytes(os.pathsep)
else:
    PATH_TYPE = compat.unicode
    POSIX_PATH_SEP = '/'
    WIN_PATH_SEP = '\\'
    EMPTY_STRING = ''
    DOT = '.'
    PATH_SEP = compat.unicode(os.sep)
    PATH_ENV_VAR = 'PATH'
    PATH_ENV_SEP = compat.unicode(os.pathsep)

ALL_SEPS = POSIX_PATH_SEP + WIN_PATH_SEP
"""
File, paths and directory utility functions.
"""

#
# DIRECTORIES
#


def create_dir(location):
    """