Example #1
0
def extract_with_fallback(location, target_dir, extractor1, extractor2):
    """
    Extract archive at `location` to `target_dir` trying first `extractor1` function.
    If extract fails, attempt extraction again with the `extractor2` function.
    Return a list of warning messages. Raise exceptions on errors.

    Note: there are a few cases where the primary extractor for a type may fail and
    a secondary extractor will succeed.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    # attempt extract first to a temp dir
    temp_target1 = unicode(fileutils.get_temp_dir('extract1'))
    try:
        warnings = extractor1(abs_location, temp_target1)
        if TRACE:
            logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals())
        fileutils.copytree(temp_target1, abs_target_dir)
    except:
        try:
            temp_target2 = unicode(fileutils.get_temp_dir('extract2'))
            warnings = extractor2(abs_location, temp_target2)
            if TRACE:
                logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals())
            fileutils.copytree(temp_target2, abs_target_dir)
        finally:
            fileutils.delete(temp_target2)
    finally:
        fileutils.delete(temp_target1)
    return warnings
Example #2
0
    def get_temp_dir(self, sub_dir_path=None):
        """
        Create a unique new temporary directory location. Create directories
        identified by sub_dir_path if provided in this temporary directory.
        Return the location for this unique directory joined with the
        sub_dir_path if any.
        """
        # ensure that we have a new unique temp directory for each test run
        global test_run_temp_dir
        if not test_run_temp_dir:
            from scancode_config import scancode_root_dir
            test_tmp_root_dir = path.join(scancode_root_dir, 'tmp')
            # now we add a space in the path for testing path with spaces
            test_run_temp_dir = fileutils.get_temp_dir(
                base_dir=test_tmp_root_dir, prefix='scancode-tk-tests -')
        if on_linux and py2:
            test_run_temp_dir = fsencode(test_run_temp_dir)

        test_run_temp_subdir = fileutils.get_temp_dir(
            base_dir=test_run_temp_dir, prefix='')

        if sub_dir_path:
            # create a sub directory hierarchy if requested
            sub_dir_path = to_os_native_path(sub_dir_path)
            test_run_temp_subdir = path.join(test_run_temp_subdir,
                                             sub_dir_path)
            fileutils.create_dir(test_run_temp_subdir)
        return test_run_temp_subdir
def get_gem_metadata(location):
    """
    Return the string content of the metadata of a .gem archive file at
    `location` or None
    """
    extract_loc = None
    try:
        # Extract first level of tar archive
        extract_loc = fileutils.get_temp_dir(prefix='scancode-extract-')
        abs_location = abspath(expanduser(location))
        archive.extract_tar(abs_location, extract_loc)

        # The gzipped metadata is the second level of archive.
        metadata = os.path.join(extract_loc, 'metadata')
        # or it can be a plain, non-gzipped file
        metadata_gz = metadata + '.gz'

        if os.path.exists(metadata):
            with open(metadata, 'rb') as met:
                content = met.read()

        elif os.path.exists(metadata_gz):
            content = archive.get_gz_compressed_file_content(metadata_gz)

        else:
            raise Exception('No gem metadata found in RubyGem .gem file.')

        return content

    finally:
        if extract_loc:
            fileutils.delete(extract_loc)
Example #4
0
def try_to_extract(location, target_dir, extractor):
    """
    Extract archive at `location` to `target_dir` trying the `extractor` function.
    If extract fails, just return without returning warnings nor raising exceptions.

    Note: there are a few cases where we want to attempt extracting something
    but do not care if this fails.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(
        os.path.abspath(os.path.expanduser(target_dir)))
    temp_target = compat.unicode(
        fileutils.get_temp_dir(prefix='extractcode-extract1-'))
    warnings = []
    try:
        warnings = extractor(abs_location, temp_target)
        if TRACE:
            logger.debug('try_to_extract: temp_target: %(temp_target)r' %
                         locals())
        fileutils.copytree(temp_target, abs_target_dir)
    except:
        return warnings
    finally:
        fileutils.delete(temp_target)
    return warnings
Example #5
0
def extract_file(location, target, kinds=extractcode.default_kinds):
    """
    Extract a single archive at `location` in the `target` directory if it is
    of a kind supported in the `kinds` kind tuple.
    """
    warnings = []
    errors = []
    extractor = archive.get_extractor(location, kinds)
    if TRACE:
        logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals()
                     + getattr(extractor, '__module__', '')
                     + '.' + getattr(extractor, '__name__', ''))
    if extractor:
        yield ExtractEvent(location, target, done=False, warnings=[], errors=[])
        try:
            # extract first to a temp directory.
            # if there is an error,  the extracted files will not be moved
            # to target
            tmp_tgt = fileutils.get_temp_dir('extract')
            abs_location = abspath(expanduser(location))
            warnings.extend(extractor(abs_location, tmp_tgt))
            fileutils.copytree(tmp_tgt, target)
            fileutils.delete(tmp_tgt)
        except Exception, e:
            if TRACE:
                logger.debug('extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals())
            errors = [str(e).strip(' \'"')]
        finally:
Example #6
0
def extract_file(location, target, kinds=extractcode.default_kinds, verbose=False):
    """
    Extract a single archive at `location` in the `target` directory if it is
    of a kind supported in the `kinds` kind tuple.
    """
    warnings = []
    errors = []
    extractor = archive.get_extractor(location, kinds)
    if TRACE:
        logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals()
                     + getattr(extractor, '__module__', '')
                     + '.' + getattr(extractor, '__name__', ''))
    if extractor:
        yield ExtractEvent(location, target, done=False, warnings=[], errors=[])
        try:
            # extract first to a temp directory: if there is an error,  the
            # extracted files will not be moved to target
            tmp_tgt = fileutils.get_temp_dir(prefix='scancode-extract-')
            abs_location = abspath(expanduser(location))
            warns = extractor(abs_location, tmp_tgt) or []
            warnings.extend(warns)
            fileutils.copytree(tmp_tgt, target)
            fileutils.delete(tmp_tgt)
        except Exception as e:
            errors = [str(e).strip(' \'"')]
            if verbose:
                errors.append(traceback.format_exc())
            if TRACE:
                tb = traceback.format_exc()
                logger.debug('extract_file: ERROR: %(location)r: %(errors)r\n%(e)r\n%(tb)s' % locals())

        finally:
            yield ExtractEvent(location, target, done=True, warnings=warnings, errors=errors)
Example #7
0
def convert_to_utf8(location):
    """
    Convert the file at location to UTF-8 text.
    Return the location of the converted file or None.
    """
    if not get_type(location).is_text:
        return location
    start = open(location, 'rb').read(4096)
    encoding = chardet.detect(start)
    if encoding:
        encoding = encoding.get('encoding', None)
        if encoding:
            target = os.path.join(fileutils.get_temp_dir('markup'),
                                  fileutils.file_name(location))
            with codecs.open(location,
                             'rb',
                             encoding=encoding,
                             errors='replace',
                             buffering=16384) as inf:
                with codecs.open(target, 'wb', encoding='utf-8') as outf:
                    outf.write(inf.read())
            return target
        else:
            # chardet failed somehow to detect an encoding
            return location
Example #8
0
def extract_file(location, target, kinds=extractcode.default_kinds):
    """
    Extract a single archive at `location` in the `target` directory if it is
    of a kind supported in the `kinds` kind tuple.
    """
    warnings = []
    errors = []
    extractor = archive.get_extractor(location, kinds)
    if DEBUG:
        logger.debug(
            'extract_file: extractor: for: %(location)r with kinds: r(kinds)r : '
            % locals() + getattr(extractor, '__module__', '') + '.' +
            getattr(extractor, '__name__', ''))
    if extractor:
        yield ExtractEvent(location,
                           target,
                           done=False,
                           warnings=[],
                           errors=[])
        try:
            # extract first to a temp directory.
            # if there is an error,  the extracted files will not be moved
            # to target
            tmp_tgt = fileutils.get_temp_dir('extract')
            abs_location = abspath(expanduser(location))
            warnings.extend(extractor(abs_location, tmp_tgt))
            fileutils.copytree(tmp_tgt, target)
            fileutils.delete(tmp_tgt)
        except Exception, e:
            if DEBUG:
                logger.debug(
                    'extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' %
                    locals())
            errors = [str(e).strip(' \'"')]
        finally:
Example #9
0
def download_url(url, file_name=None, verify=True):
    """
    Return the temporary location of the file fetched at the remote url. Use
    file_name if provided or create a file name base on the last url segment. If
    verify is True, SSL certification is performed. Otherwise, no verification
    is done but a warning will be printed.
    """
    requests_args = dict(timeout=10, verify=verify)
    file_name = file_name or fileutils.file_name(url)

    try:
        response = requests.get(url, **requests_args)
    except (ConnectionError, InvalidSchema) as e:
        logger.error('fetch: Download failed for %(url)r' % locals())
        raise

    status = response.status_code
    if status != 200:
        msg = 'fetch: Download failed for %(url)r with %(status)r' % locals()
        logger.error(msg)
        raise Exception(msg)

    tmp_dir = fileutils.get_temp_dir(base_dir='fetch')
    output_file = os.path.join(tmp_dir, file_name)
    with open(output_file, 'wb') as out:
        out.write(response.content)

    return output_file
Example #10
0
def download_url(url, file_name=None, verify=True, timeout=10):
    """
    Fetch `url` and return the temporary location where the fetched content was
    saved. Use `file_name` if provided or create a new `file_name` base on the last
    url segment. If `verify` is True, SSL certification is performed. Otherwise, no
    verification is done but a warning will be printed.
    `timeout` is the timeout in seconds.
    """
    requests_args = dict(timeout=timeout, verify=verify)
    file_name = file_name or fileutils.file_name(url)

    try:
        response = requests.get(url, **requests_args)
    except (ConnectionError, InvalidSchema) as e:
        logger.error('download_url: Download failed for %(url)r' % locals())
        raise

    status = response.status_code
    if status != 200:
        msg = 'download_url: Download failed for %(url)r with %(status)r' % locals(
        )
        logger.error(msg)
        raise Exception(msg)

    tmp_dir = fileutils.get_temp_dir(prefix='fetch-')
    output_file = os.path.join(tmp_dir, file_name)
    with open(output_file, 'wb') as out:
        out.write(response.content)

    return output_file
Example #11
0
def uncompress_file(location, decompressor):
    """
    Uncompress a compressed file at location and return a temporary location of
    the uncompressed file and a list of warning messages. Raise Exceptions on
    errors. Use the `decompressor` object for decompression.
    """
    # FIXME: do not create a sub-directory and instead strip the "compression"
    # extension such gz, etc. or introspect the archive header to get the file
    # name when present.
    assert location
    assert decompressor

    warnings = []
    base_name = fileutils.file_base_name(location)
    target_location = os.path.join(fileutils.get_temp_dir(base_dir='extract'),
                                   base_name)
    with decompressor(location, 'rb') as compressed:
        with open(target_location, 'wb') as uncompressed:
            buffer_size = 32 * 1024 * 1024
            while True:
                chunk = compressed.read(buffer_size)
                if not chunk:
                    break
                uncompressed.write(chunk)
        if getattr(decompressor, 'has_trailing_garbage', False):
            warnings.append(location + ': Trailing garbage found and ignored.')
    return target_location, warnings
Example #12
0
 def test_is_dir(self):
     test_dir = self.get_test_loc('symlink', copy=True)
     temp_dir = fileutils.get_temp_dir()
     test_link = join(temp_dir, 'test-dir-link')
     os.symlink(test_dir, test_link)
     assert filetype.is_dir(test_link, follow_symlinks=True)
     assert not filetype.is_dir(test_link, follow_symlinks=False)
Example #13
0
def uncompress_file(location, decompressor):
    """
    Uncompress a compressed file at location and return a temporary location of
    the uncompressed file and a list of warning messages. Raise Exceptions on
    errors. Use the `decompressor` object for decompression.
    """
    # FIXME: do not create a sub-directory and instead strip the "compression"
    # extension such gz, etc. or introspect the archive header to get the file
    # name when present.
    assert location
    assert decompressor

    warnings = []
    base_name = fileutils.file_base_name(location)
    target_location = os.path.join(fileutils.get_temp_dir(base_dir='extract'), base_name)
    with decompressor(location, 'rb') as compressed:
        with open(target_location, 'wb') as uncompressed:
            buffer_size = 32 * 1024 * 1024
            while True:
                chunk = compressed.read(buffer_size)
                if not chunk:
                    break
                uncompressed.write(chunk)
        if getattr(decompressor, 'has_trailing_garbage', False):
            warnings.append(location + ': Trailing garbage found and ignored.')
    return target_location, warnings
Example #14
0
def execute(cmd, args, root_dir=None, cwd=None, env=None, to_files=False):
    """
    Run a `cmd` external command with the `args` arguments list and return the
    return code, the stdout and stderr.

    To avoid RAM exhaustion, always write stdout and stderr streams to files.

    If `to_files` is False, return the content of stderr and stdout as ASCII
    strings. Otherwise, return the locations to the stderr and stdout
    temporary files.

    Resolve the `cmd` location using os/arch local/vendored location based on
    using `root_dir`. No resolution is done if root_dir is None

    Run the command using the `cwd` current working directory with an
    `env` dict of environment variables.
    """
    assert cmd
    cmd_loc, bin_dir, lib_dir = get_locations(cmd, root_dir)
    full_cmd = [cmd_loc or cmd] + args or []
    env = get_env(env, lib_dir) or None
    cwd = cwd or curr_dir

    # temp files for stderr and stdout
    tmp_dir = fileutils.get_temp_dir(base_dir='cmd')
    sop = os.path.join(tmp_dir, 'stdout')
    sep = os.path.join(tmp_dir, 'stderr')

    # shell==True is DANGEROUS but we are not running arbitrary commands
    # though we can execute command that just happen to be in the path
    shell = True if on_windows else False

    logger.debug(
        'Executing command %(cmd)r as %(full_cmd)r with: env=%(env)r, '
        'shell=%(shell)r, cwd=%(cwd)r, stdout=%(sop)r, stderr=%(sep)r.' %
        locals())

    proc = None
    try:
        with open(sop, 'wb') as stdout, open(sep, 'wb') as stderr:
            # -1 defaults bufsize to system bufsize
            pargs = dict(cwd=cwd,
                         env=env,
                         stdout=stdout,
                         stderr=stderr,
                         shell=shell,
                         bufsize=-1,
                         universal_newlines=True)
            proc = subprocess.Popen(full_cmd, **pargs)
            stdout, stderr = proc.communicate()
            rc = proc.returncode if proc else 0
    finally:
        close(proc)

    if not to_files:
        # return output as ASCII string loaded from the output files
        sop = text.toascii(open(sop, 'rb').read().strip())
        sep = text.toascii(open(sep, 'rb').read().strip())
    return rc, sop, sep
Example #15
0
def get_scans_cache_class(cache_dir=scans_cache_dir):
    """
    Return a new persistent cache class configured with a unique storage directory.
    """
    # create a unique temp directory in cache_dir
    fileutils.create_dir(cache_dir)
    # ensure that the cache dir is alwasy unicode
    cache_dir = fileutils.get_temp_dir(unicode(cache_dir), prefix=unicode(timeutils.time2tstamp()) + u'-')
    sc = ScanFileCache(cache_dir)
    sc.setup()
    return partial(ScanFileCache, cache_dir)
    def get_temp_dir(self, sub_dir_path=None):
        """
        Create a unique new temporary directory location. Create directories
        identified by sub_dir_path if provided in this temporary directory.
        Return the location for this unique directory joined with the
        sub_dir_path if any.
        """
        # ensure that we have a new unique temp directory for each test run
        global test_run_temp_dir
        if not test_run_temp_dir:
            test_run_temp_dir = fileutils.get_temp_dir(base_dir='tst',
                                                       prefix=' ')

        new_temp_dir = fileutils.get_temp_dir(base_dir=test_run_temp_dir)

        if sub_dir_path:
            # create a sub directory hierarchy if requested
            sub_dir_path = to_os_native_path(sub_dir_path)
            new_temp_dir = os.path.join(new_temp_dir, sub_dir_path)
            fileutils.create_dir(new_temp_dir)
        return new_temp_dir
Example #17
0
def execute(cmd, args, root_dir=None, cwd=None, env=None, to_files=False):
    """
    Run a `cmd` external command with the `args` arguments list and return the
    return code, the stdout and stderr.

    To avoid RAM exhaustion, always write stdout and stderr streams to files.

    If `to_files` is False, return the content of stderr and stdout as ASCII
    strings. Otherwise, return the locations to the stderr and stdout
    temporary files.

    Resolve the `cmd` location using os/arch local/vendored location based on
    using `root_dir`. No resolution is done if root_dir is None

    Run the command using the `cwd` current working directory with an
    `env` dict of environment variables.
    """
    assert cmd
    cmd_loc, bin_dir, lib_dir = get_locations(cmd, root_dir)
    full_cmd = [cmd_loc or cmd] + args or []
    env = get_env(env, lib_dir) or None
    cwd = cwd or curr_dir

    # temp files for stderr and stdout
    tmp_dir = fileutils.get_temp_dir(base_dir='cmd')
    sop = os.path.join(tmp_dir, 'stdout')
    sep = os.path.join(tmp_dir, 'stderr')

    # shell==True is DANGEROUS but we are not running arbitrary commands
    # though we can execute command that just happen to be in the path
    shell = True if on_windows else False

    logger.debug('Executing command %(cmd)r as %(full_cmd)r with: env=%(env)r, '
                 'shell=%(shell)r, cwd=%(cwd)r, stdout=%(sop)r, stderr=%(sep)r.'
                 % locals())

    proc = None
    try:
        with open(sop, 'wb') as stdout, open(sep, 'wb') as stderr:
            # -1 defaults bufsize to system bufsize
            pargs = dict(cwd=cwd, env=env, stdout=stdout, stderr=stderr,
                         shell=shell, bufsize=-1, universal_newlines=True)
            proc = subprocess.Popen(full_cmd, **pargs)
            stdout, stderr = proc.communicate()
            rc = proc.returncode if proc else 0
    finally:
        close(proc)

    if not to_files:
        # return output as ASCII string loaded from the output files
        sop = text.toascii(open(sop, 'rb').read().strip())
        sep = text.toascii(open(sep, 'rb').read().strip())
    return rc, sop, sep
Example #18
0
    def get_temp_dir(self, sub_dir_path=None):
        """
        Create a unique new temporary directory location. Create directories
        identified by sub_dir_path if provided in this temporary directory.
        Return the location for this unique directory joined with the
        sub_dir_path if any.
        """
        # ensure that we have a new unique temp directory for each test run
        global test_run_temp_dir
        if not test_run_temp_dir:
            test_run_temp_dir = fileutils.get_temp_dir(base_dir='tst',
                                                       prefix=' ')

        new_temp_dir = fileutils.get_temp_dir(base_dir=test_run_temp_dir)

        if sub_dir_path:
            # create a sub directory hierarchy if requested
            sub_dir_path = to_os_native_path(sub_dir_path)
            new_temp_dir = os.path.join(new_temp_dir, sub_dir_path)
            fileutils.create_dir(new_temp_dir)
        return new_temp_dir
Example #19
0
def get_scans_cache_class(cache_dir=scans_cache_dir):
    """
    Return a new persistent cache class configured with a unique storage directory.
    """
    # create a unique temp directory in cache_dir
    fileutils.create_dir(cache_dir)
    prefix = timeutils.time2tstamp() + u'-'
    cache_dir = fileutils.get_temp_dir(cache_dir, prefix=prefix)
    if on_linux:
        cache_dir = path_to_bytes(cache_dir)
    sc = ScanFileCache(cache_dir)
    sc.setup()
    return partial(ScanFileCache, cache_dir)
Example #20
0
def get_scans_cache_class(cache_dir=scans_cache_dir):
    """
    Return a new persistent cache class configured with a unique storage directory.
    """
    # create a unique temp directory in cache_dir
    fileutils.create_dir(cache_dir)
    prefix = timeutils.time2tstamp() + u'-'
    cache_dir = fileutils.get_temp_dir(cache_dir, prefix=prefix)
    if on_linux:
        cache_dir = path_to_bytes(cache_dir)
    sc = ScanFileCache(cache_dir)
    sc.setup()
    return partial(ScanFileCache, cache_dir)
Example #21
0
def convert_to_text(location, _retrying=False):
    """
    Convert the markup file at location to plain text.
    Return the location of the converted plain text file or None.
    """
    if not is_markup(location):
        return

    temp_file = os.path.join(fileutils.get_temp_dir('markup'), 'text')
    from bs4 import BeautifulSoup
    with open(location, 'rb') as input_text:
        soup = BeautifulSoup(input_text.read(), 'html5lib')
    with codecs.open(temp_file, mode='wb', encoding='utf-8') as output_text:
        output_text.write(soup.get_text())
    return temp_file
Example #22
0
def extract_twice(location, target_dir, extractor1, extractor2):
    """
    Extract a nested compressed archive at `location` to `target_dir` using
    the `extractor1` function to a temporary directory then the `extractor2`
    function on the extracted payload of `extractor1`.

    Return a list of warning messages. Raise exceptions on errors.

    Typical nested archives include compressed tarballs and RPMs (containing a
    compressed cpio).

    Note: it would be easy to support deeper extractor chains, but this gets
    hard to trace and debug very quickly. A depth of two is simple and sane and
    covers most common cases.
    """
    if on_linux and py2:
        location = fileutils.fsencode(location)
        target_dir = fileutils.fsencode(target_dir)
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(
        os.path.abspath(os.path.expanduser(target_dir)))
    # extract first the intermediate payload to a temp dir
    temp_target = compat.unicode(
        fileutils.get_temp_dir(prefix='extractcode-extract-'))
    warnings = extractor1(abs_location, temp_target)
    if TRACE:
        logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())

    # extract this intermediate payload to the final target_dir
    try:
        inner_archives = list(
            fileutils.resource_iter(temp_target, with_dirs=False))
        if not inner_archives:
            warnings.append(location + ': No files found in archive.')
        else:
            for extracted1_loc in inner_archives:
                if TRACE:
                    logger.debug(
                        'extract_twice: extractor2: %(extracted1_loc)r' %
                        locals())
                warnings.extend(extractor2(extracted1_loc, abs_target_dir))
    finally:
        # cleanup the temporary output from extractor1
        fileutils.delete(temp_target)
    return warnings
Example #23
0
def extract_twice(location, target_dir, extractor1, extractor2):
    """
    Extract a nested compressed archive at `location` to `target_dir` using
    the `extractor1` function to a temporary directory then the `extractor2`
    function on the extracted payload of `extractor1`.

    Return a list of warning messages. Raise exceptions on errors.

    Typical nested archives include compressed tarballs and RPMs (containing a
    compressed cpio).

    Note: it would be easy to support deeper extractor chains, but this gets
    hard to trace and debug very quickly. A depth of two is simple and sane and
    covers most common cases.
    """
    if on_linux:
        location = path_to_bytes(location)
        target_dir = path_to_bytes(target_dir)
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    # extract first the intermediate payload to a temp dir
    temp_target = unicode(fileutils.get_temp_dir('extract'))
    warnings = extractor1(abs_location, temp_target)
    if TRACE:
        logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())

    # extract this intermediate payload to the final target_dir
    try:
        inner_archives = list(fileutils.file_iter(temp_target))
        if not inner_archives:
            warnings.append(location + ': No files found in archive.')
        else:
            for extracted1_loc in inner_archives:
                if TRACE:
                    logger.debug('extract_twice: extractor2: %(extracted1_loc)r' % locals())
                warnings.extend(extractor2(extracted1_loc, abs_target_dir))
    finally:
        # cleanup the temporary output from extractor1
        fileutils.delete(temp_target)
    return warnings
Example #24
0
def convert_to_utf8(location):
    """
    Convert the file at location to UTF-8 text.
    Return the location of the converted file or None.
    """
    if not contenttype.get_type(location).is_text:
        return location
    start = open(location, 'rb').read(4096)
    encoding = chardet.detect(start)
    if encoding:
        encoding = encoding.get('encoding', None)
        if encoding:
            target = os.path.join(fileutils.get_temp_dir('markup'),
                                  fileutils.file_name(location))
            with codecs.open(location, 'rb', encoding=encoding,
                             errors='replace', buffering=16384) as inf:
                with codecs.open(target, 'wb', encoding='utf-8') as outf:
                    outf.write(inf.read())
            return target
        else:
            # chardet failed somehow to detect an encoding
            return location
Example #25
0
def try_to_extract(location, target_dir, extractor):
    """
    Extract archive at `location` to `target_dir` trying the `extractor` function.
    If extract fails, just return without returning warnings nor raising exceptions.

    Note: there are a few cases where we want to attempt extracting something
    but do not care if this fails.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    temp_target = unicode(fileutils.get_temp_dir('extract1'))
    warnings = []
    try:
        warnings = extractor(abs_location, temp_target)
        if TRACE:
            logger.debug('try_to_extract: temp_target: %(temp_target)r' % locals())
        fileutils.copytree(temp_target, abs_target_dir)
    except:
        return warnings
    finally:
        fileutils.delete(temp_target)
    return warnings
Example #26
0
    def _collect_and_parse_tags(self):
        ctags_args = ['--fields=K', '--c-kinds=fp', '-f', '-', self.sourcefile]
        ctags_temp_dir = fileutils.get_temp_dir(base_dir='ctags')
        envt = {'TMPDIR': ctags_temp_dir}
        try:
            rc, stdo, err = command.execute2(cmd_loc=self.cmd_loc,
                                             ctags_args,
                                             env=envt,
                                             lib_dir=self.lib_loc,
                                             to_files=True)

            if rc != 0:
                raise Exception(open(err).read())

            with open(stdo, 'rb') as lines:
                for line in lines:
                    if 'cannot open temporary file' in line:
                        raise Exception('ctags: cannot open temporary file '
                                        ': Permission denied')

                    if line.startswith('!'):
                        continue

                    line = line.strip()
                    if not line:
                        continue

                    splitted = line.split('\t')

                    if (line.endswith('function\tfile:')
                            or line.endswith('prototype\tfile:')):
                        self.local_functions.append(splitted[0])

                    elif (line.endswith('function')
                          or line.endswith('prototype')):
                        self.global_functions.append(splitted[0])
        finally:
            fileutils.delete(ctags_temp_dir)
Example #27
0
def execute(cmd_loc, args, cwd=None, env=None, to_files=False, log=TRACE):
    """
    Run a `cmd_loc` command with the `args` arguments list and return the return
    code, the stdout and stderr.

    To avoid RAM exhaustion, always write stdout and stderr streams to files.

    If `to_files` is False, return the content of stderr and stdout as ASCII
    strings. Otherwise, return the locations to the stderr and stdout temporary
    files.

    Run the command using the `cwd` current working directory with an `env` dict
    of environment variables.
    """
    assert cmd_loc
    full_cmd = [cmd_loc] + (args or [])

    # any shared object should be either in the PATH, the rpath or
    # side-by-side with the exceutable
    cmd_dir = os.path.dirname(cmd_loc)
    env = get_env(env, lib_dir=cmd_dir) or None
    cwd = cwd or curr_dir

    # temp files for stderr and stdout
    tmp_dir = get_temp_dir(prefix='cmd-')

    sop = path.join(tmp_dir, 'stdout')
    sep = path.join(tmp_dir, 'stderr')

    # shell==True is DANGEROUS but we are not running arbitrary commands
    # though we can execute commands that just happen to be in the path
    # See why we need it on Windows https://bugs.python.org/issue8557
    shell = True if on_windows else False

    if log:
        printer = logger.debug if TRACE else lambda x: print(x)
        printer(
            'Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n'
            'shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r' %
            locals())

    proc = None
    rc = 100

    try:
        with io.open(sop,
                     'wb') as stdout, io.open(sep,
                                              'wb') as stderr, pushd(cmd_dir):
            proc = subprocess.Popen(
                full_cmd,
                cwd=cwd,
                env=env,
                stdout=stdout,
                stderr=stderr,
                shell=shell,
                # -1 defaults bufsize to system bufsize
                bufsize=-1,
                universal_newlines=True,
            )
            stdout, stderr = proc.communicate()
            rc = proc.returncode if proc else 0
    finally:
        close(proc)

    if not to_files:
        # return output as ASCII string loaded from the output files
        with open(sop, 'rb') as so:
            sor = so.read()
            sop = text.toascii(sor).strip()

        with open(sep, 'rb') as se:
            ser = se.read()
            sep = text.toascii(ser).strip()

    return rc, sop, sep
Example #28
0
def rebuild_rootfs(image, target_dir, layerid_len=DEFAULT_ID_LEN):
    """
    Extract and merge all layers to target_dir. Extraction is done in
    sequence from bottom (root) to top (latest layer).

    Return a mapping of errors and a list of whiteouts/deleted files.

    The extraction process consists of these steps:
     - extract the layer in a temp directory
     - move layer to the target directory, overwriting existing files
     - if any, remove AUFS special files/dirs in the target directory
     - if any, remove whiteouts file/directory pairs in the target directory
    """

    from extractcode.extract import extract_file

    assert filetype.is_dir(target_dir)
    assert os.path.exists(target_dir)
    extract_errors = []
    # log whiteouts deletions
    whiteouts = []

    for layer_id, layer in image.layers.items():
        layer_tarball = join(image.repo_dir, layer_id[:layerid_len],
                             LAYER_TAR_FILE)
        logger.debug('Extracting layer tarball: %(layer_tarball)r' % locals())
        temp_target = fileutils.get_temp_dir('conan-docker')
        xevents = list(extract_file(layer_tarball, temp_target))
        for x in xevents:
            if x.warnings or x.errors:
                extract_errors.extend(xevents)

        # FIXME: the order of ops is WRONG: we are getting whiteouts incorrectly
        # it should be:
        # 1. extract a layer to temp.
        # 2. find whiteouts in that layer.
        # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs)
        # 4. finall copy the extracted layer over the WIP rootfs

        # move extracted layer to target_dir
        logger.debug(
            'Moving extracted layer from: %(temp_target)r to: %(target_dir)r')
        fileutils.copytree(temp_target, target_dir)
        fileutils.delete(temp_target)

        logger.debug(
            'Merging extracted layers and applying AUFS whiteouts/deletes')
        for top, dirs, files in fileutils.walk(target_dir):
            # delete AUFS dirs and apply whiteout deletions
            for dr in dirs[:]:
                whiteable_dir = join(top, dr)
                if dr.startswith(WHITEOUT_PREFIX):
                    # delete the .wh. dir...
                    dirs.remove(dr)
                    logger.debug('Deleting whiteout dir: %(whiteable_dir)r' %
                                 locals())
                    fileutils.delete(whiteable_dir)

                    # ... and delete the corresponding dir it does "whiteout"
                    base_dir = dr[len(WHITEOUT_PREFIX):]
                    try:
                        dirs.remove(base_dir)
                    except ValueError:
                        # FIXME: should we really raise an exception here?
                        msg = ('Inconsistent layers: '
                               'missing directory to whiteout: %(base_dir)r' %
                               locals())
                        raise InconsistentLayersError(msg)
                    wdo = join(top, base_dir)
                    logger.debug('Deleting real dir:  %(wdo)r' % locals())
                    fileutils.delete(wdo)
                    whiteouts.append(wdo)

                # delete AUFS special dirs
                elif dr.startswith(WHITEOUT_SPECIAL_DIR_PREFIX):
                    dirs.remove(dr)
                    logger.debug(
                        'Deleting AUFS special dir:  %(whiteable_dir)r' %
                        locals())
                    fileutils.delete(whiteable_dir)

            # delete AUFS files and apply whiteout deletions
            all_files = set(files)
            for fl in all_files:
                whiteable_file = join(top, fl)
                if fl.startswith(WHITEOUT_PREFIX):
                    # delete the .wh. marker file...
                    logger.debug('Deleting whiteout file: %(whiteable_file)r' %
                                 locals())
                    fileutils.delete(whiteable_file)
                    # ... and delete the corresponding file it does "whiteout"
                    # e.g. logically delete
                    base_file = fl[len(WHITEOUT_PREFIX):]

                    wfo = join(top, base_file)
                    whiteouts.append(wfo)
                    if base_file in all_files:
                        logger.debug('Deleting real file:  %(wfo)r' % locals())
                        fileutils.delete(wfo)

                # delete AUFS special files
                elif fl.startswith(WHITEOUT_SPECIAL_DIR_PREFIX):
                    logger.debug(
                        'Deleting AUFS special file:  %(whiteable_file)r' %
                        locals())
                    fileutils.delete(whiteable_file)
                    whiteouts.append(whiteable_file)

    return extract_errors, whiteouts
Example #29
0
def execute2(cmd_loc, args, lib_dir=None, cwd=None, env=None, to_files=False, log=TRACE):
    """
    Run a `cmd_loc` command with the `args` arguments list and return the return
    code, the stdout and stderr.

    To avoid RAM exhaustion, always write stdout and stderr streams to files.

    If `to_files` is False, return the content of stderr and stdout as ASCII
    strings. Otherwise, return the locations to the stderr and stdout
    temporary files.

    Run the command using the `cwd` current working directory with an
    `env` dict of environment variables.
    """
    assert cmd_loc
    full_cmd = [cmd_loc] + (args or [])

    env = get_env(env, lib_dir) or None
    cwd = cwd or curr_dir

    # temp files for stderr and stdout
    tmp_dir = get_temp_dir(prefix='cmd-')

    if on_linux and py2:
        stdout = b'stdout'
        stderr = b'stderr'
    else:
        stdout = 'stdout'
        stderr = 'stderr'

    sop = path.join(tmp_dir, stdout)
    sep = path.join(tmp_dir, stderr)

    # shell==True is DANGEROUS but we are not running arbitrary commands
    # though we can execute commands that just happen to be in the path
    shell = True if on_windows else False

    if log:
        printer = logger.debug if TRACE else lambda x: print(x)
        printer(
            'Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n'
            'shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r'
            % locals())

    proc = None
    rc = 100

    if py2:
        okwargs = dict(mode='wb')
    if py3:
        okwargs = dict(mode='w', encoding='utf-8')

    try:
        with io.open(sop, **okwargs) as stdout, io.open(sep, **okwargs) as stderr:
            with pushd(lib_dir):
                popen_args = dict(
                    cwd=cwd,
                    env=env,
                    stdout=stdout,
                    stderr=stderr,
                    shell=shell,
                    # -1 defaults bufsize to system bufsize
                    bufsize=-1,
                    universal_newlines=True
                )

                proc = subprocess.Popen(full_cmd, **popen_args)
                stdout, stderr = proc.communicate()
                rc = proc.returncode if proc else 0

    finally:
        close(proc)

    if not to_files:
        # return output as ASCII string loaded from the output files
        sop = text.toascii(open(sop, 'rb').read().strip())
        sep = text.toascii(open(sep, 'rb').read().strip())
    return rc, sop, sep
Example #30
0
def extract_file_by_file(location,
                         target_dir,
                         arch_type='*',
                         skip_symlinks=True):
    """
    Extract all files using a one-by-one process from a 7zip-supported archive
    file at location in the `target_dir` directory.

    Return a list of warning messages if any or an empty list.
    Raise exception on errors.

    `arch_type` is the type of 7zip archive passed to the -t 7zip option.
    Can be None.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))

    entries, errors_msgs = list_entries(location, arch_type)
    entries = list(entries)

    # Determine if we need a one-by-one approach: technically the aproach is to
    # check if we have files that are in the same dir and have the same name
    # when the case is ignored. We take a simpler approach: we check if all
    # paths are unique when we ignore the case: for that we only check that the
    # length of two paths sets are the same: one set as-is and the other
    # lowercased.

    paths_as_is = set(e.path for e in entries)
    paths_no_case = set(p.lower() for p in paths_as_is)
    need_by_file = len(paths_as_is) != len(paths_no_case)

    if not need_by_file:
        # use regular extract
        return extract_all_files_at_once(location=location,
                                         target_dir=target_dir,
                                         arch_type=arch_type)

    # now we are extracting one file at a time. this is a tad painful because we
    # are dealing with a full command execution at each time.

    errors = {}
    warnings = {}
    tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-')
    for i, entry in enumerate(entries):

        if not entry.is_file:
            continue

        tmp_extract_dir = os.path.join(tmp_dir, str(i))
        fileutils.create_dir(tmp_extract_dir)

        ex_args = build_7z_extract_command(
            location=location,
            target_dir=tmp_extract_dir,
            single_entry=entry,
            arch_type=arch_type,
        )
        rc, stdout, stderr = command.execute2(**ex_args)

        error = get_7z_errors(stdout, stderr)
        if error or rc != 0:
            error = error or UNKNOWN_ERROR
            if TRACE:
                logger.debug(
                    'extract: failure: {rc}\n'
                    'stderr: {stderr}\nstdout: {stdout}'.format(**locals()))
            errors[entry.path] = error
            continue

        # these are all for a single file path
        warns = get_7z_warnings(stdout) or {}
        wmsg = '\n'.join(warns.values())
        if wmsg:
            if entry.path in warnings:
                warnings[entry.path] += '\n' + wmsg
            else:
                warnings[entry.path] = wmsg

        # finally move that extracted file to its target location, possibly renamed
        source_file_name = fileutils.file_name(entry.path)
        source_file_loc = os.path.join(tmp_extract_dir, source_file_name)
        if not os.path.exists(source_file_loc):
            if entry.path in errors:
                errors[entry.path] += '\nNo file name extracted.'
            else:
                errors[entry.path] = 'No file name extracted.'
            continue

        safe_path = paths.safe_path(entry.path, posix=True)
        target_file_loc = os.path.join(target_dir, safe_path)
        target_file_dir = os.path.dirname(target_file_loc)
        fileutils.create_dir(target_file_dir)

        unique_target_file_loc = extractcode.new_name(target_file_loc,
                                                      is_dir=False)

        if TRACE:
            logger.debug(
                'extract: unique_target_file_loc: from {} to {}'.format(
                    target_file_loc, unique_target_file_loc))

        if os.path.isfile(source_file_loc):
            fileutils.copyfile(source_file_loc, unique_target_file_loc)
        else:
            fileutils.copytree(source_file_loc, unique_target_file_loc)

    extractcode.remove_backslashes_and_dotdots(abs_target_dir)
    if errors:
        raise ExtractErrorFailedToExtract(errors)

    return convert_warnings_to_list(warnings)