Exemple #1
0
def external_file(*urls, filename = None, directory = None,
    copy_file_url = False, name = None):
  """
  Downloads a file from the first valid URL and saves it into *directory*
  under the specified *filename*.

  :param urls: One or more URLs. Supports ``http://``, ``https://``,
      ``ftp://`` and ``file://`. Note that if a ``file://`` URL is
      specified, the file is not copied to the output filename unless
      *copy_file_url* is True.
  :param filename: The output filename of the downloaded file. Defaults
      to the filename of the downloaded file.
  :param directory: The directory to save the file to. If *filename*
      is a relative path, it will be joined with this directory. Defaults
      to a path in the build directory.
  :param copy_file_url: If True, ``file://`` URLs will be copied instead
      of used as-is.
  :param name: The name of the loader action. This name is used to store
      information in the :attr:`Session.cache` so we can re-use existing
      downloaded data. :func:`~craftr.defaults.gtn` will be used to
      retrieve the default value for this parameter.
  :return: The path to the downloaded file.
  """

  name = gtn(name)
  if not directory and not filename:
    directory = buildlocal('data')

  cache = get_loader_cache(name)

  # TODO: expand variables of the current module.

  target_filename = None
  exceptions = []
  for url in urls:
    if url == cache.get('download_url'):
      existing_file = cache.get('download_file')
      if existing_file and path.isfile(existing_file):
        return existing_file

    progress_info = 'Downloading {} ...'.format(url)
    if url.startswith('file://'):
      source_file = url[7:]
      if path.isfile(source_file):
        if not copy_file_url:
          return source_file
        if not filename:
          filename = path.basename(source_file)

        # TODO: Use httputils.download_file() for this as well?
        logger.progress_begin(progress_info)
        path.makedirs(directory)
        target_filename = path.join(directory, filename)
        with open(source_file, 'rb') as sfp:
          with open(target_filename, 'wb') as dfp:
            for bytes_copied, size in pyutils.copyfileobj(sfp, dfp):
              logger.progress_update(float(bytes_copied) / size)
        logger.progress_end()

        # TODO: Copy file permissions
        break
      else:
        exceptions.append(FileNotFoundError(url))
    else:
      progress = lambda data: _external_file_download_callback(
          progress_info, directory, filename, cache, data)

      try:
        target_filename, reused = httputils.download_file(
          url, filename = filename, directory = directory, on_exists = 'skip',
          progress = progress)
      except (httputils.URLError, httputils.HTTPError) as exc:
        exceptions.append(exc)
      else:
        break
      finally:
        logger.progress_end()

  if target_filename:
    cache['download_url'] = url
    cache['download_file'] = target_filename
    return target_filename

  raise NoExternalFileMatch(name, urls, exceptions)
Exemple #2
0
def external_archive(*urls, exclude_files = (), directory = None, name = None):
  """
  Downloads an archive from the first valid URL and unpacks it into
  *directory*. Archives with a single directory at the root will be
  extracted from one level below, eliminating that single parent
  directory.

  *exclude_files* can be a list of glob patterns that will be matched
  against the arcnames in the archive. Note that to exclude a directory,
  a pattern must match all files in that directory.

  Uses :func:`external_file` to download the archive.

  :param urls: See :func:`external_file`
  :param exclude_files: A list of glob patterns.
  :param directory: The directory to unpack the archive to. Defaults
      to a directory on the build directory derived from the downloaded
      archive filename. If defined and followed by a trailing slash, the
      archive filename will be appended.
  :param name: The name of the loader action. This name is used to store
      information in the :attr:`Session.cache` so we can re-use existing
      downloaded data. :func:`~craftr.defaults.gtn` will be used to
      retrieve the default value for this parameter.
  :return: The path to the top-level directory of the unpacked archive.
  """

  name = gtn(name)
  if not directory:
    directory = buildlocal('data') + '/'

  archive = external_file(*urls, directory = directory, name = name)
  cache = get_loader_cache(name)  # shared with external_file()

  suffix = nr.misc.archive.get_opener(archive)[0]
  if path.maybedir(directory):
    filename = path.basename(archive)[:-len(suffix)]
    directory = path.join(directory, filename)

  # Check if we already unpacked it etc.
  if cache.get('archive_source') == archive and \
      cache.get('archive_dir') == directory and \
      path.isdir(directory):
    return directory

  def match_exclude_files(arcname):
    for pattern in exclude_files:
      if fnmatch.fnmatch(arcname, pattern):
        return False
    return True

  def progress(index, count, filename):
    if index == -1:
      logger.progress_begin("Unpacking {} ...".format(path.basename(archive)))
      logger.progress_update(0.0, 'Reading index...')
      return
    progress = index / float(count)
    if index == 0:
      logger.progress_end()
      logger.progress_begin(None, False)
    elif index == (count - 1):
      logger.progress_end()
    else:
      logger.progress_update(progress, '{} / {}'.format(index, count))

  nr.misc.archive.extract(archive, directory, suffix = suffix,
    unpack_single_dir = True, check_extract_file = match_exclude_files,
    progress_callback = progress)

  cache['archive_source'] = archive
  cache['archive_dir'] = directory
  return directory