Example #1
0
    def load(self, context, cache):
        if cache is not None and path.isdir(cache.get("directory", "")):
            # Check if the requested version changes.
            url_template = context.expand_variables(cache.get("url_template", ""))
            if url_template == cache.get("url"):
                self.directory = cache["directory"]
                logger.info("Reusing cached directory: {}".format(path.rel(self.directory, nopar=True)))
                return cache
            else:
                logger.info("Cached URL is outdated:", cache.get("url"))

        directory = None
        archive = None
        delete_after_extract = True
        for url_template in self.urls:
            url = context.expand_variables(url_template)
            if not url:
                continue
            if url.startswith("file://"):
                name = url[7:]
                if path.isdir(name):
                    logger.info("Using directory", url)
                    directory = name
                    break
                elif path.isfile(name):
                    logger.info("Using archive", url)
                    archive = name
                    delete_after_extract = False
                    break
                error = None
            else:
                error = None
                try:
                    progress = lambda d: self._download_progress(url, context, d)
                    archive, reused = httputils.download_file(
                        url, directory=context.get_temporary_directory(), on_exists="skip", progress=progress
                    )
                except (httputils.URLError, httputils.HTTPError) as exc:
                    error = exc
                except self.DownloadAlreadyExists as exc:
                    directory = exc.directory
                    logger.info("Reusing existing directory", directory)
                else:
                    if reused:
                        logger.info("Reusing cached download", path.basename(archive))
                    break

            if error:
                logger.info("Error reading", url, ":", error)

        if directory or archive:
            logger.debug("URL applies: {}".format(url))

        if not directory and archive:
            suffix, directory = self._get_archive_unpack_info(context, archive)
            logger.info(
                'Unpacking "{}" to "{}" ...'.format(path.rel(archive, nopar=True), path.rel(directory, nopar=True))
            )
            nr.misc.archive.extract(
                archive,
                directory,
                suffix=suffix,
                unpack_single_dir=True,
                check_extract_file=self._check_extract_file,
                progress_callback=self._extract_progress,
            )
        elif not directory:
            raise LoaderError(self, "no URL matched")

        self.directory = directory
        with open(path.join(self.directory, ".craftr_downloadurl"), "w") as fp:
            fp.write(url)
        return {"directory": directory, "url_template": url_template, "url": url}
Example #2
0
def external_file(*urls, filename = None, directory = None,
    copy_file_url = False, name = None):
  """
  Downloads a file from the first valid URL and saves it into *directory*
  under the specified *filename*.

  :param urls: One or more URLs. Supports ``http://``, ``https://``,
      ``ftp://`` and ``file://`. Note that if a ``file://`` URL is
      specified, the file is not copied to the output filename unless
      *copy_file_url* is True.
  :param filename: The output filename of the downloaded file. Defaults
      to the filename of the downloaded file.
  :param directory: The directory to save the file to. If *filename*
      is a relative path, it will be joined with this directory. Defaults
      to a path in the build directory.
  :param copy_file_url: If True, ``file://`` URLs will be copied instead
      of used as-is.
  :param name: The name of the loader action. This name is used to store
      information in the :attr:`Session.cache` so we can re-use existing
      downloaded data. :func:`~craftr.defaults.gtn` will be used to
      retrieve the default value for this parameter.
  :return: The path to the downloaded file.
  """

  name = gtn(name)
  if not directory and not filename:
    directory = buildlocal('data')

  cache = get_loader_cache(name)

  # TODO: expand variables of the current module.

  target_filename = None
  exceptions = []
  for url in urls:
    if url == cache.get('download_url'):
      existing_file = cache.get('download_file')
      if existing_file and path.isfile(existing_file):
        return existing_file

    progress_info = 'Downloading {} ...'.format(url)
    if url.startswith('file://'):
      source_file = url[7:]
      if path.isfile(source_file):
        if not copy_file_url:
          return source_file
        if not filename:
          filename = path.basename(source_file)

        # TODO: Use httputils.download_file() for this as well?
        logger.progress_begin(progress_info)
        path.makedirs(directory)
        target_filename = path.join(directory, filename)
        with open(source_file, 'rb') as sfp:
          with open(target_filename, 'wb') as dfp:
            for bytes_copied, size in pyutils.copyfileobj(sfp, dfp):
              logger.progress_update(float(bytes_copied) / size)
        logger.progress_end()

        # TODO: Copy file permissions
        break
      else:
        exceptions.append(FileNotFoundError(url))
    else:
      progress = lambda data: _external_file_download_callback(
          progress_info, directory, filename, cache, data)

      try:
        target_filename, reused = httputils.download_file(
          url, filename = filename, directory = directory, on_exists = 'skip',
          progress = progress)
      except (httputils.URLError, httputils.HTTPError) as exc:
        exceptions.append(exc)
      else:
        break
      finally:
        logger.progress_end()

  if target_filename:
    cache['download_url'] = url
    cache['download_file'] = target_filename
    return target_filename

  raise NoExternalFileMatch(name, urls, exceptions)