def load(self, context, cache): if cache is not None and path.isdir(cache.get("directory", "")): # Check if the requested version changes. url_template = context.expand_variables(cache.get("url_template", "")) if url_template == cache.get("url"): self.directory = cache["directory"] logger.info("Reusing cached directory: {}".format(path.rel(self.directory, nopar=True))) return cache else: logger.info("Cached URL is outdated:", cache.get("url")) directory = None archive = None delete_after_extract = True for url_template in self.urls: url = context.expand_variables(url_template) if not url: continue if url.startswith("file://"): name = url[7:] if path.isdir(name): logger.info("Using directory", url) directory = name break elif path.isfile(name): logger.info("Using archive", url) archive = name delete_after_extract = False break error = None else: error = None try: progress = lambda d: self._download_progress(url, context, d) archive, reused = httputils.download_file( url, directory=context.get_temporary_directory(), on_exists="skip", progress=progress ) except (httputils.URLError, httputils.HTTPError) as exc: error = exc except self.DownloadAlreadyExists as exc: directory = exc.directory logger.info("Reusing existing directory", directory) else: if reused: logger.info("Reusing cached download", path.basename(archive)) break if error: logger.info("Error reading", url, ":", error) if directory or archive: logger.debug("URL applies: {}".format(url)) if not directory and archive: suffix, directory = self._get_archive_unpack_info(context, archive) logger.info( 'Unpacking "{}" to "{}" ...'.format(path.rel(archive, nopar=True), path.rel(directory, nopar=True)) ) nr.misc.archive.extract( archive, directory, suffix=suffix, unpack_single_dir=True, check_extract_file=self._check_extract_file, progress_callback=self._extract_progress, ) elif not directory: raise LoaderError(self, "no URL matched") self.directory = directory with open(path.join(self.directory, ".craftr_downloadurl"), "w") as fp: fp.write(url) return {"directory": directory, "url_template": url_template, "url": url}
def external_file(*urls, filename = None, directory = None, copy_file_url = False, name = None): """ Downloads a file from the first valid URL and saves it into *directory* under the specified *filename*. :param urls: One or more URLs. Supports ``http://``, ``https://``, ``ftp://`` and ``file://`. Note that if a ``file://`` URL is specified, the file is not copied to the output filename unless *copy_file_url* is True. :param filename: The output filename of the downloaded file. Defaults to the filename of the downloaded file. :param directory: The directory to save the file to. If *filename* is a relative path, it will be joined with this directory. Defaults to a path in the build directory. :param copy_file_url: If True, ``file://`` URLs will be copied instead of used as-is. :param name: The name of the loader action. This name is used to store information in the :attr:`Session.cache` so we can re-use existing downloaded data. :func:`~craftr.defaults.gtn` will be used to retrieve the default value for this parameter. :return: The path to the downloaded file. """ name = gtn(name) if not directory and not filename: directory = buildlocal('data') cache = get_loader_cache(name) # TODO: expand variables of the current module. target_filename = None exceptions = [] for url in urls: if url == cache.get('download_url'): existing_file = cache.get('download_file') if existing_file and path.isfile(existing_file): return existing_file progress_info = 'Downloading {} ...'.format(url) if url.startswith('file://'): source_file = url[7:] if path.isfile(source_file): if not copy_file_url: return source_file if not filename: filename = path.basename(source_file) # TODO: Use httputils.download_file() for this as well? logger.progress_begin(progress_info) path.makedirs(directory) target_filename = path.join(directory, filename) with open(source_file, 'rb') as sfp: with open(target_filename, 'wb') as dfp: for bytes_copied, size in pyutils.copyfileobj(sfp, dfp): logger.progress_update(float(bytes_copied) / size) logger.progress_end() # TODO: Copy file permissions break else: exceptions.append(FileNotFoundError(url)) else: progress = lambda data: _external_file_download_callback( progress_info, directory, filename, cache, data) try: target_filename, reused = httputils.download_file( url, filename = filename, directory = directory, on_exists = 'skip', progress = progress) except (httputils.URLError, httputils.HTTPError) as exc: exceptions.append(exc) else: break finally: logger.progress_end() if target_filename: cache['download_url'] = url cache['download_file'] = target_filename return target_filename raise NoExternalFileMatch(name, urls, exceptions)