def cached_gdown_download(url, cached_path=None): cache_root = os.path.join(download.get_dataset_root(), '_dl_cache') try: os.makedirs(cache_root) except OSError: if not os.path.exists(cache_root): raise urlhash = hashlib.md5(url.encode('utf-8')).hexdigest() if cached_path is None: cached_path = os.path.join(cache_root, urlhash) lock_path = cached_path + ".lock" with filelock.FileLock(lock_path): if os.path.exists(cached_path): return cached_path temp_root = tempfile.mkdtemp(dir=cache_root) try: temp_path = os.path.join(temp_root, 'download.cache') gdown.download(url, temp_path, quiet=False) with filelock.FileLock(lock_path): shutil.move(temp_path, cached_path) finally: shutil.rmtree(temp_root) return cached_path
def cached_download(url): """Downloads a file and caches it. This is different from the original :func:`~chainer.dataset.cached_download` in that the download progress is reported. Note that this progress report can be disabled by setting the environment variable `CHAINERCV_DOWNLOAD_REPORT` to `'OFF'`. It downloads a file from the URL if there is no corresponding cache. After the download, this function stores a cache to the directory under the dataset root (see :func:`set_dataset_root`). If there is already a cache for the given URL, it just returns the path to the cache without downloading the same file. Args: url (string): URL to download from. Returns: string: Path to the downloaded file. """ cache_root = os.path.join(get_dataset_root(), '_dl_cache') try: os.makedirs(cache_root) except OSError: if not os.path.exists(cache_root): raise lock_path = os.path.join(cache_root, '_dl_lock') urlhash = hashlib.md5(url.encode('utf-8')).hexdigest() cache_path = os.path.join(cache_root, urlhash) with filelock.FileLock(lock_path): if os.path.exists(cache_path): return cache_path temp_root = tempfile.mkdtemp(dir=cache_root) try: temp_path = os.path.join(temp_root, 'dl') if strtobool(os.getenv('CHAINERCV_DOWNLOAD_REPORT', 'ON')): print('Downloading ...') print('From: {:s}'.format(url)) print('To: {:s}'.format(cache_path)) request.urlretrieve(url, temp_path, _reporthook) else: request.urlretrieve(url, temp_path) with filelock.FileLock(lock_path): shutil.move(temp_path, cache_path) finally: shutil.rmtree(temp_root) return cache_path
def cached_download(url, cached_path=None): """Downloads a file and caches it. This is different from the original :func:`~chainer.dataset.cached_download` in that the download progress is reported. It downloads a file from the URL if there is no corresponding cache. After the download, this function stores a cache to the directory under the dataset root (see :func:`set_dataset_root`). If there is already a cache for the given URL, it just returns the path to the cache without downloading the same file. Args: url (string): URL to download from. Returns: string: Path to the downloaded file. """ cache_root = os.path.join(download.get_dataset_root(), '_dl_cache') try: os.makedirs(cache_root) except OSError: if not os.path.exists(cache_root): raise urlhash = hashlib.md5(url.encode('utf-8')).hexdigest() if cached_path is None: cached_path = os.path.join(cache_root, urlhash) lock_path = cached_path + ".lock" with filelock.FileLock(lock_path): if os.path.exists(cached_path): return cached_path temp_root = tempfile.mkdtemp(dir=cache_root) try: temp_path = os.path.join(temp_root, 'download.cache') print('Downloading ...') print('From: {:s}'.format(url)) print('To: {:s}'.format(cached_path)) request.urlretrieve(url, temp_path, _reporthook) with filelock.FileLock(lock_path): shutil.move(temp_path, cached_path) finally: shutil.rmtree(temp_root) return cached_path