예제 #1
0
 def _get_local_path(self, path: str, force: bool = False, **kwargs: Any) -> str:
     """
     This implementation downloads the remote resource and caches it locally.
     The resource will only be downloaded if not previously requested.
     """
     self._check_kwargs(kwargs)
     if (
         force
         or path not in self.cache_map
         or not os.path.exists(self.cache_map[path])
     ):
         logger = logging.getLogger(__name__)
         parsed_url = urlparse(path)
         dirname = os.path.join(
             get_cache_dir(), os.path.dirname(parsed_url.path.lstrip("/"))
         )
         filename = path.split("/")[-1]
         cached = os.path.join(dirname, filename)
         with file_lock(cached):
             if not os.path.isfile(cached):
                 logger.info("Downloading {} ...".format(path))
                 cached = download(path, dirname, filename=filename)
         logger.info("URL {} cached in {}".format(path, cached))
         self.cache_map[path] = cached
     return self.cache_map[path]
예제 #2
0
파일: io.py 프로젝트: zlapp/vissl
def cache_url(url: str, cache_dir: str) -> str:
    """
    This implementation downloads the remote resource and caches it locally.
    The resource will only be downloaded if not previously requested.
    """
    parsed_url = urlparse(url)
    dirname = os.path.join(cache_dir, os.path.dirname(parsed_url.path.lstrip("/")))
    makedir(dirname)
    filename = url.split("/")[-1]
    cached = os.path.join(dirname, filename)
    with file_lock(cached):
        if not os.path.isfile(cached):
            logging.info(f"Downloading {url} to {cached} ...")
            cached = download(url, dirname, filename=filename)
    logging.info(f"URL {url} cached in {cached}")
    return cached
예제 #3
0
 def test_download(self) -> None:
     download("https://www.facebook.com",
              ".",
              filename=self._filename,
              progress=False)
     self.assertTrue(os.path.isfile(self._filename))
예제 #4
0
    def _get_local_path(
        self,
        path: str,
        force: bool = False,
        cache_dir: Optional[str] = None,
        **kwargs: Any,
    ) -> str:
        """
        As paddle model stores all files in tar files, we need to extract them
        and get the newly extracted folder path. This function rewrites the base
        function to support the following situations:

        1. If the tar file is not downloaded, it will download the tar file,
            extract it to the target folder, delete the downloaded tar file,
            and return the folder path.
        2. If the extracted target folder is present, and all the necessary model
            files are present (specified in _TAR_FILE_NAME_LIST), it will
            return the folder path.
        3. If the tar file is downloaded, but the extracted target folder is not
            present (or it doesn't contain the necessary files in _TAR_FILE_NAME_LIST),
            it will extract the tar file to the target folder, delete the tar file,
            and return the folder path.

        """
        self._check_kwargs(kwargs)
        if (force or path not in self.cache_map
                or not os.path.exists(self.cache_map[path])):
            logger = logging.getLogger(__name__)
            parsed_url = urlparse(path)
            dirname = os.path.join(
                get_cache_dir(cache_dir),
                os.path.dirname(parsed_url.path.lstrip("/")))
            filename = path.split("/")[-1]
            if len(filename) > self.MAX_FILENAME_LEN:
                filename = filename[:100] + "_" + uuid.uuid4().hex

            cached = os.path.join(dirname, filename)

            if is_cached_folder_exists_and_valid(cached):
                # When the cached folder exists and valid, we don't need to redownload
                # the tar file.
                self.cache_map[path] = _get_untar_directory(cached)

            else:
                with file_lock(cached):
                    if not os.path.isfile(cached):
                        logger.info("Downloading {} ...".format(path))
                        cached = download(path, dirname, filename=filename)

                    if path.endswith(".tar"):
                        model_dir = _untar_model_weights(cached)
                        try:
                            os.remove(cached)  # remove the redundant tar file
                            # TODO: remove the .lock file .
                        except:
                            logger.warning(
                                f"Not able to remove the cached tar file {cached}"
                            )

                logger.info("URL {} cached in {}".format(path, model_dir))
                self.cache_map[path] = model_dir

        return self.cache_map[path]