Beispiel #1
0
    def test_cached_path_offline(self, monkeypatch):
        # Ensures `cached_path` just returns the path to the latest cached version
        # of the resource when there's no internet connection.

        # First we mock the `_http_etag` method so that it raises a `ConnectionError`,
        # like it would if there was no internet connection.
        def mocked_http_etag(url: str):
            raise ConnectionError

        monkeypatch.setattr(file_utils, "_http_etag", mocked_http_etag)

        url = "https://github.com/allenai/allennlp/blob/master/some-fake-resource"

        # We'll create two cached versions of this fake resource using two different etags.
        etags = [
            'W/"3e5885bfcbf4c47bc4ee9e2f6e5ea916"',
            'W/"3e5885bfcbf4c47bc4ee9e2f6e5ea918"'
        ]
        filenames = [
            os.path.join(self.TEST_DIR, _resource_to_filename(url, etag))
            for etag in etags
        ]
        for filename, etag in zip(filenames, etags):
            meta = _Meta(resource=url,
                         cached_path=filename,
                         creation_time=time.time(),
                         etag=etag,
                         size=2341)
            meta.to_file()
            with open(filename, "w") as f:
                f.write("some random data")
            # os.path.getmtime is only accurate to the second.
            time.sleep(1.1)

        # Should know to ignore lock files and extraction directories.
        with open(filenames[-1] + ".lock", "w") as f:
            f.write("")
        os.mkdir(filenames[-1] + "-extracted")

        # The version corresponding to the last etag should be returned, since
        # that one has the latest "last modified" time.
        assert get_from_cache(url, cache_dir=self.TEST_DIR) == filenames[-1]

        # We also want to make sure this works when the latest cached version doesn't
        # have a corresponding etag.
        filename = os.path.join(self.TEST_DIR, _resource_to_filename(url))
        meta = _Meta(resource=url,
                     cached_path=filename,
                     creation_time=time.time(),
                     size=2341)
        with open(filename, "w") as f:
            f.write("some random data")

        assert get_from_cache(url, cache_dir=self.TEST_DIR) == filename
Beispiel #2
0
 def create_cache_entry(self,
                        url: str,
                        etag: str,
                        as_extraction_dir: bool = False):
     filename = os.path.join(self.TEST_DIR,
                             _resource_to_filename(url, etag))
     cache_path = filename
     if as_extraction_dir:
         cache_path = filename + "-extracted"
         filename = filename + "-extracted/glove.txt"
         os.mkdir(cache_path)
     with open(filename, "wb") as f:
         f.write(self.glove_bytes)
     open(cache_path + ".lock", "a").close()
     meta = _Meta(
         resource=url,
         cached_path=cache_path,
         etag=etag,
         creation_time=time.time(),
         size=len(self.glove_bytes),
         extraction_dir=as_extraction_dir,
     )
     meta.to_file()