Ejemplo n.º 1
0
 def test_snapshot_download_no_user_or_org(self):
     # This is the smallest snapshot I could find that is not associated with a user / org.
     model_name = "distilbert-base-german-cased"
     path = cached_path(f"hf://{model_name}")
     assert os.path.isdir(path)
     assert os.path.isfile(path + ".json")
     meta = _Meta.from_path(path + ".json")
     assert meta.resource == f"hf://{model_name}"
Ejemplo n.º 2
0
 def test_cached_download_no_user_or_org(self):
     path = cached_path("hf://t5-small/config.json",
                        cache_dir=self.TEST_DIR)
     assert os.path.isfile(path)
     assert pathlib.Path(os.path.dirname(path)) == self.TEST_DIR
     assert os.path.isfile(path + ".json")
     meta = _Meta.from_path(path + ".json")
     assert meta.etag is not None
     assert meta.resource == "hf://t5-small/config.json"
Ejemplo n.º 3
0
 def test_meta_backwards_compatible(self):
     url = "http://fake.datastore.com/glove.txt.gz"
     etag = "some-fake-etag"
     filename = os.path.join(self.TEST_DIR,
                             _resource_to_filename(url, etag))
     with open(filename, "wb") as f:
         f.write(self.glove_bytes)
     with open(filename + ".json", "w") as meta_file:
         json.dump({"url": url, "etag": etag}, meta_file)
     meta = _Meta.from_path(filename + ".json")
     assert meta.resource == url
     assert meta.etag == etag
     assert meta.creation_time is not None
     assert meta.size == len(self.glove_bytes)
Ejemplo n.º 4
0
    def test_get_from_cache(self):
        url = "http://fake.datastore.com/glove.txt.gz"
        set_up_glove(url, self.glove_bytes, change_etag_every=2)

        filename = get_from_cache(url, cache_dir=self.TEST_DIR)
        assert filename == os.path.join(self.TEST_DIR,
                                        _resource_to_filename(url, etag="0"))
        assert os.path.exists(filename + ".json")
        meta = _Meta.from_path(filename + ".json")
        assert meta.resource == url

        # We should have made one HEAD request and one GET request.
        method_counts = Counter(call.request.method
                                for call in responses.calls)
        assert len(method_counts) == 2
        assert method_counts["HEAD"] == 1
        assert method_counts["GET"] == 1

        # And the cached file should have the correct contents
        with open(filename, "rb") as cached_file:
            assert cached_file.read() == self.glove_bytes

        # A second call to `get_from_cache` should make another HEAD call
        # but not another GET call.
        filename2 = get_from_cache(url, cache_dir=self.TEST_DIR)
        assert filename2 == filename

        method_counts = Counter(call.request.method
                                for call in responses.calls)
        assert len(method_counts) == 2
        assert method_counts["HEAD"] == 2
        assert method_counts["GET"] == 1

        with open(filename2, "rb") as cached_file:
            assert cached_file.read() == self.glove_bytes

        # A third call should have a different ETag and should force a new download,
        # which means another HEAD call and another GET call.
        filename3 = get_from_cache(url, cache_dir=self.TEST_DIR)
        assert filename3 == os.path.join(self.TEST_DIR,
                                         _resource_to_filename(url, etag="1"))

        method_counts = Counter(call.request.method
                                for call in responses.calls)
        assert len(method_counts) == 2
        assert method_counts["HEAD"] == 3
        assert method_counts["GET"] == 2

        with open(filename3, "rb") as cached_file:
            assert cached_file.read() == self.glove_bytes
Ejemplo n.º 5
0
 def test_snapshot_download_no_user_or_org(self):
     path = cached_path("hf://t5-small")
     assert os.path.isdir(path)
     assert os.path.isfile(path + ".json")
     meta = _Meta.from_path(path + ".json")
     assert meta.resource == "hf://t5-small"