def test_snapshot_download_no_user_or_org(self): # This is the smallest snapshot I could find that is not associated with a user / org. model_name = "distilbert-base-german-cased" path = cached_path(f"hf://{model_name}") assert os.path.isdir(path) assert os.path.isfile(path + ".json") meta = _Meta.from_path(path + ".json") assert meta.resource == f"hf://{model_name}"
def test_cached_download_no_user_or_org(self): path = cached_path("hf://t5-small/config.json", cache_dir=self.TEST_DIR) assert os.path.isfile(path) assert pathlib.Path(os.path.dirname(path)) == self.TEST_DIR assert os.path.isfile(path + ".json") meta = _Meta.from_path(path + ".json") assert meta.etag is not None assert meta.resource == "hf://t5-small/config.json"
def test_meta_backwards_compatible(self): url = "http://fake.datastore.com/glove.txt.gz" etag = "some-fake-etag" filename = os.path.join(self.TEST_DIR, _resource_to_filename(url, etag)) with open(filename, "wb") as f: f.write(self.glove_bytes) with open(filename + ".json", "w") as meta_file: json.dump({"url": url, "etag": etag}, meta_file) meta = _Meta.from_path(filename + ".json") assert meta.resource == url assert meta.etag == etag assert meta.creation_time is not None assert meta.size == len(self.glove_bytes)
def test_get_from_cache(self): url = "http://fake.datastore.com/glove.txt.gz" set_up_glove(url, self.glove_bytes, change_etag_every=2) filename = get_from_cache(url, cache_dir=self.TEST_DIR) assert filename == os.path.join(self.TEST_DIR, _resource_to_filename(url, etag="0")) assert os.path.exists(filename + ".json") meta = _Meta.from_path(filename + ".json") assert meta.resource == url # We should have made one HEAD request and one GET request. method_counts = Counter(call.request.method for call in responses.calls) assert len(method_counts) == 2 assert method_counts["HEAD"] == 1 assert method_counts["GET"] == 1 # And the cached file should have the correct contents with open(filename, "rb") as cached_file: assert cached_file.read() == self.glove_bytes # A second call to `get_from_cache` should make another HEAD call # but not another GET call. filename2 = get_from_cache(url, cache_dir=self.TEST_DIR) assert filename2 == filename method_counts = Counter(call.request.method for call in responses.calls) assert len(method_counts) == 2 assert method_counts["HEAD"] == 2 assert method_counts["GET"] == 1 with open(filename2, "rb") as cached_file: assert cached_file.read() == self.glove_bytes # A third call should have a different ETag and should force a new download, # which means another HEAD call and another GET call. filename3 = get_from_cache(url, cache_dir=self.TEST_DIR) assert filename3 == os.path.join(self.TEST_DIR, _resource_to_filename(url, etag="1")) method_counts = Counter(call.request.method for call in responses.calls) assert len(method_counts) == 2 assert method_counts["HEAD"] == 3 assert method_counts["GET"] == 2 with open(filename3, "rb") as cached_file: assert cached_file.read() == self.glove_bytes
def test_snapshot_download_no_user_or_org(self): path = cached_path("hf://t5-small") assert os.path.isdir(path) assert os.path.isfile(path + ".json") meta = _Meta.from_path(path + ".json") assert meta.resource == "hf://t5-small"