def test_url_to_filename(self): for url in ['http://allenai.org', 'http://allennlp.org', 'https://www.google.com', 'http://pytorch.org', 'https://s3-us-west-2.amazonaws.com/allennlp' + '/long' * 20 + '/url']: filename = url_to_filename(url) assert "http" not in filename with pytest.raises(FileNotFoundError): filename_to_url(filename, cache_dir=self.TEST_DIR) pathlib.Path(os.path.join(self.TEST_DIR, filename)).touch() with pytest.raises(FileNotFoundError): filename_to_url(filename, cache_dir=self.TEST_DIR) json.dump({'url': url, 'etag': None}, open(os.path.join(self.TEST_DIR, filename + '.json'), 'w')) back_to_url, etag = filename_to_url(filename, cache_dir=self.TEST_DIR) assert back_to_url == url assert etag is None
def test_url_to_filename_with_etags_eliminates_quotes(self): for url in ['http://allenai.org', 'http://allennlp.org', 'https://www.google.com', 'http://pytorch.org']: filename = url_to_filename(url, etag='"mytag"') assert "http" not in filename pathlib.Path(os.path.join(self.TEST_DIR, filename)).touch() back_to_url, etag = filename_to_url(filename) assert back_to_url == url assert etag == "mytag"
def test_url_to_filename(self): for url in ['http://allenai.org', 'http://allennlp.org', 'https://www.google.com', 'http://pytorch.org']: filename = url_to_filename(url) assert "http" not in filename pathlib.Path(os.path.join(self.TEST_DIR, filename)).touch() back_to_url, etag = filename_to_url(filename) assert back_to_url == url assert etag is None
def test_url_to_filename_with_etags_eliminates_quotes(self): for url in ['http://allenai.org', 'http://allennlp.org', 'https://www.google.com', 'http://pytorch.org']: filename = url_to_filename(url, etag='"mytag"') assert "http" not in filename pathlib.Path(os.path.join(self.TEST_DIR, filename)).touch() json.dump({'url': url, 'etag': 'mytag'}, open(os.path.join(self.TEST_DIR, filename + '.json'), 'w')) back_to_url, etag = filename_to_url(filename, cache_dir=self.TEST_DIR) assert back_to_url == url assert etag == "mytag"
def test_resource_to_filename_with_etags_eliminates_quotes(self): for url in [ "http://allenai.org", "http://allennlp.org", "https://www.google.com", "http://pytorch.org", ]: filename = _resource_to_filename(url, etag='"mytag"') assert "http" not in filename pathlib.Path(os.path.join(self.TEST_DIR, filename)).touch() json.dump( { "url": url, "etag": "mytag" }, open(os.path.join(self.TEST_DIR, filename + ".json"), "w"), ) back_to_url, etag = filename_to_url(filename, cache_dir=self.TEST_DIR) assert back_to_url == url assert etag == "mytag"
def test_url_to_filename_with_etags(self): for url in [ "http://allenai.org", "http://allennlp.org", "https://www.google.com", "http://pytorch.org", ]: filename = url_to_filename(url, etag="mytag") assert "http" not in filename pathlib.Path(os.path.join(self.TEST_DIR, filename)).touch() json.dump( { "url": url, "etag": "mytag" }, open(os.path.join(self.TEST_DIR, filename + ".json"), "w"), ) back_to_url, etag = filename_to_url(filename, cache_dir=self.TEST_DIR) assert back_to_url == url assert etag == "mytag" baseurl = "http://allenai.org/" assert url_to_filename(baseurl + "1") != url_to_filename(baseurl, etag="1")
import os import base64 import sys sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir)))) from allennlp.common.file_utils import filename_to_url from allennlp.common.file_utils import DATASET_CACHE try: cached_files = os.listdir(DATASET_CACHE) if not cached_files: print('No cached datasets found.') for filename in cached_files: url, etag = filename_to_url(filename) print('Filename: %s' % filename) print('Url: %s' % url) print('ETag: %s' % etag) print() except FileNotFoundError: print('No cached datasets found.')
import os import base64 import sys sys.path.insert( 0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir)))) from allennlp.common.file_utils import filename_to_url from allennlp.common.file_utils import DATASET_CACHE try: cached_files = os.listdir(DATASET_CACHE) if not cached_files: print('No cached datasets found.') for filename in cached_files: url, etag = filename_to_url(filename) print('Filename: %s' % filename) print('Url: %s' % url) print('ETag: %s' % etag) print() except FileNotFoundError: print('No cached datasets found.')