def download_data(output='data/noisy_mnist.npz'): """ Downloads and extracts Noisy MNIST dataset (~286MB) The resulting .npz file contains train, valid, and test image pairs along with their labels in arrays train_digit, valid_digit, and test_difit Parameters: output: path to the output .npz file """ with tempfile.NamedTemporaryFile() as view1, \ tempfile.NamedTemporaryFile() as view2: logger.info("[Step 1/3] Downloading images x (rotated)") _download_url_to_file(__VIEW1, view1.name, None, True) logger.info("[Step 2/3] Downloading images y (rotated and noisy)") _download_url_to_file(__VIEW2, view2.name, None, True) logger.info("[Step 3/3] Preparing final file") img_x = read_data(view1.name) img_y = read_data(view2.name) train = np.asarray([img_x[0][0], img_y[0][0]]) valid = np.asarray([img_x[1][0], img_y[1][0]]) test = np.asarray([img_x[2][0], img_y[2][0]]) train_digit = img_x[0][1] valid_digit = img_x[1][1] test_digit = img_x[2][1] assert np.all(img_y[0][1] == train_digit) assert np.all(img_y[1][1] == valid_digit) assert np.all(img_y[2][1] == test_digit) np.savez_compressed(output, train=train, valid=valid, test=test, train_digit=train_digit, valid_digit=valid_digit, test_digit=test_digit)
def download_model(url, dst_path): parts = urlparse(url) filename = os.path.basename(parts.path) HASH_REGEX = re.compile(r'-([a-f0-9]*)\.') hash_prefix = HASH_REGEX.search(filename).group(1) model_zoo._download_url_to_file(url, os.path.join(dst_path, filename), hash_prefix, True) return filename
def loadurl(url, model_dir=None, map_location=None, progress=True): if model_dir is None: torch_home = os.path.expanduser('~/.dl_ext') model_dir = os.path.join(torch_home, 'models') if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) _download_url_to_file(url, cached_file, None, progress=progress) return torch.load(cached_file, map_location=map_location)
def cache_url(url, model_dir=None, progress=True): r"""Loads the Torch serialized object at the given URL. If the object is already present in `model_dir`, it's deserialized and returned. The filename part of the URL should follow the naming convention ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more digits of the SHA256 hash of the contents of the file. The hash is used to ensure unique names and to verify the contents of the file. The default value of `model_dir` is ``$TORCH_HOME/models`` where ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be overridden with the ``$TORCH_MODEL_ZOO`` environment variable. Args: url (string): URL of the object to download model_dir (string, optional): directory in which to save the object progress (bool, optional): whether or not to display a progress bar to stderr Example: >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') """ if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) if parts.fragment != "": filename = parts.fragment else: filename = os.path.basename(parts.path) if filename == "model_final.pkl": # workaround as pre-trained Caffe2 models from Detectron have all the same filename # so make the full path the filename by replacing / with _ filename = parts.path.replace("/", "_") cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file) and is_main_process(): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = HASH_REGEX.search(filename) if hash_prefix is not None: hash_prefix = hash_prefix.group(1) # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, # which matches the hash PyTorch uses. So we skip the hash matching # if the hash_prefix is less than 6 characters if len(hash_prefix) < 6: hash_prefix = None _download_url_to_file(url, cached_file, hash_prefix, progress=progress) synchronize() return cached_file
def load_dox_url(url, filename, model_dir=None, map_location=None, progress=True): r"""Adapt to fit format file of mtdp pre-trained models """ if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir): os.makedirs(model_dir) cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) sys.stderr.flush() _download_url_to_file(url, cached_file, None, progress=progress) return torch.load(cached_file, map_location=map_location)
def cache_url(url, model_dir=None, progress=True): r"""Loads the Torch serialized object at the given URL. If the object is already present in `model_dir`, it's deserialized and returned. The filename part of the URL should follow the naming convention ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more digits of the SHA256 hash of the contents of the file. The hash is used to ensure unique names and to verify the contents of the file. The default value of `model_dir` is ``$TORCH_HOME/models`` where ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be overridden with the ``$TORCH_MODEL_ZOO`` environment variable. Args: url (string): URL of the object to download model_dir (string, optional): directory in which to save the object progress (bool, optional): whether or not to display a progress bar to stderr Example: >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') """ if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) if filename == "model_final.pkl": # workaround as pre-trained Caffe2 models from Detectron have all the same filename # so make the full path the filename by replacing / with _ filename = parts.path.replace("/", "_") cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file) and is_main_process(): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = HASH_REGEX.search(filename) if hash_prefix is not None: hash_prefix = hash_prefix.group(1) # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, # which matches the hash PyTorch uses. So we skip the hash matching # if the hash_prefix is less than 6 characters if len(hash_prefix) < 6: hash_prefix = None _download_url_to_file(url, cached_file, hash_prefix, progress=progress) synchronize() return cached_file
def load_fair_url(url, model_dir=None, progress=True): # note: like torch.utils.model_zoo.load_url but # - hash is disabled # - loads pickle format if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir): os.makedirs(model_dir) parts = model_zoo.urlparse(url) filename = os.path.basename(parts.path) cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = None model_zoo._download_url_to_file(url, cached_file, hash_prefix, progress=progress) return pickle.load(open(cached_file, 'rb'), encoding='latin')
import os import zipfile # PyTorch 1.1 moves _download_url_to_file # from torch.utils.model_zoo to torch.hub # PyTorch 1.0 exists another _download_url_to_file # 2 argument # TODO: If you remove support PyTorch 1.0 or older, # You should remove torch.utils.model_zoo # Ref. PyTorch #18758 # https://github.com/pytorch/pytorch/pull/18758/commits try: from torch.utils.model_zoo import _download_url_to_file except ImportError: try: from torch.hub import download_url_to_file as _download_url_to_file except ImportError: from torch.hub import _download_url_to_file def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(path=dest_dir) if __name__ == '__main__': _download_url_to_file( 'https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1', 'saved_models.zip', None, True) unzip('saved_models.zip', '.')
]) downloads = [] torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) for m, url in tasks.items(): if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format( url, cached_file)) hash_prefix = HASH_REGEX.search(filename).group(1) _download_url_to_file(url, cached_file, hash_prefix, progress=True) downloads.append((m, cached_file)) if sys.version_info >= (3, 0): raise RuntimeError( 'You can download with python2|3, but convert with python2 only.') import cPickle for m, file in downloads: p1, p2 = os.path.split(file) p3, p4 = os.path.splitext(p2) file_v2 = os.path.join(p1, p3 + '.pkl') mi = m() mi.load_state_dict(torch.load(file)) np_state_dict = state_dict_v2(mi) cPickle.dump(np_state_dict, open(file_v2, 'wb'),
# PyTorch 1.1 moves _download_url_to_file # from torch.utils.model_zoo to torch.hub # PyTorch 1.0 exists another _download_url_to_file # 2 argument # TODO: If you remove support PyTorch 1.0 or older, # You should remove torch.utils.model_zoo # Ref. PyTorch #18758 # https://github.com/pytorch/pytorch/pull/18758/commits try: from torch.utils.model_zoo import _download_url_to_file except ImportError: try: from torch.hub import download_url_to_file as _download_url_to_file except ImportError: from torch.hub import _download_url_to_file def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(path=dest_dir) if __name__ == "__main__": _download_url_to_file( "https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1", "saved_models.zip", None, True, ) unzip("saved_models.zip", ".")
import os import zipfile from torch.utils.model_zoo import _download_url_to_file def unzip(source_filename, dest_dir): with zipfile.ZipFile(source_filename) as zf: zf.extractall(path=dest_dir) if __name__ == '__main__': _download_url_to_file('https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1', 'saved_models.zip', None, True) unzip('saved_models.zip', '.')