def _check_hash_stamp(fpath, hash_prefix, hasher, verbose, needs_download=False): stamp_fpath = fpath + '.hash' # Force a re-download if the hash file does not exist or it does # not match the expected hash if exists(stamp_fpath): with open(stamp_fpath, 'r') as file: hashstr = file.read() if not hashstr.startswith(hash_prefix): if verbose: # pragma: nobranch print('invalid hash value (expected "{}", got "{}")'.format( hash_prefix, hashstr)) needs_download = True elif exists(fpath): # If the file exists, but the hash doesnt exist, simply compute the # hash of the existing file instead of redownloading it. # Redownload if this fails. from ubelt import util_hash hashstr = util_hash.hash_file(fpath, hasher=hasher) if hashstr.startswith(hash_prefix): # Write the missing stamp file if it matches with open(stamp_fpath, 'w') as file: file.write(hash_prefix) else: if verbose: # pragma: nobranch print('invalid hash value (expected "{}", got "{}")'.format( hash_prefix, hashstr)) needs_download = True else: needs_download = True return stamp_fpath, needs_download
def _product_file_hash(self, product=None): """ Get the hash of the each product file """ if self.hasher is None: return None else: products = self._rectify_products(product) product_file_hash = [ util_hash.hash_file(p, hasher=self.hasher, base='hex') for p in products ] return product_file_hash
def grabdata(url, fpath=None, dpath=None, fname=None, redo=False, verbose=1, appname=None, hash_prefix=None, hasher='sha512', **download_kw): """ Downloads a file, caches it, and returns its local path. Args: url (str): url to the file to download fpath (PathLike): The full path to download the file to. If unspecified, the arguments `dpath` and `fname` are used to determine this. dpath (PathLike): where to download the file. If unspecified `appname` is used to determine this. Mutually exclusive with fpath. fname (str): What to name the downloaded file. Defaults to the url basename. Mutually exclusive with fpath. redo (bool): if True forces redownload of the file (default = False) verbose (bool): verbosity flag (default = True) appname (str): set dpath to `ub.get_app_cache_dir(appname)`. Mutually exclusive with dpath and fpath. hash_prefix (None or str): If specified, grabdata verifies that this matches the hash of the file, and then saves the hash in a adjacent file to certify that the download was successful. Defaults to None. hasher (str or Hasher): If hash_prefix is specified, this indicates the hashing algorithm to apply to the file. Defaults to sha512. **download_kw: additional kwargs to pass to ub.download Returns: PathLike: fpath - file path string Example: >>> # xdoctest: +REQUIRES(--network) >>> import ubelt as ub >>> url = 'http://i.imgur.com/rqwaDag.png' >>> fpath = ub.grabdata(url, fname='mario.png') >>> result = basename(fpath) >>> print(result) mario.png Example: >>> # xdoctest: +REQUIRES(--network) >>> import ubelt as ub >>> fname = 'foo.bar' >>> url = 'http://i.imgur.com/rqwaDag.png' >>> prefix1 = '944389a39dfb8fa9' >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) >>> stamp_fpath = fpath + '.hash' >>> assert open(stamp_fpath, 'r').read() == prefix1 >>> # Check that the download doesn't happen again >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) >>> # todo: check file timestamps have not changed >>> # >>> # Check redo works with hash >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, redo=True) >>> # todo: check file timestamps have changed >>> # >>> # Check that a redownload occurs when the stamp is changed >>> open(stamp_fpath, 'w').write('corrupt-stamp') >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) >>> assert open(stamp_fpath, 'r').read() == prefix1 >>> # >>> # Check that a redownload occurs when the stamp is removed >>> ub.delete(stamp_fpath) >>> open(fpath, 'w').write('corrupt-data') >>> assert not ub.hash_file(fpath, base='hex').startswith(prefix1) >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) >>> assert ub.hash_file(fpath, base='hex').startswith(prefix1) >>> # >>> # Check that requesting new data causes redownload >>> url2 = 'https://data.kitware.com/api/v1/item/5b4039308d777f2e6225994c/download' >>> prefix2 = 'c98a46cb31205cf' >>> fpath = ub.grabdata(url2, fname=fname, hash_prefix=prefix2) >>> assert open(stamp_fpath, 'r').read() == prefix2 """ if appname and dpath: raise ValueError('Cannot specify appname with dpath') if fpath and (dpath or fname or appname): raise ValueError('Cannot specify fpath with dpath or fname') if fpath is None: if dpath is None: appname = appname or 'ubelt' dpath = util_platform.ensure_app_cache_dir(appname) if fname is None: fname = basename(url) fpath = join(dpath, fname) # note that needs_download is never set to false after it becomes true # this is the key to working through the logic of the following checks needs_download = redo if not exists(fpath): # always download if we are missing the file needs_download = True if hash_prefix: stamp_fpath = fpath + '.hash' # Force a re-download if the hash file does not exist or it does # not match the expected hash if exists(stamp_fpath): with open(stamp_fpath, 'r') as file: hashstr = file.read() if not hashstr.startswith(hash_prefix): if verbose: # pragma: nobranch print('invalid hash value (expected "{}", got "{}")'.format( hash_prefix, hashstr)) needs_download = True elif exists(fpath): # If the file exists, but the hash doesnt exist, simply compute the # hash of the existing file instead of redownloading it. # Redownload if this fails. from ubelt import util_hash hashstr = util_hash.hash_file(fpath, hasher=hasher) if hashstr.startswith(hash_prefix): # Write the missing stamp file if it matches with open(stamp_fpath, 'w') as file: file.write(hash_prefix) else: if verbose: # pragma: nobranch print('invalid hash value (expected "{}", got "{}")'.format( hash_prefix, hashstr)) needs_download = True else: needs_download = True if needs_download: fpath = download(url, fpath, verbose=verbose, hash_prefix=hash_prefix, hasher=hasher, **download_kw) if hash_prefix: # If the file successfully downloaded then the hashes match. # write out the expected prefix so we can check it later with open(stamp_fpath, 'w') as file: file.write(hash_prefix) else: if verbose >= 2: print('Already have file %s' % fpath) return fpath