Example #1
0
def _check_hash_stamp(fpath,
                      hash_prefix,
                      hasher,
                      verbose,
                      needs_download=False):
    stamp_fpath = fpath + '.hash'
    # Force a re-download if the hash file does not exist or it does
    # not match the expected hash
    if exists(stamp_fpath):
        with open(stamp_fpath, 'r') as file:
            hashstr = file.read()
        if not hashstr.startswith(hash_prefix):
            if verbose:  # pragma: nobranch
                print('invalid hash value (expected "{}", got "{}")'.format(
                    hash_prefix, hashstr))
            needs_download = True
    elif exists(fpath):
        # If the file exists, but the hash doesnt exist, simply compute the
        # hash of the existing file instead of redownloading it.
        # Redownload if this fails.
        from ubelt import util_hash
        hashstr = util_hash.hash_file(fpath, hasher=hasher)
        if hashstr.startswith(hash_prefix):
            # Write the missing stamp file if it matches
            with open(stamp_fpath, 'w') as file:
                file.write(hash_prefix)
        else:
            if verbose:  # pragma: nobranch
                print('invalid hash value (expected "{}", got "{}")'.format(
                    hash_prefix, hashstr))
            needs_download = True
    else:
        needs_download = True

    return stamp_fpath, needs_download
Example #2
0
 def _product_file_hash(self, product=None):
     """
     Get the hash of the each product file
     """
     if self.hasher is None:
         return None
     else:
         products = self._rectify_products(product)
         product_file_hash = [
             util_hash.hash_file(p, hasher=self.hasher, base='hex')
             for p in products
         ]
         return product_file_hash
Example #3
0
def grabdata(url, fpath=None, dpath=None, fname=None, redo=False,
             verbose=1, appname=None, hash_prefix=None, hasher='sha512',
             **download_kw):
    """
    Downloads a file, caches it, and returns its local path.

    Args:
        url (str): url to the file to download

        fpath (PathLike): The full path to download the file to. If
            unspecified, the arguments `dpath` and `fname` are used to
            determine this.

        dpath (PathLike): where to download the file. If unspecified `appname`
            is used to determine this. Mutually exclusive with fpath.

        fname (str): What to name the downloaded file. Defaults to the url
            basename. Mutually exclusive with fpath.

        redo (bool): if True forces redownload of the file (default = False)

        verbose (bool):  verbosity flag (default = True)

        appname (str): set dpath to `ub.get_app_cache_dir(appname)`.
            Mutually exclusive with dpath and fpath.

        hash_prefix (None or str):
            If specified, grabdata verifies that this matches the hash of the
            file, and then saves the hash in a adjacent file to certify that
            the download was successful. Defaults to None.

        hasher (str or Hasher):
            If hash_prefix is specified, this indicates the hashing
            algorithm to apply to the file. Defaults to sha512.

        **download_kw: additional kwargs to pass to ub.download

    Returns:
        PathLike: fpath - file path string

    Example:
        >>> # xdoctest: +REQUIRES(--network)
        >>> import ubelt as ub
        >>> url = 'http://i.imgur.com/rqwaDag.png'
        >>> fpath = ub.grabdata(url, fname='mario.png')
        >>> result = basename(fpath)
        >>> print(result)
        mario.png

    Example:
        >>> # xdoctest: +REQUIRES(--network)
        >>> import ubelt as ub
        >>> fname = 'foo.bar'
        >>> url = 'http://i.imgur.com/rqwaDag.png'
        >>> prefix1 = '944389a39dfb8fa9'
        >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
        >>> stamp_fpath = fpath + '.hash'
        >>> assert open(stamp_fpath, 'r').read() == prefix1
        >>> # Check that the download doesn't happen again
        >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
        >>> # todo: check file timestamps have not changed
        >>> #
        >>> # Check redo works with hash
        >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, redo=True)
        >>> # todo: check file timestamps have changed
        >>> #
        >>> # Check that a redownload occurs when the stamp is changed
        >>> open(stamp_fpath, 'w').write('corrupt-stamp')
        >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
        >>> assert open(stamp_fpath, 'r').read() == prefix1
        >>> #
        >>> # Check that a redownload occurs when the stamp is removed
        >>> ub.delete(stamp_fpath)
        >>> open(fpath, 'w').write('corrupt-data')
        >>> assert not ub.hash_file(fpath, base='hex').startswith(prefix1)
        >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1)
        >>> assert ub.hash_file(fpath, base='hex').startswith(prefix1)
        >>> #
        >>> # Check that requesting new data causes redownload
        >>> url2 = 'https://data.kitware.com/api/v1/item/5b4039308d777f2e6225994c/download'
        >>> prefix2 = 'c98a46cb31205cf'
        >>> fpath = ub.grabdata(url2, fname=fname, hash_prefix=prefix2)
        >>> assert open(stamp_fpath, 'r').read() == prefix2
    """
    if appname and dpath:
        raise ValueError('Cannot specify appname with dpath')
    if fpath and (dpath or fname or appname):
        raise ValueError('Cannot specify fpath with dpath or fname')

    if fpath is None:
        if dpath is None:
            appname = appname or 'ubelt'
            dpath = util_platform.ensure_app_cache_dir(appname)
        if fname is None:
            fname = basename(url)
        fpath = join(dpath, fname)

    # note that needs_download is never set to false after it becomes true
    # this is the key to working through the logic of the following checks
    needs_download = redo

    if not exists(fpath):
        # always download if we are missing the file
        needs_download = True

    if hash_prefix:
        stamp_fpath = fpath + '.hash'
        # Force a re-download if the hash file does not exist or it does
        # not match the expected hash
        if exists(stamp_fpath):
            with open(stamp_fpath, 'r') as file:
                hashstr = file.read()
            if not hashstr.startswith(hash_prefix):
                if verbose:  # pragma: nobranch
                    print('invalid hash value (expected "{}", got "{}")'.format(
                        hash_prefix, hashstr))
                needs_download = True
        elif exists(fpath):
            # If the file exists, but the hash doesnt exist, simply compute the
            # hash of the existing file instead of redownloading it.
            # Redownload if this fails.
            from ubelt import util_hash
            hashstr = util_hash.hash_file(fpath, hasher=hasher)
            if hashstr.startswith(hash_prefix):
                # Write the missing stamp file if it matches
                with open(stamp_fpath, 'w') as file:
                    file.write(hash_prefix)
            else:
                if verbose:  # pragma: nobranch
                    print('invalid hash value (expected "{}", got "{}")'.format(
                        hash_prefix, hashstr))
                needs_download = True
        else:
            needs_download = True

    if needs_download:
        fpath = download(url, fpath, verbose=verbose,
                         hash_prefix=hash_prefix, hasher=hasher,
                         **download_kw)

        if hash_prefix:
            # If the file successfully downloaded then the hashes match.
            # write out the expected prefix so we can check it later
            with open(stamp_fpath, 'w') as file:
                file.write(hash_prefix)
    else:
        if verbose >= 2:
            print('Already have file %s' % fpath)
    return fpath