Exemple #1
0
def grab_file_url(file_url, appname='utool', download_dir=None, delay=None,
                  spoof=False, fname=None, verbose=True, redownload=False,
                  check_hash=False):
    r"""
    Downloads a file and returns the local path of the file.

    The resulting file is cached, so multiple calls to this function do not
    result in multiple dowloads.

    Args:
        file_url (str): url to the file
        appname (str): (default = 'utool')
        download_dir custom directory (None): (default = None)
        delay (None): delay time before download (default = None)
        spoof (bool): (default = False)
        fname (str):  custom file name (default = None)
        verbose (bool):  verbosity flag (default = True)
        redownload (bool): if True forces redownload of the file
            (default = False)
        check_hash (bool or iterable): if True, defaults to checking 4 hashes
            (in order): custom, md5, sha1, sha256.  These hashes are checked
            for remote copies and, if found, will check the local file.  You may
            also specify a list of hashes to check, for example ['md5', 'sha256']
            in the specified order.  The first verified hash to be found is used
            (default = False)

    Returns:
        str: fpath - file path string

    CommandLine:
        python -m utool.util_grabdata --test-grab_file_url:0
        python -m utool.util_grabdata --test-grab_file_url:1

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from utool.util_grabdata import *  # NOQA
        >>> import utool as ut  # NOQA
        >>> from os.path import basename
        >>> ut.exec_funckw(ut.grab_file_url, locals())
        >>> file_url = 'http://i.imgur.com/JGrqMnV.png'
        >>> redownload = True
        >>> fname = 'lena.png'
        >>> lena_fpath = ut.grab_file_url(file_url, fname=fname,
        >>>                               redownload=redownload)
        >>> result = basename(lena_fpath)
        >>> print(result)
        lena.png

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from utool.util_grabdata import *  # NOQA
        >>> import utool as ut  # NOQA
        >>> ut.exec_funckw(ut.grab_file_url, locals())
        >>> file_url = 'https://lev.cs.rpi.edu/public/models/detect.yolo.12.classes'
        >>> fname = 'detect.yolo.12.classes'
        >>> check_hash = True
        >>> fpath = ut.grab_file_url(file_url, fname=fname, check_hash=check_hash)
    """
    file_url = clean_dropbox_link(file_url)
    if fname is None:
        fname = basename(file_url)
    # Download zipfile to
    if download_dir is None:
        download_dir = util_cplat.get_app_resource_dir(appname)
    # Zipfile should unzip to:
    fpath = join(download_dir, fname)
    # If check hash, get remote hash and assert local copy is the same
    if check_hash:
        if isinstance(check_hash, (list, tuple)):
            hash_list = check_hash
        else:
            hash_list = ['md5']
            # hash_list = ['sha1.custom', 'md5', 'sha1', 'sha256']
        # Get expected remote file
        hash_remote, hash_tag_remote = grab_file_remote_hash(file_url, hash_list, verbose=verbose)
        hash_list = [hash_tag_remote]
        # We have a valid candidate hash from remote, check for same hash locally
        hash_local, hash_tag_local = get_file_local_hash(fpath, hash_list, verbose=verbose)
        if verbose:
            print('[utool] Pre Local Hash:  %r' % (hash_local, ))
            print('[utool] Pre Remote Hash: %r' % (hash_remote, ))
        # Check all 4 hash conditions
        if hash_remote is None:
            # No remote hash provided, turn off post-download hash check
            check_hash = False
        elif hash_local is None:
            if verbose:
                print('[utool] Remote hash provided but local hash missing, redownloading.')
            redownload = True
        elif hash_local == hash_remote:
            assert hash_tag_local == hash_tag_remote, ('hash tag disagreement')
        else:
            if verbose:
                print('[utool] Both hashes provided, but they disagree, redownloading.')
            redownload = True

    # Download
    util_path.ensurepath(download_dir)
    if redownload or not exists(fpath):
        # Download testdata
        if verbose:
            print('[utool] Downloading file %s' % fpath)
        if delay is not None:
            print('[utool] delay download by %r seconds' % (delay,))
            time.sleep(delay)
        download_url(file_url, fpath, spoof=spoof)
    else:
        if verbose:
            print('[utool] Already have file %s' % fpath)

    util_path.assert_exists(fpath)
    # Post-download local hash verification
    if check_hash:
        # File has been successfuly downloaded, write remote hash to local hash file
        hash_fpath = '%s.%s' % (fpath, hash_tag_remote, )
        with open(hash_fpath, 'w') as hash_file:
            hash_file.write(hash_remote)
        # For sanity check (custom) and file verification (hashing), get local hash again
        hash_local, hash_tag_local = get_file_local_hash(fpath, hash_list, verbose=verbose)
        if verbose:
            print('[utool] Post Local Hash: %r' % (hash_local, ))
        assert hash_local == hash_remote, 'Post-download hash disagreement'
        assert hash_tag_local == hash_tag_remote, 'Post-download hash tag disagreement'
    return fpath
Exemple #2
0
def grab_zipped_url(zipped_url, ensure=True, appname='utool',
                    download_dir=None, force_commonprefix=True, cleanup=False,
                    redownload=False, spoof=False):
    r"""
    downloads and unzips the url

    Args:
        zipped_url (str): url which must be either a .zip of a .tar.gz file
        ensure (bool):  eager evaluation if True(default = True)
        appname (str): (default = 'utool')
        download_dir (str): containing downloading directory
        force_commonprefix (bool): (default = True)
        cleanup (bool): (default = False)
        redownload (bool): (default = False)
        spoof (bool): (default = False)

    CommandLine:
        python -m utool.util_grabdata --exec-grab_zipped_url --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_grabdata import *  # NOQA
        >>> import utool as ut
        >>> zipped_url = '?'
        >>> ensure = True
        >>> appname = 'utool'
        >>> download_dir = None
        >>> force_commonprefix = True
        >>> cleanup = False
        >>> redownload = False
        >>> spoof = False
        >>> result = grab_zipped_url(zipped_url, ensure, appname, download_dir,
        >>>                          force_commonprefix, cleanup, redownload,
        >>>                          spoof)
        >>> print(result)

    Examples:
        >>> from utool.util_grabdata import *  # NOQA
        >>> zipped_url = 'https://lev.cs.rpi.edu/public/data/testdata.zip'
        >>> zipped_url = 'http://www.spam.com/eggs/data.zip'

    """
    zipped_url = clean_dropbox_link(zipped_url)
    zip_fname = split(zipped_url)[1]
    data_name = split_archive_ext(zip_fname)[0]
    # Download zipfile to
    if download_dir is None:
        download_dir = util_cplat.get_app_resource_dir(appname)
    # Zipfile should unzip to:
    data_dir = join(download_dir, data_name)
    if ensure or redownload:
        if redownload:
            util_path.remove_dirs(data_dir)
        util_path.ensurepath(download_dir)
        if not exists(data_dir) or redownload:
            # Download and unzip testdata
            zip_fpath = realpath(join(download_dir, zip_fname))
            #print('[utool] Downloading archive %s' % zip_fpath)
            if not exists(zip_fpath) or redownload:
                download_url(zipped_url, zip_fpath, spoof=spoof)
            unarchive_file(zip_fpath, force_commonprefix)
            if cleanup:
                util_path.delete(zip_fpath)  # Cleanup
    if cleanup:
        util_path.assert_exists(data_dir)
    return util_path.unixpath(data_dir)
Exemple #3
0
def grab_file_url(file_url, ensure=True, appname='utool', download_dir=None,
                  delay=None, spoof=False, fname=None, verbose=True,
                  redownload=False):
    r"""
    Downloads a file and returns the local path of the file.

    The resulting file is cached, so multiple calls to this function do not
    result in multiple dowloads.

    Args:
        file_url (str): url to the file
        ensure (bool):  if False the file is assumed to be downloaed
            (default = True)
        appname (str): (default = 'utool')
        download_dir custom directory (None): (default = None)
        delay (None): delay time before download (default = None)
        spoof (bool): (default = False)
        fname (str):  custom file name (default = None)
        verbose (bool):  verbosity flag (default = True)
        redownload (bool): if True forces redownload of the file
            (default = False)

    Returns:
        str: fpath

    CommandLine:
        sh -c "python ~/code/utool/utool/util_grabdata.py --all-examples"
        python -m utool.util_grabdata --test-grab_file_url

    Example:
        >>> # ENABLE_DOCTEST
        >>> from utool.util_grabdata import *  # NOQA
        >>> import utool as ut  # NOQA
        >>> from os.path import basename
        >>> file_url = 'http://i.imgur.com/JGrqMnV.png'
        >>> ensure = True
        >>> appname = 'utool'
        >>> download_dir = None
        >>> delay = None
        >>> spoof = False
        >>> verbose = True
        >>> redownload = True
        >>> fname = 'lena.png'
        >>> lena_fpath = ut.grab_file_url(file_url, ensure, appname, download_dir,
        >>>                               delay, spoof, fname, verbose, redownload)
        >>> result = basename(lena_fpath)
        >>> print(result)
        lena.png
    """
    file_url = clean_dropbox_link(file_url)
    if fname is None:
        fname = basename(file_url)
    # Download zipfile to
    if download_dir is None:
        download_dir = util_cplat.get_app_resource_dir(appname)
    # Zipfile should unzip to:
    fpath = join(download_dir, fname)
    if ensure or redownload:
        util_path.ensurepath(download_dir)
        if redownload or not exists(fpath):
            # Download testdata
            if verbose:
                print('[utool] Downloading file %s' % fpath)
            if delay is not None:
                print('[utool] delay download by %r seconds' % (delay,))
                time.sleep(delay)
            download_url(file_url, fpath, spoof=spoof)
        else:
            if verbose:
                print('[utool] Already have file %s' % fpath)
    if ensure:
        util_path.assert_exists(fpath)
    return fpath