Ejemplo n.º 1
0
def test_download_bad_url():
    """
    Check that we error when the url is bad

    CommandLine:
        python -m ubelt.tests.test_download test_download_bad_url --verbose
    """
    url = 'http://a-very-incorrect-url'

    if not ub.argflag('--network'):
        pytest.skip('not running network tests')

    dpath = ub.ensure_app_cache_dir('ubelt', 'tests')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    # from ubelt.util_download import URLError
    import six
    if six.PY2:  # nocover
        from urllib2 import URLError  # NOQA
    else:
        from urllib.error import URLError  # NOQA
    with pytest.raises(URLError):
        ub.download(url, fpath=fpath, verbose=1)
Ejemplo n.º 2
0
def test_download_bad_url():
    """
    Check that we error when the url is bad

    Notes:
        For some reason this can take a long time to realize there is no URL,
        even if the timeout is specified and fairly low.

    CommandLine:
        python tests/test_download.py test_download_bad_url --verbose
    """
    url = 'http://www.a-very-incorrect-url.gov/does_not_exist.txt'
    # if not ub.argflag('--network'):
    #     pytest.skip('not running network tests')

    # Ensure the opener exist
    import sys
    if sys.version_info[0] == 2:  # nocover
        # import urllib2 as urllib_x
        from urllib2 import URLError  # NOQA
    else:
        # import urllib.request as urllib_x
        from urllib.error import URLError  # NOQA
    # if urllib_x._opener is None:
    #     urllib_x.install_opener(urllib_x.build_opener())

    dpath = ub.ensure_app_cache_dir('ubelt', 'tests')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    with pytest.raises(URLError):
        ub.download(url, fpath=fpath, verbose=1, timeout=1.0)
Ejemplo n.º 3
0
def have_gov_certs():
    try:
        # Test to see if we have certs
        ub.download(
            'https://gwg.nga.mil/ntb/baseline/software/testfile/Nitfv2_1/scen_2_1.html'
        )
        return True
    except urllib.request.URLError:
        return False
Ejemplo n.º 4
0
def test_download_bad_url():
    """
    Check where the url is downloaded to when fpath is not specified.
    """
    url = 'http://averyincorrecturl'

    dpath = ub.ensure_app_cache_dir('ubelt', 'tests')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    with pytest.raises(Exception):
        ub.download(url, fpath=fpath)
Ejemplo n.º 5
0
def test_download_cover_hashers():
    # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1'
    url = 'http://i.imgur.com/rqwaDag.png'

    if not ub.argflag('--network'):
        pytest.skip('not running network tests')

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)

    # add coverage for different hashers
    ub.download(url, hasher='md5', hash_prefix='545e3a51404f664e46aa65',
                dpath=dpath, fname=fname)
    ub.download(url, hasher='sha256', hash_prefix='31a129618c87dd667103',
                dpath=dpath, fname=fname)
Ejemplo n.º 6
0
def main():
    """
    Checks that the latest wheels on pypi agree with the gpg key
    """
    import requests

    package_name = 'ubelt'
    url = "https://pypi.python.org/pypi/{}/json".format(package_name)
    package = requests.get(url).json()
    max_ver = max(package["releases"].keys())
    # ... check compatibility
    latest_wheel_info_list = package['releases'][max_ver]

    for wheel_info in latest_wheel_info_list:
        import ubelt as ub
        whl_fpath = ub.grabdata(wheel_info['url'],
                                hash_prefix=wheel_info['digests']['sha256'],
                                hasher='sha256')

        if not wheel_info['has_sig']:
            raise ValueError('info says no sig')

        sig_fpath = ub.download(wheel_info['url'] + '.asc', )

        info = ub.cmd('gpg --verify {} {}'.format(sig_fpath, whl_fpath),
                      verbose=3)
        assert info['ret'] == 0
Ejemplo n.º 7
0
def test_download_with_progkw():
    """
    Test that progkw is properly passed through to ub.download
    """
    url = _demo_url(128 * 10)
    dpath = ub.ensure_app_cache_dir('ubelt', 'tests')
    fname = basename(url)
    fpath = join(dpath, fname)
    with ub.CaptureStdout() as cap:
        ub.download(url,
                    fpath=fpath,
                    progkw={
                        'verbose': 3,
                        'freq': 1,
                        'adjust': False
                    },
                    chunksize=128)
    assert len(cap.text.split('\n')) > 10
Ejemplo n.º 8
0
def download_metadata_file(url, outputdir, program):
    """Download and unzip the catalogue files."""
    if outputdir is None:
        outputdir = FELS_DEFAULT_OUTPUTDIR
    zipped_index_path = os.path.join(outputdir, 'index_' + program + '.csv.gz')
    index_path = os.path.join(outputdir, 'index_' + program + '.csv')
    if not os.path.isfile(index_path):
        if not os.path.isfile(zipped_index_path):
            if not os.path.exists(os.path.dirname(zipped_index_path)):
                os.makedirs(os.path.dirname(zipped_index_path))
            print('Downloading Metadata file...')
            print('url = {!r}'.format(url))
            print('outputdir = {!r}'.format(outputdir))
            ubelt.download(url, fpath=zipped_index_path, chunksize=int(2**22))
        print('Unzipping Metadata file...')
        with gzip.open(zipped_index_path) as gzip_index, open(
                index_path, 'wb') as f:
            shutil.copyfileobj(gzip_index, f)
        ubelt.delete(zipped_index_path)  # remove archive file
    return index_path
Ejemplo n.º 9
0
def test_download_with_io():
    import ubelt as ub
    import io
    url = _demo_url(128 * 3)
    file = io.BytesIO()
    fpath = ub.download(url, file)
    assert fpath is file
    file.seek(0)
    data = file.read()
    hashstr = ub.hash_data(data, hasher='sha1')
    assert hashstr.startswith('45a5c851bf12d1')
Ejemplo n.º 10
0
def test_download_cover_hashers():
    # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1'
    # url = 'http://i.imgur.com/rqwaDag.png'
    # if not ub.argflag('--network'):
    #     pytest.skip('not running network tests')
    url = _demo_url()

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)

    # add coverage for different hashers
    ub.download(url,
                hasher='md5',
                hash_prefix='e09c80c42fda55f9d992e59ca6b33',
                dpath=dpath,
                fname=fname)
    ub.download(url,
                hasher='sha256',
                hash_prefix='bf2cb58a68f684d95a3b78ef8f',
                dpath=dpath,
                fname=fname)
Ejemplo n.º 11
0
def test_download_no_fpath():
    url = 'http://i.imgur.com/rqwaDag.png'

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url)

    assert got_fpath == fpath
    assert exists(fpath)
Ejemplo n.º 12
0
def test_download_chunksize():
    # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1'
    url = 'http://i.imgur.com/rqwaDag.png'

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url, chunksize=2)

    assert got_fpath == fpath
    assert exists(fpath)
Ejemplo n.º 13
0
def test_download_no_fpath():
    # url = 'http://i.imgur.com/rqwaDag.png'
    # if not ub.argflag('--network'):
    #     pytest.skip('not running network tests')
    url = _demo_url()

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url)

    assert got_fpath == fpath
    assert exists(fpath)
Ejemplo n.º 14
0
def test_download_with_fpath():
    url = 'http://i.imgur.com/rqwaDag.png'

    dpath = ub.ensure_app_cache_dir('ubelt', 'tests')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url, fpath=fpath)
    assert got_fpath == fpath
    assert exists(fpath)

    with open(got_fpath, 'rb') as file:
        data = file.read()
    assert len(data) > 1200, 'should have downloaded some bytes'
Ejemplo n.º 15
0
def test_download_chunksize():
    # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1'
    # url = 'http://i.imgur.com/rqwaDag.png'
    # if not ub.argflag('--network'):
    #     pytest.skip('not running network tests')
    url = _demo_url()

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url, chunksize=2)

    assert got_fpath == fpath
    assert exists(fpath)
Ejemplo n.º 16
0
def test_download_with_fpath():
    # url = 'http://i.imgur.com/rqwaDag.png'
    # if not ub.argflag('--network'):
    #     pytest.skip('not running network tests')
    url = _demo_url(1201)

    dpath = ub.ensure_app_cache_dir('ubelt', 'tests')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url, fpath=fpath)
    assert got_fpath == fpath
    assert exists(fpath)

    with open(got_fpath, 'rb') as file:
        data = file.read()
    assert len(data) > 1200, 'should have downloaded some bytes'
Ejemplo n.º 17
0
def test_download_hashalgo():
    # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1'
    import hashlib
    url = 'http://i.imgur.com/rqwaDag.png'

    if not ub.argflag('--network'):
        pytest.skip('not running network tests')

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url,
                            hash_prefix='545e3a51404f664e46aa65a70948e126',
                            hasher=hashlib.md5())

    assert got_fpath == fpath
    assert exists(fpath)
Ejemplo n.º 18
0
def test_download_hashalgo():
    # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1'
    import hashlib

    # url = 'http://i.imgur.com/rqwaDag.png'
    # if not ub.argflag('--network'):
    #     pytest.skip('not running network tests')
    url = _demo_url()

    dpath = ub.ensure_app_cache_dir('ubelt')
    fname = basename(url)
    fpath = join(dpath, fname)

    ub.delete(fpath)
    assert not exists(fpath)

    got_fpath = ub.download(url,
                            hash_prefix='e09c80c42fda55f9d992e59ca6b3307d',
                            hasher=hashlib.md5())

    assert got_fpath == fpath
    assert exists(fpath)
Ejemplo n.º 19
0
def test_download_with_sha1_hasher():
    import ubelt as ub
    url = _demo_url(128 * 4)
    ub.download(url, hasher='sha1', hash_prefix='164557facb7392')
Ejemplo n.º 20
0
def test_local_download():
    server = SingletonTestServer.instance()
    url = server.write_file(filebytes=int(10 * 2**20))[0]
    ub.download(url)
Ejemplo n.º 21
0
import pandas as pd
from dateutil import parser
import datetime
import pathlib
sns = kwplot.autosns()

repo_fpath = pathlib.Path('/data/joncrall/COVID-19/csse_covid_19_data')

data = pd.read_csv(
    '/home/joncrall/Downloads/Provisional_COVID-19_Deaths_by_Sex_and_Age.csv')

# data[data['Province_State'] == 'Florida']

url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv'
file = io.BytesIO()
ub.download(url, fpath=file)
file.seek(0)
data = pd.read_csv(file)

sub = data[[
    'Start Date', 'End Date', 'Age Group', 'COVID-19 Deaths', 'State', 'Sex'
]]
flags1 = (sub['State'] == 'Florida') | (sub['State'] == 'New York')
flags1 &= sub['Sex'] == 'All Sexes'
sub2 = sub[flags1]

flags3 = (
    (sub2['Age Group'] != 'All Ages') & (sub2['Age Group'] != 'Under 1 year') &
    (sub2['Age Group'] != '1-4 years') & (sub2['Age Group'] != '5-14 years') &
    (sub2['Age Group'] != '15-24 years') & (sub2['Age Group'] != '0-17 years')
    & (sub2['Age Group'] != '25-34 years') &
Ejemplo n.º 22
0
def 下载文件(url, 保存文件路径=None):
    fpath = ub.download(url, fpath=保存文件路径, verbose=0)
    return ub.shrinkuser(fpath)
Ejemplo n.º 23
0
def test_local_download():
    server = SingletonTestServer.instance()
    url = server.write_file(filebytes=int(10 * 2**20))[0]
    # also test with a timeout for lazy coverage
    ub.download(url, timeout=1000)
def get_sentinel2_image(url,
                        outputdir,
                        overwrite=False,
                        partial=False,
                        noinspire=False,
                        reject_old=False):
    """
    Collect the entire dir structure of the image files from the
    manifest.safe file and build the same structure in the output
    location.

    Returns:
        True if image was downloaded
        False if partial=False and image was not fully downloaded
            or if reject_old=True and it is old-format
            or if noinspire=False and INSPIRE file is missing
    """
    img = os.path.basename(url)
    target_path = os.path.join(outputdir, img)
    target_manifest = os.path.join(target_path, 'manifest.safe')

    return_status = True
    if not os.path.exists(target_path) or overwrite:

        manifest_url = url + '/manifest.safe'

        if reject_old:
            # check contents of manifest before downloading the rest
            content = urlopen(manifest_url)
            with NamedTemporaryFile() as f:
                shutil.copyfileobj(content, f)
                if not is_new(f.name):
                    return False

        os.makedirs(target_path, exist_ok=True)
        content = urlopen(manifest_url)
        with open(target_manifest, 'wb') as f:
            shutil.copyfileobj(content, f)
        with open(target_manifest, 'r') as manifest_file:
            manifest_lines = manifest_file.read().split()
        for line in manifest_lines:
            if 'href' in line:
                rel_path = line[line.find('href=".') + 7:]
                rel_path = rel_path[:rel_path.find('"')]
                abs_path = os.path.join(target_path, *rel_path.split('/')[1:])
                if not os.path.exists(os.path.dirname(abs_path)):
                    os.makedirs(os.path.dirname(abs_path))
                try:
                    ubelt.download(url + rel_path, fpath=abs_path)
                except HTTPError as error:
                    print('Error downloading {} [{}]'.format(
                        url + rel_path, error))
                    continue
        granule = os.path.dirname(
            os.path.dirname(get_S2_image_bands(target_path, 'B01')))
        for extra_dir in ('AUX_DATA', 'HTML'):
            if not os.path.exists(os.path.join(target_path, extra_dir)):
                os.makedirs(os.path.join(target_path, extra_dir))
            if not os.path.exists(os.path.join(granule, extra_dir)):
                os.makedirs(os.path.join(granule, extra_dir))
        if not manifest_lines:
            print()
    elif reject_old and not is_new(target_manifest):
        print(f'Warning: old-format image {outputdir} exists')
        return_status = False

    if partial:
        tile_chk = check_full_tile(get_S2_image_bands(target_path, 'B01'))
        if tile_chk == 'Partial':
            print('Removing partial tile image files...')
            shutil.rmtree(target_path)
            return_status = False
    if not noinspire:
        inspire_file = os.path.join(target_path, 'INSPIRE.xml')
        if os.path.isfile(inspire_file):
            inspire_path = get_S2_INSPIRE_title(inspire_file)
            if os.path.basename(target_path) != inspire_path:
                os.rename(target_path, inspire_path)
        else:
            print(f"File {inspire_file} could not be found.")
            return_status = False

    return return_status