Esempio n. 1
0
def download_url(url, filename, headers, args):
    """
    Downloads the given url in filename.
    """

    if is_youtube_url(url):
        download_youtube_url(url, filename, headers, args)
    else:
        import ssl
        # FIXME: Ugly hack for coping with broken SSL sites:
        # https://www.cs.duke.edu/~angl/papers/imc10-cloudcmp.pdf
        #
        # We should really ask the user if they want to stop the downloads
        # or if they are OK proceeding without verification.
        #
        # Note that skipping verification by default could be a problem for
        # people's lives if they happen to live ditatorial countries.
        #
        # Note: The mess with various exceptions being caught (and their
        # order) is due to different behaviors in different Python versions
        # (e.g., 2.7 vs. 3.4).
        try:
            urlretrieve(url, filename)
        except ssl.SSLError as e:
            compat_print('[warning] Got SSL error: %s' % e)
            raise e
        except HTTPError as e:
            compat_print('[warning] Got HTTP error: %s' % e)
            raise e
        except URLError as e:
            compat_print('[warning] Got URL error: %s' % e)
            raise e
        except IOError as e:
            compat_print('[warning] Got a connection error: %s' % e)
            raise e
Esempio n. 2
0
def _find_jar(url=None):
    """
    Finds the location of loci_tools.jar, if necessary download it to a
    writeable location.
    """
    for loc in _gen_jar_locations():
        if os.path.isfile(os.path.join(loc, 'loci_tools.jar')):
            return os.path.join(loc, 'loci_tools.jar')

    warn('loci_tools.jar not found, downloading')
    for loc in _gen_jar_locations():
        # check if dir exists and has write access:
        if os.path.exists(loc) and os.access(loc, os.W_OK):
            break
        # if directory is pims and it does not exist, so make it (if allowed)
        if os.path.basename(loc) == 'pims' and \
           os.access(os.path.dirname(loc), os.W_OK):
            os.mkdir(loc)
            break
    else:
        raise IOError('No writeable location found. In order to use the '
                      'Bioformats reader, please download '
                      'loci_tools.jar to the pims program folder or one of '
                      'the locations provided by _gen_jar_locations().')

    from six.moves.urllib.request import urlretrieve
    if url is None:
        url = ('http://downloads.openmicroscopy.org/bio-formats/5.1.0/' +
               'artifacts/loci_tools.jar')
    urlretrieve(url, os.path.join(loc, 'loci_tools.jar'))

    return os.path.join(loc, 'loci_tools.jar')
Esempio n. 3
0
def fetch_data(dest_dir='.', clobber=False, url=DATA_URL):
    """
    Download data from NCBI required to generate local taxonomy
    database. Default url is ncbi.DATA_URL

    * dest_dir - directory in which to save output files (created if necessary).
    * clobber - don't download if False and target of url exists in dest_dir
    * url - url to archive; default is ncbi.DATA_URL

    Returns (fname, downloaded), where fname is the name of the
    downloaded zip archive, and downloaded is True if a new files was
    downloaded, false otherwise.

    see ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump_readme.txt
    """

    dest_dir = os.path.abspath(dest_dir)
    try:
        os.mkdir(dest_dir)
    except OSError:
        pass

    fout = os.path.join(dest_dir, os.path.split(url)[-1])

    if os.access(fout, os.F_OK) and not clobber:
        downloaded = False
        logging.info(fout + ' exists; not downloading')
    else:
        downloaded = True
        logging.info('downloading {} to {}'.format(url, fout))
        request.urlretrieve(url, fout)

    return (fout, downloaded)
Esempio n. 4
0
def download_file(url, download_path):
    """
    Download a file from a resource URL to the given location

    :param url: URL of the file to download
    :param download_path: location where the file should be saved
    """

    # Extract the filename from the URL
    parsed = urlparse(url)
    filename = basename(parsed.path)

    # Ensure the output directory exists
    if not os.path.exists(download_path):
        os.makedirs(download_path)

    # Get a temporary file path for the compressed file download
    downloaded_file = os.path.join(tempfile.gettempdir(), filename)

    # Download the file
    urlretrieve(url, downloaded_file, reporthook=progress_bar_wrapper)

    # Move the file to the destination folder
    destination_path = os.path.join(download_path, filename)
    move(downloaded_file, destination_path)
Esempio n. 5
0
def download_and_decompress(url, download_path, verbose=True):
    """
    Download an archive from a resource URL and
    decompresses/unarchives to the given location

    :param url: URL of the compressed file to download
    :param download_path: location where the file should be extracted
    """

    # Extract the filename from the URL
    parsed = urlparse(url)
    filename = basename(parsed.path)

    # Ensure the output directory exists
    if not os.path.exists(download_path):
        os.makedirs(download_path)

    # Get a temporary file path for the compressed file download
    downloaded_file = os.path.join(tempfile.gettempdir(), filename)

    # Download the file
    if verbose:
        urlretrieve(url, downloaded_file, reporthook=progress_bar_wrapper)
    else:
        urlretrieve(url, downloaded_file)

    # Decompress and extract all files to the specified local path
    tar = tarfile.open(downloaded_file, "r")
    tar.extractall(download_path)
    tar.close()

    # Remove the downloaded file
    os.remove(downloaded_file)
Esempio n. 6
0
def get_rdr_some_label(kind, obsid):
    """Download `some` PRODUCT_ID label for `obsid`.

    Note
    ----
    The RED channel is also called the B&W channel on the HiRISE website.

    Parameters
    ----------
    kind : {'RED', 'COLOR'}
        String that determines the kind of color looking for.
    obsid : str
        HiRISE obsid in the standard form of ESP_012345_1234

    Returns
    -------
    None
        Storing the label file in the `labels_root` folder.
    """
    pid = PRODUCT_ID(obsid)
    pid.kind = kind
    savepath = labels_root() / Path(pid.label_fname)
    savepath.parent.mkdir(exist_ok=True)
    print("Downloading\n", pid.label_url, 'to\n', savepath)
    try:
        urlretrieve(pid.label_url, str(savepath))
    except HTTPError as e:
        print(e)
Esempio n. 7
0
def _download_database_template(
    galaxy_root,
    database_location,
    latest=False,
    galaxy_sqlite_database=None
):
    if galaxy_sqlite_database is not None:
        shutil.copyfile(galaxy_sqlite_database, database_location)
        return True

    if latest or not galaxy_root:
        template_url = DOWNLOADS_URL + urlopen(LATEST_URL).read()
        urlretrieve(template_url, database_location)
        return True

    newest_migration = _newest_migration_version(galaxy_root)
    download_migration = None
    for migration in DOWNLOADABLE_MIGRATION_VERSIONS:
        if newest_migration > migration:
            download_migration = migration
            break

    if download_migration:
        download_name = "db_gx_rev_0%d.sqlite" % download_migration
        download_url = DOWNLOADS_URL + download_name
        urlretrieve(download_url, database_location)
        return True
    else:
        return False
Esempio n. 8
0
def download_zip(url, name=None, check_dir=None):
    """Download and unzip zip file from url to $XTAS_DATA.

    Does nothing if $XTAS_DATA/check_dir exists.

    Parameters
    ----------
    url : string
        URL of resource.
    name : string
        Used by the logger, to display "Downloading [name]".
    check_dir : string
        Name of directory to which the resource is unzipped.
        Derived from the URL by default.
    """
    if check_dir is None:
        check_dir = os.path.basename(url)
        if check_dir.endswith('.zip'):
            check_dir = check_dir[:-4]
    if name is None:
        name = url
    home = make_data_home()
    check_dir = os.path.join(home, check_dir)

    # XXX race condition with multiple workers
    if not os.path.exists(check_dir):
        with NamedTemporaryFile() as temp:
            logger.info("Downloading %s" % name)
            urlretrieve(url, temp.name, reporthook=progress)
            with ZipFile(temp.name) as z:
                z.extractall(path=home)

    return check_dir
def download_onnx_model(model_name, zoo_dir, use_cache=True, only_local=False):
    model_dir = os.path.join(zoo_dir, model_name)
    if os.path.exists(model_dir):
        if use_cache:
            upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
            return
        else:
            shutil.rmtree(model_dir)
    url = 'https://s3.amazonaws.com/download.onnx/models/latest/{}.tar.gz'.format(model_name)

    download_file = tempfile.NamedTemporaryFile(delete=False)
    try:
        download_file.close()
        print('Downloading ONNX model {} from {} and save in {} ...\n'.format(
            model_name, url, download_file.name))
        urlretrieve(url, download_file.name)
        with tarfile.open(download_file.name) as t:
            print('Extracting ONNX model {} to {} ...\n'.format(model_name, zoo_dir))
            t.extractall(zoo_dir)
    except Exception as e:
        print('Failed to download/backup data for ONNX model {}: {}'.format(model_name, e))
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
    finally:
        os.remove(download_file.name)

    if not only_local:
        upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
def download_attachments(output_path, urls):
    """Downloads WordPress attachments and returns a list of paths to
    attachments that can be associated with a post (relative path to output
    directory). Files that fail to download, will not be added to posts"""
    locations = []
    for url in urls:
        path = urlparse(url).path
        #teardown path and rebuild to negate any errors with
        #os.path.join and leading /'s
        path = path.split('/')
        filename = path.pop(-1)
        localpath = ''
        for item in path:
            if sys.platform != 'win32' or ':' not in item:
                localpath = os.path.join(localpath, item)
        full_path = os.path.join(output_path, localpath)
        if not os.path.exists(full_path):
            os.makedirs(full_path)
        print('downloading {}'.format(filename))
        try:
            urlretrieve(url, os.path.join(full_path, filename))
            locations.append(os.path.join(localpath, filename))
        except (URLError, IOError) as e:
            #Python 2.7 throws an IOError rather Than URLError
            logger.warning("No file could be downloaded from %s\n%s", url, e)
    return locations
Esempio n. 11
0
def download_url(url, filename, headers, args):
    """
    Downloads the given url in filename.
    """

    if is_youtube_url(url):
        download_youtube_url(url, filename, headers, args)
    else:
        import ssl
        # FIXME: Ugly hack for coping with broken SSL sites:
        # https://www.cs.duke.edu/~angl/papers/imc10-cloudcmp.pdf
        #
        # We should really ask the user if they want to stop the downloads
        # or if they are OK proceeding without verification.
        #
        # Note that skipping verification by default could be a problem for
        # people's lives if they happen to live ditatorial countries.
        #
        # Note: The mess with various exceptions being caught (and their
        # order) is due to different behaviors in different Python versions
        # (e.g., 2.7 vs. 3.4).
        try:
            urlretrieve(url, filename)
        except Exception as e:
            logging.warn('Got SSL/Connection error: %s', e)
            if not args.ignore_errors:
                logging.warn('Hint: if you want to ignore this error, add '
                             '--ignore-errors option to the command line')
                raise e
            else:
                logging.warn('SSL/Connection error ignored: %s', e)
Esempio n. 12
0
def maybe_download(filename,expected_bytes,force=False):
    '''
    Download file if file not exsits.
    @param:
        filename: Name of file to download.
        expected: The size of file should download.
        force: Download without whether exsits.
    '''
    #destinate file path
    dest_filename = os.path.join(data_root,filename)

    #if download file
    if force or not os.path.exists(dest_filename):
        print('Attempting to download:{}'.format(filename))
        urlretrieve(url+filename,dest_filename,reporthook = download_progress_hook)
        print('\nDownload Complete!')
    else:
        print('File {} exists!'.format(filename))

    #is file Complete
    statinfo = os.stat(dest_filename)
    if expected_bytes == statinfo.st_size:
        print('File {} is downloaded completely!'.format(filename))
    else:
        raise Exception(
            'File {} is not downloaded completely!'.format(filename)
        )
    return dest_filename
Esempio n. 13
0
def download_one(url, output_file, skip_existing=True):
    """Download a single URL.

    Parameters
    ----------
    url : str
        URL to download.

    output_file : str
        Path to save the downloaded file.

    skip_existing : bool, default=True
        If True, down download URLs that exist in the output directory.

    Returns
    -------
    success : bool
        True if the file was downloaded successfully.
    """
    if os.path.exists(output_file) and skip_existing:
        print(" Skipping (exists): {}".format(url))
        return

    print("[{}] Fetching: {}".format(time.asctime(), url))
    try:
        surl = urlparse.quote(url, safe=':./')
        urlrequest.urlretrieve(surl, output_file)
    except urlerror.HTTPError:
        logger.warning("FAILED to download file at: {}".format(surl))
        logger.warning("\nOriginal link: {}\nOutput file:{}\n".format(
            url, output_file))
        logger.warning("Skipping...")
    finally:
        return os.path.exists(output_file)
Esempio n. 14
0
def k8s_install_cli(client_version='latest', install_location=None):
    """
    Downloads the kubectl command line from Kubernetes
    """

    if client_version == 'latest':
        version = urlopen('https://storage.googleapis.com/kubernetes-release/release/stable.txt').read()
        client_version = version.decode('UTF-8').strip()

    file_url = ''
    system = platform.system()
    base_url = 'https://storage.googleapis.com/kubernetes-release/release/{}/bin/{}/amd64/{}'
    if system == 'Windows':
        file_url = base_url.format(client_version, 'windows', 'kubectl.exe')
    elif system == 'Linux':
        # TODO: Support ARM CPU here
        file_url = base_url.format(client_version, 'linux', 'kubectl')
    elif system == 'Darwin':
        file_url = base_url.format(client_version, 'darwin', 'kubectl')
    else:
        raise CLIError('Proxy server ({}) does not exist on the cluster.'.format(system))

    logger.warning('Downloading client to %s from %s', install_location, file_url)
    try:
        urlretrieve(file_url, install_location)
        os.chmod(install_location,
                 os.stat(install_location).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
    except IOError as err:
        raise CLIError('Connection error while attempting to download client ({})'.format(err))
Esempio n. 15
0
def load_cifar10(datadir="cifar-10-batches-py"):
    # CIFAR-10 データセットがなければダウンロードする
    if os.path.exists(datadir) == False:
        print("Downloading cifar-10...")
        request.urlretrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz","cifar10.tar.gz")
        tar = tarfile.open("cifar10.tar.gz")
        tar.extractall()
        tar.close()

    train_data = []
    train_target = []

    # 訓練データをロード
    for i in range(1, 6):
        d = unpickle("%s/data_batch_%d" % (datadir, i))
        train_data.extend(d["data"])
        train_target.extend(d["labels"])

    # テストデータをロード
    d = unpickle("%s/test_batch" % (datadir))
    test_data = d["data"]
    test_target = d["labels"]

    # データはfloat32、ラベルはint32のndarrayに変換
    train_data = np.array(train_data, dtype=np.float32)
    train_target = np.array(train_target, dtype=np.int32)
    test_data = np.array(test_data, dtype=np.float32)
    test_target = np.array(test_target, dtype=np.int32)

    # 画像のピクセル値を0-1に正規化
    train_data /= 255.0
    test_data /= 255.0

    return train_data, test_data, train_target, test_target
Esempio n. 16
0
def _cache_download(url, filename, sha256sum=None):
    """Returns local path to cached copy of URL using given filename."""
    cache = os.environ.get("DOWNLOAD_CACHE", "./download_cache/")
    # TODO - expose this as a command line option

    if not os.path.isdir(cache):
        os.mkdir(cache)

    local = os.path.join(cache, filename)

    if not os.path.isfile(local):
        # Must download it...
        try:
            # TODO - log this nicely...
            sys.stderr.write("Downloading %s to %r\n" % (url, local))
            urlretrieve(url, local)
        except URLError:
            # Most likely server is down, could be bad URL in XML action:
            raise RuntimeError("Unable to download %s" % url)
        except FTPErrors:
            # Most likely server is down, could be bad URL in XML action:
            raise RuntimeError("Unable to download %s" % url)

        # Verifying the checksum is slow, only do this on a fresh
        # download. Assume locally cached files are already OK.
        if sha256sum:
            # TODO - log this nicely...
            sys.stderr.write("Verifying checksum for %s\n" % filename)
            filehash = subprocess.check_output(['shasum', '-a', '256', local])[0:64].strip()
            if filehash != sha256sum:
                raise RuntimeError("Checksum failure for %s, got %r but wanted %r" % (local, filehash, sha256sum))

    return local
Esempio n. 17
0
def download_dbsnp_vcf(dbsnp_build=None,genome_build=None,url=None,outpath=None):
  """
  Download the NCBI dbSNP VCF for a given human genome build and dbSNP build

  Args:
    dbsnp_build: b147
    genome_build: GRCh37p13
    url: Direct URL to file, e.g. ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b147_GRCh37p13/VCF/00-All.vcf.gz
    outpath: Constructed from genome_build and dbsnp_build. If not given, a random filename will be generated.

  Returns:
    Name of file into which we saved the data (will be constructed from builds, or random name)
  """

  if url is None:
    if not genome_build.startswith("GRC"):
      raise ValueError("Genome build should begin with GRC")

    if not dbsnp_build.startswith("b"):
      raise ValueError("dbSNP build should look like b147, b148, etc.")

    url = NCBI_VCF_TEMPLATE_URL.format(dbsnp_build,genome_build)

  if outpath is None:
    if genome_build is None or dbsnp_build is None:
      outpath = "dbsnp.vcf.gz"
    else:
      outpath = "human_9606_{}_{}_All.vcf.gz".format(dbsnp_build,genome_build)

  with tqdm(unit='B',unit_scale=True,miniters=1,desc=url.split('/')[-1]) as t:
    urlcleanup()
    urlretrieve(url,filename=outpath,reporthook=tqdm_hook(t),data=None)

  return outpath
Esempio n. 18
0
def get_mnist_file(fpath, origin):
    datadir = os.path.dirname(fpath)
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    try:
        f = open(fpath)
    except:
        print('Downloading data from',  origin)

        global progbar
        progbar = None
        def dl_progress(count, block_size, total_size):
            global progbar
            if progbar is None:
                progbar = Progbar(total_size)
            else:
                progbar.update(count*block_size)

        urlretrieve(origin, fpath + '.gz', dl_progress)
        progbar = None

        fin = gzip.open(fpath + '.gz', 'rb')
        fout = open(fpath, 'wb')
        fout.write(fin.read())
        fin.close()
        fout.close()

    return fpath
Esempio n. 19
0
def _download_log_files(
        client,
        resource_group_name,
        server_name,
        file_name):
    """
     Download log file(s) of a given server to current directory.

    :param resource_group_name: The name of the resource group that
    contains the resource. You can obtain this value from the Azure
    Resource Manager API or the portal.
    :type resource_group_name: str
    :param server_name: Name of the server.
    :type server_name: str
    :param file_name: Space separated list of log filenames on the server to download.
    :type filename_contains: str
    """
    from six.moves.urllib.request import urlretrieve  # pylint: disable=import-error

    # list all files
    files = client.list_by_server(resource_group_name, server_name)

    for f in files:
        if f.name in file_name:
            urlretrieve(f.url, f.name)
Esempio n. 20
0
def dcos_install_cli(install_location=None, client_version='1.8'):
    """
    Downloads the dcos command line from Mesosphere
    """
    system = platform.system()

    if not install_location:
        raise CLIError("No install location specified and it could not be determined from the current platform '{}'".format(system))
    base_url = 'https://downloads.dcos.io/binaries/cli/{}/x86-64/dcos-{}/{}'
    if system == 'Windows':
        file_url = base_url.format('windows', client_version, 'dcos.exe')
    elif system == 'Linux':
        # TODO Support ARM CPU here
        file_url = base_url.format('linux', client_version, 'dcos')
    elif system == 'Darwin':
        file_url = base_url.format('darwin', client_version, 'dcos')
    else:
        raise CLIError('Proxy server ({}) does not exist on the cluster.'.format(system))

    logger.info('Downloading client to %s', install_location)
    try:
        urlretrieve(file_url, install_location)
        os.chmod(install_location, os.stat(install_location).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
    except IOError as err:
        raise CLIError('Connection error while attempting to download client ({})'.format(err))
Esempio n. 21
0
    def retrieve_file_from_url(self, url, force=False):
        """
        Retrieve a file from FTP server.
        .. note:: urlretrieve has a better API for error handling than
        ftp.retrbinary

        :param bool force: overwrite local files
        :param str url: file url
        :return bool: whether retrieved
        """
        remote_path, basename = url.rsplit('/', 1)

        filename = path.join(self.local_data, self.assembly, basename)
        if not force and self.check_local(basename.replace('.gz', '')):
            log.info('{} available, aborting retrieval.'.format(filename))
            return False
        else:
            try:
                urlretrieve('ftp://' + self.base_url + url, filename)
                log.info('{} retrieval complete.'.format(filename))
                return True
            except URLError as e:
                log.error('Error retrieving {}: \n{}'.format(
                    'ftp://' + self.base_url + url, e))
                raise e
Esempio n. 22
0
def dcos_install_cli(install_location=None, client_version="1.8"):
    """
    Downloads the dcos command line from Mesosphere
    """
    system = platform.system()

    if not install_location:
        raise CLIError(
            "No install location specified and it could not be determined from the current platform '{}'".format(system)
        )
    base_url = "https://downloads.dcos.io/binaries/cli/{}/x86-64/dcos-{}/{}"
    if system == "Windows":
        file_url = base_url.format("windows", client_version, "dcos.exe")
    elif system == "Linux":
        # TODO Support ARM CPU here
        file_url = base_url.format("linux", client_version, "dcos")
    elif system == "Darwin":
        file_url = base_url.format("darwin", client_version, "dcos")
    else:
        raise CLIError("Proxy server ({}) does not exist on the cluster.".format(system))

    logger.info("Downloading client to %s", install_location)
    try:
        urlretrieve(file_url, install_location)
    except IOError as err:
        raise CLIError("Connection error while attempting to download client ({})".format(err))
Esempio n. 23
0
def get_file(fpath, origin, untar=False):
    datadir = os.path.dirname(fpath)
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    if not os.path.exists(fpath):
        print('Downloading data from',  origin)

        global progbar
        progbar = None
        def dl_progress(count, block_size, total_size):
            global progbar
            if progbar is None:
                progbar = Progbar(total_size)
            else:
                progbar.update(count*block_size)

        urlretrieve(origin, fpath, dl_progress)
        progbar = None

    if untar:
        tfile = tarfile.open(fpath, 'r:gz')
        names = tfile.getnames()
        dirname = names[0]
        not_exists = [int(not os.path.exists("{}/{}".format(datadir, fname))) for fname in names]
        if sum(not_exists) > 0:
            print('Untaring file...')
            tfile.extractall(path=datadir)
        else:
            print('Files already untarred')
        tfile.close()

    return "{}/{}".format(datadir, dirname)
def download_numpy_wheel():
    base_url = os.getenv('NUMPY_URL')
    if base_url is None:
        raise ValueError('NUMPY_URL environment variable is missing.')

    version = '1.10.4+mkl'
    py = 'cp{0[0]}{0[1]}'.format(sys.version_info)
    if py not in {'cp27', 'cp34', 'cp35'}:
        print('NumPy wheel not available for {}'.format(py))
        return None

    bits = struct.calcsize('P') * 8
    if bits == 32:
        arch = 'win32'
    elif bits == 64:
        arch = 'win_amd64'
    else:
        raise ValueError("Couldn't determine 32/64 bits.")

    filename = 'numpy-{}-{}-none-{}.whl'.format(version, py, arch)

    directory = 'astrodynamics-numpy-wheels'
    os.mkdir(directory)

    filepath = os.path.join(directory, filename)
    url = base_url + filename

    # Disable SSL. Shouldn't do this ever. This is just a script.
    ssl._create_default_https_context = ssl._create_unverified_context
    urlretrieve(url, filepath)
    return filepath
def build_image_factory():
    """Downloads the
    //github.com/barseghyanartur/delusionalinsanity.images/archive/latest.zip
    locally, unpacks it to grab the images. Then makes a list of all the
    images.

    :return list: List of relative paths to images.
    """
    try:
        shutil.rmtree(
            os.path.join(settings.MEDIA_ROOT,
                         'delusionalinsanity.images-latest')
        )
    except Exception as err:
        logger.debug(err)

    try:
        download_local = os.path.join(
            settings.MEDIA_ROOT, 'delusionalinsanity_images_latest.zip'
        )
        request.urlretrieve(
            'https://github.com/barseghyanartur/delusionalinsanity.images'
            '/archive/latest.zip',
            download_local
        )

        zfile = zipfile.ZipFile(download_local)
        names = zfile.namelist()

        for name in names:
            try:
                dirname, filename = os.path.split(name)

                if not filename:
                    continue

                dirname = os.path.join(settings.MEDIA_ROOT, dirname)
                if not os.path.exists(dirname):
                    os.mkdir(dirname)

                fd = open(os.path.join(settings.MEDIA_ROOT, name), "w")
                fd.write(zfile.read(name))
                fd.close()
            except Exception as e:
                logger.debug(e)

        source_dir = os.path.join(
            settings.MEDIA_ROOT,
            'delusionalinsanity.images-latest',
            'images'
        )
        images_dir = os.path.join(settings.MEDIA_ROOT,
                                  NEWS_IMAGES_STORAGE_PATH)
        shutil.move(source_dir, images_dir)
        images = [os.path.join(images_dir, f) for f in os.listdir(images_dir)]
        return [fix_image(i) for i in images]
    except Exception as err:
        logger.debug(err)
        return []
Esempio n. 26
0
 def get_bioformats_file(filename, filepath='', url=''):
     if url == '':
         url = 'http://www.loci.wisc.edu/files/software/data/' + filename
     fn = os.path.join(filepath, filename)
     urlretrieve(url, fn)
     with ZipFile(fn) as zf:
         zf.extractall(filepath)
     os.remove(fn)
def download(filename):
    destination_file = "data/" + filename
    if not os.path.exists(destination_file):
        print("Dowloading ", filename, "into ", destination_file)
        urlretrieve(url + filename, destination_file)
    else:
        print "File already exists: %s" %filename
    return destination_file
Esempio n. 28
0
def fetch_data(data_url, cache_path=None, download_if_missing=True):
    """
    Fetch data and return local filename

    Parameters
    ----------
    cache_path : str (optional)
        Specify a path to cache the datasets. If not specified, this
        will cache the downloaded data to the current working directory.
    download_if_missing : bool (optional)
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.
    data_url : str (optional)
        Path to the remote data file.

    Returns
    -------
    filename : str
        The path to the local HDF5 data file.
    """

    if cache_path is None:
        cache_path = os.getcwd()
    else:
        cache_path = os.path.expanduser(os.path.abspath(cache_path))

    cache_file = os.path.join(cache_path, os.path.basename(data_url))

    try:
        # how many bytes are we expecting
        url = urlopen(data_url)
        meta = url.info()
        expected_bytes = int(meta['Content-Length'])
    except URLError as e:
        if os.path.exists(cache_file):
            print("Data file exists but unable to verify against remote file.")
            return cache_file
        else:
            print("Local file not found and unable to connect to remote file! Do "
                  "you have an internet connection?")
            raise e

    if (os.path.exists(cache_file) and os.stat(cache_file).st_size != expected_bytes) \
        or not os.path.exists(cache_file) or not os.path.isfile(cache_file):
        urlretrieve(data_url, cache_file)

        received_bytes = os.stat(cache_file).st_size
        if received_bytes != expected_bytes:
            raise IOError("Download error: size expected = {} bytes, size received = {} bytes"
                          .format(expected_bytes, received_bytes))

        print("Data downloaded and verified.")

    else:
        print("Data file already exists and is verified.")

    return cache_file
Esempio n. 29
0
def fetch(args):
    check_or_set_version(args)
    url = newest_download_url(args)
    if not url:
        print("unable to find a source release for {0}!".format(args.name))
        sys.exit(1)
    print('downloading package {0}-{1}...'.format(args.name, args.version))
    print('from {0}'.format(url['url']))
    urlretrieve(url['url'], url['filename'])
Esempio n. 30
0
def get_file(fname, origin, untar=False):
    datadir_base = os.path.expanduser(os.path.join('~', '.keras'))
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.join('/tmp', '.keras')
    datadir = os.path.join(datadir_base, 'datasets')
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    if untar:
        untar_fpath = os.path.join(datadir, fname)
        fpath = untar_fpath + '.tar.gz'
    else:
        fpath = os.path.join(datadir, fname)

    if not os.path.exists(fpath):
        print('Downloading data from',  origin)
        global progbar
        progbar = None

        def dl_progress(count, block_size, total_size):
            global progbar
            if progbar is None:
                progbar = Progbar(total_size)
            else:
                progbar.update(count*block_size)

        error_msg = 'URL fetch failure on {}: {} -- {}'
        try:
            try:
                urlretrieve(origin, fpath, dl_progress)
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(fpath):
                os.remove(fpath)
            raise
        progbar = None

    if untar:
        if not os.path.exists(untar_fpath):
            print('Untaring file...')
            tfile = tarfile.open(fpath, 'r:gz')
            try:
                tfile.extractall(path=datadir)
            except (Exception, KeyboardInterrupt) as e:
                if os.path.exists(untar_fpath):
                    if os.path.isfile(untar_fpath):
                        os.remove(untar_fpath)
                    else:
                        shutil.rmtree(untar_fpath)
                raise
            tfile.close()
        return untar_fpath

    return fpath
Esempio n. 31
0
 def download_com(self):
     # download converter
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/AC3DownMixStatus.py",
         "/tmp/AC3DownMixStatus.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/AlwaysTrue.py",
         "/tmp/AlwaysTrue.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/Bitrate2.py",
         "/tmp/Bitrate2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/CaidBar.py",
         "/tmp/CaidBar.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/CaidInfo2.py",
         "/tmp/CaidInfo2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/CamdInfo3.py",
         "/tmp/CamdInfo3.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ConverterRotator.py",
         "/tmp/ConverterRotator.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/CpuUsage.py",
         "/tmp/CpuUsage.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/DiskInfo.py",
         "/tmp/DiskInfo.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/EcmInfoLine.py",
         "/tmp/EcmInfoLine.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/EmuName.py",
         "/tmp/EmuName.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/EventName2.py",
         "/tmp/EventName2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ExtraNumText.py",
         "/tmp/ExtraNumText.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/FanTempInfo.py",
         "/tmp/FanTempInfo.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/FlashingDotClock.py",
         "/tmp/FlashingDotClock.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/FrontendInfo2.py",
         "/tmp/FrontendInfo2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/IsNet.py",
         "/tmp/IsNet.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/MemoryInfo.py",
         "/tmp/MemoryInfo.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ModuleControl.py",
         "/tmp/ModuleControl.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/MovieInfo2.py",
         "/tmp/MovieInfo2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ProgressDiskSpaceInfo.py",
         "/tmp/ProgressDiskSpaceInfo.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/RefString.py",
         "/tmp/RefString.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/RouteInfo.py",
         "/tmp/RouteInfo.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ServiceInfo2.py",
         "/tmp/ServiceInfo2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ServiceInfoEX.py",
         "/tmp/ServiceInfoEX.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ServiceName2.py",
         "/tmp/ServiceName2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ServiceName2.ref",
         "/tmp/ServiceName2.ref")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/ServiceOrbitalPosition2.py",
         "/tmp/ServiceOrbitalPosition2.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/TestConnection.py",
         "/tmp/TestConnection.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/TunerBar.py",
         "/tmp/TunerBar.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Converter/WiFiInfo.py",
         "/tmp/WiFiInfo.py")
     # download renderer
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/AnimatedWeatherPixmap.py",
         "/tmp/AnimatedWeatherPixmap.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/AnimatedMoonPixmap.py",
         "/tmp/AnimatedMoonPixmap.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/LabelDuoColors.py",
         "/tmp/LabelDuoColors.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/MovieCover.py",
         "/tmp/MovieCover.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/MovieRating.py",
         "/tmp/MovieRating.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/PiconUni.py",
         "/tmp/PiconUni.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/RendVolumeText.py",
         "/tmp/RendVolumeText.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/RendVolumeTextP.py",
         "/tmp/RendVolumeTextP.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/RunningText.py",
         "/tmp/RunningText.py")
     urlretrieve(
         "https://raw.githubusercontent.com/Sirius0103/enigma2-components/master/python/Components/Renderer/Watches.py",
         "/tmp/Watches.py")
     # end
     self.install_com()
Esempio n. 32
0
import tarfile
import os
from six.moves.urllib import request

url_dir = 'https://www.cs.toronto.edu/~kriz/'
file_name = 'cifar-10-python.tar.gz'
save_dir = 'dataset'
tar_path = os.path.join(save_dir, file_name)

if __name__ == '__main__':
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    if os.path.exists(tar_path):
        print('{:s} already downloaded.'.format(file_name))
    else:
        print('Downloading {:s}...'.format(file_name))
        request.urlretrieve('{:s}{:s}'.format(url_dir, file_name), tar_path)

    print('Extracting files...')
    with tarfile.open(tar_path, 'r:gz') as f:
        f.extractall(save_dir)
Esempio n. 33
0
import pickle
from six.moves.urllib.request import urlretrieve
import numpy as np
import h5py
import os
import sys

bs_data_dir = os.environ.get('BRAINSTORM_DATA_DIR', '.')
url = 'http://deeplearning.net/data/mnist/mnist.pkl.gz'
mnist_file = os.path.join(bs_data_dir, 'mnist.pkl.gz')
hdf_file = os.path.join(bs_data_dir, 'MNIST.hdf5')

print("Using data directory:", bs_data_dir)
if not os.path.exists(mnist_file):
    print("Downloading MNIST data ...")
    urlretrieve(url, mnist_file)
    print("Done.")

print("Extracting MNIST data ...")
with gzip.open(mnist_file, 'rb') as f:
    if sys.version_info < (3, ):
        ds = pickle.load(f)
    else:
        ds = pickle.load(f, encoding='latin1')
print("Done.")

train_inputs, train_targets = \
    ds[0][0].reshape((1, 50000, 28, 28, 1)), ds[0][1].reshape((1, 50000, 1))
valid_inputs, valid_targets = \
    ds[1][0].reshape((1, 10000, 28, 28, 1)), ds[1][1].reshape((1, 10000, 1))
test_inputs, test_targets = \
if not os.path.exists(os.environ['temp'] + "\itchiotempdir"):
    os.makedirs(os.environ['temp'] + "\itchiotempdir")
downloadDir = os.environ['temp'] + "\itchiotempdir"

chromeOptions = webdriver.ChromeOptions()
prefs = {"download.default_directory": downloadDir}
chromeOptions.add_experimental_option("prefs", prefs)

if os.path.isfile('chrome.ini'):
    ini = open('chrome.ini', 'r')
    locationString = ini.read()
elif os.path.isfile('chromedriver.exe'):
    locationString = 'chromedriver.exe'
else:
    response = urlretrieve(
        'https://chromedriver.storage.googleapis.com/2.33/chromedriver_win32.zip',
        'chromedriver.zip')

    zip_ref = zipfile.ZipFile("chromedriver.zip", 'r')
    zip_ref.extractall(owd)
    zip_ref.close

    locationString = 'chromedriver.exe'

driver = webdriver.Chrome(executable_path=(locationString),
                          chrome_options=chromeOptions)
driver.set_window_position(4000, 651)
driver.set_page_load_timeout(600)

if os.path.isfile("repo.ini"):
    with open("repo.ini", "r") as myfile:
Esempio n. 35
0
def get_file(fname,
             origin,
             untar=False,
             md5_hash=None,
             file_hash=None,
             cache_subdir='datasets',
             hash_algorithm='auto',
             archive_format='auto',
             cache_dir=None):
    if cache_dir is None:
        cache_dir = os.path.join(os.path.expanduser('~'), '.conda')
    if md5_hash is not None and file_hash is None:
        file_hash = md5_hash
        hash_algorithm = 'md5'
    datadir_base = os.path.expanduser(cache_dir)
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.join('/tmp', '.conda')
    datadir = os.path.join(datadir_base, cache_subdir)
    os.makedirs(datadir, exist_ok=True)

    fpath = os.path.join(datadir, fname)

    if untar:
        untar_fpath = os.path.join(datadir, fname)
        fpath = untar_fpath + '.tar.gz'
    else:
        fpath = os.path.join(datadir, fname)

    download = False
    if os.path.exists(fpath):
        # File found; verify integrity if a hash was provided.
        if file_hash is not None:
            if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
                print('A local file was found, but it seems to be '
                      'incomplete or outdated because the ' + hash_algorithm +
                      ' file hash does not match the original value of ' +
                      file_hash + ' so we will re-download the data.')
                download = True
    else:
        download = True

    if download:
        print('Downloading data from', origin)

        class ProgressTracker(object):
            # Maintain progbar for the lifetime of download.
            # This design was chosen for Python 2.7 compatibility.
            progbar = None

        def dl_progress(count, block_size, total_size):
            if ProgressTracker.progbar is None:
                if total_size == -1:
                    total_size = None
                ProgressTracker.progbar = Progbar(total_size)
            else:
                ProgressTracker.progbar.update(count * block_size)

        error_msg = 'URL fetch failure on {}: {} -- {}'
        try:
            try:
                urlretrieve(origin, fpath, dl_progress)
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(fpath):
                os.remove(fpath)
            raise
        ProgressTracker.progbar = None

    if untar:
        if not os.path.exists(untar_fpath):
            _extract_archive(fpath, datadir, archive_format='tar')
        return untar_fpath

    return fpath
Esempio n. 36
0
url = base_url + filename

with contextlib.closing(request.urlopen(url)) as f:
    expected_filesize = int(f.headers["content-length"])
    print(expected_filesize)
time.sleep(5)

widgets = [
    '{}: '.format(filename),
    Percentage(), ' ',
    Bar(), ' ',
    ETA(), ' ',
    FileTransferSpeed()
]
progress_bar = ProgressBar(widgets=widgets, maxval=expected_filesize).start()


def reporthook(count, blockSize, totalSize):
    progress_bar.update(min(count * blockSize, totalSize))


filepath = os.path.join(fuel_data_path, filename)
request.urlretrieve(url, filepath, reporthook=reporthook)
progress_bar.finish()

downloaded_filesize = os.path.getsize(filepath)
assert expected_filesize == downloaded_filesize, " ".join(
    ("expected file size is {}, but the actual size of the downloaded file",
     "is {}.")).format(expected_filesize, downloaded_filesize)
Esempio n. 37
0
def download_structure(pdb_id,
                       file_type,
                       outdir='',
                       outfile='',
                       only_header=False,
                       force_rerun=False):
    """Download a structure from the RCSB PDB by ID. Specify the file type desired.

    Args:
        pdb_id: PDB ID
        file_type: pdb, pdb.gz, mmcif, cif, cif.gz, xml.gz, mmtf, mmtf.gz
        outdir: Optional output directory
        outfile: Optional output name
        only_header: If only the header file should be downloaded
        force_rerun: If the file should be downloaded again even if it exists

    Returns:
        str: Path to outfile

    """
    # TODO: keep an eye on https://github.com/biopython/biopython/pull/943 Biopython PR#493 for functionality of this
    # method in biopython. extra file types have not been added to biopython download yet

    pdb_id = pdb_id.lower()
    file_type = file_type.lower()
    file_types = [
        'pdb', 'pdb.gz', 'mmcif', 'cif', 'cif.gz', 'xml.gz', 'mmtf', 'mmtf.gz'
    ]
    if file_type not in file_types:
        raise ValueError(
            'Invalid file type, must be either: pdb, pdb.gz, cif, cif.gz, xml.gz, mmtf, mmtf.gz'
        )

    if file_type == 'mmtf':
        file_type = 'mmtf.gz'

    if file_type.endswith('.gz'):
        gzipped = True
    else:
        gzipped = False

    if file_type == 'mmcif':
        file_type = 'cif'

    if only_header:
        folder = 'header'
        if outfile:
            outfile = op.join(outdir, outfile)
        else:
            outfile = op.join(outdir, '{}.header.{}'.format(pdb_id, file_type))
    else:
        folder = 'download'
        if outfile:
            outfile = op.join(outdir, outfile)
        else:
            outfile = op.join(outdir, '{}.{}'.format(pdb_id, file_type))

    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
        if file_type == 'mmtf.gz' or file_type == 'mmtf':
            mmtf_api = '1.0'
            download_link = 'http://mmtf.rcsb.org/v{}/full/{}.mmtf.gz'.format(
                mmtf_api, pdb_id)
        else:
            download_link = 'http://files.rcsb.org/{}/{}.{}'.format(
                folder, pdb_id, file_type)

        urlretrieve(download_link, outfile)

        if gzipped:
            outfile = ssbio.utils.gunzip_file(infile=outfile,
                                              outfile=outfile.strip('.gz'),
                                              outdir=outdir,
                                              delete_original=True,
                                              force_rerun_flag=force_rerun)

        log.debug('{}: saved structure file'.format(outfile))
    else:
        log.debug('{}: structure file already saved'.format(outfile))

    return outfile
Esempio n. 38
0
def get_file(fname,
             origin,
             untar=False,
             md5_hash=None,
             file_hash=None,
             cache_subdir='datasets',
             hash_algorithm='auto',
             extract=False,
             archive_format='auto',
             cache_dir=None):
    """Downloads a file from a URL if it not already in the cache.

  By default the file at the url `origin` is downloaded to the
  cache_dir `~/.keras`, placed in the cache_subdir `datasets`,
  and given the filename `fname`. The final location of a file
  `example.txt` would therefore be `~/.keras/datasets/example.txt`.

  Files in tar, tar.gz, tar.bz, and zip formats can also be extracted.
  Passing a hash will verify the file after download. The command line
  programs `shasum` and `sha256sum` can compute the hash.

  Arguments:
      fname: Name of the file. If an absolute path `/path/to/file.txt` is
          specified the file will be saved at that location.
      origin: Original URL of the file.
      untar: Deprecated in favor of 'extract'.
          boolean, whether the file should be decompressed
      md5_hash: Deprecated in favor of 'file_hash'.
          md5 hash of the file for verification
      file_hash: The expected hash string of the file after download.
          The sha256 and md5 hash algorithms are both supported.
      cache_subdir: Subdirectory under the Keras cache dir where the file is
          saved. If an absolute path `/path/to/folder` is
          specified the file will be saved at that location.
      hash_algorithm: Select the hash algorithm to verify the file.
          options are 'md5', 'sha256', and 'auto'.
          The default 'auto' detects the hash algorithm in use.
      extract: True tries extracting the file as an Archive, like tar or zip.
      archive_format: Archive format to try for extracting the file.
          Options are 'auto', 'tar', 'zip', and None.
          'tar' includes tar, tar.gz, and tar.bz files.
          The default 'auto' is ['tar', 'zip'].
          None or an empty list will return no matches found.
      cache_dir: Location to store cached files, when None it
          defaults to the [Keras
            Directory](/faq/#where-is-the-keras-configuration-filed-stored).

  Returns:
      Path to the downloaded file
  """
    if cache_dir is None:
        cache_dir = os.path.expanduser(os.path.join('~', '.keras'))
    if md5_hash is not None and file_hash is None:
        file_hash = md5_hash
        hash_algorithm = 'md5'
    datadir_base = os.path.expanduser(cache_dir)
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.join('/tmp', '.keras')
    datadir = os.path.join(datadir_base, cache_subdir)
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    if untar:
        untar_fpath = os.path.join(datadir, fname)
        fpath = untar_fpath + '.tar.gz'
    else:
        fpath = os.path.join(datadir, fname)

    download = False
    if os.path.exists(fpath):
        # File found; verify integrity if a hash was provided.
        if file_hash is not None:
            if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
                print('A local file was found, but it seems to be '
                      'incomplete or outdated because the ' + hash_algorithm +
                      ' file hash does not match the original value of ' +
                      file_hash + ' so we will re-download the data.')
                download = True
    else:
        download = True

    if download:
        print('Downloading data from', origin)

        class ProgressTracker(object):
            # Maintain progbar for the lifetime of download.
            # This design was chosen for Python 2.7 compatibility.
            progbar = None

        def dl_progress(count, block_size, total_size):
            if ProgressTracker.progbar is None:
                if total_size is -1:
                    total_size = None
                ProgressTracker.progbar = Progbar(total_size)
            else:
                ProgressTracker.progbar.update(count * block_size)

        error_msg = 'URL fetch failure on {}: {} -- {}'
        try:
            try:
                urlretrieve(origin, fpath, dl_progress)
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(fpath):
                os.remove(fpath)
            raise
        ProgressTracker.progbar = None

    if untar:
        if not os.path.exists(untar_fpath):
            _extract_archive(fpath, datadir, archive_format='tar')
        return untar_fpath

    if extract:
        _extract_archive(fpath, datadir, archive_format)

    return fpath
Esempio n. 39
0
def _read_from_url(url):
    filename, _ = urlretrieve(url)
    return open(filename, 'rb')
Esempio n. 40
0
def get_file(fname, origin, untar=False,
             md5_hash=None, cache_subdir='common'):
    '''Downloads a file from a URL if it not already in the cache.

    Passing the MD5 hash will verify the file after download as well as if it is already present in the cache.

    # Arguments
        fname: name of the file
        origin: original URL of the file
        untar: boolean, whether the file should be decompressed
        md5_hash: MD5 hash of the file for verification
        cache_subdir: directory being used as the cache

    # Returns
        Path to the downloaded file
    '''
    file_path = os.path.dirname(os.path.realpath(__file__))
    datadir_base = os.path.expanduser(os.path.join(file_path, '..', 'Data'))
    datadir = os.path.join(datadir_base, cache_subdir)
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    #if untar:
    #    fnamesplit = fname.split('.tar.gz')
    #    untar_fpath = os.path.join(datadir, fnamesplit[0])

    if fname.endswith('.tar.gz'):
        fnamesplit = fname.split('.tar.gz')
        untar_fpath = os.path.join(datadir, fnamesplit[0])
        untar = True
    elif fname.endswith('.tgz'):
        fnamesplit = fname.split('.tgz')
        untar_fpath = os.path.join(datadir, fnamesplit[0])
        untar = True

    fpath = os.path.join(datadir, fname)

    download = False
    if os.path.exists(fpath):
        # file found; verify integrity if a hash was provided
        if md5_hash is not None:
            if not validate_file(fpath, md5_hash):
                print('A local file was found, but it seems to be '
                      'incomplete or outdated.')
                download = True
    else:
        download = True

    if download:
        print('Downloading data from', origin)
        global progbar
        progbar = None

        def dl_progress(count, block_size, total_size):
            global progbar
            if progbar is None:
                progbar = Progbar(total_size)
            else:
                progbar.update(count * block_size)

        error_msg = 'URL fetch failure on {}: {} -- {}'
        try:
            try:
                urlretrieve(origin, fpath, dl_progress)
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(fpath):
                os.remove(fpath)
            raise
        progbar = None
        print()

    if untar:
        if not os.path.exists(untar_fpath):
            print('Untarring file...')
            tfile = tarfile.open(fpath, 'r:gz')
            try:
                tfile.extractall(path=datadir)
            except (Exception, KeyboardInterrupt) as e:
                if os.path.exists(untar_fpath):
                    if os.path.isfile(untar_fpath):
                        os.remove(untar_fpath)
                    else:
                        shutil.rmtree(untar_fpath)
                raise
            tfile.close()
        return untar_fpath
        print()

    return fpath
Esempio n. 41
0
def download(dataset):
    with TemporaryDirectory() as tmpdir:
        urlretrieve(URL, tmpdir.joinpath(FILENAME).as_posix())
        xls2csv(tmpdir.joinpath(FILENAME), outdir=dataset.raw)
Esempio n. 42
0
    def __init__(self,
                 intervals_file,
                 fasta_file,
                 dnase_file,
                 cell_line=None,
                 RNAseq_PC_file=None,
                 mappability_file=None,
                 GENCODE_dir=None,
                 use_linecache=True):

        # intervals
        if use_linecache:
            linecache.clearcache()
            BT = BedToolLinecache
        else:
            BT = BedTool

        self.bt = BT(intervals_file)

        # Fasta
        self.fasta_file = fasta_file
        self.fasta_extractor = None  # initialize later
        # DNase
        self.dnase_file = dnase_file
        self.dnase_extractor = None
        # mappability
        if mappability_file is None:
            # download the mappability file if not existing
            mappability_file = os.path.join(
                this_dir, "../../template/dataloader_files",
                "wgEncodeDukeMapabilityUniqueness35bp.bigWig")
            if not os.path.exists(mappability_file):
                print("Downloading the mappability file")
                urlretrieve(
                    "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
                    mappability_file)
                print("Download complete")

        self.mappability_file = mappability_file
        self.mappability_extractor = None
        # Gencode features
        if GENCODE_dir is None:
            gp = os.path.join(this_dir, "dataloader_files/gencode_features/")
        else:
            gp = GENCODE_dir
        self.gencode_beds = [
            ("cpg", BedTool(gp + '/cpgisland.bed.gz')),
            ("cds",
             BedTool(gp + '/wgEncodeGencodeBasicV19.cds.merged.bed.gz')),
            ("intron",
             BedTool(gp + '/wgEncodeGencodeBasicV19.intron.merged.bed.gz')),
            ("promoter",
             BedTool(gp + '/wgEncodeGencodeBasicV19.promoter.merged.bed.gz')),
            ("utr5",
             BedTool(gp + '/wgEncodeGencodeBasicV19.utr5.merged.bed.gz')),
            ("utr3",
             BedTool(gp + '/wgEncodeGencodeBasicV19.utr3.merged.bed.gz')),
        ]
        # Overlap beds - could be done incrementally
        print("Overlapping all the bed-files")
        # The BT() and .fn are there in order to leverage BedToolLinecache
        self.overlap_beds = [(b, BT(self.bt.intersect(v, wa=True, c=True).fn))
                             for b, v in self.gencode_beds]
        print("Assesing the file")
        assert len(self.overlap_beds[1][1]) == len(self.bt)
        # Get the metadata features
        if cell_line is None:
            if RNAseq_PC_file is None:
                raise ValueError(
                    "RNAseq_PC_file has to be specified when cell_line=None")
            assert os.path.exists(RNAseq_PC_file)
        else:
            # Using the pre-defined cell-line
            rp = os.path.join(this_dir, "dataloader_files/RNAseq_features/")
            RNAseq_PC_file = os.path.join(rp, cell_line, "meta.txt")
        self.meta_feat = pd.read_csv(RNAseq_PC_file, sep="\t",
                                     header=None)[0].values
Esempio n. 43
0
def download (filename):
  if not os.path.exists (filename):
    filename, _ = urlretrieve (url + filename, filename)
  with zipfile.ZipFile (filename) as f:
    data = tf.compat.as_str (f.read(f.namelist()[0])).split ()
  return data
Esempio n. 44
0

from six.moves.urllib.request import urlretrieve

url = 'http://yaroslavvb.com/upload/notMNIST/'
filename = 'notMNIST_small.tar.gz'

filename, _ = urlretrieve(url + filename, filename)
Esempio n. 45
0
    def test_SlicerRadiomics1(self):
        """ Ideally you should have several levels of tests.  At the lowest level
    tests should exercise the functionality of the logic with different inputs
    (both valid and invalid).  At higher levels your tests should emulate the
    way the user would interact with your code and confirm that it still works
    the way you intended.
    One of the most important features of the tests is that it should alert other
    developers when their changes will have an impact on the behavior of your
    module.  For example, if a developer removes a feature that you depend on,
    your test should break so they know that the feature is needed.
    """

        self.delayDisplay('Starting the test')
        #
        # first, get some data
        # https://github.com/Radiomics/SlicerRadiomics/releases/download/TestData-v1.0.0/lung1_binary.seg.nrrd
        from six.moves.urllib.request import urlretrieve
        dataRelease = 'v1.0.0'
        dataURLPrefix = 'https://github.com/Radiomics/SlicerRadiomics/releases/download/TestData'
        dataItems = (('lung1_image.nrrd', slicer.util.loadVolume),
                     ('lung1_label.nrrd', slicer.util.loadLabelVolume),
                     ('lung1_binary.seg.nrrd', slicer.util.loadSegmentation),
                     ('lung1.seg_0.vtp', None), ('lung1.seg_1.vtp', None),
                     ('lung1_surface.seg.vtm',
                      slicer.util.loadSegmentation), ('Params.yaml', None))

        for item, loader in dataItems:
            url = dataURLPrefix + '-' + dataRelease + '/' + item
            filePath = os.path.join(slicer.app.temporaryPath, item)
            if not os.path.exists(filePath) or os.stat(filePath).st_size == 0:
                self.logger.info('Requesting download %s from %s...\n' %
                                 (item, url))
                self.assertTrue(urlretrieve(url, filePath),
                                'Failed to download from ' + url)
            if loader:
                self.logger.info('Loading %s from %s...' % (item, filePath))
                self.assertTrue(loader(filePath), 'Failed to load ' + item)

        self.delayDisplay(
            'Finished with download and loading %d volumes' %
            (slicer.mrmlScene.GetNumberOfNodesByClass('vtkMRMLVolumeNode')))

        grayscaleNode = slicer.util.getNode(pattern='lung1_image')
        labelmapNode = slicer.util.getNode(pattern='lung1_label')
        binaryNode = slicer.util.getNode(pattern='lung1_binary')
        surfaceNode = slicer.util.getNode(pattern='lung1_surface')

        parameterFile = os.path.join(slicer.app.temporaryPath, 'Params.yaml')

        logic = SlicerRadiomicsLogic()
        logic.runSync = True  # Block Thread until each extraction is done (i.e. run synchronously)
        self.assertIsNotNone(logic.hasImageData(grayscaleNode))
        self.assertIsNotNone(logic.hasImageData(labelmapNode))

        featureClasses = ['firstorder']
        settings = {'binWidth': 25, 'symmetricalGLCM': False, 'label': 1}

        enabledImageTypes = {"Original": {}}

        for maskNode in [labelmapNode, binaryNode, surfaceNode]:
            tableNode = slicer.vtkMRMLTableNode()
            tableNode.SetName('lung1_label and ' + maskNode.GetName())
            slicer.mrmlScene.AddNode(tableNode)
            # No callback needed as tests are run synchronously
            logic.runCLI(grayscaleNode, maskNode, tableNode, featureClasses,
                         settings, enabledImageTypes)
            logic.showTable(tableNode)

        for maskNode in [labelmapNode, binaryNode, surfaceNode]:
            tableNode = slicer.vtkMRMLTableNode()
            tableNode.SetName('lung1_label and ' + maskNode.GetName() +
                              ' customized with Params.yaml')
            slicer.mrmlScene.AddNode(tableNode)
            # No callback needed as tests are run synchronously
            logic.runCLIWithParameterFile(grayscaleNode, maskNode, tableNode,
                                          parameterFile)
            logic.showTable(tableNode)

        self.delayDisplay('Test passed!')
Esempio n. 46
0
def download_dataset(fname, origin, untar=False):
    """Download a dataset, if not already there.

    Parameters
    ----------

    fname: str
        Full filename of dataset, e.g. ``mnist.pkl.gz``.
    origin: str
        Location of dataset, e.g. url
        https://s3.amazonaws.com/img-datasets/mnist.pkl.gz
    untar: Optional[bool]
        If ``True``, untar file.

    Returns
    -------

    fpath: str
        The path to the downloaded dataset. If the user has write access to
        ``home``, the dataset will be stored in ``~/.snntoolbox/datasets/``,
        otherwise in ``/tmp/.snntoolbox/datasets/``.

    Notes
    -----

    Test under python2.
    """

    import tarfile
    import shutil
    from six.moves.urllib.error import URLError, HTTPError
    # Under Python 2, 'urlretrieve' relies on FancyURLopener from legacy
    # urllib module, known to have issues with proxy management
    from six.moves.urllib.request import urlretrieve

    datadir_base = os.path.expanduser(os.path.join('~', '.snntoolbox'))
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.join('/tmp', '.snntoolbox')
    datadir = os.path.join(datadir_base, 'datasets')
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    untar_fpath = None
    if untar:
        untar_fpath = os.path.join(datadir, fname)
        fpath = untar_fpath + '.tar.gz'
    else:
        fpath = os.path.join(datadir, fname)

    if not os.path.exists(fpath):
        print("Downloading data from {}".format(origin))
        error_msg = 'URL fetch failure on {}: {} -- {}'
        try:
            try:
                urlretrieve(origin, fpath)
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(fpath):
                os.remove(fpath)
            raise e

    if untar:
        if not os.path.exists(untar_fpath):
            print("Untaring file...\n")
            tfile = tarfile.open(fpath, 'r:gz')
            try:
                tfile.extractall(path=datadir)
            except (Exception, KeyboardInterrupt) as e:
                if os.path.exists(untar_fpath):
                    if os.path.isfile(untar_fpath):
                        os.remove(untar_fpath)
                    else:
                        shutil.rmtree(untar_fpath)
                raise e
            tfile.close()
        return untar_fpath

    return fpath
Esempio n. 47
0
def load_word_vectors(root, wv_type, dim):
    """

    From https://github.com/pytorch/text/

    BSD 3-Clause License

    Copyright (c) James Bradbury and Soumith Chintala 2016,
    All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice, this
      list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.

    * Neither the name of the copyright holder nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    """
    """Load word vectors from a path, trying .pt, .txt, and .zip extensions."""
    if isinstance(dim, int):
        dim = str(dim) + 'd'
    fname = os.path.join(root, wv_type + '.' + dim)
    if os.path.isfile(fname + '.pt'):
        fname_pt = fname + '.pt'
        print('loading word vectors from', fname_pt)
        return torch.load(fname_pt)
    if os.path.isfile(fname + '.txt'):
        fname_txt = fname + '.txt'
        cm = open(fname_txt, 'rb')
        cm = [line for line in cm]
    elif os.path.basename(wv_type) in URL:
        url = URL[wv_type]
        print('downloading word vectors from {}'.format(url))
        filename = os.path.basename(fname)
        if not os.path.exists(root):
            os.makedirs(root)
        with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
            fname, _ = urlretrieve(url, fname, reporthook=reporthook(t))
            with zipfile.ZipFile(fname, "r") as zf:
                print('extracting word vectors into {}'.format(root))
                zf.extractall(root)
        if not os.path.isfile(fname + '.txt'):
            raise RuntimeError('no word vectors of requested dimension found')
        return load_word_vectors(root, wv_type, dim)
    else:
        raise RuntimeError('unable to load word vectors %s from %s' %
                           (wv_type, root))

    wv_tokens, wv_arr, wv_size = [], array.array('d'), None
    if cm is not None:
        print("Loading word vectors from {}".format(fname_txt))
        for line in trange(len(cm)):
            entries = cm[line].strip().split(b' ')
            word, entries = entries[0], entries[1:]
            if wv_size is None:
                wv_size = len(entries)
            try:
                if isinstance(word, six.binary_type):
                    word = word.decode('utf-8')
            except:
                print('non-UTF8 token', repr(word), 'ignored')
                continue
            wv_arr.extend(float(x) for x in entries)
            wv_tokens.append(word)

    wv_dict = {word: i for i, word in enumerate(wv_tokens)}
    wv_arr = torch.Tensor(wv_arr).view(-1, wv_size)
    ret = (wv_dict, wv_arr, wv_size)
    torch.save(ret, fname + '.pt')
    return ret
Esempio n. 48
0
def download_and_extract_zip_file(url, targetdir='.', verbose=True):
    import csv
    from six.moves.urllib.request import urlretrieve
    from six.moves.urllib.parse import urlparse
    from zipfile import ZipFile

    res = urlparse(url)
    if res.scheme in ('','file'):
        url = Path(res.path).resolve().as_uri()
        # local file, 'urlretrieve' will not make a copy
        # -> don't delete 'downloaded' file
        delete = False
    else:
        delete = True

    # verbosity levels:
    # - 0: no messages
    # - 1: status messages
    # - 2: status messages and list of all files
    if isinstance(verbose,bool):
        verbose *= 2

    log = (print) if verbose else (lambda *a,**k: None)

    targetdir = Path(targetdir)
    if not targetdir.is_dir():
        targetdir.mkdir(parents=True,exist_ok=True)

    provided = []

    def content_is_missing():
        try:
            filepath, http_msg = urlretrieve(url+'.contents')
            with open(filepath,'r') as contents_file:
                contents = list(csv.reader(contents_file,delimiter='\t'))
        except:
            return True
        finally:
            if delete:
                try: os.unlink(filepath)
                except: pass

        for size, relpath in contents:
            size, relpath = int(size.strip()), relpath.strip()
            entry = targetdir / relpath
            if not entry.exists():
                return True
            if entry.is_dir():
                if not relpath.endswith('/'): return True
            elif entry.is_file():
                if relpath.endswith('/') or entry.stat().st_size != size: return True
            else:
                return True
            provided.append(relpath)

        return False


    if content_is_missing():
        try:
            log('Files missing, downloading...',end='')
            filepath, http_msg = urlretrieve(url)
            with ZipFile(filepath,'r') as zip_file:
                log(' extracting...',end='')
                zip_file.extractall(str(targetdir))
                provided = zip_file.namelist()
            log(' done.')
        finally:
            if delete:
                try: os.unlink(filepath)
                except: pass
    else:
        log('Files found, nothing to download.')

    if verbose > 1:
        log('\n'+str(targetdir)+':')
        consume(map(lambda x: log('-',Path(x)), provided))
Esempio n. 49
0
    def cache(self, name, cache, url=None):
        if os.path.isfile(name):
            path = name
            path_pt = os.path.join(cache, os.path.basename(name)) + '.pt'
        else:
            path = os.path.join(cache, name)
            path_pt = path + '.pt'

        if not os.path.isfile(path_pt):
            if not os.path.isfile(path) and url:
                logger.info('Downloading vectors from {}'.format(url))
                if not os.path.exists(cache):
                    os.makedirs(cache)
                dest = os.path.join(cache, os.path.basename(url))
                if not os.path.isfile(dest):
                    with tqdm(unit='B', unit_scale=True, miniters=1,
                              desc=dest) as t:
                        try:
                            urlretrieve(url, dest, reporthook=reporthook(t))
                        except KeyboardInterrupt as e:  # remove the partial zip file
                            os.remove(dest)
                            raise e
                logger.info('Extracting vectors into {}'.format(cache))
                ext = os.path.splitext(dest)[1][1:]
                if ext == 'zip':
                    with zipfile.ZipFile(dest, "r") as zf:
                        zf.extractall(cache)
                elif ext == 'gz':
                    with tarfile.open(dest, 'r:gz') as tar:
                        tar.extractall(path=cache)
            if not os.path.isfile(path):
                raise RuntimeError('no vectors found at {}'.format(path))

            # str call is necessary for Python 2/3 compatibility, since
            # argument must be Python 2 str (Python 3 bytes) or
            # Python 3 str (Python 2 unicode)
            itos, vectors, dim = [], array.array(str('d')), None

            # Try to read the whole file with utf-8 encoding.
            binary_lines = False
            try:
                with io.open(path, encoding="utf8") as f:
                    lines = [line for line in f]
            # If there are malformed lines, read in binary mode
            # and manually decode each word from utf-8
            except:
                logger.warning("Could not read {} as UTF8 file, "
                               "reading file as bytes and skipping "
                               "words with malformed UTF8.".format(path))
                with open(path, 'rb') as f:
                    lines = [line for line in f]
                binary_lines = True

            logger.info("Loading vectors from {}".format(path))
            for line in tqdm(lines, total=len(lines)):
                # Explicitly splitting on " " is important, so we don't
                # get rid of Unicode non-breaking spaces in the vectors.
                entries = line.rstrip().split(b" " if binary_lines else " ")

                word, entries = entries[0], entries[1:]
                if dim is None and len(entries) > 1:
                    dim = len(entries)
                elif len(entries) == 1:
                    logger.warning("Skipping token {} with 1-dimensional "
                                   "vector {}; likely a header".format(
                                       word, entries))
                    continue
                elif dim != len(entries):
                    raise RuntimeError(
                        "Vector for token {} has {} dimensions, but previously "
                        "read vectors have {} dimensions. All vectors must have "
                        "the same number of dimensions.".format(
                            word, len(entries), dim))

                if binary_lines:
                    try:
                        if isinstance(word, six.binary_type):
                            word = word.decode('utf-8')
                    except:
                        logger.info("Skipping non-UTF8 token {}".format(
                            repr(word)))
                        continue
                vectors.extend(float(x) for x in entries)
                itos.append(word)

            self.itos = itos
            self.stoi = {word: i for i, word in enumerate(itos)}
            self.vectors = torch.Tensor(vectors).view(-1, dim)
            self.dim = dim
            logger.info('Saving vectors to {}'.format(path_pt))
            if not os.path.exists(cache):
                os.makedirs(cache)
            torch.save((self.itos, self.stoi, self.vectors, self.dim), path_pt)
        else:
            logger.info('Loading vectors from {}'.format(path_pt))
            self.itos, self.stoi, self.vectors, self.dim = torch.load(path_pt)
import os

# make dependency directory
if not exists('deps'):
    mkdir('deps')

os.system('pip install cython')
from Cython.Build import cythonize

# download Eigen if we don't have it in deps
eigenurl = 'http://bitbucket.org/eigen/eigen/get/3.2.6.tar.gz'
eigentarpath = join('deps', 'Eigen.tar.gz')
eigenpath = join('deps', 'Eigen')
if not exists(eigenpath):
    print('Downloading Eigen...')
    urlretrieve(eigenurl, eigentarpath)
    with tarfile.open(eigentarpath, 'r') as tar:
        tar.extractall('deps')
    thedir = glob(join('deps', 'eigen-eigen-*'))[0]
    move(join(thedir, 'Eigen'), eigenpath)
    print('...done!')

setup(name='autoregressive',
      version='0.1.1',
      description=
      'Extension for switching vector autoregressive models with pyhsmm',
      author='Matthew James Johnson',
      author_email='*****@*****.**',
      url='https://github.com/mattjj/pyhsmm-autoregressive',
      license='GPL',
      packages=['autoregressive'],
Esempio n. 51
0
 def _download(self, url, options):
     # pylint: disable=unused-argument
     return urlretrieve(url)[0]  # nocv
def get_file(filename: str, url: str, path: Optional[str] = None, extract: bool = False, verbose: bool = False) -> str:
    """
    Downloads a file from a URL if it not already in the cache. The file at indicated by `url` is downloaded to the
    path `path` (default is ~/.art/data). and given the name `filename`. Files in tar, tar.gz, tar.bz, and zip formats
    can also be extracted. This is a simplified version of the function with the same name in Keras.

    :param filename: Name of the file.
    :param url: Download URL.
    :param path: Folder to store the download. If not specified, `~/.art/data` is used instead.
    :param extract: If true, tries to extract the archive.
    :param verbose: If true, print download progress bar.
    :return: Path to the downloaded file.
    """
    if path is None:
        path_ = os.path.expanduser(config.ART_DATA_PATH)
    else:
        path_ = os.path.expanduser(path)
    if not os.access(path_, os.W_OK):
        path_ = os.path.join("/tmp", ".art")

    if not os.path.exists(path_):
        os.makedirs(path_)

    if extract:
        extract_path = os.path.join(path_, filename)
        full_path = extract_path + ".tar.gz"
    else:
        full_path = os.path.join(path_, filename)

    # Determine if dataset needs downloading
    download = not os.path.exists(full_path)

    if download:
        logger.info("Downloading data from %s", url)
        error_msg = "URL fetch failure on {}: {} -- {}"
        try:
            try:
                from six.moves.urllib.error import HTTPError, URLError
                from six.moves.urllib.request import urlretrieve

                # The following two lines should prevent occasionally occurring
                # [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:847)
                import ssl

                ssl._create_default_https_context = ssl._create_unverified_context  # pylint: disable=W0212

                if verbose:
                    with tqdm() as t_bar:
                        last_block = [0]

                        def progress_bar(blocks: int = 1, block_size: int = 1, total_size: Optional[int] = None):
                            """
                            :param blocks: Number of blocks transferred so far [default: 1].
                            :param block_size: Size of each block (in tqdm units) [default: 1].
                            :param total_size: Total size (in tqdm units). If [default: None] or -1, remains unchanged.
                            """
                            if total_size not in (None, -1):
                                t_bar.total = total_size
                            displayed = t_bar.update((blocks - last_block[0]) * block_size)
                            last_block[0] = blocks
                            return displayed

                        urlretrieve(url, full_path, reporthook=progress_bar)
                else:
                    urlretrieve(url, full_path)

            except HTTPError as exception:
                raise Exception(error_msg.format(url, exception.code, exception.msg)) from HTTPError  # type: ignore
            except URLError as exception:
                raise Exception(error_msg.format(url, exception.errno, exception.reason)) from HTTPError
        except (Exception, KeyboardInterrupt):
            if os.path.exists(full_path):
                os.remove(full_path)
            raise

    if extract:
        if not os.path.exists(extract_path):
            _extract(full_path, path_)
        return extract_path

    return full_path
Esempio n. 53
0
def _download_file(file_name,
                   url,
                   local_folder,
                   cache_subdir,
                   file_hash=None,
                   cache_dir=None,
                   verbose=True):
    """
    Downloads the specified file from the

    Heavily inspired by and lovingly adapted from keras' `get_file` function:
    https://github.com/fchollet/keras/blob/afbd5d34a3bdbb0916d558f96af197af1e92ce70/keras/utils/data_utils.py#L109

    Args:
        file_name: (String) name of the file located on the server
        url: (String) url of the file
        local_folder: (String) alternate folder in which to download the file
        cache_subdir: (String) subdirectory of folder in which to download flie
        file_hash: (String) expected hash of downloaded file
        cache_dir:

    Returns:
        (String) local path to downloaded file

    """
    if local_folder not in [None, '']:
        # local folder provided, let's create it if it doesn't exist and use it as datadir
        if not os.path.exists(os.path.expanduser(local_folder)):
            os.makedirs(os.path.expanduser(local_folder))
        datadir = os.path.expanduser(local_folder)

    else:
        if cache_dir is None:
            cache_dir = os.path.expanduser(os.path.join('~', '.nussl'))
        datadir_base = os.path.expanduser(cache_dir)

        if not os.access(datadir_base, os.W_OK):
            datadir_base = os.path.join('/tmp', '.nussl')

        datadir = os.path.join(datadir_base, cache_subdir)
        if not os.path.exists(datadir):
            os.makedirs(datadir)

    file_path = os.path.join(datadir, file_name)

    download = False
    if os.path.exists(file_path):
        if file_hash is not None:
            # compare the provided hash with the hash of the file currently at file_path
            current_hash = _hash_file(file_path)

            # if the hashes are equal, we already have the file we need, so don't download
            if file_hash != current_hash:
                if verbose:
                    warnings.warn("Hash for {} does not match known hash. "
                                  "Downloading {} from servers...".format(
                                      file_path, file_name))
                download = True
            elif verbose:
                print('Matching file found at {}, skipping download.'.format(
                    file_path))

        else:
            download = True

    else:
        download = True

    if download:
        if verbose:
            print('Saving file at {}'.format(file_path))
            print('Downloading {} from {}'.format(file_name, url))

        def _dl_progress(count, block_size, total_size):
            percent = int(count * block_size * 100 / total_size)

            if percent <= 100:
                sys.stdout.write('\r{}...{}%'.format(file_name, percent))
                sys.stdout.flush()

        error_msg = 'URL fetch failure on {}: {} -- {}'

        try:
            try:
                reporthook = _dl_progress if verbose else None
                urlretrieve(url, file_path, reporthook)
                if verbose:
                    print()  # print a new line after the progress is done.

            except HTTPError as e:
                raise FailedDownloadError(error_msg.format(url, e.code, e.msg))
            except URLError as e:
                raise FailedDownloadError(
                    error_msg.format(url, e.errno, e.reason))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(file_path):
                os.remove(file_path)
            raise e

        # check hash of received file to see if it matches the provided hash
        if file_hash is not None:
            download_hash = _hash_file(file_path)
            if file_hash != download_hash:
                # the downloaded file is not what it should be. Get rid of it.
                os.remove(file_path)
                raise MismatchedHashError(
                    "Downloaded file ({}) has been deleted "
                    "because of a hash mismatch.".format(file_path))

        return file_path

    else:
        return file_path
Esempio n. 54
0
    def readtext(self, image, decoder = 'greedy', beamWidth= 5, batch_size = 1,\
                 workers = 0, allowlist = None, blocklist = None, detail = 1,\
                 paragraph = False,\
                 contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\
                 text_threshold = 0.7, low_text = 0.4, link_threshold = 0.4,\
                 canvas_size = 2560, mag_ratio = 1.,\
                 slope_ths = 0.1, ycenter_ths = 0.5, height_ths = 0.5,\
                 width_ths = 0.5, add_margin = 0.1):
        '''
        Parameters:
        file: file path or numpy-array or a byte stream object
        '''

        if type(image) == str:
            if image.startswith('http://') or image.startswith('https://'):
                tmp, _ = urlretrieve(image,
                                     reporthook=printProgressBar(
                                         prefix='Progress:',
                                         suffix='Complete',
                                         length=50))
                img_cv_grey = cv2.imread(tmp, cv2.IMREAD_GRAYSCALE)
                os.remove(tmp)
            else:
                img_cv_grey = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
                image = os.path.expanduser(image)
            img = loadImage(image)  # can accept URL
        elif type(image) == bytes:
            nparr = np.frombuffer(image, np.uint8)
            img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_cv_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        elif type(image) == np.ndarray:
            if len(image.shape) == 2:  # grayscale
                img_cv_grey = image
                img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
            elif len(image.shape) == 3:  # BGRscale
                img = image
                img_cv_grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        text_box = get_textbox(self.detector, img, canvas_size, mag_ratio, text_threshold,\
                               link_threshold, low_text, False, self.device)
        horizontal_list, free_list = group_text_box(text_box, slope_ths,
                                                    ycenter_ths, height_ths,
                                                    width_ths, add_margin)

        # should add filter to screen small box out

        image_list, max_width = get_image_list(horizontal_list,
                                               free_list,
                                               img_cv_grey,
                                               model_height=imgH)

        if allowlist:
            ignore_char = ''.join(set(self.character) - set(allowlist))
        elif blocklist:
            ignore_char = ''.join(set(blocklist))
        else:
            ignore_char = ''.join(set(self.character) - set(self.lang_char))

        if self.model_lang in [
                'chinese_tra', 'chinese_sim', 'japanese', 'korean'
        ]:
            decoder = 'greedy'
        result = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\
                      ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\
                      workers, self.device)

        if self.model_lang == 'arabic':
            direction_mode = 'rtl'
            result = [list(item) for item in result]
            for item in result:
                item[1] = get_display(item[1])
        else:
            direction_mode = 'ltr'

        if paragraph:
            result = get_paragraph(result, mode=direction_mode)

        if detail == 0:
            return [item[1] for item in result]
        else:
            return result
Esempio n. 55
0
                                                         is_training=False,
                                                         reuse=reuse)

        logits = logits[:, 1:]

        probs = tf.nn.softmax(logits)

    return logits, probs, end_points


logits, probs, end_points = network(image, reuse=False)

checkpoint_filename = "./inception_v3.ckpt"

if not os.path.exists(checkpoint_filename):
    inception_tarball, _ = urlretrieve(
        "http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz")

    tarfile.open(inception_tarball, 'r:gz').extractall("./")

restore_vars = [
    var for var in tf.global_variables() if var.name.startswith('InceptionV3/')
]

saver = tf.train.Saver(restore_vars)

saver.restore(sess, "./inception_v3.ckpt")


def get_feature(img, feature_layer_name):

    p, feature_values = sess.run([probs, end_points], feed_dict={image: img})
Esempio n. 56
0
File: vocab.py Progetto: yuzhiw/text
    def cache(self, name, cache, url=None, max_vectors=None):
        if os.path.isfile(name):
            path = name
            if max_vectors:
                file_suffix = '_{}.pt'.format(max_vectors)
            else:
                file_suffix = '.pt'
            path_pt = os.path.join(cache, os.path.basename(name)) + file_suffix
        else:
            path = os.path.join(cache, name)
            if max_vectors:
                file_suffix = '_{}.pt'.format(max_vectors)
            else:
                file_suffix = '.pt'
            path_pt = path + file_suffix

        if not os.path.isfile(path_pt):
            if not os.path.isfile(path) and url:
                logger.info('Downloading vectors from {}'.format(url))
                if not os.path.exists(cache):
                    os.makedirs(cache)
                dest = os.path.join(cache, os.path.basename(url))
                if not os.path.isfile(dest):
                    with tqdm(unit='B', unit_scale=True, miniters=1, desc=dest) as t:
                        try:
                            urlretrieve(url, dest, reporthook=reporthook(t))
                        except KeyboardInterrupt as e:  # remove the partial zip file
                            os.remove(dest)
                            raise e
                logger.info('Extracting vectors into {}'.format(cache))
                ext = os.path.splitext(dest)[1][1:]
                if ext == 'zip':
                    with zipfile.ZipFile(dest, "r") as zf:
                        zf.extractall(cache)
                elif ext == 'gz':
                    if dest.endswith('.tar.gz'):
                        with tarfile.open(dest, 'r:gz') as tar:
                            tar.extractall(path=cache)
            if not os.path.isfile(path):
                raise RuntimeError('no vectors found at {}'.format(path))

            logger.info("Loading vectors from {}".format(path))
            ext = os.path.splitext(path)[1][1:]
            if ext == 'gz':
                open_file = gzip.open
            else:
                open_file = open

            vectors_loaded = 0
            with open_file(path, 'rb') as f:
                num_lines, dim = _infer_shape(f)
                if not max_vectors or max_vectors > num_lines:
                    max_vectors = num_lines

                itos, vectors, dim = [], torch.zeros((max_vectors, dim)), None

                for line in tqdm(f, total=num_lines):
                    # Explicitly splitting on " " is important, so we don't
                    # get rid of Unicode non-breaking spaces in the vectors.
                    entries = line.rstrip().split(b" ")

                    word, entries = entries[0], entries[1:]
                    if dim is None and len(entries) > 1:
                        dim = len(entries)
                    elif len(entries) == 1:
                        logger.warning("Skipping token {} with 1-dimensional "
                                       "vector {}; likely a header".format(word, entries))
                        continue
                    elif dim != len(entries):
                        raise RuntimeError(
                            "Vector for token {} has {} dimensions, but previously "
                            "read vectors have {} dimensions. All vectors must have "
                            "the same number of dimensions.".format(word, len(entries),
                                                                    dim))

                    try:
                        if isinstance(word, six.binary_type):
                            word = word.decode('utf-8')
                    except UnicodeDecodeError:
                        logger.info("Skipping non-UTF8 token {}".format(repr(word)))
                        continue

                    vectors[vectors_loaded] = torch.tensor([float(x) for x in entries])
                    vectors_loaded += 1
                    itos.append(word)

                    if vectors_loaded == max_vectors:
                        break

            self.itos = itos
            self.stoi = {word: i for i, word in enumerate(itos)}
            self.vectors = torch.Tensor(vectors).view(-1, dim)
            self.dim = dim
            logger.info('Saving vectors to {}'.format(path_pt))
            if not os.path.exists(cache):
                os.makedirs(cache)
            torch.save((self.itos, self.stoi, self.vectors, self.dim), path_pt)
        else:
            logger.info('Loading vectors from {}'.format(path_pt))
            self.itos, self.stoi, self.vectors, self.dim = torch.load(path_pt)
Esempio n. 57
0
 def _download(self, url: Text, options) -> FILENAME:
     # pylint: disable=unused-argument
     return urlretrieve(url)[0]  # nocv
Esempio n. 58
0
def load_word_vectors(root, wv_type, dim):
    """Load word vectors from a path, trying .pt, .txt, and .zip extensions."""
    if isinstance(dim, int):
        dim = str(dim) + 'd'
    fname = os.path.join(root, wv_type + '.' + dim)
    if os.path.isfile(fname + '.pt'):
        fname_pt = fname + '.pt'
        print('loading word vectors from', fname_pt)
        try:
            return torch.load(fname_pt)
        except Exception as e:
            print("""
                Error loading the model from {}

                This could be because this code was previously run with one
                PyTorch version to generate cached data and is now being
                run with another version.
                You can try to delete the cached files on disk (this file
                  and others) and re-running the code

                Error message:
                ---------
                {}
                """.format(fname_pt, str(e)))
            sys.exit(-1)
    if os.path.isfile(fname + '.txt'):
        fname_txt = fname + '.txt'
        cm = open(fname_txt, 'rb')
        cm = [line for line in cm]
    elif os.path.basename(wv_type) in URL:
        url = URL[wv_type]
        print('downloading word vectors from {}'.format(url))
        filename = os.path.basename(fname)
        if not os.path.exists(root):
            os.makedirs(root)
        with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
            fname, _ = urlretrieve(url, fname, reporthook=reporthook(t))
            with zipfile.ZipFile(fname, "r") as zf:
                print('extracting word vectors into {}'.format(root))
                zf.extractall(root)
        if not os.path.isfile(fname + '.txt'):
            raise RuntimeError('no word vectors of requested dimension found')
        return load_word_vectors(root, wv_type, dim)
    else:
        raise RuntimeError('unable to load word vectors')

    wv_tokens, wv_arr, wv_size = [], array.array('d'), None
    if cm is not None:
        for line in tqdm(
                range(len(cm)),
                desc="loading word vectors from {}".format(fname_txt)):
            entries = cm[line].strip().split(b' ')
            word, entries = entries[0], entries[1:]
            if wv_size is None:
                wv_size = len(entries)
            try:
                if isinstance(word, six.binary_type):
                    word = word.decode('utf-8')
            except:
                print('non-UTF8 token', repr(word), 'ignored')
                continue
            wv_arr.extend(float(x) for x in entries)
            wv_tokens.append(word)

    wv_dict = {word: i for i, word in enumerate(wv_tokens)}
    wv_arr = torch.Tensor(wv_arr).view(-1, wv_size)
    ret = (wv_dict, wv_arr, wv_size)
    torch.save(ret, fname + '.pt')
    return ret
Esempio n. 59
0
def get_file(fname,
             origin,
             save_path,
             untar=False,
             md5_hash=None,
             cache_subdir='datasets'):
    """Downloads a file from a URL if it not already in the cache.
    Passing the MD5 hash will verify the file after download
    as well as if it is already present in the cache.

    Usually it downloads the file to
        save_path/cache_dubdir/fname

    Arguments
    ---------
        fname: name of the file
        origin: original URL of the file
        save_path: path to create cache_subdir.
        untar: boolean, whether the file should be decompressed
        md5_hash: MD5 hash of the file for verification
        cache_subdir: directory being used as the cache

    Returns
    -------
        Path to the downloaded file
    """
    datadir_base = save_path
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.expanduser(os.path.join('~', '.kapre'))
        print(
            'Given path {} is not accessible. Trying to use~/.kapre instead..')
        if not os.access(datadir_base, os.W_OK):
            print('~/.kapre is not accessible, using /tmp/kapre instead.')
            datadir_base = os.path.join('/tmp', '.kapre')
    datadir = os.path.join(datadir_base, cache_subdir)

    if not os.path.exists(datadir):
        os.makedirs(datadir)

    if untar:
        assert fname.endswith('.tar.gz'), fname
        fpath = os.path.join(datadir, fname)
        untar_fpath = fpath.rstrip('.tar.gz')
    else:
        fpath = os.path.join(datadir, fname)

    download = False
    if os.path.exists(fpath):
        # File found; verify integrity if a hash was provided.
        if md5_hash is not None:
            if not validate_file(fpath, md5_hash):
                print('A local file was found, but it seems to be '
                      'incomplete or outdated.')
                download = True
    else:
        download = True

    if download:
        print('Downloading data from', origin)
        progbar = None

        def dl_progress(count, block_size, total_size, progbar=None):
            if progbar is None:
                progbar = Progbar(total_size)
            else:
                progbar.update(count * block_size)

        error_msg = 'URL fetch failure on {}: {} -- {}'
        try:
            try:
                urlretrieve(origin, fpath,
                            functools.partial(dl_progress, progbar=progbar))
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(fpath):
                os.remove(fpath)
            raise
        progbar = None

    if untar:
        if not os.path.exists(untar_fpath):
            print('Untaring file...')
            tfile = tarfile.open(fpath, 'r:gz')
            try:
                tfile.extractall(path=datadir)
            except (Exception, KeyboardInterrupt) as e:
                if os.path.exists(untar_fpath):
                    if os.path.isfile(untar_fpath):
                        os.remove(untar_fpath)
                    else:
                        shutil.rmtree(untar_fpath)
                raise
            tfile.close()
            # return untar_fpath

    return datadir
Esempio n. 60
0
def get_file(fname: str = None,
             origin: str = None,
             untar: bool = False,
             extract: bool = False,
             md5_hash: typing.Any = None,
             file_hash: typing.Any = None,
             hash_algorithm: str = 'auto',
             archive_format: str = 'auto',
             cache_subdir: typing.Union[Path, str] = 'data',
             cache_dir: typing.Union[Path, str] = 'dataset',
             verbose: int = 1) -> str:
    """
    Downloads a file from a URL if it not already in the cache.

    By default the file at the url `origin` is downloaded to the
    cache_dir `~/.project/datasets`, placed in the cache_subdir `data`,
    and given the filename `fname`. The final location of a file
    `example.txt` would therefore be `~/.project/datasets/data/example.txt`.

    Files in tar, tar.gz, tar.bz, and zip formats can also be extracted.
    Passing a hash will verify the file after download. The command line
    programs `shasum` and `sha256sum` can compute the hash.

    :param fname: Name of the file. If an absolute path `/path/to/file.txt` is
        specified the file will be saved at that location.
    :param origin: Original URL of the file.
    :param untar: Deprecated in favor of 'extract'. Boolean, whether the file
        should be decompressed.
    :param md5_hash: Deprecated in favor of 'file_hash'. md5 hash of the file
        for verification.
    :param file_hash: The expected hash string of the file after download.
        The sha256 and md5 hash algorithms are both supported.
    :param cache_subdir: Subdirectory under the cache dir where the file is
        saved. If an absolute path `/path/to/folder` is specified the file
        will be saved at that location.
    :param hash_algorithm: Select the hash algorithm to verify the file.
        options are 'md5', 'sha256', and 'auto'. The default 'auto' detects
        the hash algorithm in use.
    :papram extract: True tries extracting the file as an Archive, like tar
        or zip.
    :param archive_format: Archive format to try for extracting the file.
        Options are 'auto', 'tar', 'zip', and None.
        'tar' includes tar, tar.gz, and tar.bz files.
        The default 'auto' is ['tar', 'zip'].
        None or an empty list will return no matches found.
    :param cache_dir: Location to store cached files, when None it defaults to
        the [project.USER_DATA_DIR](~/.project/datasets).
    :param verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose)

    :return: Path to the downloaded file.
    """
    if md5_hash is not None and file_hash is None:
        file_hash = md5_hash
        hash_algorithm = 'md5'
    datadir_base = os.path.expanduser(cache_dir)
    if not os.access(datadir_base, os.W_OK):
        datadir_base = os.path.join('/tmp', '.project')
    datadir = os.path.join(datadir_base, cache_subdir)
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    if untar:
        untar_fpath = os.path.join(datadir, fname)
        fpath = untar_fpath + '.tar.gz'
    else:
        fpath = os.path.join(datadir, fname)

    download = False
    if os.path.exists(fpath):
        if file_hash is not None:
            if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
                print('A local file was found, but it seems to be '
                      'incomplete or outdated because the file hash '
                      'does not match the original value of file_hash.'
                      ' We will re-download the data.')
                download = True
    else:
        download = True

    if download:
        print('Downloading data from', origin)

        class ProgressTracker(object):
            progbar = None

        def dl_progress(count, block_size, total_size):
            if ProgressTracker.progbar is None:
                if total_size == -1:
                    total_size = None
                ProgressTracker.progbar = Progbar(target=total_size,
                                                  verbose=verbose)
            else:
                ProgressTracker.progbar.update(count * block_size)

        error_msg = 'URL fetch failure on {} : {} -- {}'
        try:
            try:
                from six.moves.urllib.request import urlretrieve
                urlretrieve(origin, fpath, dl_progress)
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
        except (Exception, KeyboardInterrupt):
            if os.path.exists(fpath):
                os.remove(fpath)
            raise
        ProgressTracker.progbar = None

    if untar:
        if not os.path.exists(untar_fpath):
            _extract_archive(fpath, datadir, archive_format='tar')
        return untar_fpath

    if extract:
        _extract_archive(fpath, datadir, archive_format)

    return fpath