Esempio n. 1
0
def write_streaming_download_file(url,
                                  filepath,
                                  mode='wt',
                                  encoding=None,
                                  auto_make_dirs=False,
                                  chunk_size=1024):
    """
    Download content from ``url`` in a stream; write successive chunks of size
    ``chunk_size`` bytes to disk at ``filepath``. Files with appropriate extensions
    are compressed with gzip or bz2 automatically. Any intermediate folders
    not found on disk may automatically be created.

    .. seealso:: :func:`open_sesame() <textacy.fileio.utils.open_sesame>`
    """
    decode_unicode = True if 't' in mode else False
    if auto_make_dirs is True:
        make_dirs(filepath, mode)
    # always close the connection
    with closing(requests.get(url, stream=True)) as r:
        # set fallback encoding if unable to infer from headers
        if r.encoding is None:
            r.encoding = 'utf-8'
        with io.open(filepath, mode=mode, encoding=encoding) as f:
            pbar = tqdm(unit='B',
                        unit_scale=True,
                        total=int(r.headers.get('content-length', 0)))
            chunks = r.iter_content(chunk_size=chunk_size,
                                    decode_unicode=decode_unicode)
            for chunk in chunks:
                # needed (?) to filter out "keep-alive" new chunks
                if chunk:
                    pbar.update(len(chunk))
                    f.write(chunk)
Esempio n. 2
0
def write_sparse_matrix(matrix, filepath, compressed=True):
    """
    Write a ``scipy.sparse.csr_matrix`` or ``scipy.sparse.csc_matrix`` to disk
    at ``filepath``, optionally compressed.

    Args:
        matrix (``scipy.sparse.csr_matrix`` or ``scipy.sparse.csr_matrix``)
        filepath (str): /path/to/file on disk to which matrix objects will be written;
            if ``filepath`` does not end in ``.npz``, that extension is
            automatically appended to the name
        compressed (bool): if True, save arrays into a single file in compressed
            .npz format

    .. seealso: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez.html
    .. seealso: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez_compressed.html
    """
    if not isinstance(matrix, (csc_matrix, csr_matrix)):
        raise TypeError('input matrix must be a scipy sparse csr or csc matrix')
    make_dirs(filepath, 'w')
    if compressed is False:
        savez(filepath,
              data=matrix.data, indices=matrix.indices,
              indptr=matrix.indptr, shape=matrix.shape)
    else:
        savez_compressed(filepath,
                         data=matrix.data, indices=matrix.indices,
                         indptr=matrix.indptr, shape=matrix.shape)
Esempio n. 3
0
def write_sparse_matrix(matrix, filepath, compressed=True):
    """
    Write a ``scipy.sparse.csr_matrix`` or ``scipy.sparse.csc_matrix`` to disk
    at ``filepath``, optionally compressed.

    Args:
        matrix (``scipy.sparse.csr_matrix`` or ``scipy.sparse.csr_matrix``)
        filepath (str): /path/to/file on disk to which matrix objects will be written;
            if ``filepath`` does not end in ``.npz``, that extension is
            automatically appended to the name
        compressed (bool): if True, save arrays into a single file in compressed
            .npz format

    .. seealso:: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez.html
    .. seealso:: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez_compressed.html
    """
    if not isinstance(matrix, (csc_matrix, csr_matrix)):
        raise TypeError(
            'input matrix must be a scipy sparse csr or csc matrix')
    make_dirs(filepath, 'w')
    if compressed is False:
        savez(filepath,
              data=matrix.data,
              indices=matrix.indices,
              indptr=matrix.indptr,
              shape=matrix.shape)
    else:
        savez_compressed(filepath,
                         data=matrix.data,
                         indices=matrix.indices,
                         indptr=matrix.indptr,
                         shape=matrix.shape)
Esempio n. 4
0
 def _download_data(self):
     LOGGER.info('downloading data from "%s"', URL)
     response = requests.get(URL)
     make_dirs(self.filepath, 'wb')
     with io.open(self.filepath, mode='wb') as f:
         f.write(response.content)
Esempio n. 5
0
 def _download_data(self):
     LOGGER.info('downloading data from "%s"', URL)
     response = requests.get(URL)
     make_dirs(self.filepath, "wb")
     with io.open(self.filepath, mode="wb") as f:
         f.write(response.content)