def write_streaming_download_file(url, filepath, mode='wt', encoding=None, auto_make_dirs=False, chunk_size=1024): """ Download content from ``url`` in a stream; write successive chunks of size ``chunk_size`` bytes to disk at ``filepath``. Files with appropriate extensions are compressed with gzip or bz2 automatically. Any intermediate folders not found on disk may automatically be created. .. seealso:: :func:`open_sesame() <textacy.fileio.utils.open_sesame>` """ decode_unicode = True if 't' in mode else False if auto_make_dirs is True: make_dirs(filepath, mode) # always close the connection with closing(requests.get(url, stream=True)) as r: # set fallback encoding if unable to infer from headers if r.encoding is None: r.encoding = 'utf-8' with io.open(filepath, mode=mode, encoding=encoding) as f: pbar = tqdm(unit='B', unit_scale=True, total=int(r.headers.get('content-length', 0))) chunks = r.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode) for chunk in chunks: # needed (?) to filter out "keep-alive" new chunks if chunk: pbar.update(len(chunk)) f.write(chunk)
def write_sparse_matrix(matrix, filepath, compressed=True): """ Write a ``scipy.sparse.csr_matrix`` or ``scipy.sparse.csc_matrix`` to disk at ``filepath``, optionally compressed. Args: matrix (``scipy.sparse.csr_matrix`` or ``scipy.sparse.csr_matrix``) filepath (str): /path/to/file on disk to which matrix objects will be written; if ``filepath`` does not end in ``.npz``, that extension is automatically appended to the name compressed (bool): if True, save arrays into a single file in compressed .npz format .. seealso: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez.html .. seealso: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez_compressed.html """ if not isinstance(matrix, (csc_matrix, csr_matrix)): raise TypeError('input matrix must be a scipy sparse csr or csc matrix') make_dirs(filepath, 'w') if compressed is False: savez(filepath, data=matrix.data, indices=matrix.indices, indptr=matrix.indptr, shape=matrix.shape) else: savez_compressed(filepath, data=matrix.data, indices=matrix.indices, indptr=matrix.indptr, shape=matrix.shape)
def write_sparse_matrix(matrix, filepath, compressed=True): """ Write a ``scipy.sparse.csr_matrix`` or ``scipy.sparse.csc_matrix`` to disk at ``filepath``, optionally compressed. Args: matrix (``scipy.sparse.csr_matrix`` or ``scipy.sparse.csr_matrix``) filepath (str): /path/to/file on disk to which matrix objects will be written; if ``filepath`` does not end in ``.npz``, that extension is automatically appended to the name compressed (bool): if True, save arrays into a single file in compressed .npz format .. seealso:: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez.html .. seealso:: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savez_compressed.html """ if not isinstance(matrix, (csc_matrix, csr_matrix)): raise TypeError( 'input matrix must be a scipy sparse csr or csc matrix') make_dirs(filepath, 'w') if compressed is False: savez(filepath, data=matrix.data, indices=matrix.indices, indptr=matrix.indptr, shape=matrix.shape) else: savez_compressed(filepath, data=matrix.data, indices=matrix.indices, indptr=matrix.indptr, shape=matrix.shape)
def _download_data(self): LOGGER.info('downloading data from "%s"', URL) response = requests.get(URL) make_dirs(self.filepath, 'wb') with io.open(self.filepath, mode='wb') as f: f.write(response.content)
def _download_data(self): LOGGER.info('downloading data from "%s"', URL) response = requests.get(URL) make_dirs(self.filepath, "wb") with io.open(self.filepath, mode="wb") as f: f.write(response.content)