async def download_one(pair, year, month, day, hour, session, sem): url = url_template.format(pair, year, month, day, hour) data = list() async with sem: async with session.get(url) as response: content = await response.read() if response.status not in http_ok: print( f'Scraping {url} failed due to the return code {response.status}' ) return if content == b'': print(f'Scraping {url} failed due to the empty content') return print(f'Scraping {url} succeeded') with lzma_open(BytesIO(content)) as f: while True: chunk = f.read(chunk_size) if chunk: data.append(unpack(fmt, chunk)) else: break async with AIOFile( f'{store_path}/{pair}-{year}-{month}-{day}-{hour}.bi5', 'w') as fl: await fl.write(dumps(data, indent=4)) return
def test_samples(): samples = [ b'', b'Hello!', b''.join(bytes([i]) for i in range(256)), urandom(10), urandom(100), ] for sample in samples: x = LZMAEncoder() data = x.run(sample) + x.finish() assert decompress(data) == sample split_points = range(len(sample)) for i in split_points: x = LZMAEncoder() data = x.run(sample[:i]) + x.run(sample[i:]) + x.finish() assert decompress(data) == sample x = LZMAEncoder() data = x.run(sample[:i]) + x.sync_flush() + x.run( sample[i:]) + x.finish() assert decompress(data) == sample x = LZMAEncoder() data = x.run(sample[:i]) + x.sync_flush() f = lzma_open(BytesIO(data), mode='rb') assert f.read(i) == sample[:i]
def write_packages(packages, filename, sort=True, sources=False): """ Writes `packages` to a file (per debian Packages format) If sort=True, the packages are sorted by name. """ makedirs(dirname(filename), exist_ok=True) # Copy the arch-specific Release file from devuan if it's not there bsnm = 'Packages.gz' if sources: bsnm = 'Sources.gz' rel = filename.replace(bsnm, 'Release') sprl = rel.replace(mergedir, join(spooldir, 'devuan')) if not isfile(rel) and isfile(sprl): copyfile(sprl, rel) gzf = gzip_open(filename, 'w') xzf = lzma_open(filename.replace('.gz', '.xz'), 'w') # f = open(filename.replace('.gz', ''), 'wb') pkg_items = packages.items() if sort: pkg_items = sorted(pkg_items, key=lambda x: x[0]) if sources: keylist = sources_keys else: keylist = packages_keys for pkg_name, pkg_contents in pkg_items: for key in keylist: if key in pkg_contents: sin = '%s: %s\n' % (key, pkg_contents[key]) gzf.write(sin.encode('utf-8')) xzf.write(sin.encode('utf-8')) # f.write(sin.encode('utf-8')) gzf.write(b'\n') xzf.write(b'\n') # f.write(b'\n') gzf.close() xzf.close()
def open_with_compression(filename, mode='r'): """ Wrapper around builtin `open` that will guess compression of a file from the filename and open it for reading or writing as if it were a standard file. Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). Either Python 3 or the ``backports.lzma`` module are required for ``xz``. Supported modes are: * 'r', 'rt', 'w', 'wt' for text mode read and write. * 'rb, 'wb' for binary read and write. Depending on the Python version, you may get errors trying to write the wrong string type to the file. Parameters ========== filename: str Path to the file to open, including any extensions that indicate the compression used. mode: str Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. Returns ======= fd: file File-like object open with the specified mode. """ if sys.version_info[0] > 2: # Compressed formats sometimes default to binary, so force # text mode in Python 3. if mode == 'r': mode = 'rt' elif mode == 'w': mode = 'wt' elif mode == 'a': mode = 'at' else: # The version of gzip in Anaconda Python 2 on Windows forcibly # adds a 'b', so strip any 't' and let the string conversions # be carried out implicitly by Python. mode = mode.strip('t') root, compression = get_compression(filename) if compression is None: return open(filename, mode) elif compression == 'gz': import gzip fd = gzip.open(filename, mode=mode) elif compression == 'bz2': import bz2 if hasattr(bz2, 'open'): # Python 3 only fd = bz2.open(filename, mode=mode) else: # Python 2 fd = bz2.BZ2File(filename, mode=mode) elif compression == 'xz': try: from lzma import open as lzma_open except ImportError: from backports.lzma import open as lzma_open fd = lzma_open(filename, mode) else: fd = open(filename, mode) return fd
def __call__(self, source, dest): rename(source, dest) with open(dest, 'rb') as log_fd, lzma_open('%s.gz'.format(dest), 'wb') as lzma_fd: lzma_fd.writelines(log_fd) unlink(dest)
def scan_lzma_content(file_, substr=None): # xzcat RC_20##-##.xz | grep '"author":"$AUTHOR"' | jq .id -r with lzma_open(file_, "rb") as fin: for line in fin: if substr is None or substr in line: yield loads(line.strip())