Example #1
0
async def download_one(pair, year, month, day, hour, session, sem):
    url = url_template.format(pair, year, month, day, hour)
    data = list()

    async with sem:
        async with session.get(url) as response:
            content = await response.read()

        if response.status not in http_ok:
            print(
                f'Scraping {url} failed due to the return code {response.status}'
            )
            return

        if content == b'':
            print(f'Scraping {url} failed due to the empty content')
            return

        print(f'Scraping {url} succeeded')

        with lzma_open(BytesIO(content)) as f:
            while True:
                chunk = f.read(chunk_size)
                if chunk:
                    data.append(unpack(fmt, chunk))
                else:
                    break

        async with AIOFile(
                f'{store_path}/{pair}-{year}-{month}-{day}-{hour}.bi5',
                'w') as fl:
            await fl.write(dumps(data, indent=4))

        return
Example #2
0
def test_samples():
    samples = [
        b'',
        b'Hello!',
        b''.join(bytes([i]) for i in range(256)),
        urandom(10),
        urandom(100),
    ]
    for sample in samples:

        x = LZMAEncoder()
        data = x.run(sample) + x.finish()
        assert decompress(data) == sample

        split_points = range(len(sample))
        for i in split_points:

            x = LZMAEncoder()
            data = x.run(sample[:i]) + x.run(sample[i:]) + x.finish()
            assert decompress(data) == sample

            x = LZMAEncoder()
            data = x.run(sample[:i]) + x.sync_flush() + x.run(
                sample[i:]) + x.finish()
            assert decompress(data) == sample

            x = LZMAEncoder()
            data = x.run(sample[:i]) + x.sync_flush()
            f = lzma_open(BytesIO(data), mode='rb')
            assert f.read(i) == sample[:i]
Example #3
0
def write_packages(packages, filename, sort=True, sources=False):
    """
    Writes `packages` to a file (per debian Packages format)
    If sort=True, the packages are sorted by name.
    """
    makedirs(dirname(filename), exist_ok=True)

    # Copy the arch-specific Release file from devuan if it's not there
    bsnm = 'Packages.gz'
    if sources:
        bsnm = 'Sources.gz'
    rel = filename.replace(bsnm, 'Release')
    sprl = rel.replace(mergedir, join(spooldir, 'devuan'))
    if not isfile(rel) and isfile(sprl):
        copyfile(sprl, rel)

    gzf = gzip_open(filename, 'w')
    xzf = lzma_open(filename.replace('.gz', '.xz'), 'w')
    # f = open(filename.replace('.gz', ''), 'wb')

    pkg_items = packages.items()
    if sort:
        pkg_items = sorted(pkg_items, key=lambda x: x[0])

    if sources:
        keylist = sources_keys
    else:
        keylist = packages_keys

    for pkg_name, pkg_contents in pkg_items:
        for key in keylist:
            if key in pkg_contents:
                sin = '%s: %s\n' % (key, pkg_contents[key])
                gzf.write(sin.encode('utf-8'))
                xzf.write(sin.encode('utf-8'))
                # f.write(sin.encode('utf-8'))
        gzf.write(b'\n')
        xzf.write(b'\n')
        # f.write(b'\n')

    gzf.close()
    xzf.close()
Example #4
0
def open_with_compression(filename, mode='r'):
    """
    Wrapper around builtin `open` that will guess compression of a file
    from the filename and open it for reading or writing as if it were
    a standard file.

    Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). Either
    Python 3 or the ``backports.lzma`` module are required for ``xz``.

    Supported modes are:
       * 'r', 'rt', 'w', 'wt' for text mode read and write.
       * 'rb, 'wb' for binary read and write.
    Depending on the Python version, you may get errors trying to write the
    wrong string type to the file.

    Parameters
    ==========
    filename: str
        Path to the file to open, including any extensions that indicate
        the compression used.
    mode: str
        Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.

    Returns
    =======
    fd: file
        File-like object open with the specified mode.
    """

    if sys.version_info[0] > 2:
        # Compressed formats sometimes default to binary, so force
        # text mode in Python 3.
        if mode == 'r':
            mode = 'rt'
        elif mode == 'w':
            mode = 'wt'
        elif mode == 'a':
            mode = 'at'
    else:
        # The version of gzip in Anaconda Python 2 on Windows forcibly
        # adds a 'b', so strip any 't' and let the string conversions
        # be carried out implicitly by Python.
        mode = mode.strip('t')

    root, compression = get_compression(filename)

    if compression is None:
        return open(filename, mode)
    elif compression == 'gz':
        import gzip
        fd = gzip.open(filename, mode=mode)
    elif compression == 'bz2':
        import bz2
        if hasattr(bz2, 'open'):
            # Python 3 only
            fd = bz2.open(filename, mode=mode)
        else:
            # Python 2
            fd = bz2.BZ2File(filename, mode=mode)
    elif compression == 'xz':
        try:
            from lzma import open as lzma_open
        except ImportError:
            from backports.lzma import open as lzma_open
        fd = lzma_open(filename, mode)
    else:
        fd = open(filename, mode)

    return fd
Example #5
0
 def __call__(self, source, dest):
     rename(source, dest)
     with open(dest, 'rb') as log_fd, lzma_open('%s.gz'.format(dest),
                                                'wb') as lzma_fd:
         lzma_fd.writelines(log_fd)
     unlink(dest)
def scan_lzma_content(file_, substr=None):
    # xzcat RC_20##-##.xz | grep '"author":"$AUTHOR"' | jq .id -r
    with lzma_open(file_, "rb") as fin:
        for line in fin:
            if substr is None or substr in line:
                yield loads(line.strip())