Beispiel #1
0
def test_complex_bytes(tempdir, comp, pars):
    dump, load, read = pars
    dump = import_name(dump)
    # using bytestrings means not needing extra en/decode argument to msgpack
    data = [{b'something': b'simple', b'and': 0}] * 2
    for f in ['1.out', '2.out']:
        fn = os.path.join(tempdir, f)
        with open_files([fn], mode='wb', compression=comp)[0] as fo:
            if read:
                fo.write(dump(data))
            else:
                dump(data, fo)
    # that was all setup

    path = os.path.join(tempdir, '*.out')
    t = TextFilesSource(path,
                        text_mode=False,
                        compression=comp,
                        decoder=load,
                        read=read)
    t.discover()
    assert t.npartitions == 2
    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()
    out = t.read()
    assert isinstance(out, list)
    assert out[0] == data[0]
Beispiel #2
0
def test_textfiles(tempdir):
    open(os.path.join(tempdir, '1.txt'), 'wt').write('hello\nworld')
    open(os.path.join(tempdir, '2.txt'), 'wt').write('hello\nworld')
    path = os.path.join(tempdir, '*.txt')
    t = TextFilesSource(path)
    t.discover()
    assert t.npartitions == 2
    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()
    out = t.read()
    assert isinstance(out, list)
    assert out[0] == 'hello\n'
Beispiel #3
0
def test_complex_text(tempdir, comp):
    dump, load, read = 'json.dumps', 'json.loads', True
    dump = import_name(dump)
    data = [{'something': 'simple', 'and': 0}] * 2
    for f in ['1.out', '2.out']:
        fn = os.path.join(tempdir, f)
        with open_files([fn], mode='wt', compression=comp)[0] as fo:
            if read:
                fo.write(dump(data))
            else:
                dump(data, fo)
    # that was all setup

    path = os.path.join(tempdir, '*.out')
    t = TextFilesSource(path, text_mode=True, compression=comp, decoder=load)
    t.discover()
    assert t.npartitions == 2
    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()
    out = t.read()
    assert isinstance(out, list)
    assert out[0] == data[0]