Exemple #1
0
def test_complex_bytes(tempdir, comp, pars):
    dump, load, read = pars
    dump = import_name(dump)
    # using bytestrings means not needing extra en/decode argument to msgpack
    data = [{b'something': b'simple', b'and': 0}] * 2
    for f in ['1.out', '2.out']:
        fn = os.path.join(tempdir, f)
        with open_files([fn], mode='wb', compression=comp)[0] as fo:
            if read:
                fo.write(dump(data))
            else:
                dump(data, fo)
    # that was all setup

    path = os.path.join(tempdir, '*.out')
    t = TextFilesSource(path,
                        text_mode=False,
                        compression=comp,
                        decoder=load,
                        read=read)
    t.discover()
    assert t.npartitions == 2
    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()
    out = t.read()
    assert isinstance(out, list)
    assert out[0] == data[0]
 def _get_schema(self):
     import streamz
     if self.stream is None:
         stream = streamz.Stream
         for part in self.method:
             kw = part.get("kwargs", {})
             for functional in part.get("func_value", []):
                 kw[functional] = import_name(kw[functional])
             stream = getattr(stream,
                              part["method"])(**part.get("kwargs", {}))
         self.stream = stream
     if self.start:
         self.stream.start()
     return {'stream': str(self.stream)}
Exemple #3
0
def test_complex_text(tempdir, comp):
    dump, load, read = 'json.dumps', 'json.loads', True
    dump = import_name(dump)
    data = [{'something': 'simple', 'and': 0}] * 2
    for f in ['1.out', '2.out']:
        fn = os.path.join(tempdir, f)
        with open_files([fn], mode='wt', compression=comp)[0] as fo:
            if read:
                fo.write(dump(data))
            else:
                dump(data, fo)
    # that was all setup

    path = os.path.join(tempdir, '*.out')
    t = TextFilesSource(path, text_mode=True, compression=comp, decoder=load)
    t.discover()
    assert t.npartitions == 2
    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()
    out = t.read()
    assert isinstance(out, list)
    assert out[0] == data[0]