def test_complex_bytes(tempdir, comp, pars): dump, load, read = pars dump = import_name(dump) # using bytestrings means not needing extra en/decode argument to msgpack data = [{b'something': b'simple', b'and': 0}] * 2 for f in ['1.out', '2.out']: fn = os.path.join(tempdir, f) with open_files([fn], mode='wb', compression=comp)[0] as fo: if read: fo.write(dump(data)) else: dump(data, fo) # that was all setup path = os.path.join(tempdir, '*.out') t = TextFilesSource(path, text_mode=False, compression=comp, decoder=load, read=read) t.discover() assert t.npartitions == 2 assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute() out = t.read() assert isinstance(out, list) assert out[0] == data[0]
def _get_schema(self): import streamz if self.stream is None: stream = streamz.Stream for part in self.method: kw = part.get("kwargs", {}) for functional in part.get("func_value", []): kw[functional] = import_name(kw[functional]) stream = getattr(stream, part["method"])(**part.get("kwargs", {})) self.stream = stream if self.start: self.stream.start() return {'stream': str(self.stream)}
def test_complex_text(tempdir, comp): dump, load, read = 'json.dumps', 'json.loads', True dump = import_name(dump) data = [{'something': 'simple', 'and': 0}] * 2 for f in ['1.out', '2.out']: fn = os.path.join(tempdir, f) with open_files([fn], mode='wt', compression=comp)[0] as fo: if read: fo.write(dump(data)) else: dump(data, fo) # that was all setup path = os.path.join(tempdir, '*.out') t = TextFilesSource(path, text_mode=True, compression=comp, decoder=load) t.discover() assert t.npartitions == 2 assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute() out = t.read() assert isinstance(out, list) assert out[0] == data[0]