def test_text_blocks_to_pandas_simple(reader, files): blocks = [[files[k]] for k in sorted(files)] kwargs = {} head = pandas_read_text(reader, files["2014-01-01.csv"], b"", {}) header = files["2014-01-01.csv"].split(b"\n")[0] + b"\n" df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True) assert isinstance(df, dd.DataFrame) assert list(df.columns) == ["name", "amount", "id"] values = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=False) assert isinstance(values, list) assert len(values) == 3 assert all(hasattr(item, "dask") for item in values) assert_eq(df.amount.sum(), 100 + 200 + 300 + 400 + 500 + 600)
def test_text_blocks_to_pandas_simple(reader, files): blocks = [[files[k]] for k in sorted(files)] kwargs = {} head = pandas_read_text(reader, files['2014-01-01.csv'], b'', {}) header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n' df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True) assert isinstance(df, dd.DataFrame) assert list(df.columns) == ['name', 'amount', 'id'] values = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=False) assert isinstance(values, list) assert len(values) == 3 assert all(hasattr(item, 'dask') for item in values) result = df.amount.sum().compute(get=get_sync) assert result == (100 + 200 + 300 + 400 + 500 + 600)
def test_pandas_read_text_with_header(reader, files): b = files['2014-01-01.csv'] header, b = b.split(b'\n', 1) header = header + b'\n' df = pandas_read_text(reader, b, header, {}) assert list(df.columns) == ['name', 'amount', 'id'] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3
def test_pandas_read_text_with_header(reader, files): b = files["2014-01-01.csv"] header, b = b.split(b"\n", 1) header = header + b"\n" df = pandas_read_text(reader, b, header, {}) assert list(df.columns) == ["name", "amount", "id"] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3
def test_text_blocks_to_pandas_kwargs(reader, files): blocks = [files[k] for k in sorted(files)] blocks = [[b] for b in blocks] kwargs = {"usecols": ["name", "id"]} head = pandas_read_text(reader, files["2014-01-01.csv"], b"", kwargs) header = files["2014-01-01.csv"].split(b"\n")[0] + b"\n" df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True) assert list(df.columns) == ["name", "id"] result = df.compute() assert (result.columns == df.columns).all()
def test_text_blocks_to_pandas_kwargs(reader, files): blocks = [files[k] for k in sorted(files)] blocks = [[b] for b in blocks] kwargs = {'usecols': ['name', 'id']} head = pandas_read_text(reader, files['2014-01-01.csv'], b'', kwargs) header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n' df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True) assert list(df.columns) == ['name', 'id'] result = df.compute() assert (result.columns == df.columns).all()
def test_text_blocks_to_pandas_simple(reader, files): blocks = [[files[k]] for k in sorted(files)] kwargs = {} head = pandas_read_text(reader, files['2014-01-01.csv'], b'', {}) header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n' df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True) assert isinstance(df, dd.DataFrame) assert list(df.columns) == ['name', 'amount', 'id'] values = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=False) assert isinstance(values, list) assert len(values) == 3 assert all(hasattr(item, 'dask') for item in values) assert_eq(df.amount.sum(), 100 + 200 + 300 + 400 + 500 + 600)
def test_pandas_read_text_dtype_coercion(reader, files): b = files["2014-01-01.csv"] df = pandas_read_text(reader, b, b"", {}, {"amount": "float"}) assert df.amount.dtype == "float"
def test_pandas_read_text_kwargs(reader, files): b = files['2014-01-01.csv'] df = pandas_read_text(reader, b, b'', {'usecols': ['name', 'id']}) assert list(df.columns) == ['name', 'id']
def test_pandas_read_text_dtype_coercion(reader, files): b = files['2014-01-01.csv'] df = pandas_read_text(reader, b, b'', {}, {'amount': 'float'}) assert df.amount.dtype == 'float'
def test_pandas_read_text(reader, files): b = files['2014-01-01.csv'] df = pandas_read_text(reader, b, b'', {}) assert list(df.columns) == ['name', 'amount', 'id'] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3
def test_pandas_read_text_kwargs(reader, files): b = files["2014-01-01.csv"] df = pandas_read_text(reader, b, b"", {"usecols": ["name", "id"]}) assert list(df.columns) == ["name", "id"]
def test_pandas_read_text(reader, files): b = files["2014-01-01.csv"] df = pandas_read_text(reader, b, b"", {}) assert list(df.columns) == ["name", "amount", "id"] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3