def test_bytes_read_csv_with_header(): b = files['2014-01-01.csv'] header, b = b.split(b'\n', 1) header = header + b'\n' df = bytes_read_csv(b, header, {}) assert list(df.columns) == ['name', 'amount', 'id'] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3
def test_kwargs(): blocks = [files[k] for k in sorted(files)] blocks = [[b] for b in blocks] kwargs = {'usecols': ['name', 'id']} head = bytes_read_csv(files['2014-01-01.csv'], b'', kwargs) df = read_csv_from_bytes(blocks, header, head, kwargs, collection=True) assert list(df.columns) == ['name', 'id'] result = df.compute() assert (result.columns == df.columns).all()
def test_read_csv_simple(): blocks = [[files[k]] for k in sorted(files)] kwargs = {} head = bytes_read_csv(files['2014-01-01.csv'], b'', {}) df = read_csv_from_bytes(blocks, header, head, kwargs, collection=True) assert isinstance(df, dd.DataFrame) assert list(df.columns) == ['name', 'amount', 'id'] values = read_csv_from_bytes(blocks, header, head, kwargs, collection=False) assert isinstance(values, list) assert len(values) == 3 assert all(hasattr(item, 'dask') for item in values) result = df.amount.sum().compute(get=get_sync) assert result == (100 + 200 + 300 + 400 + 500 + 600)
def test_bytes_read_csv_dtype_coercion(): b = files['2014-01-01.csv'] df = bytes_read_csv(b, b'', {}, {'amount': 'float'}) assert df.amount.dtype == 'float'
def test_bytes_read_csv_kwargs(): b = files['2014-01-01.csv'] df = bytes_read_csv(b, b'', {'usecols': ['name', 'id']}) assert list(df.columns) == ['name', 'id']
def test_bytes_read_csv(): b = files['2014-01-01.csv'] df = bytes_read_csv(b, b'', {}) assert list(df.columns) == ['name', 'amount', 'id'] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3