コード例 #1
0
def test_bytes_read_csv_with_header():
    b = files['2014-01-01.csv']
    header, b = b.split(b'\n', 1)
    header = header + b'\n'
    df = bytes_read_csv(b, header, {})
    assert list(df.columns) == ['name', 'amount', 'id']
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3
コード例 #2
0
def test_kwargs():
    blocks = [files[k] for k in sorted(files)]
    blocks = [[b] for b in blocks]
    kwargs = {'usecols': ['name', 'id']}
    head = bytes_read_csv(files['2014-01-01.csv'], b'', kwargs)

    df = read_csv_from_bytes(blocks, header, head, kwargs, collection=True)
    assert list(df.columns) == ['name', 'id']
    result = df.compute()
    assert (result.columns == df.columns).all()
コード例 #3
0
def test_read_csv_simple():
    blocks = [[files[k]] for k in sorted(files)]
    kwargs = {}
    head = bytes_read_csv(files['2014-01-01.csv'], b'', {})

    df = read_csv_from_bytes(blocks, header, head, kwargs, collection=True)
    assert isinstance(df, dd.DataFrame)
    assert list(df.columns) == ['name', 'amount', 'id']

    values = read_csv_from_bytes(blocks,
                                 header,
                                 head,
                                 kwargs,
                                 collection=False)
    assert isinstance(values, list)
    assert len(values) == 3
    assert all(hasattr(item, 'dask') for item in values)

    result = df.amount.sum().compute(get=get_sync)
    assert result == (100 + 200 + 300 + 400 + 500 + 600)
コード例 #4
0
def test_bytes_read_csv_dtype_coercion():
    b = files['2014-01-01.csv']
    df = bytes_read_csv(b, b'', {}, {'amount': 'float'})
    assert df.amount.dtype == 'float'
コード例 #5
0
def test_bytes_read_csv_kwargs():
    b = files['2014-01-01.csv']
    df = bytes_read_csv(b, b'', {'usecols': ['name', 'id']})
    assert list(df.columns) == ['name', 'id']
コード例 #6
0
def test_bytes_read_csv():
    b = files['2014-01-01.csv']
    df = bytes_read_csv(b, b'', {})
    assert list(df.columns) == ['name', 'amount', 'id']
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3