Beispiel #1
0
def test_text_blocks_to_pandas_simple(reader, files):
    blocks = [[files[k]] for k in sorted(files)]
    kwargs = {}
    head = pandas_read_text(reader, files["2014-01-01.csv"], b"", {})
    header = files["2014-01-01.csv"].split(b"\n")[0] + b"\n"

    df = text_blocks_to_pandas(reader,
                               blocks,
                               header,
                               head,
                               kwargs,
                               collection=True)
    assert isinstance(df, dd.DataFrame)
    assert list(df.columns) == ["name", "amount", "id"]

    values = text_blocks_to_pandas(reader,
                                   blocks,
                                   header,
                                   head,
                                   kwargs,
                                   collection=False)
    assert isinstance(values, list)
    assert len(values) == 3
    assert all(hasattr(item, "dask") for item in values)

    assert_eq(df.amount.sum(), 100 + 200 + 300 + 400 + 500 + 600)
Beispiel #2
0
def test_text_blocks_to_pandas_simple(reader, files):
    blocks = [[files[k]] for k in sorted(files)]
    kwargs = {}
    head = pandas_read_text(reader, files['2014-01-01.csv'], b'', {})
    header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n'

    df = text_blocks_to_pandas(reader,
                               blocks,
                               header,
                               head,
                               kwargs,
                               collection=True)
    assert isinstance(df, dd.DataFrame)
    assert list(df.columns) == ['name', 'amount', 'id']

    values = text_blocks_to_pandas(reader,
                                   blocks,
                                   header,
                                   head,
                                   kwargs,
                                   collection=False)
    assert isinstance(values, list)
    assert len(values) == 3
    assert all(hasattr(item, 'dask') for item in values)

    result = df.amount.sum().compute(get=get_sync)
    assert result == (100 + 200 + 300 + 400 + 500 + 600)
Beispiel #3
0
def test_pandas_read_text_with_header(reader, files):
    b = files['2014-01-01.csv']
    header, b = b.split(b'\n', 1)
    header = header + b'\n'
    df = pandas_read_text(reader, b, header, {})
    assert list(df.columns) == ['name', 'amount', 'id']
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3
Beispiel #4
0
def test_pandas_read_text_with_header(reader, files):
    b = files['2014-01-01.csv']
    header, b = b.split(b'\n', 1)
    header = header + b'\n'
    df = pandas_read_text(reader, b, header, {})
    assert list(df.columns) == ['name', 'amount', 'id']
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3
Beispiel #5
0
def test_pandas_read_text_with_header(reader, files):
    b = files["2014-01-01.csv"]
    header, b = b.split(b"\n", 1)
    header = header + b"\n"
    df = pandas_read_text(reader, b, header, {})
    assert list(df.columns) == ["name", "amount", "id"]
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3
Beispiel #6
0
def test_text_blocks_to_pandas_kwargs(reader, files):
    blocks = [files[k] for k in sorted(files)]
    blocks = [[b] for b in blocks]
    kwargs = {"usecols": ["name", "id"]}
    head = pandas_read_text(reader, files["2014-01-01.csv"], b"", kwargs)
    header = files["2014-01-01.csv"].split(b"\n")[0] + b"\n"

    df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True)
    assert list(df.columns) == ["name", "id"]
    result = df.compute()
    assert (result.columns == df.columns).all()
Beispiel #7
0
def test_text_blocks_to_pandas_kwargs(reader, files):
    blocks = [files[k] for k in sorted(files)]
    blocks = [[b] for b in blocks]
    kwargs = {'usecols': ['name', 'id']}
    head = pandas_read_text(reader, files['2014-01-01.csv'], b'', kwargs)
    header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n'

    df = text_blocks_to_pandas(reader, blocks, header, head, kwargs,
                               collection=True)
    assert list(df.columns) == ['name', 'id']
    result = df.compute()
    assert (result.columns == df.columns).all()
Beispiel #8
0
def test_text_blocks_to_pandas_kwargs(reader, files):
    blocks = [files[k] for k in sorted(files)]
    blocks = [[b] for b in blocks]
    kwargs = {'usecols': ['name', 'id']}
    head = pandas_read_text(reader, files['2014-01-01.csv'], b'', kwargs)
    header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n'

    df = text_blocks_to_pandas(reader, blocks, header, head, kwargs,
                               collection=True)
    assert list(df.columns) == ['name', 'id']
    result = df.compute()
    assert (result.columns == df.columns).all()
Beispiel #9
0
def test_text_blocks_to_pandas_simple(reader, files):
    blocks = [[files[k]] for k in sorted(files)]
    kwargs = {}
    head = pandas_read_text(reader, files['2014-01-01.csv'], b'', {})
    header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n'

    df = text_blocks_to_pandas(reader, blocks, header, head, kwargs,
                               collection=True)
    assert isinstance(df, dd.DataFrame)
    assert list(df.columns) == ['name', 'amount', 'id']

    values = text_blocks_to_pandas(reader, blocks, header, head, kwargs,
                                   collection=False)
    assert isinstance(values, list)
    assert len(values) == 3
    assert all(hasattr(item, 'dask') for item in values)

    assert_eq(df.amount.sum(),
              100 + 200 + 300 + 400 + 500 + 600)
Beispiel #10
0
def test_pandas_read_text_dtype_coercion(reader, files):
    b = files["2014-01-01.csv"]
    df = pandas_read_text(reader, b, b"", {}, {"amount": "float"})
    assert df.amount.dtype == "float"
Beispiel #11
0
def test_pandas_read_text_kwargs(reader, files):
    b = files['2014-01-01.csv']
    df = pandas_read_text(reader, b, b'', {'usecols': ['name', 'id']})
    assert list(df.columns) == ['name', 'id']
Beispiel #12
0
def test_pandas_read_text_dtype_coercion(reader, files):
    b = files['2014-01-01.csv']
    df = pandas_read_text(reader, b, b'', {}, {'amount': 'float'})
    assert df.amount.dtype == 'float'
Beispiel #13
0
def test_pandas_read_text_kwargs(reader, files):
    b = files['2014-01-01.csv']
    df = pandas_read_text(reader, b, b'', {'usecols': ['name', 'id']})
    assert list(df.columns) == ['name', 'id']
Beispiel #14
0
def test_pandas_read_text(reader, files):
    b = files['2014-01-01.csv']
    df = pandas_read_text(reader, b, b'', {})
    assert list(df.columns) == ['name', 'amount', 'id']
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3
Beispiel #15
0
def test_pandas_read_text_dtype_coercion(reader, files):
    b = files['2014-01-01.csv']
    df = pandas_read_text(reader, b, b'', {}, {'amount': 'float'})
    assert df.amount.dtype == 'float'
Beispiel #16
0
def test_pandas_read_text(reader, files):
    b = files['2014-01-01.csv']
    df = pandas_read_text(reader, b, b'', {})
    assert list(df.columns) == ['name', 'amount', 'id']
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3
Beispiel #17
0
def test_pandas_read_text_kwargs(reader, files):
    b = files["2014-01-01.csv"]
    df = pandas_read_text(reader, b, b"", {"usecols": ["name", "id"]})
    assert list(df.columns) == ["name", "id"]
Beispiel #18
0
def test_pandas_read_text(reader, files):
    b = files["2014-01-01.csv"]
    df = pandas_read_text(reader, b, b"", {})
    assert list(df.columns) == ["name", "amount", "id"]
    assert len(df) == 3
    assert df.id.sum() == 1 + 2 + 3