Python read_csv Examples, dask.dataframe.io.read_csv Python Examples

Example #1

0

Show file

File: test_io.py Project: StuartAxelOwen/dask

def test_read_csv_of_modified_file_has_different_name():
    with filetext(text) as fn:
        a = read_csv(fn)
        with open(fn, 'a') as f:
            f.write('\nGeorge,700')
            os.fsync(f)
        b = read_csv(fn)

        assert sorted(a.dask) != sorted(b.dask)

Example #2

0

Show file

File: test_io.py Project: seibert/dask

def test_read_csv_has_deterministic_name():
    with filetext(text) as fn:
        a = read_csv(fn)
        b = read_csv(fn)
        assert a._name == b._name
        assert sorted(a.dask.keys()) == sorted(b.dask.keys())
        assert isinstance(a._name, str)

        c = read_csv(fn, skiprows=1, na_values=[0])
        assert a._name != c._name

Example #3

0

Show file

File: test_io.py Project: roxyboy/dask

def test_read_csv_has_deterministic_name():
    with filetext(text) as fn:
        a = read_csv(fn)
        b = read_csv(fn)
        assert a._name == b._name
        assert sorted(a.dask.keys()) == sorted(b.dask.keys())
        assert isinstance(a._name, str)

        c = read_csv(fn, skiprows=1, na_values=[0])
        assert a._name != c._name

Example #4

0

Show file

File: test_io.py Project: roxyboy/dask

def test_read_csv_of_modified_file_has_different_name():
    with filetext(text) as fn:
        mtime = os.path.getmtime(fn)
        sleep(1)
        a = read_csv(fn)
        sleep(1)
        with open(fn, "a") as f:
            f.write("\nGeorge,700")
            os.fsync(f)
        b = read_csv(fn)

        assert sorted(a.dask) != sorted(b.dask)

Example #5

0

Show file

File: test_io.py Project: seibert/dask

def test_read_csv_of_modified_file_has_different_name():
    with filetext(text) as fn:
        mtime = os.path.getmtime(fn)
        sleep(1)
        a = read_csv(fn)
        sleep(1)
        with open(fn, 'a') as f:
            f.write('\nGeorge,700')
            os.fsync(f)
        b = read_csv(fn)

        assert sorted(a.dask) != sorted(b.dask)

Example #6

0

Show file

File: test_io.py Project: hgz2373294/dask

def test_read_multiple_csv():
    try:
        with open("_foo.1.csv", "w") as f:
            f.write(text)
        with open("_foo.2.csv", "w") as f:
            f.write(text)
        df = dd.read_csv("_foo.*.csv")

        assert len(read_csv("_foo.*.csv").compute()) == len(read_csv("_foo.1.csv").compute()) * 2
    finally:
        os.remove("_foo.1.csv")
        os.remove("_foo.2.csv")

Example #7

0

Show file

File: test_io.py Project: roxyboy/dask

def test_multiple_read_csv_has_deterministic_name():
    try:
        with open("_foo.1.csv", "w") as f:
            f.write(text)
        with open("_foo.2.csv", "w") as f:
            f.write(text)
        a = read_csv("_foo.*.csv")
        b = read_csv("_foo.*.csv")

        assert sorted(a.dask.keys()) == sorted(b.dask.keys())
    finally:
        os.remove("_foo.1.csv")
        os.remove("_foo.2.csv")

Example #8

0

Show file

File: test_io.py Project: seibert/dask

def test_multiple_read_csv_has_deterministic_name():
    try:
        with open('_foo.1.csv', 'w') as f:
            f.write(text)
        with open('_foo.2.csv', 'w') as f:
            f.write(text)
        a = read_csv('_foo.*.csv')
        b = read_csv('_foo.*.csv')

        assert sorted(a.dask.keys()) == sorted(b.dask.keys())
    finally:
        os.remove('_foo.1.csv')
        os.remove('_foo.2.csv')

Example #9

0

Show file

def test_read_multiple_csv():
    try:
        with open('_foo.1.csv', 'w') as f:
            f.write(text)
        with open('_foo.2.csv', 'w') as f:
            f.write(text)
        df = read_csv('_foo.*.csv')

        assert (len(read_csv('_foo.*.csv').compute()) == len(
            read_csv('_foo.1.csv').compute()) * 2)
    finally:
        os.remove('_foo.1.csv')
        os.remove('_foo.2.csv')

Example #10

0

Show file

File: test_io.py Project: GaelVaroquaux/dask

def test_read_multiple_csv():
    try:
        with open('_foo.1.csv', 'w') as f:
            f.write(text)
        with open('_foo.2.csv', 'w') as f:
            f.write(text)
        df = read_csv('_foo.*.csv')

        assert (len(read_csv('_foo.*.csv').compute()) ==
                len(read_csv('_foo.1.csv').compute()) * 2)
    finally:
        os.remove('_foo.1.csv')
        os.remove('_foo.2.csv')

Example #11

0

Show file

File: test_io.py Project: ssanderson/dask

def test_csv_expands_dtypes():
    with filetext(text) as fn:
        a = read_csv(fn, chunkbytes=30, dtype={})
        a_kwargs = list(a.dask.values())[0][-1]

        b = read_csv(fn, chunkbytes=30)
        b_kwargs = list(b.dask.values())[0][-1]

        assert a_kwargs['dtype'] == b_kwargs['dtype']

        a = read_csv(fn, chunkbytes=30, dtype={'amount': float})
        a_kwargs = list(a.dask.values())[0][-1]

        assert a_kwargs['dtype']['amount'] == float

Example #12

0

Show file

def test_read_gzip_csv():
    with filetext(text.encode(), open=gzip.open) as fn:
        f = read_csv(fn, chunkbytes=30, compression='gzip')
        assert list(f.columns) == ['name', 'amount']
        assert f.npartitions > 1
        result = f.compute(get=dask.get).sort('name')
        assert (result.values == pd.read_csv(fn, compression='gzip').sort('name').values).all()

Example #13

0

Show file

File: test_io.py Project: hgz2373294/dask

def test_index_col():
    with filetext(text) as fn:
        try:
            f = read_csv(fn, chunkbytes=30, index_col="name")
            assert False
        except ValueError as e:
            assert "set_index" in str(e)

Example #14

0

Show file

File: test_io.py Project: GaelVaroquaux/dask

def test_read_csv():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=30)
        assert list(f.columns) == ['name', 'amount']
        assert f.npartitions > 1
        result = f.compute(get=dask.get).sort('name')
        assert (result.values == pd.read_csv(fn).sort('name').values).all()

Example #15

0

Show file

File: test_io.py Project: iamxiaodong/dask

def test_read_multiple_csv():
    try:
        with open('_foo.1.csv', 'w') as f:
            f.write(text)
        with open('_foo.2.csv', 'w') as f:
            f.write(text)
        df = dd.read_csv('_foo.*.csv', chunkbytes=30)
        assert df._known_dtype
        assert df.npartitions > 2

        assert (len(read_csv('_foo.*.csv').compute()) == len(
            read_csv('_foo.1.csv').compute()) * 2)

    finally:
        os.remove('_foo.1.csv')
        os.remove('_foo.2.csv')

Example #16

0

Show file

File: test_io.py Project: iamxiaodong/dask

def test_read_multiple_csv():
    try:
        with open('_foo.1.csv', 'w') as f:
            f.write(text)
        with open('_foo.2.csv', 'w') as f:
            f.write(text)
        df = dd.read_csv('_foo.*.csv', chunkbytes=30)
        assert df._known_dtype
        assert df.npartitions > 2

        assert (len(read_csv('_foo.*.csv').compute()) ==
                len(read_csv('_foo.1.csv').compute()) * 2)

    finally:
        os.remove('_foo.1.csv')
        os.remove('_foo.2.csv')

Example #17

0

Show file

def test_index_col():
    with filetext(text) as fn:
        try:
            f = read_csv(fn, chunkbytes=30, index_col='name')
            assert False
        except ValueError as e:
            assert 'set_index' in str(e)

Example #18

0

Show file

def test_read_csv():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=30)
        assert list(f.columns) == ['name', 'amount']
        assert f.npartitions > 1
        result = f.compute(get=dask.get).sort('name')
        assert (result.values == pd.read_csv(fn).sort('name').values).all()

Example #19

0

Show file

def test_read_csv_categorize_and_index():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=20, index='amount')
        assert f.index.compute().name == 'amount'

        expected = pd.read_csv(fn).set_index('amount')
        expected['name'] = expected.name.astype('category')
        assert eq(f, expected)

Example #20

0

Show file

File: test_io.py Project: bjlittle/dask

def test_read_csv_categorize_and_index():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=20, index='amount')
        assert f.index.compute().name == 'amount'

        expected = pd.read_csv(fn).set_index('amount')
        expected['name'] = expected.name.astype('category')
        assert eq(f, expected)

Example #21

0

Show file

File: test_io.py Project: roxyboy/dask

def test_read_csv_categorize():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=30, categorize=True)
        assert list(f.dtypes) == ["category", "i8"]

        expected = pd.read_csv(fn)
        expected["name"] = expected.name.astype("category")

        assert (f.dtypes == expected.dtypes).all()
        assert len(f.compute().name.cat.categories) == 6

Example #22

0

Show file

File: test_io.py Project: GaelVaroquaux/dask

def test_read_csv_categorize():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=30, categorize=True)
        assert list(f.dtypes) == ['category', 'i8']

        expected = pd.read_csv(fn)
        expected['name'] = expected.name.astype('category')

        assert (f.dtypes == expected.dtypes).all()
        assert len(f.compute().name.cat.categories) == 6

Example #23

0

Show file

def test_read_csv_categorize():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=30, categorize=True)
        assert list(f.dtypes) == ['category', 'i8']

        expected = pd.read_csv(fn)
        expected['name'] = expected.name.astype('category')

        assert (f.dtypes == expected.dtypes).all()
        assert len(f.compute().name.cat.categories) == 6

Example #24

0

Show file

def test_consistent_dtypes():
    text = """
    name,amount
    Alice,100.5
    Bob,-200.5
    Charlie,300
    Dennis,400
    Edith,-500
    Frank,600
    """.strip()

    with filetext(text) as fn:
        df = read_csv(fn, chunkbytes=30)
        assert isinstance(df.amount.sum().compute(), float)

Example #25

0

Show file

File: test_io.py Project: GaelVaroquaux/dask

def test_consistent_dtypes():
    text = """
    name,amount
    Alice,100.5
    Bob,-200.5
    Charlie,300
    Dennis,400
    Edith,-500
    Frank,600
    """.strip()

    with filetext(text) as fn:
        df = read_csv(fn, chunkbytes=30)
        assert isinstance(df.amount.sum().compute(), float)

Example #26

0

Show file

def test_read_csv_categorize_and_index():
    with filetext(text) as fn:
        f = read_csv(fn, chunkbytes=20, index='amount')
        result = f.compute(get=get_sync)
        assert result.index.name == 'amount'

        blocks = dd.core.get(f.dask, f._keys(), get=get_sync)
        for i, block in enumerate(blocks):
            if i < len(f.divisions):
                assert (block.index <= f.divisions[i + 1]).all()
            if i > 0:
                assert (block.index > f.divisions[i]).all()

        expected = pd.read_csv(fn).set_index('amount')
        expected['name'] = expected.name.astype('category')

        result = result.sort()
        expected = expected.sort()

        assert eq(result, expected)

Example #27

0

Show file

File: test_io.py Project: GaelVaroquaux/dask

def test_read_csv_categorize_with_parse_dates():
    with filetext(datetime_csv_file) as fn:
        f = read_csv(fn, chunkbytes=30, categorize=True, parse_dates=['when'])
        assert list(f.dtypes) == ['category', 'i8', 'M8[ns]']

Example #28

0

Show file

File: test_io.py Project: seibert/dask

def test_read_csv_with_nrows():
    with filetext(text) as fn:
        f = read_csv(fn, nrows=3)
        assert list(f.columns) == ['name', 'amount']
        assert f.npartitions == 1
        assert eq(read_csv(fn, nrows=3), pd.read_csv(fn, nrows=3))

Example #29

0

Show file

File: test_io.py Project: roxyboy/dask

def test_read_csv_categorize_with_parse_dates():
    with filetext(datetime_csv_file) as fn:
        f = read_csv(fn, chunkbytes=30, categorize=True, parse_dates=["when"])
        assert list(f.dtypes) == ["category", "i8", "M8[ns]"]

Example #30

0

Show file

def test_usecols():
    with filetext(datetime_csv_file) as fn:
        df = read_csv(fn, chunkbytes=30, usecols=['when', 'amount'])
        expected = pd.read_csv(fn, usecols=['when', 'amount'])
        assert (df.compute().values == expected.values).all()

Example #31

0

Show file

File: test_io.py Project: roxyboy/dask

def test_read_csv_with_nrows():
    with filetext(text) as fn:
        f = read_csv(fn, nrows=3)
        assert list(f.columns) == ["name", "amount"]
        assert f.npartitions == 1
        assert eq(read_csv(fn, nrows=3), pd.read_csv(fn, nrows=3))

Example #32

0

Show file

def test_read_csv_categorize_with_parse_dates():
    with filetext(datetime_csv_file) as fn:
        f = read_csv(fn, chunkbytes=30, categorize=True, parse_dates=['when'])
        assert list(f.dtypes) == ['category', 'i8', 'M8[ns]']

Example #33

0

Show file

File: test_io.py Project: GaelVaroquaux/dask

def test_usecols():
    with filetext(datetime_csv_file) as fn:
        df = read_csv(fn, chunkbytes=30, usecols=['when', 'amount'])
        expected = pd.read_csv(fn, usecols=['when', 'amount'])
        assert (df.compute().values == expected.values).all()