예제 #1
0
파일: test_io.py 프로젝트: dukebody/dask
def test_to_hdf_link_optimizations():
    """testing dask link levels is correct by calculating the depth of the dask graph"""
    pytest.importorskip('tables')
    df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'],
                       'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]},
                            index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.])
    a = dd.from_pandas(df16, 16)

    # saving to multiple hdf files, no links are needed
    # expected layers: from_pandas, to_hdf, list = depth of 3
    with tmpdir() as dn:
        fn = os.path.join(dn, 'data*')
        d = a.to_hdf(fn, '/data', compute=False)
        assert dependency_depth(d.dask) == 3

    # saving to a single hdf file with multiple nodes
    # all subsequent nodes depend on the first
    # expected layers: from_pandas, first to_hdf(creates file+node), subsequent to_hdfs, list = 4
    with tmpfile() as fn:
        d = a.to_hdf(fn, '/data*', compute=False)
        assert dependency_depth(d.dask) == 4

    # saving to a single hdf file with a single node
    # every node depends on the previous node
    # expected layers: from_pandas, to_hdf times npartitions(15), list = 2 + npartitions = 17
    with tmpfile() as fn:
        d = a.to_hdf(fn, '/data', compute=False)
        assert dependency_depth(d.dask) == 2 + a.npartitions
예제 #2
0
def test_to_hdf_link_optimizations():
    """testing dask link levels is correct by calculating the depth of the dask graph"""
    pytest.importorskip('tables')
    df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
                               'j', 'k', 'l', 'm', 'n', 'o', 'p'],
                         'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
                               15, 16]},
                        index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.,
                               12., 13., 14., 15., 16.])
    a = dd.from_pandas(df16, 16)

    # saving to multiple hdf files, no links are needed
    # expected layers: from_pandas, to_hdf, list = depth of 3
    with tmpdir() as dn:
        fn = os.path.join(dn, 'data*')
        d = a.to_hdf(fn, '/data', compute=False)
        assert dependency_depth(d.dask) == 3

    # saving to a single hdf file with multiple nodes
    # all subsequent nodes depend on the first
    # expected layers: from_pandas, first to_hdf(creates file+node), subsequent to_hdfs, list = 4
    with tmpfile() as fn:
        d = a.to_hdf(fn, '/data*', compute=False)
        assert dependency_depth(d.dask) == 4

    # saving to a single hdf file with a single node
    # every node depends on the previous node
    # expected layers: from_pandas, to_hdf times npartitions(15), list = 2 + npartitions = 17
    with tmpfile() as fn:
        d = a.to_hdf(fn, '/data', compute=False)
        assert dependency_depth(d.dask) == 2 + a.npartitions
예제 #3
0
파일: test_hdf.py 프로젝트: zhch158/dask
def test_to_hdf_link_optimizations():
    """testing dask link levels is correct by calculating the depth of the dask graph"""
    pytest.importorskip("tables")
    df16 = pd.DataFrame(
        {
            "x": [
                "a",
                "b",
                "c",
                "d",
                "e",
                "f",
                "g",
                "h",
                "i",
                "j",
                "k",
                "l",
                "m",
                "n",
                "o",
                "p",
            ],
            "y": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
        },
        index=[
            1.0,
            2.0,
            3.0,
            4.0,
            5.0,
            6.0,
            7.0,
            8.0,
            9.0,
            10.0,
            11.0,
            12.0,
            13.0,
            14.0,
            15.0,
            16.0,
        ],
    )
    a = dd.from_pandas(df16, 16)

    # saving to multiple hdf files, no links are needed
    # expected layers: from_pandas, to_hdf, list = depth of 3
    with tmpdir() as dn:
        fn = os.path.join(dn, "data*")
        d = a.to_hdf(fn, "/data", compute=False)
        assert dependency_depth(d.dask) == 3

    # saving to a single hdf file with multiple nodes
    # all subsequent nodes depend on the first
    # expected layers: from_pandas, first to_hdf(creates file+node), subsequent to_hdfs, list = 4
    with tmpfile() as fn:
        d = a.to_hdf(fn, "/data*", compute=False)
        assert dependency_depth(d.dask) == 4

    # saving to a single hdf file with a single node
    # every node depends on the previous node
    # expected layers: from_pandas, to_hdf times npartitions(15), list = 2 + npartitions = 17
    with tmpfile() as fn:
        d = a.to_hdf(fn, "/data", compute=False)
        assert dependency_depth(d.dask) == 2 + a.npartitions