Esempio n. 1
0
def test_castra_column_store():
    castra = pytest.importorskip('castra')
    blosc = pytest.importorskip('blosc')
    if (LooseVersion(blosc.__version__) == '1.3.0'
            or LooseVersion(castra.__version__) < '0.1.8'):
        pytest.skip()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    with castra.Castra(template=df) as c:
        c.extend(df)

        df = c.to_dask()

        df2 = df[['x']]

        dsk = dd.optimize(df2.dask, df2._keys())

        assert dsk == {
            (df2._name, 0):
            (castra.Castra.load_partition, c, '0--2', (list, ['x']))
        }
        df3 = df.index
        dsk = dd.optimize(df3.dask, df3._keys())
        assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
Esempio n. 2
0
def test_castra_column_store():
    castra = pytest.importorskip('castra')

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    with castra.Castra(template=df) as c:
        c.extend(df)

        df = c.to_dask()

        df2 = df[['x']]

        dsk = dd.optimize(df2.dask, df2._keys())

        assert dsk == {(df2._name, 0): (castra.Castra.load_partition, c, '0--2',
                                            (list, ['x']))}
        df3 = df.index
        dsk = dd.optimize(df3.dask, df3._keys())
        assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
def test_castra_column_store():
    try:
        from castra import Castra
    except ImportError:
        return
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    with Castra(template=df) as c:
        c.extend(df)

        df = c.to_dask()

        df2 = df[['x']]

        dsk = dd.optimize(df2.dask, df2._keys())

        assert dsk == {(df2._name, 0): (Castra.load_partition, c, '0--2',
                                            (list, ['x']))}
        df3 = df.index
        dsk = dd.optimize(df3.dask, df3._keys())
        assert dsk == {(df3._name, 0): (Castra.load_index, c, '0--2')}
Esempio n. 4
0
def test_castra_column_store():
    castra = pytest.importorskip('castra')

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    with castra.Castra(template=df) as c:
        c.extend(df)

        df = c.to_dask()

        df2 = df[['x']]

        dsk = dd.optimize(df2.dask, df2._keys())

        assert dsk == {
            (df2._name, 0):
            (castra.Castra.load_partition, c, '0--2', (list, ['x']))
        }
        df3 = df.index
        dsk = dd.optimize(df3.dask, df3._keys())
        assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
Esempio n. 5
0
def test_castra_column_store():
    try:
        from castra import Castra
    except ImportError:
        return
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    with Castra(template=df) as c:
        c.extend(df)

        df = c.to_dask()

        df2 = df[['x']]

        dsk = dd.optimize(df2.dask, df2._keys())

        assert dsk == {
            (df2._name, 0): (Castra.load_partition, c, '0--2', (list, ['x']))
        }
        df3 = df.index
        dsk = dd.optimize(df3.dask, df3._keys())
        assert dsk == {(df3._name, 0): (Castra.load_index, c, '0--2')}
Esempio n. 6
0
def test_castra_column_store():
    castra = pytest.importorskip('castra')
    blosc = pytest.importorskip('blosc')
    if (LooseVersion(blosc.__version__) == '1.3.0' or
            LooseVersion(castra.__version__) < '0.1.8'):
        pytest.skip()

    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

    with castra.Castra(template=df) as c:
        c.extend(df)

        df = c.to_dask()

        df2 = df[['x']]

        dsk = dd.optimize(df2.dask, df2._keys())

        assert dsk == {(df2._name, 0): (castra.Castra.load_partition, c, '0--2',
                                        (list, ['x']))}
        df3 = df.index
        dsk = dd.optimize(df3.dask, df3._keys())
        assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
def test_column_optimizations_with_bcolz_and_rewrite():
    bcolz = pytest.importorskip('bcolz')

    bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b'])
    for cols in [None, 'abc', ['abc']]:
        dsk2 = merge(
            dict((('x', i), (dataframe_from_ctable, bc, slice(0, 2), cols, {}))
                 for i in [1, 2, 3]),
            dict((('y', i), (getitem, ('x', i), ['a', 'b']))
                 for i in [1, 2, 3]))

        expected = dict((('y', i), (dataframe_from_ctable, bc, slice(0, 2),
                                    ['a', 'b'], {})) for i in [1, 2, 3])

        result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]])
        assert result == expected
Esempio n. 8
0
def test_column_optimizations_with_bcolz_and_rewrite():
    bcolz = pytest.importorskip('bcolz')

    bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b'])
    for cols in [None, 'abc', ['abc']]:
        dsk2 = merge(dict((('x', i),
                          (dataframe_from_ctable, bc, slice(0, 2), cols, {}))
                          for i in [1, 2, 3]),
                     dict((('y', i),
                          (getitem, ('x', i), ['a', 'b']))
                          for i in [1, 2, 3]))

        expected = dict((('y', i), (dataframe_from_ctable,
                                    bc, slice(0, 2), ['a', 'b'], {}))
                        for i in [1, 2, 3])

        result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]])
        assert result == expected
def test_column_optimizations_with_bcolz_and_rewrite():
    bcolz = pytest.importorskip("bcolz")

    bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=["a", "b"])
    for cols in [None, "abc", ["abc"]]:
        dsk2 = merge(
            dict((("x", i), (dataframe_from_ctable, bc, slice(0, 2), cols, {}))
                 for i in [1, 2, 3]),
            dict((("y", i), (getitem, ("x", i), ["a", "b"]))
                 for i in [1, 2, 3]),
        )

        expected = dict((("y", i), (dataframe_from_ctable, bc, slice(0, 2),
                                    ["a", "b"], {})) for i in [1, 2, 3])

        with dask.config.set(fuse_ave_width=0):
            result = dd.optimize(dsk2, [("y", i) for i in [1, 2, 3]])
        assert result == expected
Esempio n. 10
0
def test_column_optimizations_with_bcolz_and_rewrite():
    try:
        import bcolz
    except ImportError:
        return
    bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b'])
    func = lambda x: x
    for cols in [None, 'abc', ['abc']]:
        dsk2 = merge(
            dict((('x', i), (dataframe_from_ctable, bc, slice(0, 2), cols, {}))
                 for i in [1, 2, 3]),
            dict((('y', i), (getitem, ('x', i), (list, ['a', 'b'])))
                 for i in [1, 2, 3]))

        expected = dict((('y', i), (dataframe_from_ctable, bc, slice(0, 2),
                                    (list, ['a', 'b']), {}))
                        for i in [1, 2, 3])

        result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]])
        assert result == expected
Esempio n. 11
0
def test_column_optimizations_with_bcolz_and_rewrite():
    try:
        import bcolz
    except ImportError:
        return
    bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b'])
    func = lambda x: x
    for cols in [None, 'abc', ['abc']]:
        dsk2 = merge(dict((('x', i),
                          (dataframe_from_ctable, bc, slice(0, 2), cols, {}))
                          for i in [1, 2, 3]),
                     dict((('y', i),
                          (getitem, ('x', i), (list, ['a', 'b'])))
                          for i in [1, 2, 3]))

        expected = dict((('y', i), (dataframe_from_ctable,
                                     bc, slice(0, 2), (list, ['a', 'b']), {}))
                          for i in [1, 2, 3])

        result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]])
        assert result == expected
Esempio n. 12
0
def test_fast_functions():
    df = dd.DataFrame(dsk, 'x', ['a', 'b'], [None, None])
    e = df.a + df.b
    assert len(e.dask) > 6

    assert len(dd.optimize(e.dask, e._keys())) == 6
Esempio n. 13
0
def test_fast_functions():
    df = dd.DataFrame(dsk, 'x', ['a', 'b'], [None, None, None, None])
    e = df.a + df.b
    assert len(e.dask) > 6

    assert len(dd.optimize(e.dask, e._keys())) == 6