def test_castra_column_store(): castra = pytest.importorskip('castra') blosc = pytest.importorskip('blosc') if (LooseVersion(blosc.__version__) == '1.3.0' or LooseVersion(castra.__version__) < '0.1.8'): pytest.skip() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) with castra.Castra(template=df) as c: c.extend(df) df = c.to_dask() df2 = df[['x']] dsk = dd.optimize(df2.dask, df2._keys()) assert dsk == { (df2._name, 0): (castra.Castra.load_partition, c, '0--2', (list, ['x'])) } df3 = df.index dsk = dd.optimize(df3.dask, df3._keys()) assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
def test_castra_column_store(): castra = pytest.importorskip('castra') df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) with castra.Castra(template=df) as c: c.extend(df) df = c.to_dask() df2 = df[['x']] dsk = dd.optimize(df2.dask, df2._keys()) assert dsk == {(df2._name, 0): (castra.Castra.load_partition, c, '0--2', (list, ['x']))} df3 = df.index dsk = dd.optimize(df3.dask, df3._keys()) assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
def test_castra_column_store(): try: from castra import Castra except ImportError: return df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) with Castra(template=df) as c: c.extend(df) df = c.to_dask() df2 = df[['x']] dsk = dd.optimize(df2.dask, df2._keys()) assert dsk == {(df2._name, 0): (Castra.load_partition, c, '0--2', (list, ['x']))} df3 = df.index dsk = dd.optimize(df3.dask, df3._keys()) assert dsk == {(df3._name, 0): (Castra.load_index, c, '0--2')}
def test_castra_column_store(): castra = pytest.importorskip('castra') df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) with castra.Castra(template=df) as c: c.extend(df) df = c.to_dask() df2 = df[['x']] dsk = dd.optimize(df2.dask, df2._keys()) assert dsk == { (df2._name, 0): (castra.Castra.load_partition, c, '0--2', (list, ['x'])) } df3 = df.index dsk = dd.optimize(df3.dask, df3._keys()) assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
def test_castra_column_store(): try: from castra import Castra except ImportError: return df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) with Castra(template=df) as c: c.extend(df) df = c.to_dask() df2 = df[['x']] dsk = dd.optimize(df2.dask, df2._keys()) assert dsk == { (df2._name, 0): (Castra.load_partition, c, '0--2', (list, ['x'])) } df3 = df.index dsk = dd.optimize(df3.dask, df3._keys()) assert dsk == {(df3._name, 0): (Castra.load_index, c, '0--2')}
def test_castra_column_store(): castra = pytest.importorskip('castra') blosc = pytest.importorskip('blosc') if (LooseVersion(blosc.__version__) == '1.3.0' or LooseVersion(castra.__version__) < '0.1.8'): pytest.skip() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) with castra.Castra(template=df) as c: c.extend(df) df = c.to_dask() df2 = df[['x']] dsk = dd.optimize(df2.dask, df2._keys()) assert dsk == {(df2._name, 0): (castra.Castra.load_partition, c, '0--2', (list, ['x']))} df3 = df.index dsk = dd.optimize(df3.dask, df3._keys()) assert dsk == {(df3._name, 0): (castra.Castra.load_index, c, '0--2')}
def test_column_optimizations_with_bcolz_and_rewrite(): bcolz = pytest.importorskip('bcolz') bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b']) for cols in [None, 'abc', ['abc']]: dsk2 = merge( dict((('x', i), (dataframe_from_ctable, bc, slice(0, 2), cols, {})) for i in [1, 2, 3]), dict((('y', i), (getitem, ('x', i), ['a', 'b'])) for i in [1, 2, 3])) expected = dict((('y', i), (dataframe_from_ctable, bc, slice(0, 2), ['a', 'b'], {})) for i in [1, 2, 3]) result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]]) assert result == expected
def test_column_optimizations_with_bcolz_and_rewrite(): bcolz = pytest.importorskip('bcolz') bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b']) for cols in [None, 'abc', ['abc']]: dsk2 = merge(dict((('x', i), (dataframe_from_ctable, bc, slice(0, 2), cols, {})) for i in [1, 2, 3]), dict((('y', i), (getitem, ('x', i), ['a', 'b'])) for i in [1, 2, 3])) expected = dict((('y', i), (dataframe_from_ctable, bc, slice(0, 2), ['a', 'b'], {})) for i in [1, 2, 3]) result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]]) assert result == expected
def test_column_optimizations_with_bcolz_and_rewrite(): bcolz = pytest.importorskip("bcolz") bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=["a", "b"]) for cols in [None, "abc", ["abc"]]: dsk2 = merge( dict((("x", i), (dataframe_from_ctable, bc, slice(0, 2), cols, {})) for i in [1, 2, 3]), dict((("y", i), (getitem, ("x", i), ["a", "b"])) for i in [1, 2, 3]), ) expected = dict((("y", i), (dataframe_from_ctable, bc, slice(0, 2), ["a", "b"], {})) for i in [1, 2, 3]) with dask.config.set(fuse_ave_width=0): result = dd.optimize(dsk2, [("y", i) for i in [1, 2, 3]]) assert result == expected
def test_column_optimizations_with_bcolz_and_rewrite(): try: import bcolz except ImportError: return bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b']) func = lambda x: x for cols in [None, 'abc', ['abc']]: dsk2 = merge( dict((('x', i), (dataframe_from_ctable, bc, slice(0, 2), cols, {})) for i in [1, 2, 3]), dict((('y', i), (getitem, ('x', i), (list, ['a', 'b']))) for i in [1, 2, 3])) expected = dict((('y', i), (dataframe_from_ctable, bc, slice(0, 2), (list, ['a', 'b']), {})) for i in [1, 2, 3]) result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]]) assert result == expected
def test_column_optimizations_with_bcolz_and_rewrite(): try: import bcolz except ImportError: return bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b']) func = lambda x: x for cols in [None, 'abc', ['abc']]: dsk2 = merge(dict((('x', i), (dataframe_from_ctable, bc, slice(0, 2), cols, {})) for i in [1, 2, 3]), dict((('y', i), (getitem, ('x', i), (list, ['a', 'b']))) for i in [1, 2, 3])) expected = dict((('y', i), (dataframe_from_ctable, bc, slice(0, 2), (list, ['a', 'b']), {})) for i in [1, 2, 3]) result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]]) assert result == expected
def test_fast_functions(): df = dd.DataFrame(dsk, 'x', ['a', 'b'], [None, None]) e = df.a + df.b assert len(e.dask) > 6 assert len(dd.optimize(e.dask, e._keys())) == 6
def test_fast_functions(): df = dd.DataFrame(dsk, 'x', ['a', 'b'], [None, None, None, None]) e = df.a + df.b assert len(e.dask) > 6 assert len(dd.optimize(e.dask, e._keys())) == 6