def test_convert_logfiles_to_bag(): with filetexts({'a1.log': 'Hello\nWorld', 'a2.log': 'Hola\nMundo'}) as fns: logs = chunks(TextFile)(list(map(TextFile, fns))) b = convert(Bag, logs) assert isinstance(b, Bag) assert 'a1.log' in str(b.dask.values()) assert convert(list, b) == convert(list, logs)
def test_first_csv_establishes_consistent_dshape(): d = {'accounts1.csv': 'name,when\nAlice,one\nBob,two', 'accounts2.csv': 'name,when\nAlice,300\nBob,400'} with filetexts(d) as fns: result = into(list, 'accounts*.csv') assert len(result) == 4 assert all(isinstance(val, (str, unicode)) for name, val in result)
def test_convert_logfiles_to_bag(): with filetexts({'a1.log': 'Hello\nWorld', 'a2.log': 'Hola\nMundo'}) as fns: logs = chunks(TextFile)(list(map(TextFile, fns))) b = odo(logs, Bag) assert isinstance(b, Bag) assert (list(map(methodcaller('strip'), odo(b, list))) == list(map(methodcaller('strip'), odo(logs, list))))
def test_convert_logfiles_to_bag(): with filetexts({'a1.log': 'Hello\nWorld', 'a2.log': 'Hola\nMundo'}) as fns: logs = chunks(TextFile)(list(map(TextFile, fns))) b = odo(logs, Bag) assert isinstance(b, Bag) assert 'a1.log' in str(b.dask.values()) assert odo(b, list) == odo(logs, list)
def test_glob(): d = {"accounts1.csv": "name,when\nAlice,100\nBob,200", "accounts2.csv": "name,when\nAlice,300\nBob,400"} with filetexts(d) as fns: r = resource("accounts*.csv", has_header=True) assert convert(list, r) == [("Alice", 100), ("Bob", 200), ("Alice", 300), ("Bob", 400)] r = resource("*.csv") assert isinstance(r, chunks(CSV))
def test_glob(): d = {'accounts1.csv': 'name,when\nAlice,100\nBob,200', 'accounts2.csv': 'name,when\nAlice,300\nBob,400'} with filetexts(d) as fns: r = resource('accounts*.csv', has_header=True) assert convert(list, r) == [('Alice', 100), ('Bob', 200), ('Alice', 300), ('Bob', 400)] r = resource('*.csv') assert isinstance(r, chunks(CSV))
def test_globbed_csv_to_dataframe(): header = 'a,b,c\n' d = {'a-1.csv': header + '1,2,3\n4,5,6\n', 'a-2.csv': header + '7,8,9\n10,11,12\n'} with filetexts(d): df = odo('a-*.csv', pd.DataFrame) tm.assert_frame_equal( df, pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], columns=['a', 'b', 'c']), )
def test_globbed_csv_to_chunks_of_dataframe(): header = 'a,b,c\n' d = {'a-1.csv': header + '1,2,3\n4,5,6\n', 'a-2.csv': header + '7,8,9\n10,11,12\n'} with filetexts(d): dfs = list(odo('a-*.csv', chunks(pd.DataFrame))) assert len(dfs) == 2 columns = 'a', 'b', 'c' tm.assert_frame_equal(dfs[0], pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=columns)) tm.assert_frame_equal(dfs[1], pd.DataFrame([[7, 8, 9], [10, 11, 12]], columns=columns))
def test_globbed_csv_to_dataframe(): header = 'a,b,c\n' d = { 'a-1.csv': header + '1,2,3\n4,5,6\n', 'a-2.csv': header + '7,8,9\n10,11,12\n' } with filetexts(d): df = odo('a-*.csv', pd.DataFrame) tm.assert_frame_equal( df, pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], columns=['a', 'b', 'c']), )
def test_chunks_textfile(): with filetexts({'a1.log': 'Hello\nWorld', 'a2.log': 'Hola\nMundo'}) as fns: logs = chunks(TextFile)(list(map(TextFile, fns))) assert set(map(str.strip, convert(list, logs))) == \ set(['Hello', 'World', 'Hola', 'Mundo'])