def test_run_and_write_tables(df, store_name): orca.add_table('table', df) def year_key(y): return '{}'.format(y) def series_year(y): return pd.Series([y] * 3, index=df.index, name=str(y)) @orca.step() def step(iter_var, table): table[year_key(iter_var)] = series_year(iter_var) orca.run( ['step'], iter_vars=range(11), data_out=store_name, out_interval=3) with pd.HDFStore(store_name, mode='r') as store: for year in range(0, 11, 3): key = '{}/table'.format(year) assert key in store for x in range(year): pdt.assert_series_equal( store[key][year_key(x)], series_year(x)) assert 'base/table' in store for x in range(11): pdt.assert_series_equal( store['10/table'][year_key(x)], series_year(x))
def test_column_type(df): orca.add_table('test_frame', df) @orca.table() def test_func(): return df s = pd.Series(range(len(df)), index=df.index) def col_func(): return s orca.add_column('test_frame', 'col_series', s) orca.add_column('test_func', 'col_series', s) orca.add_column('test_frame', 'col_func', col_func) orca.add_column('test_func', 'col_func', col_func) tframe = orca.get_raw_table('test_frame') tfunc = orca.get_raw_table('test_func') assert tframe.column_type('a') == 'local' assert tframe.column_type('col_series') == 'series' assert tframe.column_type('col_func') == 'function' assert tfunc.column_type('a') == 'local' assert tfunc.column_type('col_series') == 'series' assert tfunc.column_type('col_func') == 'function'
def test_get_table(df): orca.add_table('frame', df) @orca.table() def table(): return df @orca.table(cache=True) def source(): return df fr = orca.get_table('frame') ta = orca.get_table('table') so = orca.get_table('source') with pytest.raises(KeyError): orca.get_table('asdf') assert isinstance(fr, orca.DataFrameWrapper) assert isinstance(ta, orca.DataFrameWrapper) assert isinstance(so, orca.DataFrameWrapper) pdt.assert_frame_equal(fr.to_frame(), df) pdt.assert_frame_equal(ta.to_frame(), df) pdt.assert_frame_equal(so.to_frame(), df)
def test_steps(df): orca.add_table('test_table', df) df2 = df / 2 orca.add_table('test_table2', df2) @orca.step() def test_step(test_table, test_column='test_table2.b'): tt = test_table.to_frame() test_table['a'] = tt['a'] + tt['b'] pdt.assert_series_equal(test_column, df2['b']) with pytest.raises(KeyError): orca.get_step('asdf') step = orca.get_step('test_step') assert step._tables_used() == set(['test_table', 'test_table2']) step() table = orca.get_table('test_table') pdt.assert_frame_equal( table.to_frame(), pd.DataFrame( {'a': [5, 7, 9], 'b': [4, 5, 6]}, index=['x', 'y', 'z'])) assert orca.list_steps() == ['test_step']
def test_write_all_tables(df, store_name): orca.add_table('table', df) orca.write_tables(store_name) with pd.HDFStore(store_name, mode='r') as store: for t in orca.list_tables(): assert t in store
def test_columns_and_tables(df): orca.add_table('test_frame', df) @orca.table() def test_func(test_frame): return test_frame.to_frame() / 2 orca.add_column('test_frame', 'c', pd.Series([7, 8, 9], index=df.index)) @orca.column('test_func', 'd') def asdf(test_func): return test_func.to_frame(columns=['b'])['b'] * 2 @orca.column('test_func') def e(column='test_func.d'): return column + 1 test_frame = orca.get_table('test_frame') assert set(test_frame.columns) == set(['a', 'b', 'c']) assert_frames_equal( test_frame.to_frame(), pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9] }, index=['x', 'y', 'z'])) assert_frames_equal( test_frame.to_frame(columns=['a', 'c']), pd.DataFrame({ 'a': [1, 2, 3], 'c': [7, 8, 9] }, index=['x', 'y', 'z'])) test_func_df = orca._TABLES['test_func'] assert set(test_func_df.columns) == set(['d', 'e']) assert_frames_equal( test_func_df.to_frame(), pd.DataFrame( { 'a': [0.5, 1, 1.5], 'b': [2, 2.5, 3], 'c': [3.5, 4, 4.5], 'd': [4., 5., 6.], 'e': [5., 6., 7.] }, index=['x', 'y', 'z'])) assert_frames_equal( test_func_df.to_frame(columns=['b', 'd']), pd.DataFrame({ 'b': [2, 2.5, 3], 'd': [4., 5., 6.] }, index=['x', 'y', 'z'])) assert set(test_func_df.columns) == set(['a', 'b', 'c', 'd', 'e']) assert set(orca.list_columns()) == {('test_frame', 'c'), ('test_func', 'd'), ('test_func', 'e')}
def test_temporary_tables_cm(): orca.add_table('a', pd.DataFrame()) with orca.temporary_tables(): assert sorted(orca._TABLES.keys()) == ['a'] with orca.temporary_tables(a=pd.DataFrame(), b=pd.DataFrame()): assert sorted(orca._TABLES.keys()) == ['a', 'b'] assert sorted(orca._TABLES.keys()) == ['a']
def test_columns_and_tables(df): orca.add_table('test_frame', df) @orca.table() def test_func(test_frame): return test_frame.to_frame() / 2 orca.add_column('test_frame', 'c', pd.Series([7, 8, 9], index=df.index)) @orca.column('test_func', 'd') def asdf(test_func): return test_func.to_frame(columns=['b'])['b'] * 2 @orca.column('test_func') def e(column='test_func.d'): return column + 1 test_frame = orca.get_table('test_frame') assert set(test_frame.columns) == set(['a', 'b', 'c']) assert_frames_equal( test_frame.to_frame(), pd.DataFrame( {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=['x', 'y', 'z'])) assert_frames_equal( test_frame.to_frame(columns=['a', 'c']), pd.DataFrame( {'a': [1, 2, 3], 'c': [7, 8, 9]}, index=['x', 'y', 'z'])) test_func_df = orca._TABLES['test_func'] assert set(test_func_df.columns) == set(['d', 'e']) assert_frames_equal( test_func_df.to_frame(), pd.DataFrame( {'a': [0.5, 1, 1.5], 'b': [2, 2.5, 3], 'c': [3.5, 4, 4.5], 'd': [4., 5., 6.], 'e': [5., 6., 7.]}, index=['x', 'y', 'z'])) assert_frames_equal( test_func_df.to_frame(columns=['b', 'd']), pd.DataFrame( {'b': [2, 2.5, 3], 'd': [4., 5., 6.]}, index=['x', 'y', 'z'])) assert set(test_func_df.columns) == set(['a', 'b', 'c', 'd', 'e']) assert set(orca.list_columns()) == { ('test_frame', 'c'), ('test_func', 'd'), ('test_func', 'e')}
def test_get_raw_table(df): orca.add_table('table1', df) @orca.table() def table2(): return df assert isinstance(orca.get_raw_table('table1'), orca.DataFrameWrapper) assert isinstance(orca.get_raw_table('table2'), orca.TableFuncWrapper) assert orca.table_type('table1') == 'dataframe' assert orca.table_type('table2') == 'function'
def test_column_func_source_data(df): orca.add_table('test_frame', df) @orca.column('test_frame') def col_func(): return pd.Series(range(len(df)), index=df.index) s = orca.get_raw_column('test_frame', 'col_func') filename, lineno, source = s.func_source_data() assert filename.endswith('test_orca.py') assert isinstance(lineno, int) assert 'def col_func():' in source
def test_get_raw_column(df): orca.add_table('test_frame', df) s = pd.Series(range(len(df)), index=df.index) def col_func(): return s orca.add_column('test_frame', 'col_series', s) orca.add_column('test_frame', 'col_func', col_func) assert isinstance(orca.get_raw_column('test_frame', 'col_series'), orca._SeriesWrapper) assert isinstance(orca.get_raw_column('test_frame', 'col_func'), orca._ColumnFuncWrapper)
def test_tables(df): wrapped_df = orca.add_table('test_frame', df) @orca.table() def test_func(test_frame): return test_frame.to_frame() / 2 assert set(orca.list_tables()) == {'test_frame', 'test_func'} table = orca.get_table('test_frame') assert table is wrapped_df assert table.columns == ['a', 'b'] assert table.local_columns == ['a', 'b'] assert len(table) == 3 pdt.assert_index_equal(table.index, df.index) pdt.assert_series_equal(table.get_column('a'), df.a) pdt.assert_series_equal(table.a, df.a) pdt.assert_series_equal(table['b'], df['b']) table = orca._TABLES['test_func'] assert table.index is None assert table.columns == [] assert len(table) is 0 pdt.assert_frame_equal(table.to_frame(), df / 2) pdt.assert_frame_equal(table.to_frame([]), df[[]]) pdt.assert_frame_equal(table.to_frame(columns=['a']), df[['a']] / 2) pdt.assert_frame_equal(table.to_frame(columns='a'), df[['a']] / 2) pdt.assert_index_equal(table.index, df.index) pdt.assert_series_equal(table.get_column('a'), df.a / 2) pdt.assert_series_equal(table.a, df.a / 2) pdt.assert_series_equal(table['b'], df['b'] / 2) assert len(table) == 3 assert table.columns == ['a', 'b']
def test_get_raw_column(df): orca.add_table('test_frame', df) s = pd.Series(range(len(df)), index=df.index) def col_func(): return s orca.add_column('test_frame', 'col_series', s) orca.add_column('test_frame', 'col_func', col_func) assert isinstance( orca.get_raw_column('test_frame', 'col_series'), orca._SeriesWrapper) assert isinstance( orca.get_raw_column('test_frame', 'col_func'), orca._ColumnFuncWrapper)
def test_write_tables(df, store_name): orca.add_table('table', df) @orca.step() def step(table): pass step_tables = orca.get_step_table_names(['step']) orca.write_tables(store_name, step_tables) with pd.HDFStore(store_name, mode='r') as store: assert 'table' in store pdt.assert_frame_equal(store['table'], df) orca.write_tables(store_name, step_tables, prefix=1969) with pd.HDFStore(store_name, mode='r') as store: assert '1969/table' in store pdt.assert_frame_equal(store['1969/table'], df)
def test_table_func_cache(df): orca.add_injectable('x', 2) @orca.table(cache=True) def table(variable='x'): return df * variable pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 2) orca.add_injectable('x', 3) pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 2) orca.get_table('table').clear_cached() pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 3) orca.add_injectable('x', 4) pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 3) orca.clear_cache() pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 4) orca.add_injectable('x', 5) pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 4) orca.add_table('table', table) pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 5)
def test_step_run(df): orca.add_table('test_table', df) @orca.table() def table_func(test_table): tt = test_table.to_frame() tt['c'] = [7, 8, 9] return tt @orca.column('table_func') def new_col(test_table, table_func): tt = test_table.to_frame() tf = table_func.to_frame(columns=['c']) return tt['a'] + tt['b'] + tf['c'] @orca.step() def test_step1(iter_var, test_table, table_func): tf = table_func.to_frame(columns=['new_col']) test_table[iter_var] = tf['new_col'] + iter_var @orca.step('test_step2') def asdf(table='test_table'): tt = table.to_frame() table['a'] = tt['a']**2 orca.run(steps=['test_step1', 'test_step2'], iter_vars=[2000, 3000]) test_table = orca.get_table('test_table') assert_frames_equal( test_table.to_frame(), pd.DataFrame( { 'a': [1, 16, 81], 'b': [4, 5, 6], 2000: [2012, 2015, 2018], 3000: [3012, 3017, 3024] }, index=['x', 'y', 'z'])) m = orca.get_step('test_step1') assert set(m._tables_used()) == {'test_table', 'table_func'}
def test_step_run(df): orca.add_table('test_table', df) @orca.table() def table_func(test_table): tt = test_table.to_frame() tt['c'] = [7, 8, 9] return tt @orca.column('table_func') def new_col(test_table, table_func): tt = test_table.to_frame() tf = table_func.to_frame(columns=['c']) return tt['a'] + tt['b'] + tf['c'] @orca.step() def test_step1(iter_var, test_table, table_func): tf = table_func.to_frame(columns=['new_col']) test_table[iter_var] = tf['new_col'] + iter_var @orca.step('test_step2') def asdf(table='test_table'): tt = table.to_frame() table['a'] = tt['a'] ** 2 orca.run(steps=['test_step1', 'test_step2'], iter_vars=[2000, 3000]) test_table = orca.get_table('test_table') assert_frames_equal( test_table.to_frame(), pd.DataFrame( {'a': [1, 16, 81], 'b': [4, 5, 6], 2000: [2012, 2015, 2018], 3000: [3012, 3017, 3024]}, index=['x', 'y', 'z'])) m = orca.get_step('test_step1') assert set(m._tables_used()) == {'test_table', 'table_func'}
def test_run_and_write_tables_out_tables_provided(df, store_name): table_names = ['table', 'table2', 'table3'] for t in table_names: orca.add_table(t, df) @orca.step() def step(iter_var, table, table2): return orca.run(['step'], iter_vars=range(1), data_out=store_name, out_base_tables=table_names, out_run_tables=['table']) with pd.HDFStore(store_name, mode='r') as store: for t in table_names: assert 'base/{}'.format(t) in store assert '0/table' in store assert '0/table2' not in store assert '0/table3' not in store
def test_collect_variables(df): orca.add_table('df', df) @orca.table() def df_func(): return df @orca.column('df') def zzz(): return df['a'] / 2 orca.add_injectable('answer', 42) @orca.injectable() def injected(): return 'injected' @orca.table('source table', cache=True) def source(): return df with pytest.raises(KeyError): orca._collect_variables(['asdf']) with pytest.raises(KeyError): orca._collect_variables(names=['df'], expressions=['asdf']) names = ['df', 'df_func', 'answer', 'injected', 'source_label', 'df_a'] expressions = ['source table', 'df.a'] things = orca._collect_variables(names, expressions) assert set(things.keys()) == set(names) assert isinstance(things['source_label'], orca.DataFrameWrapper) pdt.assert_frame_equal(things['source_label'].to_frame(), df) assert isinstance(things['df_a'], pd.Series) pdt.assert_series_equal(things['df_a'], df['a'])
def test_run_and_write_tables_out_tables_provided(df, store_name): table_names = ['table', 'table2', 'table3'] for t in table_names: orca.add_table(t, df) @orca.step() def step(iter_var, table, table2): return orca.run( ['step'], iter_vars=range(1), data_out=store_name, out_base_tables=table_names, out_run_tables=['table']) with pd.HDFStore(store_name, mode='r') as store: for t in table_names: assert 'base/{}'.format(t) in store assert '0/table' in store assert '0/table2' not in store assert '0/table3' not in store
def test_update_col(df): wrapped = orca.add_table('table', df) wrapped.update_col('b', pd.Series([7, 8, 9], index=df.index)) pdt.assert_series_equal( wrapped['b'], pd.Series([7, 8, 9], index=df.index, name='b')) a_dtype = wrapped['a'].dtype # test 1 - cast the data type before the update wrapped.update_col_from_series('a', pd.Series(dtype=a_dtype)) pdt.assert_series_equal(wrapped['a'], df['a']) # test 2 - let the update method do the cast wrapped.update_col_from_series('a', pd.Series(), True) pdt.assert_series_equal(wrapped['a'], df['a']) # test 3 - don't cast, should raise an error with pytest.raises(ValueError): wrapped.update_col_from_series('a', pd.Series()) wrapped.update_col_from_series('a', pd.Series([99], index=['y'])) pdt.assert_series_equal( wrapped['a'], pd.Series([1, 99, 3], index=df.index, name='a'))
def step(table, column): df = table.to_frame() df['new'] = column orca.add_table('table', df)
def test_table_copy(df): orca.add_table('test_frame_copied', df, copy_col=True) orca.add_table('test_frame_uncopied', df, copy_col=False) orca.add_table('test_func_copied', lambda: df, copy_col=True) orca.add_table('test_func_uncopied', lambda: df, copy_col=False) @orca.table(copy_col=True) def test_funcd_copied(): return df @orca.table(copy_col=False) def test_funcd_uncopied(): return df @orca.table(copy_col=True) def test_funcd_copied2(test_frame_copied): # local returns original, but it is copied by copy_col. return test_frame_copied.local @orca.table(copy_col=True) def test_funcd_copied3(test_frame_uncopied): # local returns original, but it is copied by copy_col. return test_frame_uncopied.local @orca.table(copy_col=False) def test_funcd_uncopied2(test_frame_copied): # local returns original. return test_frame_copied.local @orca.table(copy_col=False) def test_funcd_uncopied3(test_frame_uncopied): # local returns original. return test_frame_uncopied.local orca.add_table('test_cache_copied', lambda: df, cache=True, copy_col=True) orca.add_table( 'test_cache_uncopied', lambda: df, cache=True, copy_col=False) @orca.table(cache=True, copy_col=True) def test_cached_copied(): return df @orca.table(cache=True, copy_col=False) def test_cached_uncopied(): return df # Create tables with computed columns. orca.add_table( 'test_copied_columns', pd.DataFrame(index=df.index), copy_col=True) orca.add_table( 'test_uncopied_columns', pd.DataFrame(index=df.index), copy_col=False) for column_name in ['a', 'b']: label = "test_frame_uncopied.{}".format(column_name) def func(col=label): return col for table_name in ['test_copied_columns', 'test_uncopied_columns']: orca.add_column(table_name, column_name, func) for name in ['test_frame_uncopied', 'test_func_uncopied', 'test_funcd_uncopied', 'test_funcd_uncopied2', 'test_funcd_uncopied3', 'test_cache_uncopied', 'test_cached_uncopied', 'test_uncopied_columns', 'test_frame_copied', 'test_func_copied', 'test_funcd_copied', 'test_funcd_copied2', 'test_funcd_copied3', 'test_cache_copied', 'test_cached_copied', 'test_copied_columns']: table = orca.get_table(name) table2 = orca.get_table(name) # to_frame will always return a copy. if 'columns' in name: assert_frames_equal(table.to_frame(), df) else: pdt.assert_frame_equal(table.to_frame(), df) assert table.to_frame() is not df pdt.assert_frame_equal(table.to_frame(), table.to_frame()) assert table.to_frame() is not table.to_frame() pdt.assert_series_equal(table.to_frame()['a'], df['a']) assert table.to_frame()['a'] is not df['a'] pdt.assert_series_equal(table.to_frame()['a'], table.to_frame()['a']) assert table.to_frame()['a'] is not table.to_frame()['a'] if 'uncopied' in name: pdt.assert_series_equal(table['a'], df['a']) assert table['a'] is df['a'] pdt.assert_series_equal(table['a'], table2['a']) assert table['a'] is table2['a'] else: pdt.assert_series_equal(table['a'], df['a']) assert table['a'] is not df['a'] pdt.assert_series_equal(table['a'], table2['a']) assert table['a'] is not table2['a']
def test_is_table(df): orca.add_table('table', df) assert orca.is_table('table') is True assert orca.is_table('asdf') is False