Beispiel #1
0
def test_run_and_write_tables(df, store_name):
    orca.add_table('table', df)

    def year_key(y):
        return '{}'.format(y)

    def series_year(y):
        return pd.Series([y] * 3, index=df.index, name=str(y))

    @orca.step()
    def step(iter_var, table):
        table[year_key(iter_var)] = series_year(iter_var)

    orca.run(
        ['step'], iter_vars=range(11), data_out=store_name, out_interval=3)

    with pd.HDFStore(store_name, mode='r') as store:
        for year in range(0, 11, 3):
            key = '{}/table'.format(year)
            assert key in store

            for x in range(year):
                pdt.assert_series_equal(
                    store[key][year_key(x)], series_year(x))

        assert 'base/table' in store

        for x in range(11):
            pdt.assert_series_equal(
                store['10/table'][year_key(x)], series_year(x))
Beispiel #2
0
def test_column_type(df):
    orca.add_table('test_frame', df)

    @orca.table()
    def test_func():
        return df

    s = pd.Series(range(len(df)), index=df.index)

    def col_func():
        return s

    orca.add_column('test_frame', 'col_series', s)
    orca.add_column('test_func', 'col_series', s)
    orca.add_column('test_frame', 'col_func', col_func)
    orca.add_column('test_func', 'col_func', col_func)

    tframe = orca.get_raw_table('test_frame')
    tfunc = orca.get_raw_table('test_func')

    assert tframe.column_type('a') == 'local'
    assert tframe.column_type('col_series') == 'series'
    assert tframe.column_type('col_func') == 'function'

    assert tfunc.column_type('a') == 'local'
    assert tfunc.column_type('col_series') == 'series'
    assert tfunc.column_type('col_func') == 'function'
Beispiel #3
0
def test_get_table(df):
    orca.add_table('frame', df)

    @orca.table()
    def table():
        return df

    @orca.table(cache=True)
    def source():
        return df

    fr = orca.get_table('frame')
    ta = orca.get_table('table')
    so = orca.get_table('source')

    with pytest.raises(KeyError):
        orca.get_table('asdf')

    assert isinstance(fr, orca.DataFrameWrapper)
    assert isinstance(ta, orca.DataFrameWrapper)
    assert isinstance(so, orca.DataFrameWrapper)

    pdt.assert_frame_equal(fr.to_frame(), df)
    pdt.assert_frame_equal(ta.to_frame(), df)
    pdt.assert_frame_equal(so.to_frame(), df)
Beispiel #4
0
def test_steps(df):
    orca.add_table('test_table', df)

    df2 = df / 2
    orca.add_table('test_table2', df2)

    @orca.step()
    def test_step(test_table, test_column='test_table2.b'):
        tt = test_table.to_frame()
        test_table['a'] = tt['a'] + tt['b']
        pdt.assert_series_equal(test_column, df2['b'])

    with pytest.raises(KeyError):
        orca.get_step('asdf')

    step = orca.get_step('test_step')
    assert step._tables_used() == set(['test_table', 'test_table2'])
    step()

    table = orca.get_table('test_table')
    pdt.assert_frame_equal(
        table.to_frame(),
        pd.DataFrame(
            {'a': [5, 7, 9],
             'b': [4, 5, 6]},
            index=['x', 'y', 'z']))

    assert orca.list_steps() == ['test_step']
Beispiel #5
0
def test_write_all_tables(df, store_name):
    orca.add_table('table', df)
    orca.write_tables(store_name)

    with pd.HDFStore(store_name, mode='r') as store:
        for t in orca.list_tables():
            assert t in store
Beispiel #6
0
def test_columns_and_tables(df):
    orca.add_table('test_frame', df)

    @orca.table()
    def test_func(test_frame):
        return test_frame.to_frame() / 2

    orca.add_column('test_frame', 'c', pd.Series([7, 8, 9], index=df.index))

    @orca.column('test_func', 'd')
    def asdf(test_func):
        return test_func.to_frame(columns=['b'])['b'] * 2

    @orca.column('test_func')
    def e(column='test_func.d'):
        return column + 1

    test_frame = orca.get_table('test_frame')
    assert set(test_frame.columns) == set(['a', 'b', 'c'])
    assert_frames_equal(
        test_frame.to_frame(),
        pd.DataFrame({
            'a': [1, 2, 3],
            'b': [4, 5, 6],
            'c': [7, 8, 9]
        },
                     index=['x', 'y', 'z']))
    assert_frames_equal(
        test_frame.to_frame(columns=['a', 'c']),
        pd.DataFrame({
            'a': [1, 2, 3],
            'c': [7, 8, 9]
        }, index=['x', 'y', 'z']))

    test_func_df = orca._TABLES['test_func']
    assert set(test_func_df.columns) == set(['d', 'e'])
    assert_frames_equal(
        test_func_df.to_frame(),
        pd.DataFrame(
            {
                'a': [0.5, 1, 1.5],
                'b': [2, 2.5, 3],
                'c': [3.5, 4, 4.5],
                'd': [4., 5., 6.],
                'e': [5., 6., 7.]
            },
            index=['x', 'y', 'z']))
    assert_frames_equal(
        test_func_df.to_frame(columns=['b', 'd']),
        pd.DataFrame({
            'b': [2, 2.5, 3],
            'd': [4., 5., 6.]
        },
                     index=['x', 'y', 'z']))
    assert set(test_func_df.columns) == set(['a', 'b', 'c', 'd', 'e'])

    assert set(orca.list_columns()) == {('test_frame', 'c'),
                                        ('test_func', 'd'), ('test_func', 'e')}
Beispiel #7
0
def test_temporary_tables_cm():
    orca.add_table('a', pd.DataFrame())

    with orca.temporary_tables():
        assert sorted(orca._TABLES.keys()) == ['a']

    with orca.temporary_tables(a=pd.DataFrame(), b=pd.DataFrame()):
        assert sorted(orca._TABLES.keys()) == ['a', 'b']

    assert sorted(orca._TABLES.keys()) == ['a']
Beispiel #8
0
def test_columns_and_tables(df):
    orca.add_table('test_frame', df)

    @orca.table()
    def test_func(test_frame):
        return test_frame.to_frame() / 2

    orca.add_column('test_frame', 'c', pd.Series([7, 8, 9], index=df.index))

    @orca.column('test_func', 'd')
    def asdf(test_func):
        return test_func.to_frame(columns=['b'])['b'] * 2

    @orca.column('test_func')
    def e(column='test_func.d'):
        return column + 1

    test_frame = orca.get_table('test_frame')
    assert set(test_frame.columns) == set(['a', 'b', 'c'])
    assert_frames_equal(
        test_frame.to_frame(),
        pd.DataFrame(
            {'a': [1, 2, 3],
             'b': [4, 5, 6],
             'c': [7, 8, 9]},
            index=['x', 'y', 'z']))
    assert_frames_equal(
        test_frame.to_frame(columns=['a', 'c']),
        pd.DataFrame(
            {'a': [1, 2, 3],
             'c': [7, 8, 9]},
            index=['x', 'y', 'z']))

    test_func_df = orca._TABLES['test_func']
    assert set(test_func_df.columns) == set(['d', 'e'])
    assert_frames_equal(
        test_func_df.to_frame(),
        pd.DataFrame(
            {'a': [0.5, 1, 1.5],
             'b': [2, 2.5, 3],
             'c': [3.5, 4, 4.5],
             'd': [4., 5., 6.],
             'e': [5., 6., 7.]},
            index=['x', 'y', 'z']))
    assert_frames_equal(
        test_func_df.to_frame(columns=['b', 'd']),
        pd.DataFrame(
            {'b': [2, 2.5, 3],
             'd': [4., 5., 6.]},
            index=['x', 'y', 'z']))
    assert set(test_func_df.columns) == set(['a', 'b', 'c', 'd', 'e'])

    assert set(orca.list_columns()) == {
        ('test_frame', 'c'), ('test_func', 'd'), ('test_func', 'e')}
Beispiel #9
0
def test_get_raw_table(df):
    orca.add_table('table1', df)

    @orca.table()
    def table2():
        return df

    assert isinstance(orca.get_raw_table('table1'), orca.DataFrameWrapper)
    assert isinstance(orca.get_raw_table('table2'), orca.TableFuncWrapper)

    assert orca.table_type('table1') == 'dataframe'
    assert orca.table_type('table2') == 'function'
Beispiel #10
0
def test_column_func_source_data(df):
    orca.add_table('test_frame', df)

    @orca.column('test_frame')
    def col_func():
        return pd.Series(range(len(df)), index=df.index)

    s = orca.get_raw_column('test_frame', 'col_func')
    filename, lineno, source = s.func_source_data()

    assert filename.endswith('test_orca.py')
    assert isinstance(lineno, int)
    assert 'def col_func():' in source
Beispiel #11
0
def test_get_raw_column(df):
    orca.add_table('test_frame', df)

    s = pd.Series(range(len(df)), index=df.index)

    def col_func():
        return s

    orca.add_column('test_frame', 'col_series', s)
    orca.add_column('test_frame', 'col_func', col_func)

    assert isinstance(orca.get_raw_column('test_frame', 'col_series'),
                      orca._SeriesWrapper)
    assert isinstance(orca.get_raw_column('test_frame', 'col_func'),
                      orca._ColumnFuncWrapper)
Beispiel #12
0
def test_tables(df):
    wrapped_df = orca.add_table('test_frame', df)

    @orca.table()
    def test_func(test_frame):
        return test_frame.to_frame() / 2

    assert set(orca.list_tables()) == {'test_frame', 'test_func'}

    table = orca.get_table('test_frame')
    assert table is wrapped_df
    assert table.columns == ['a', 'b']
    assert table.local_columns == ['a', 'b']
    assert len(table) == 3
    pdt.assert_index_equal(table.index, df.index)
    pdt.assert_series_equal(table.get_column('a'), df.a)
    pdt.assert_series_equal(table.a, df.a)
    pdt.assert_series_equal(table['b'], df['b'])

    table = orca._TABLES['test_func']
    assert table.index is None
    assert table.columns == []
    assert len(table) is 0
    pdt.assert_frame_equal(table.to_frame(), df / 2)
    pdt.assert_frame_equal(table.to_frame([]), df[[]])
    pdt.assert_frame_equal(table.to_frame(columns=['a']), df[['a']] / 2)
    pdt.assert_frame_equal(table.to_frame(columns='a'), df[['a']] / 2)
    pdt.assert_index_equal(table.index, df.index)
    pdt.assert_series_equal(table.get_column('a'), df.a / 2)
    pdt.assert_series_equal(table.a, df.a / 2)
    pdt.assert_series_equal(table['b'], df['b'] / 2)
    assert len(table) == 3
    assert table.columns == ['a', 'b']
Beispiel #13
0
def test_get_raw_column(df):
    orca.add_table('test_frame', df)

    s = pd.Series(range(len(df)), index=df.index)

    def col_func():
        return s

    orca.add_column('test_frame', 'col_series', s)
    orca.add_column('test_frame', 'col_func', col_func)

    assert isinstance(
        orca.get_raw_column('test_frame', 'col_series'),
        orca._SeriesWrapper)
    assert isinstance(
        orca.get_raw_column('test_frame', 'col_func'),
        orca._ColumnFuncWrapper)
Beispiel #14
0
def test_write_tables(df, store_name):
    orca.add_table('table', df)

    @orca.step()
    def step(table):
        pass

    step_tables = orca.get_step_table_names(['step'])

    orca.write_tables(store_name, step_tables)
    with pd.HDFStore(store_name, mode='r') as store:
        assert 'table' in store
        pdt.assert_frame_equal(store['table'], df)

    orca.write_tables(store_name, step_tables, prefix=1969)

    with pd.HDFStore(store_name, mode='r') as store:
        assert '1969/table' in store
        pdt.assert_frame_equal(store['1969/table'], df)
Beispiel #15
0
def test_table_func_cache(df):
    orca.add_injectable('x', 2)

    @orca.table(cache=True)
    def table(variable='x'):
        return df * variable

    pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 2)
    orca.add_injectable('x', 3)
    pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 2)
    orca.get_table('table').clear_cached()
    pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 3)
    orca.add_injectable('x', 4)
    pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 3)
    orca.clear_cache()
    pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 4)
    orca.add_injectable('x', 5)
    pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 4)
    orca.add_table('table', table)
    pdt.assert_frame_equal(orca.get_table('table').to_frame(), df * 5)
Beispiel #16
0
def test_step_run(df):
    orca.add_table('test_table', df)

    @orca.table()
    def table_func(test_table):
        tt = test_table.to_frame()
        tt['c'] = [7, 8, 9]
        return tt

    @orca.column('table_func')
    def new_col(test_table, table_func):
        tt = test_table.to_frame()
        tf = table_func.to_frame(columns=['c'])
        return tt['a'] + tt['b'] + tf['c']

    @orca.step()
    def test_step1(iter_var, test_table, table_func):
        tf = table_func.to_frame(columns=['new_col'])
        test_table[iter_var] = tf['new_col'] + iter_var

    @orca.step('test_step2')
    def asdf(table='test_table'):
        tt = table.to_frame()
        table['a'] = tt['a']**2

    orca.run(steps=['test_step1', 'test_step2'], iter_vars=[2000, 3000])

    test_table = orca.get_table('test_table')
    assert_frames_equal(
        test_table.to_frame(),
        pd.DataFrame(
            {
                'a': [1, 16, 81],
                'b': [4, 5, 6],
                2000: [2012, 2015, 2018],
                3000: [3012, 3017, 3024]
            },
            index=['x', 'y', 'z']))

    m = orca.get_step('test_step1')
    assert set(m._tables_used()) == {'test_table', 'table_func'}
Beispiel #17
0
def test_step_run(df):
    orca.add_table('test_table', df)

    @orca.table()
    def table_func(test_table):
        tt = test_table.to_frame()
        tt['c'] = [7, 8, 9]
        return tt

    @orca.column('table_func')
    def new_col(test_table, table_func):
        tt = test_table.to_frame()
        tf = table_func.to_frame(columns=['c'])
        return tt['a'] + tt['b'] + tf['c']

    @orca.step()
    def test_step1(iter_var, test_table, table_func):
        tf = table_func.to_frame(columns=['new_col'])
        test_table[iter_var] = tf['new_col'] + iter_var

    @orca.step('test_step2')
    def asdf(table='test_table'):
        tt = table.to_frame()
        table['a'] = tt['a'] ** 2

    orca.run(steps=['test_step1', 'test_step2'], iter_vars=[2000, 3000])

    test_table = orca.get_table('test_table')
    assert_frames_equal(
        test_table.to_frame(),
        pd.DataFrame(
            {'a': [1, 16, 81],
             'b': [4, 5, 6],
             2000: [2012, 2015, 2018],
             3000: [3012, 3017, 3024]},
            index=['x', 'y', 'z']))

    m = orca.get_step('test_step1')
    assert set(m._tables_used()) == {'test_table', 'table_func'}
Beispiel #18
0
def test_run_and_write_tables_out_tables_provided(df, store_name):
    table_names = ['table', 'table2', 'table3']
    for t in table_names:
        orca.add_table(t, df)

    @orca.step()
    def step(iter_var, table, table2):
        return

    orca.run(['step'],
             iter_vars=range(1),
             data_out=store_name,
             out_base_tables=table_names,
             out_run_tables=['table'])

    with pd.HDFStore(store_name, mode='r') as store:

        for t in table_names:
            assert 'base/{}'.format(t) in store

        assert '0/table' in store
        assert '0/table2' not in store
        assert '0/table3' not in store
Beispiel #19
0
def test_collect_variables(df):
    orca.add_table('df', df)

    @orca.table()
    def df_func():
        return df

    @orca.column('df')
    def zzz():
        return df['a'] / 2

    orca.add_injectable('answer', 42)

    @orca.injectable()
    def injected():
        return 'injected'

    @orca.table('source table', cache=True)
    def source():
        return df

    with pytest.raises(KeyError):
        orca._collect_variables(['asdf'])

    with pytest.raises(KeyError):
        orca._collect_variables(names=['df'], expressions=['asdf'])

    names = ['df', 'df_func', 'answer', 'injected', 'source_label', 'df_a']
    expressions = ['source table', 'df.a']
    things = orca._collect_variables(names, expressions)

    assert set(things.keys()) == set(names)
    assert isinstance(things['source_label'], orca.DataFrameWrapper)
    pdt.assert_frame_equal(things['source_label'].to_frame(), df)
    assert isinstance(things['df_a'], pd.Series)
    pdt.assert_series_equal(things['df_a'], df['a'])
Beispiel #20
0
def test_run_and_write_tables_out_tables_provided(df, store_name):
    table_names = ['table', 'table2', 'table3']
    for t in table_names:
        orca.add_table(t, df)

    @orca.step()
    def step(iter_var, table, table2):
        return

    orca.run(
        ['step'],
        iter_vars=range(1),
        data_out=store_name,
        out_base_tables=table_names,
        out_run_tables=['table'])

    with pd.HDFStore(store_name, mode='r') as store:

        for t in table_names:
            assert 'base/{}'.format(t) in store

        assert '0/table' in store
        assert '0/table2' not in store
        assert '0/table3' not in store
Beispiel #21
0
def test_update_col(df):
    wrapped = orca.add_table('table', df)

    wrapped.update_col('b', pd.Series([7, 8, 9], index=df.index))
    pdt.assert_series_equal(
        wrapped['b'], pd.Series([7, 8, 9], index=df.index, name='b'))

    a_dtype = wrapped['a'].dtype

    # test 1 - cast the data type before the update
    wrapped.update_col_from_series('a', pd.Series(dtype=a_dtype))
    pdt.assert_series_equal(wrapped['a'], df['a'])

    # test 2 - let the update method do the cast
    wrapped.update_col_from_series('a', pd.Series(), True)
    pdt.assert_series_equal(wrapped['a'], df['a'])

    # test 3 - don't cast, should raise an error
    with pytest.raises(ValueError):
        wrapped.update_col_from_series('a', pd.Series())

    wrapped.update_col_from_series('a', pd.Series([99], index=['y']))
    pdt.assert_series_equal(
        wrapped['a'], pd.Series([1, 99, 3], index=df.index, name='a'))
Beispiel #22
0
 def step(table, column):
     df = table.to_frame()
     df['new'] = column
     orca.add_table('table', df)
Beispiel #23
0
def test_table_copy(df):
    orca.add_table('test_frame_copied', df, copy_col=True)
    orca.add_table('test_frame_uncopied', df, copy_col=False)
    orca.add_table('test_func_copied', lambda: df, copy_col=True)
    orca.add_table('test_func_uncopied', lambda: df, copy_col=False)

    @orca.table(copy_col=True)
    def test_funcd_copied():
        return df

    @orca.table(copy_col=False)
    def test_funcd_uncopied():
        return df

    @orca.table(copy_col=True)
    def test_funcd_copied2(test_frame_copied):
        # local returns original, but it is copied by copy_col.
        return test_frame_copied.local

    @orca.table(copy_col=True)
    def test_funcd_copied3(test_frame_uncopied):
        # local returns original, but it is copied by copy_col.
        return test_frame_uncopied.local

    @orca.table(copy_col=False)
    def test_funcd_uncopied2(test_frame_copied):
        # local returns original.
        return test_frame_copied.local

    @orca.table(copy_col=False)
    def test_funcd_uncopied3(test_frame_uncopied):
        # local returns original.
        return test_frame_uncopied.local

    orca.add_table('test_cache_copied', lambda: df, cache=True, copy_col=True)
    orca.add_table(
        'test_cache_uncopied', lambda: df, cache=True, copy_col=False)

    @orca.table(cache=True, copy_col=True)
    def test_cached_copied():
        return df

    @orca.table(cache=True, copy_col=False)
    def test_cached_uncopied():
        return df

    # Create tables with computed columns.
    orca.add_table(
        'test_copied_columns', pd.DataFrame(index=df.index), copy_col=True)
    orca.add_table(
        'test_uncopied_columns', pd.DataFrame(index=df.index), copy_col=False)
    for column_name in ['a', 'b']:
        label = "test_frame_uncopied.{}".format(column_name)

        def func(col=label):
            return col
        for table_name in ['test_copied_columns', 'test_uncopied_columns']:
            orca.add_column(table_name, column_name, func)

    for name in ['test_frame_uncopied', 'test_func_uncopied',
                 'test_funcd_uncopied', 'test_funcd_uncopied2',
                 'test_funcd_uncopied3', 'test_cache_uncopied',
                 'test_cached_uncopied', 'test_uncopied_columns',
                 'test_frame_copied', 'test_func_copied',
                 'test_funcd_copied', 'test_funcd_copied2',
                 'test_funcd_copied3', 'test_cache_copied',
                 'test_cached_copied', 'test_copied_columns']:
        table = orca.get_table(name)
        table2 = orca.get_table(name)

        # to_frame will always return a copy.
        if 'columns' in name:
            assert_frames_equal(table.to_frame(), df)
        else:
            pdt.assert_frame_equal(table.to_frame(), df)
        assert table.to_frame() is not df
        pdt.assert_frame_equal(table.to_frame(), table.to_frame())
        assert table.to_frame() is not table.to_frame()
        pdt.assert_series_equal(table.to_frame()['a'], df['a'])
        assert table.to_frame()['a'] is not df['a']
        pdt.assert_series_equal(table.to_frame()['a'],
                                table.to_frame()['a'])
        assert table.to_frame()['a'] is not table.to_frame()['a']

        if 'uncopied' in name:
            pdt.assert_series_equal(table['a'], df['a'])
            assert table['a'] is df['a']
            pdt.assert_series_equal(table['a'], table2['a'])
            assert table['a'] is table2['a']
        else:
            pdt.assert_series_equal(table['a'], df['a'])
            assert table['a'] is not df['a']
            pdt.assert_series_equal(table['a'], table2['a'])
            assert table['a'] is not table2['a']
Beispiel #24
0
def test_is_table(df):
    orca.add_table('table', df)
    assert orca.is_table('table') is True
    assert orca.is_table('asdf') is False