def test_upsert_new_cols(pandabase_loaded_db, constants, col_to_duplicate): """upsert new rows with only 1 of 5 values (and index)""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df['bonus_col'] = df[col_to_duplicate].copy() pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert', add_new_columns=True) # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert companda(df, loaded) assert 'bonus_col' in df.columns
def test_create_read_table_no_index(empty_db, minimal_df): """add a new minimal table & read it back with pandabase""" table = pb.to_sql( minimal_df, table_name='sample', con=empty_db, how='create_only', auto_index=True, ) # print(table.columns) assert table.columns[PANDABASE_DEFAULT_INDEX].primary_key loaded = pb.read_sql('sample', con=empty_db) assert pb.has_table(empty_db, 'sample') assert pb.companda(loaded, minimal_df, ignore_index=True)
def test_append_bad_pk_fails(pandabase_loaded_db, simple_df, constants, unique_index_name): """Try to append rows with conflicting index columns""" table_name = constants.TABLE_NAME assert pb.has_table(pandabase_loaded_db, table_name) simple_df.index = simple_df['integer'] if unique_index_name: simple_df[constants.SAMPLE_INDEX_NAME] = simple_df.integer simple_df = simple_df.drop('integer', axis=1) with pytest.raises(NameError): pb.to_sql(simple_df, table_name=table_name, con=pandabase_loaded_db, how='append')
def test_add_fails_invalid_timezone(pandabase_loaded_db, how, constants, tz): assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=range(5), columns=['date'], data=pd.date_range('2019-06-06', periods=5, freq='h', tz=tz)) df.index.name = constants.SAMPLE_INDEX_NAME print(df.date) with pytest.raises(ValueError): pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how)
def test_coerce_integer(pandabase_loaded_db, how, constants): """insert an integer into float column""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=[1], columns=['integer'], data=[[77.0]]) df.index.name = constants.SAMPLE_INDEX_NAME types = df.dtypes pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') for col in df.columns: assert types[col] == df.dtypes[col] loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert loaded.loc[1, 'integer'] == 77
def test_add_new_rows(pandabase_loaded_db, simple_df, how, constants): """upsert or append new complete rows""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = simple_df.copy() df.index = df.index + 100 pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how) loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) # print('loaded post-upsert by pandabase:') # print(loaded) assert loaded.isna().sum().sum() == 0 assert companda(simple_df, loaded.loc[simple_df.index]) assert companda(df, loaded.loc[df.index])
def test_create_select_table_range_int_index(empty_db, simple_df, constants): """add a new table with explicit index, read it back with pandabase, check equality""" table = pb.to_sql(simple_df, table_name='sample', con=empty_db, how='create_only') # print(table.columns) assert table.columns[constants.SAMPLE_INDEX_NAME].primary_key assert pb.has_table(empty_db, 'sample') loaded0 = pb.read_sql('sample', con=empty_db, lowest=1, highest=0) print(loaded0) assert len(loaded0) == 0 loaded = pb.read_sql('sample', con=empty_db, lowest=simple_df.index[0], highest=simple_df.index[-1]) assert pb.companda(loaded, simple_df, ignore_all_nan_columns=True)
def test_append_auto_index(empty_db, minimal_df): """add a new minimal table; add it again""" pb.to_sql(minimal_df, table_name='sample', con=empty_db, auto_index=True, how='create_only') table2 = pb.to_sql(minimal_df, table_name='sample', con=empty_db, auto_index=True, how='append') assert table2.columns[PANDABASE_DEFAULT_INDEX].primary_key loaded = pb.read_sql('sample', con=empty_db) assert pb.has_table(empty_db, 'sample') double_df = pd.concat([minimal_df, minimal_df], ignore_index=True) assert pb.companda(loaded, double_df, ignore_index=True) assert len(loaded) == len(minimal_df) * 2
def test_create_read_table_with_different_index(session_db, simple_df, table_name, index_col_name): """create new tables in empty db, using different col types as index, read with pandabase""" orig_df = simple_df.copy() orig_df.index = orig_df[index_col_name] print(orig_df[index_col_name]) print(orig_df.index) orig_df = orig_df.drop(index_col_name, axis=1) table = pb.to_sql(orig_df, table_name=table_name, con=session_db, how='create_only') assert table.columns[index_col_name].primary_key assert pb.has_table(session_db, table_name) loaded = pb.read_sql(table_name, con=session_db) c = pb.companda(loaded, orig_df, ignore_all_nan_columns=True) if not c: raise ValueError(c.message)
def test_create_table_with_different_index_pandas(session_db, simple_df, table_name, index_col_name): """create new tables in empty db, using different col types as index, read with Pandas""" df = simple_df.copy() df.index = df[index_col_name] df = df.drop(index_col_name, axis=1) table = pb.to_sql(df, table_name=table_name, con=session_db, how='create_only') assert table.columns[index_col_name].primary_key assert pb.has_table(session_db, table_name) # read with PANDAS loaded = pd.read_sql_table(table_name, con=session_db, index_col=index_col_name) # make an integer index, since pd.read_sql_table doesn't know to do this new_index = loaded.index.name loaded[new_index] = loaded.index if isinstance(loaded[new_index].iloc[0], str): print('converting') loaded[new_index] = loaded[new_index].apply(lambda x: float(x)) loaded.index = loaded[new_index] loaded = loaded.drop(new_index, axis=1) # pandas doesn't know about UTC if 'date' in loaded.columns: print('converting date to UTC') loaded.date = pd.to_datetime(loaded.date, utc=True) else: print('making new UTC index (Fake!)') loaded.index = df.index c = pb.companda(loaded, df, ignore_all_nan_columns=True) if not c: raise ValueError(c.message)
def test_upsert_complete_rows(pandabase_loaded_db, constants): """upsert, changing individual values""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert df.date.dt.tz == UTC df.loc[778, 'float'] = 9.9 df.loc[779, 'integer'] = 999 df.loc[780, 'string'] = 'nah' df.loc[781, 'date'] = pd.to_datetime('1968-01-01', utc=True) # check that all values still exist assert df.loc[1, 'integer'] == 778 assert df.date.dt.tz == UTC pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert companda(df, loaded)
def test_upsert_valid_bool(pandabase_loaded_db, how, constants): assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=[101, 102, 103], columns=['boolean'], data=[True, False, None]) df.index.name = constants.SAMPLE_INDEX_NAME pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) # Int64Dtype is a fine way to store nullable boolean values # Stored in database as boolean or NULL so the data can only be 0, 1, or None assert is_bool_dtype(df.boolean) or is_integer_dtype(df.boolean) assert df.loc[101, 'boolean'] assert not df.loc[102, 'boolean'] assert pd.np.isnan(df.loc[103, 'boolean']) with pytest.raises(KeyError): _ = df.loc[104, 'boolean']
def test_upsert_incomplete_rows(pandabase_loaded_db, constants): """upsert new rows with only 1 of 5 values (and index)""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df.loc[11, 'float'] = 9.9 df.loc[12, 'integer'] = 999 df.loc[13, 'string'] = 'nah' df.loc[14, 'date'] = pd.to_datetime('1968-01-01', utc=True) # check that these values exist assert df.loc[1, 'integer'] == 778 assert pd.isna(df.loc[11, 'integer']) assert df.loc[13, 'string'] == 'nah' pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert companda(df, loaded)
def test_create_table_no_index_load_pandas(empty_db, minimal_df): """add a new minimal table, read with Pandas""" table = pb.to_sql( minimal_df, table_name='sample', con=empty_db, how='create_only', auto_index=True, ) # print(table.columns) assert table.columns[PANDABASE_DEFAULT_INDEX].primary_key assert pb.has_table(empty_db, 'sample') loaded = pd.read_sql_table('sample', con=empty_db, index_col=PANDABASE_DEFAULT_INDEX) # pandas doesn't know about default index loaded.index.name = None # pandas doesn't know stored as UTC w/o timezone info loaded.date = pd.to_datetime(loaded.date, utc=True) assert pb.companda(loaded, minimal_df, ignore_index=True)
def test_drop_table(pandabase_loaded_db): names = pb.util.get_db_table_names(pandabase_loaded_db) for name in names: assert pb.has_table(pandabase_loaded_db, table_name=name) pb.util.drop_db_table(con=pandabase_loaded_db, table_name=name) assert not pb.has_table(pandabase_loaded_db, table_name=name)