def test_upsert_individual_values1(pandabase_loaded_db, constants): """upsert to update rows with only 1 of 5 values (and index) from full dataframe""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df2 = pd.DataFrame(index=df.index, columns=df.columns) for col in df2.columns: df2[col] = df2[col].astype(df[col].dtype) df2.loc[df2.index[0], 'float'] = 9.9 df2.loc[df2.index[1], 'integer'] = 999 df2.loc[df2.index[2], 'string'] = 'nah' df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) pb.to_sql(df2, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df.loc[df.index[0], 'float'] = 9.9 df.loc[df.index[1], 'integer'] = 999 df.loc[df.index[2], 'string'] = 'nah' df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) assert companda(df, loaded)
def test_create_table_multi_index_rename(empty_db, multi_index_df, multi_index_df_4): multi_index_df.index.names = ['name/z', 'other name'] pb.to_sql(multi_index_df, table_name='sample_mi', con=empty_db, how='create_only', )
def test_auto_index_add_valid_bool(minimal_df, empty_db, constants): pb.to_sql( minimal_df, table_name=constants.TABLE_NAME, con=empty_db, how='create_only', auto_index=True, ) assert pb.has_table(empty_db, constants.TABLE_NAME) df = pd.DataFrame(index=[101, 102, 103], columns=['boolean'], data=[True, False, None]) pb.to_sql( df, table_name=constants.TABLE_NAME, con=empty_db, how='append', auto_index=True, ) df = pb.read_sql(constants.TABLE_NAME, con=empty_db) # Int64Dtype is a fine way to store nullable boolean values # Stored in database as boolean or NULL so the data can only be 0, 1, or None assert is_bool_dtype(df.boolean) or is_integer_dtype(df.boolean) # assume values were loaded in order: x = len(df) assert df.loc[x - 2, 'boolean'] assert not df.loc[x - 1, 'boolean'] assert pd.np.isnan(df.loc[x, 'boolean']) with pytest.raises(KeyError): _ = df.loc[x + 1, 'boolean']
def test_upsert_individual_values2(pandabase_loaded_db, constants): """upsert to update rows with only 1 of 5 values (and index) from incomplete DataFrame""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df2 = pd.DataFrame(index=df.index, columns=df.columns) for col in df2.columns: df2[col] = df2[col].astype(df[col].dtype) df2.loc[df2.index[0], 'float'] = 9.9 df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) pb.to_sql(pd.DataFrame(index=df2.index[:1], columns=['float'], data=[9.9]), table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') pb.to_sql(pd.DataFrame(index=df2.index[3:4], columns=['date'], data=[pd.to_datetime('1968-01-01', utc=True)]), table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df.loc[df.index[0], 'float'] = 9.9 df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) assert companda(df, loaded)
def create_branch_table(con): branch_df = pd.DataFrame(branches, columns=branches.keys()) branch_df = branch_df.transpose() branch_df.index.name = "branch_id" logging.info(f"Creating branch table ") pandabase.to_sql(branch_df, table_name="branches", con=con, how="upsert") logging.info(f"[Success] branch table created! ")
def test_upsert_numeric_column_names(empty_db, n_rows, n_cols, prefix, how): """if column names are purely numeric, upsert fails.""" df = pd.DataFrame(index=range(1, n_rows + 1), columns=[prefix + str(n) for n in range(n_cols)], data=np.random.random((n_rows, n_cols))) df.index.name = 'dex' with pytest.raises(NameError): pb.to_sql(df, con=empty_db, table_name='table', how=how)
def test_scale(rows, cols, empty_db): """test different scales. Pandabase fails at 200 cols""" df = scale_df(rows, cols) pb.to_sql(df, con=empty_db, table_name=f'scale_{rows}x{cols}', how='upsert')
def test_create_table_fails_non_utc_index(empty_db, simple_df, constants): """add a new table with explicit index, read it back with pandabase, check equality""" simple_df.index = pd.date_range('2011', freq='d', periods=len(simple_df), tz=LA_TZ) with pytest.raises(ValueError): pb.to_sql(simple_df, table_name='sample', con=empty_db, how='create_only')
def test_select_fails_multi_index(empty_db, multi_index_df, lowest): """add a new minimal table & read it back with pandabase - select all""" pb.to_sql(multi_index_df, table_name='sample_mi', con=empty_db, how='create_only', ) with pytest.raises(Exception): pb.read_sql(con=empty_db, table_name='sample_mi', highest=(1000, 1000), lowest=lowest)
def test_select_table_range_fails_different_index(empty_db, simple_df, constants): """add a new table with explicit index, read it back with pandabase, check equality""" simple_df.index = simple_df.date simple_df = simple_df.drop('date', axis=1) pb.to_sql(simple_df, table_name='sample', con=empty_db, how='create_only') with pytest.raises(Exception): _ = pb.read_sql('sample', con=empty_db, lowest=0, highest=12)
def test_overwrite_table_fails(pandabase_loaded_db, simple_df, how, constants): """Try to append/insert rows with conflicting indices""" table_name = constants.TABLE_NAME assert pb.has_table(pandabase_loaded_db, table_name) with pytest.raises(Exception): pb.to_sql(simple_df, table_name=table_name, con=pandabase_loaded_db, how=how)
def test_add_fails_wrong_index_name(pandabase_loaded_db, how, constants): assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=[1], columns=['date'], data=[['x']]) df.index.name = 'not_a_real_name' with pytest.raises(NameError): pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how)
def test_upsert_fails_invalid_float(pandabase_loaded_db, how, constants): assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=[1], columns=['float'], data=[['x']]) df.index.name = constants.SAMPLE_INDEX_NAME with pytest.raises(TypeError): pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how)
def test_add_fails_invalid_date(pandabase_loaded_db, how, constants): assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=[1], columns=['date'], data=[['x']]) df.index.name = constants.SAMPLE_INDEX_NAME with pytest.raises((ValueError, TypeError, sqa.exc.StatementError)): pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how)
def test_write_time(empty_db, how, qty): """test that write times are semi-acceptably fast""" start = datetime.utcnow() pb.to_sql(pd.DataFrame(index=range(qty), columns=['a', 'b', 'c'], data=np.random.random((qty, 3))), table_name='sample', con=empty_db, how=how, auto_index=True, ) end = datetime.utcnow() assert end - start < pd.Timedelta(seconds=2)
def test_create_table_multi_index(empty_db, multi_index_df_4, how): """add a new minimal table & read it back with pandabase""" pb.to_sql( multi_index_df_4, table_name='sample_mi', con=empty_db, how=how, ) loaded = pb.read_sql(con=empty_db, table_name='sample_mi') assert companda(multi_index_df_4, loaded)
def test_new_column_all_nan(pandabase_loaded_db, df_with_all_nan_col, constants): """insert into a new column""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df_with_all_nan_col.index = range(100, 100 + len(df_with_all_nan_col)) df_with_all_nan_col.index.name = constants.SAMPLE_INDEX_NAME pb.to_sql(df_with_all_nan_col, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, auto_index=False, how='append', )
def test_new_column_fails(pandabase_loaded_db, how, constants): """insert into a new column""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=[101], columns=['new_column'], data=[[1.1]]) df.index.name = constants.SAMPLE_INDEX_NAME assert df.loc[101, 'new_column'] == 1.1 with pytest.raises(NameError): pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how, )
def test_upsert_auto_index_fails(empty_db, minimal_df): """add a new minimal table w/o index; trying to add again should fail""" pb.to_sql(minimal_df, table_name='sample', con=empty_db, auto_index=True, how='create_only') with pytest.raises(IOError): pb.to_sql(minimal_df, table_name='sample', con=empty_db, auto_index=True, how='upsert')
def test_add_fails_invalid_timezone(pandabase_loaded_db, how, constants, tz): assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=range(5), columns=['date'], data=pd.date_range('2019-06-06', periods=5, freq='h', tz=tz)) df.index.name = constants.SAMPLE_INDEX_NAME print(df.date) with pytest.raises(ValueError): pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how)
def test_append_bad_pk_fails(pandabase_loaded_db, simple_df, constants, unique_index_name): """Try to append rows with conflicting index columns""" table_name = constants.TABLE_NAME assert pb.has_table(pandabase_loaded_db, table_name) simple_df.index = simple_df['integer'] if unique_index_name: simple_df[constants.SAMPLE_INDEX_NAME] = simple_df.integer simple_df = simple_df.drop('integer', axis=1) with pytest.raises(NameError): pb.to_sql(simple_df, table_name=table_name, con=pandabase_loaded_db, how='append')
def test_create_table_no_index_load_pandas(empty_db, minimal_df, as_type, how): """add a new minimal table to db, read with Pandas""" if as_type: minimal_df['integer'] = minimal_df['integer'].astype('Int64') table = pb.to_sql( minimal_df, table_name='sample', con=empty_db, how=how, auto_index=True, ) # print(table.columns) assert table.columns[PANDABASE_DEFAULT_INDEX].primary_key assert pb.has_table(empty_db, 'sample') loaded = pd.read_sql_table('sample', con=empty_db, index_col=PANDABASE_DEFAULT_INDEX) # pandas doesn't know about default index loaded.index.name = None # pandas doesn't know stored as UTC w/o timezone info loaded.date = pd.to_datetime(loaded.date, utc=True) assert pb.companda(loaded, minimal_df, ignore_index=True)
def test_select_some_multi_index(empty_db, multi_index_df, lowest, length): """add a new minimal table & read it back with pandabase - select all""" pb.to_sql( multi_index_df, table_name='sample_mi', con=empty_db, how='create_only', ) loaded = pb.read_sql(con=empty_db, table_name='sample_mi', highest=(1000, 1000), lowest=lowest) print('\n', loaded) assert len(loaded) == length
def test_upsert_new_cols(pandabase_loaded_db, constants, col_to_duplicate): """upsert new rows with only 1 of 5 values (and index)""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df['bonus_col'] = df[col_to_duplicate].copy() pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert', add_new_columns=True) # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert companda(df, loaded) assert 'bonus_col' in df.columns
def test_create_select_table_range_datetime_index(empty_db, simple_df, constants): """add a new table with explicit index, read it back with pandabase, check equality""" simple_df.index = simple_df.date simple_df = simple_df.drop('date', axis=1) table = pb.to_sql(simple_df, table_name='sample', con=empty_db, how='create_only') # print(table.columns) assert table.columns['date'].primary_key assert pb.has_table(empty_db, 'sample') loaded0 = pb.read_sql('sample', con=empty_db, lowest=simple_df.index[-1], highest=simple_df.index[0]) print(loaded0) assert len(loaded0) == 0 loaded = pb.read_sql('sample', con=empty_db, lowest=simple_df.index[0], highest=simple_df.index[-1]) assert pb.companda(loaded, simple_df, ignore_all_nan_columns=True)
def create_collections_table(con): collections_df = AuthorityFiles.Authority_instance.df_credits collections_df = collections_df.rename( columns=collection_table_field_mapper) cols = [ col for col in list(collections_df.columns) if col in collection_table_field_mapper.values() ] collections_df_final = collections_df[cols] collections_df_final.index.name = "collection_id" collections_df_final = replace_branch_with_fk(collections_df_final) logging.info(f"Creating collections table ") pandabase.to_sql(collections_df_final, table_name="collections", con=con, how="upsert") logging.info(f"[Success] collections table created! ")
def data1_to_sql(data1, how): ''' Args: data1: usersテーブルに追加したいデータのデータフレーム how : {'create_only', 'upsert', 'append'} - create_only: If table exists, raise an error and stop. - append: If table exists, append data. Raise if index overlaps Create table if does not exist. - upsert: create table if needed if record exists: update (possibly replacing values with NULL) else: insert ''' data1 = data1.set_index('id') pandabase.to_sql(data1, table_name='users', con=con, how=how)
def test_coerce_integer(pandabase_loaded_db, how, constants): """insert an integer into float column""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pd.DataFrame(index=[1], columns=['integer'], data=[[77.0]]) df.index.name = constants.SAMPLE_INDEX_NAME types = df.dtypes pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') for col in df.columns: assert types[col] == df.dtypes[col] loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert loaded.loc[1, 'integer'] == 77
def test_add_new_rows(pandabase_loaded_db, simple_df, how, constants): """upsert or append new complete rows""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = simple_df.copy() df.index = df.index + 100 pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how) loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) # print('loaded post-upsert by pandabase:') # print(loaded) assert loaded.isna().sum().sum() == 0 assert companda(simple_df, loaded.loc[simple_df.index]) assert companda(df, loaded.loc[df.index])
def test_append_auto_index(empty_db, minimal_df): """add a new minimal table; add it again""" pb.to_sql(minimal_df, table_name='sample', con=empty_db, auto_index=True, how='create_only') table2 = pb.to_sql(minimal_df, table_name='sample', con=empty_db, auto_index=True, how='append') assert table2.columns[PANDABASE_DEFAULT_INDEX].primary_key loaded = pb.read_sql('sample', con=empty_db) assert pb.has_table(empty_db, 'sample') double_df = pd.concat([minimal_df, minimal_df], ignore_index=True) assert pb.companda(loaded, double_df, ignore_index=True) assert len(loaded) == len(minimal_df) * 2