def test_same_companda_alter_dtype(minimal_df): """changing between types changes equality (e.g. bool!=int)""" df2 = minimal_df.copy() df2.boolean = df2.boolean.astype(np.int) print(minimal_df.dtypes) print(df2.dtypes) assert not companda(df2, minimal_df, check_dtype=True)
def test_upsert_individual_values2(pandabase_loaded_db, constants): """upsert to update rows with only 1 of 5 values (and index) from incomplete DataFrame""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df2 = pd.DataFrame(index=df.index, columns=df.columns) for col in df2.columns: df2[col] = df2[col].astype(df[col].dtype) df2.loc[df2.index[0], 'float'] = 9.9 df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) pb.to_sql(pd.DataFrame(index=df2.index[:1], columns=['float'], data=[9.9]), table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') pb.to_sql(pd.DataFrame(index=df2.index[3:4], columns=['date'], data=[pd.to_datetime('1968-01-01', utc=True)]), table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df.loc[df.index[0], 'float'] = 9.9 df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) assert companda(df, loaded)
def test_upsert_individual_values1(pandabase_loaded_db, constants): """upsert to update rows with only 1 of 5 values (and index) from full dataframe""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df2 = pd.DataFrame(index=df.index, columns=df.columns) for col in df2.columns: df2[col] = df2[col].astype(df[col].dtype) df2.loc[df2.index[0], 'float'] = 9.9 df2.loc[df2.index[1], 'integer'] = 999 df2.loc[df2.index[2], 'string'] = 'nah' df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) pb.to_sql(df2, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df.loc[df.index[0], 'float'] = 9.9 df.loc[df.index[1], 'integer'] = 999 df.loc[df.index[2], 'string'] = 'nah' df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True) assert companda(df, loaded)
def test_add_new_rows(pandabase_loaded_db, simple_df, how, constants): """upsert or append new complete rows""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = simple_df.copy() df.index = df.index + 100 pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how=how) loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) # print('loaded post-upsert by pandabase:') # print(loaded) assert loaded.isna().sum().sum() == 0 assert companda(simple_df, loaded.loc[simple_df.index]) assert companda(df, loaded.loc[df.index])
def test_companda_nan_different_values(simple_df): df = simple_df.copy() df.iloc[2, 2] = np.NaN df2 = simple_df.copy() df2.iloc[2, 2] = np.NaN df2.iloc[1, 2] = 450 x = companda(df, df2) print(x) assert not x
def test_create_table_multi_index(empty_db, multi_index_df_4, how): """add a new minimal table & read it back with pandabase""" table = pb.to_sql(multi_index_df_4, table_name='sample_mi', con=empty_db, how=how, ) loaded = pb.read_sql(con=empty_db, table_name='sample_mi') assert companda(multi_index_df_4, loaded)
def test_select_all_multi_index(empty_db, multi_index_df): """add a new minimal table & read it back with pandabase - select all""" table = pb.to_sql(multi_index_df, table_name='sample_mi', con=empty_db, how='create_only', ) # print(table.columns) assert table.columns['this'].primary_key assert table.columns['that'].primary_key loaded = pb.read_sql(con=empty_db, table_name='sample_mi', highest=(100, 100), lowest=(0, 0)) print('\n', loaded) assert companda(multi_index_df, loaded)
def test_create_table_multi_index(empty_db, multi_index_df, how): """add a new minimal table & read it back with pandabase""" table = pb.to_sql(multi_index_df, table_name='sample_mi', con=empty_db, how=how, ) # print(table.columns) assert table.columns['this'].primary_key assert table.columns['that'].primary_key loaded = pb.read_sql(con=empty_db, table_name='sample_mi') print('\n', loaded) assert companda(multi_index_df, loaded)
def test_upsert_new_cols(pandabase_loaded_db, constants, col_to_duplicate): """upsert new rows with only 1 of 5 values (and index)""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df['bonus_col'] = df[col_to_duplicate].copy() pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert', add_new_columns=True) # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert companda(df, loaded) assert 'bonus_col' in df.columns
def test_read_pandas_table_pandas(pandabase_loaded_db, simple_df, constants): """baseline: read pre-written table containing simple_df, using pd.read_sql_table""" assert has_table(pandabase_loaded_db, constants.TABLE_NAME) loaded_df = pd.read_sql_table(constants.TABLE_NAME, con=pandabase_loaded_db, index_col=constants.SAMPLE_INDEX_NAME, parse_dates='dates') # sqlite does not store TZ info. So we will convert loaded_df['date'] = pd.to_datetime(loaded_df['date'], utc=True) orig_columns = make_clean_columns_dict(simple_df) loaded_columns = make_clean_columns_dict(loaded_df) for key in orig_columns.keys(): print(key) if key == 'nan': # column of all NaN values is skipped continue assert_sqla_types_equivalent(orig_columns[key], loaded_columns[key]) assert companda(loaded_df, simple_df)
def test_upsert_complete_rows(pandabase_loaded_db, constants): """upsert, changing individual values""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert df.date.dt.tz == UTC df.loc[778, 'float'] = 9.9 df.loc[779, 'integer'] = 999 df.loc[780, 'string'] = 'nah' df.loc[781, 'date'] = pd.to_datetime('1968-01-01', utc=True) # check that all values still exist assert df.loc[1, 'integer'] == 778 assert df.date.dt.tz == UTC pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert companda(df, loaded)
def test_select_pandas_table(pandas_loaded_db, simple_df, constants): """using pandabase.read_sql: read pandas-written table containing simple_df, this test fails because: when pandas writes the entry, it does not create an explicit primary key. the table is treated as a multiindex""" assert has_table(pandas_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, pandas_loaded_db) # line up pk since Pandas doesn't deal with it well simple_df[simple_df.index.name] = simple_df.index simple_df.index.name = None orig_columns = make_clean_columns_dict(simple_df) loaded_columns = make_clean_columns_dict(df) for key in orig_columns.keys(): print(key) if key == 'nan': continue assert_sqla_types_equivalent(orig_columns[key], loaded_columns[key]) assert companda(df, simple_df)
def test_upsert_incomplete_rows(pandabase_loaded_db, constants): """upsert new rows with only 1 of 5 values (and index)""" assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME) df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) df.loc[11, 'float'] = 9.9 df.loc[12, 'integer'] = 999 df.loc[13, 'string'] = 'nah' df.loc[14, 'date'] = pd.to_datetime('1968-01-01', utc=True) # check that these values exist assert df.loc[1, 'integer'] == 778 assert pd.isna(df.loc[11, 'integer']) assert df.loc[13, 'string'] == 'nah' pb.to_sql(df, table_name=constants.TABLE_NAME, con=pandabase_loaded_db, how='upsert') # check against pandabase read loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db) assert companda(df, loaded)
def test_all_nans_ignore(df_with_all_nan_col): assert companda(df_with_all_nan_col, df_with_all_nan_col, ignore_all_nan_columns=True)
def test_same_companda_index2(minimal_df): df = minimal_df.copy() df = df.drop(1, axis=0) assert not companda(df, minimal_df)
def test_same_companda_index1(minimal_df): df = minimal_df.copy() df = df.rename(index={1: 99}) assert not companda(df, minimal_df)
def test_same_companda_cols4(minimal_df): df = minimal_df.copy() df = df.rename(columns={'integer': 'x'}) assert not companda(minimal_df, df)
def test_same_companda_cols2(minimal_df): df = minimal_df.copy() df = df.drop(['float'], axis=1) assert not companda(minimal_df, df)
def test_same_companda_copy2(minimal_df): assert companda(minimal_df, minimal_df.copy())
def test_same_companda_datetime3(simple_df): df = simple_df.copy() df['date'] = pd.to_datetime(df['date'].values, utc=False).tz_localize(TZ) c = companda(df, simple_df) print(c.message) assert not c
def test_same_companda2(minimal_df): assert companda(minimal_df, minimal_df)
def test_different_companda(minimal_df, simple_df): assert not companda(minimal_df, simple_df)
def test_added_nans_ignore(simple_df, df_with_all_nan_col): assert companda(df_with_all_nan_col, simple_df, ignore_all_nan_columns=True)
def test_all_nans_do_not_ignore(df_with_all_nan_col): assert companda(df_with_all_nan_col, df_with_all_nan_col, ignore_all_nan_columns=False)
def test_same_companda1(simple_df): assert companda(simple_df, simple_df)
def test_same_companda_nan(simple_df): df = simple_df.copy() df.iloc[2, 2] = pd.np.NaN assert not companda(df, simple_df)
def test_added_nans_do_not_ignore(simple_df, df_with_all_nan_col): assert not companda( df_with_all_nan_col, simple_df, ignore_all_nan_columns=False)
def test_same_companda_string(simple_df): df = simple_df.copy() df.loc[1, 'string'] = 'z' assert not companda(df, simple_df)
def test_same_companda_epsilon1(simple_df): df = simple_df.copy() df.float = df.float.apply(lambda x: x + .0001) assert companda(df, simple_df)
def test_same_companda_datetime1sec(simple_df): df = simple_df.copy() df['date'] = df['date'].apply(lambda x: x + pd.Timedelta(seconds=1)) assert not companda(df, simple_df)