Esempio n. 1
0
def test_same_companda_alter_dtype(minimal_df):
    """changing between types changes equality (e.g. bool!=int)"""
    df2 = minimal_df.copy()
    df2.boolean = df2.boolean.astype(np.int)
    print(minimal_df.dtypes)
    print(df2.dtypes)
    assert not companda(df2, minimal_df, check_dtype=True)
Esempio n. 2
0
def test_upsert_individual_values2(pandabase_loaded_db, constants):
    """upsert to update rows with only 1 of 5 values (and index) from incomplete DataFrame"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df2 = pd.DataFrame(index=df.index, columns=df.columns)
    for col in df2.columns:
        df2[col] = df2[col].astype(df[col].dtype)

    df2.loc[df2.index[0], 'float'] = 9.9
    df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    pb.to_sql(pd.DataFrame(index=df2.index[:1], columns=['float'], data=[9.9]),
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')
    pb.to_sql(pd.DataFrame(index=df2.index[3:4],
                           columns=['date'],
                           data=[pd.to_datetime('1968-01-01', utc=True)]),
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[df.index[0], 'float'] = 9.9
    df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    assert companda(df, loaded)
Esempio n. 3
0
def test_upsert_individual_values1(pandabase_loaded_db, constants):
    """upsert to update rows with only 1 of 5 values (and index) from full dataframe"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df2 = pd.DataFrame(index=df.index, columns=df.columns)
    for col in df2.columns:
        df2[col] = df2[col].astype(df[col].dtype)

    df2.loc[df2.index[0], 'float'] = 9.9
    df2.loc[df2.index[1], 'integer'] = 999
    df2.loc[df2.index[2], 'string'] = 'nah'
    df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    pb.to_sql(df2,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[df.index[0], 'float'] = 9.9
    df.loc[df.index[1], 'integer'] = 999
    df.loc[df.index[2], 'string'] = 'nah'
    df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    assert companda(df, loaded)
Esempio n. 4
0
def test_add_new_rows(pandabase_loaded_db, simple_df, how, constants):
    """upsert or append new complete rows"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = simple_df.copy()
    df.index = df.index + 100

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how=how)

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    # print('loaded post-upsert by pandabase:')
    # print(loaded)

    assert loaded.isna().sum().sum() == 0
    assert companda(simple_df, loaded.loc[simple_df.index])
    assert companda(df, loaded.loc[df.index])
Esempio n. 5
0
def test_companda_nan_different_values(simple_df):
    df = simple_df.copy()
    df.iloc[2, 2] = np.NaN

    df2 = simple_df.copy()
    df2.iloc[2, 2] = np.NaN
    df2.iloc[1, 2] = 450
    x = companda(df, df2)
    print(x)
    assert not x
Esempio n. 6
0
def test_create_table_multi_index(empty_db, multi_index_df_4, how):
    """add a new minimal table & read it back with pandabase"""
    table = pb.to_sql(multi_index_df_4,
                      table_name='sample_mi',
                      con=empty_db,
                      how=how,
                      )

    loaded = pb.read_sql(con=empty_db, table_name='sample_mi')

    assert companda(multi_index_df_4, loaded)
Esempio n. 7
0
def test_select_all_multi_index(empty_db, multi_index_df):
    """add a new minimal table & read it back with pandabase - select all"""
    table = pb.to_sql(multi_index_df,
                      table_name='sample_mi',
                      con=empty_db,
                      how='create_only',
                      )

    # print(table.columns)
    assert table.columns['this'].primary_key
    assert table.columns['that'].primary_key

    loaded = pb.read_sql(con=empty_db, table_name='sample_mi', highest=(100, 100), lowest=(0, 0))
    print('\n', loaded)

    assert companda(multi_index_df, loaded)
Esempio n. 8
0
def test_create_table_multi_index(empty_db, multi_index_df, how):
    """add a new minimal table & read it back with pandabase"""
    table = pb.to_sql(multi_index_df,
                      table_name='sample_mi',
                      con=empty_db,
                      how=how,
                      )

    # print(table.columns)
    assert table.columns['this'].primary_key
    assert table.columns['that'].primary_key

    loaded = pb.read_sql(con=empty_db, table_name='sample_mi')
    print('\n', loaded)

    assert companda(multi_index_df, loaded)
Esempio n. 9
0
def test_upsert_new_cols(pandabase_loaded_db, constants, col_to_duplicate):
    """upsert new rows with only 1 of 5 values (and index)"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df['bonus_col'] = df[col_to_duplicate].copy()

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert',
              add_new_columns=True)

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
    assert 'bonus_col' in df.columns
Esempio n. 10
0
def test_read_pandas_table_pandas(pandabase_loaded_db, simple_df, constants):
    """baseline: read pre-written table containing simple_df, using pd.read_sql_table"""
    assert has_table(pandabase_loaded_db, constants.TABLE_NAME)

    loaded_df = pd.read_sql_table(constants.TABLE_NAME,
                                  con=pandabase_loaded_db,
                                  index_col=constants.SAMPLE_INDEX_NAME,
                                  parse_dates='dates')

    # sqlite does not store TZ info. So we will convert
    loaded_df['date'] = pd.to_datetime(loaded_df['date'], utc=True)

    orig_columns = make_clean_columns_dict(simple_df)
    loaded_columns = make_clean_columns_dict(loaded_df)
    for key in orig_columns.keys():
        print(key)
        if key == 'nan':
            # column of all NaN values is skipped
            continue
        assert_sqla_types_equivalent(orig_columns[key], loaded_columns[key])
    assert companda(loaded_df, simple_df)
Esempio n. 11
0
def test_upsert_complete_rows(pandabase_loaded_db, constants):
    """upsert, changing individual values"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert df.date.dt.tz == UTC

    df.loc[778, 'float'] = 9.9
    df.loc[779, 'integer'] = 999
    df.loc[780, 'string'] = 'nah'
    df.loc[781, 'date'] = pd.to_datetime('1968-01-01', utc=True)

    # check that all values still exist
    assert df.loc[1, 'integer'] == 778
    assert df.date.dt.tz == UTC

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
Esempio n. 12
0
def test_select_pandas_table(pandas_loaded_db, simple_df, constants):
    """using pandabase.read_sql:
    read pandas-written table containing simple_df,

    this test fails because: when pandas writes the entry, it does not create
    an explicit primary key. the table is treated as a multiindex"""
    assert has_table(pandas_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, pandas_loaded_db)

    # line up pk since Pandas doesn't deal with it well
    simple_df[simple_df.index.name] = simple_df.index
    simple_df.index.name = None
    orig_columns = make_clean_columns_dict(simple_df)

    loaded_columns = make_clean_columns_dict(df)
    for key in orig_columns.keys():
        print(key)
        if key == 'nan':
            continue
        assert_sqla_types_equivalent(orig_columns[key], loaded_columns[key])
    assert companda(df, simple_df)
Esempio n. 13
0
def test_upsert_incomplete_rows(pandabase_loaded_db, constants):
    """upsert new rows with only 1 of 5 values (and index)"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[11, 'float'] = 9.9
    df.loc[12, 'integer'] = 999
    df.loc[13, 'string'] = 'nah'
    df.loc[14, 'date'] = pd.to_datetime('1968-01-01', utc=True)

    # check that these values exist
    assert df.loc[1, 'integer'] == 778
    assert pd.isna(df.loc[11, 'integer'])
    assert df.loc[13, 'string'] == 'nah'

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
Esempio n. 14
0
def test_all_nans_ignore(df_with_all_nan_col):
    assert companda(df_with_all_nan_col,
                    df_with_all_nan_col,
                    ignore_all_nan_columns=True)
Esempio n. 15
0
def test_same_companda_index2(minimal_df):
    df = minimal_df.copy()
    df = df.drop(1, axis=0)
    assert not companda(df, minimal_df)
Esempio n. 16
0
def test_same_companda_index1(minimal_df):
    df = minimal_df.copy()
    df = df.rename(index={1: 99})
    assert not companda(df, minimal_df)
Esempio n. 17
0
def test_same_companda_cols4(minimal_df):
    df = minimal_df.copy()
    df = df.rename(columns={'integer': 'x'})
    assert not companda(minimal_df, df)
Esempio n. 18
0
def test_same_companda_cols2(minimal_df):
    df = minimal_df.copy()
    df = df.drop(['float'], axis=1)
    assert not companda(minimal_df, df)
Esempio n. 19
0
def test_same_companda_copy2(minimal_df):
    assert companda(minimal_df, minimal_df.copy())
Esempio n. 20
0
def test_same_companda_datetime3(simple_df):
    df = simple_df.copy()
    df['date'] = pd.to_datetime(df['date'].values, utc=False).tz_localize(TZ)
    c = companda(df, simple_df)
    print(c.message)
    assert not c
Esempio n. 21
0
def test_same_companda2(minimal_df):
    assert companda(minimal_df, minimal_df)
Esempio n. 22
0
def test_different_companda(minimal_df, simple_df):
    assert not companda(minimal_df, simple_df)
Esempio n. 23
0
def test_added_nans_ignore(simple_df, df_with_all_nan_col):
    assert companda(df_with_all_nan_col,
                    simple_df,
                    ignore_all_nan_columns=True)
Esempio n. 24
0
def test_all_nans_do_not_ignore(df_with_all_nan_col):
    assert companda(df_with_all_nan_col,
                    df_with_all_nan_col,
                    ignore_all_nan_columns=False)
Esempio n. 25
0
def test_same_companda1(simple_df):
    assert companda(simple_df, simple_df)
Esempio n. 26
0
def test_same_companda_nan(simple_df):
    df = simple_df.copy()
    df.iloc[2, 2] = pd.np.NaN
    assert not companda(df, simple_df)
Esempio n. 27
0
def test_added_nans_do_not_ignore(simple_df, df_with_all_nan_col):
    assert not companda(
        df_with_all_nan_col, simple_df, ignore_all_nan_columns=False)
Esempio n. 28
0
def test_same_companda_string(simple_df):
    df = simple_df.copy()
    df.loc[1, 'string'] = 'z'
    assert not companda(df, simple_df)
Esempio n. 29
0
def test_same_companda_epsilon1(simple_df):
    df = simple_df.copy()
    df.float = df.float.apply(lambda x: x + .0001)
    assert companda(df, simple_df)
Esempio n. 30
0
def test_same_companda_datetime1sec(simple_df):
    df = simple_df.copy()
    df['date'] = df['date'].apply(lambda x: x + pd.Timedelta(seconds=1))
    assert not companda(df, simple_df)