Python compandaの例、pandabase.companda.companda Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_companda.py プロジェクト: notsambeck/pandabase

def test_same_companda_alter_dtype(minimal_df):
    """changing between types changes equality (e.g. bool!=int)"""
    df2 = minimal_df.copy()
    df2.boolean = df2.boolean.astype(np.int)
    print(minimal_df.dtypes)
    print(df2.dtypes)
    assert not companda(df2, minimal_df, check_dtype=True)

コード例 #2

0

ファイルを表示

def test_upsert_individual_values2(pandabase_loaded_db, constants):
    """upsert to update rows with only 1 of 5 values (and index) from incomplete DataFrame"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df2 = pd.DataFrame(index=df.index, columns=df.columns)
    for col in df2.columns:
        df2[col] = df2[col].astype(df[col].dtype)

    df2.loc[df2.index[0], 'float'] = 9.9
    df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    pb.to_sql(pd.DataFrame(index=df2.index[:1], columns=['float'], data=[9.9]),
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')
    pb.to_sql(pd.DataFrame(index=df2.index[3:4],
                           columns=['date'],
                           data=[pd.to_datetime('1968-01-01', utc=True)]),
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[df.index[0], 'float'] = 9.9
    df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    assert companda(df, loaded)

コード例 #3

0

ファイルを表示

def test_upsert_individual_values1(pandabase_loaded_db, constants):
    """upsert to update rows with only 1 of 5 values (and index) from full dataframe"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df2 = pd.DataFrame(index=df.index, columns=df.columns)
    for col in df2.columns:
        df2[col] = df2[col].astype(df[col].dtype)

    df2.loc[df2.index[0], 'float'] = 9.9
    df2.loc[df2.index[1], 'integer'] = 999
    df2.loc[df2.index[2], 'string'] = 'nah'
    df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    pb.to_sql(df2,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[df.index[0], 'float'] = 9.9
    df.loc[df.index[1], 'integer'] = 999
    df.loc[df.index[2], 'string'] = 'nah'
    df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    assert companda(df, loaded)

コード例 #4

0

ファイルを表示

def test_add_new_rows(pandabase_loaded_db, simple_df, how, constants):
    """upsert or append new complete rows"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = simple_df.copy()
    df.index = df.index + 100

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how=how)

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    # print('loaded post-upsert by pandabase:')
    # print(loaded)

    assert loaded.isna().sum().sum() == 0
    assert companda(simple_df, loaded.loc[simple_df.index])
    assert companda(df, loaded.loc[df.index])

コード例 #5

0

ファイルを表示

ファイル: test_companda.py プロジェクト: mynkit/pandabase

def test_companda_nan_different_values(simple_df):
    df = simple_df.copy()
    df.iloc[2, 2] = np.NaN

    df2 = simple_df.copy()
    df2.iloc[2, 2] = np.NaN
    df2.iloc[1, 2] = 450
    x = companda(df, df2)
    print(x)
    assert not x

コード例 #6

0

ファイルを表示

def test_create_table_multi_index(empty_db, multi_index_df_4, how):
    """add a new minimal table & read it back with pandabase"""
    table = pb.to_sql(multi_index_df_4,
                      table_name='sample_mi',
                      con=empty_db,
                      how=how,
                      )

    loaded = pb.read_sql(con=empty_db, table_name='sample_mi')

    assert companda(multi_index_df_4, loaded)

コード例 #7

0

ファイルを表示

def test_select_all_multi_index(empty_db, multi_index_df):
    """add a new minimal table & read it back with pandabase - select all"""
    table = pb.to_sql(multi_index_df,
                      table_name='sample_mi',
                      con=empty_db,
                      how='create_only',
                      )

    # print(table.columns)
    assert table.columns['this'].primary_key
    assert table.columns['that'].primary_key

    loaded = pb.read_sql(con=empty_db, table_name='sample_mi', highest=(100, 100), lowest=(0, 0))
    print('\n', loaded)

    assert companda(multi_index_df, loaded)

コード例 #8

0

ファイルを表示

def test_create_table_multi_index(empty_db, multi_index_df, how):
    """add a new minimal table & read it back with pandabase"""
    table = pb.to_sql(multi_index_df,
                      table_name='sample_mi',
                      con=empty_db,
                      how=how,
                      )

    # print(table.columns)
    assert table.columns['this'].primary_key
    assert table.columns['that'].primary_key

    loaded = pb.read_sql(con=empty_db, table_name='sample_mi')
    print('\n', loaded)

    assert companda(multi_index_df, loaded)

コード例 #9

0

ファイルを表示

def test_upsert_new_cols(pandabase_loaded_db, constants, col_to_duplicate):
    """upsert new rows with only 1 of 5 values (and index)"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df['bonus_col'] = df[col_to_duplicate].copy()

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert',
              add_new_columns=True)

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
    assert 'bonus_col' in df.columns

コード例 #10

0

ファイルを表示

def test_read_pandas_table_pandas(pandabase_loaded_db, simple_df, constants):
    """baseline: read pre-written table containing simple_df, using pd.read_sql_table"""
    assert has_table(pandabase_loaded_db, constants.TABLE_NAME)

    loaded_df = pd.read_sql_table(constants.TABLE_NAME,
                                  con=pandabase_loaded_db,
                                  index_col=constants.SAMPLE_INDEX_NAME,
                                  parse_dates='dates')

    # sqlite does not store TZ info. So we will convert
    loaded_df['date'] = pd.to_datetime(loaded_df['date'], utc=True)

    orig_columns = make_clean_columns_dict(simple_df)
    loaded_columns = make_clean_columns_dict(loaded_df)
    for key in orig_columns.keys():
        print(key)
        if key == 'nan':
            # column of all NaN values is skipped
            continue
        assert_sqla_types_equivalent(orig_columns[key], loaded_columns[key])
    assert companda(loaded_df, simple_df)

コード例 #11

0

ファイルを表示

def test_upsert_complete_rows(pandabase_loaded_db, constants):
    """upsert, changing individual values"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert df.date.dt.tz == UTC

    df.loc[778, 'float'] = 9.9
    df.loc[779, 'integer'] = 999
    df.loc[780, 'string'] = 'nah'
    df.loc[781, 'date'] = pd.to_datetime('1968-01-01', utc=True)

    # check that all values still exist
    assert df.loc[1, 'integer'] == 778
    assert df.date.dt.tz == UTC

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)

コード例 #12

0

ファイルを表示

def test_select_pandas_table(pandas_loaded_db, simple_df, constants):
    """using pandabase.read_sql:
    read pandas-written table containing simple_df,

    this test fails because: when pandas writes the entry, it does not create
    an explicit primary key. the table is treated as a multiindex"""
    assert has_table(pandas_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, pandas_loaded_db)

    # line up pk since Pandas doesn't deal with it well
    simple_df[simple_df.index.name] = simple_df.index
    simple_df.index.name = None
    orig_columns = make_clean_columns_dict(simple_df)

    loaded_columns = make_clean_columns_dict(df)
    for key in orig_columns.keys():
        print(key)
        if key == 'nan':
            continue
        assert_sqla_types_equivalent(orig_columns[key], loaded_columns[key])
    assert companda(df, simple_df)

コード例 #13

0

ファイルを表示

def test_upsert_incomplete_rows(pandabase_loaded_db, constants):
    """upsert new rows with only 1 of 5 values (and index)"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[11, 'float'] = 9.9
    df.loc[12, 'integer'] = 999
    df.loc[13, 'string'] = 'nah'
    df.loc[14, 'date'] = pd.to_datetime('1968-01-01', utc=True)

    # check that these values exist
    assert df.loc[1, 'integer'] == 778
    assert pd.isna(df.loc[11, 'integer'])
    assert df.loc[13, 'string'] == 'nah'

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)

コード例 #14

0

ファイルを表示

def test_all_nans_ignore(df_with_all_nan_col):
    assert companda(df_with_all_nan_col,
                    df_with_all_nan_col,
                    ignore_all_nan_columns=True)

コード例 #15

0

ファイルを表示

def test_same_companda_index2(minimal_df):
    df = minimal_df.copy()
    df = df.drop(1, axis=0)
    assert not companda(df, minimal_df)

コード例 #16

0

ファイルを表示

def test_same_companda_index1(minimal_df):
    df = minimal_df.copy()
    df = df.rename(index={1: 99})
    assert not companda(df, minimal_df)

コード例 #17

0

ファイルを表示

def test_same_companda_cols4(minimal_df):
    df = minimal_df.copy()
    df = df.rename(columns={'integer': 'x'})
    assert not companda(minimal_df, df)

コード例 #18

0

ファイルを表示

def test_same_companda_cols2(minimal_df):
    df = minimal_df.copy()
    df = df.drop(['float'], axis=1)
    assert not companda(minimal_df, df)

コード例 #19

0

ファイルを表示

def test_same_companda_copy2(minimal_df):
    assert companda(minimal_df, minimal_df.copy())

コード例 #20

0

ファイルを表示

def test_same_companda_datetime3(simple_df):
    df = simple_df.copy()
    df['date'] = pd.to_datetime(df['date'].values, utc=False).tz_localize(TZ)
    c = companda(df, simple_df)
    print(c.message)
    assert not c

コード例 #21

0

ファイルを表示

def test_same_companda2(minimal_df):
    assert companda(minimal_df, minimal_df)

コード例 #22

0

ファイルを表示

def test_different_companda(minimal_df, simple_df):
    assert not companda(minimal_df, simple_df)

コード例 #23

0

ファイルを表示

def test_added_nans_ignore(simple_df, df_with_all_nan_col):
    assert companda(df_with_all_nan_col,
                    simple_df,
                    ignore_all_nan_columns=True)

コード例 #24

0

ファイルを表示

def test_all_nans_do_not_ignore(df_with_all_nan_col):
    assert companda(df_with_all_nan_col,
                    df_with_all_nan_col,
                    ignore_all_nan_columns=False)

コード例 #25

0

ファイルを表示

def test_same_companda1(simple_df):
    assert companda(simple_df, simple_df)

コード例 #26

0

ファイルを表示

def test_same_companda_nan(simple_df):
    df = simple_df.copy()
    df.iloc[2, 2] = pd.np.NaN
    assert not companda(df, simple_df)

コード例 #27

0

ファイルを表示

def test_added_nans_do_not_ignore(simple_df, df_with_all_nan_col):
    assert not companda(
        df_with_all_nan_col, simple_df, ignore_all_nan_columns=False)

コード例 #28

0

ファイルを表示

def test_same_companda_string(simple_df):
    df = simple_df.copy()
    df.loc[1, 'string'] = 'z'
    assert not companda(df, simple_df)

コード例 #29

0

ファイルを表示

def test_same_companda_epsilon1(simple_df):
    df = simple_df.copy()
    df.float = df.float.apply(lambda x: x + .0001)
    assert companda(df, simple_df)

コード例 #30

0

ファイルを表示

def test_same_companda_datetime1sec(simple_df):
    df = simple_df.copy()
    df['date'] = df['date'].apply(lambda x: x + pd.Timedelta(seconds=1))
    assert not companda(df, simple_df)