예제 #1
0
def test_upsert_individual_values1(pandabase_loaded_db, constants):
    """upsert to update rows with only 1 of 5 values (and index) from full dataframe"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df2 = pd.DataFrame(index=df.index, columns=df.columns)
    for col in df2.columns:
        df2[col] = df2[col].astype(df[col].dtype)

    df2.loc[df2.index[0], 'float'] = 9.9
    df2.loc[df2.index[1], 'integer'] = 999
    df2.loc[df2.index[2], 'string'] = 'nah'
    df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    pb.to_sql(df2,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[df.index[0], 'float'] = 9.9
    df.loc[df.index[1], 'integer'] = 999
    df.loc[df.index[2], 'string'] = 'nah'
    df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    assert companda(df, loaded)
예제 #2
0
def test_create_table_multi_index_rename(empty_db, multi_index_df, multi_index_df_4):
    multi_index_df.index.names = ['name/z', 'other name']
    pb.to_sql(multi_index_df,
              table_name='sample_mi',
              con=empty_db,
              how='create_only',
              )
예제 #3
0
def test_auto_index_add_valid_bool(minimal_df, empty_db, constants):
    pb.to_sql(
        minimal_df,
        table_name=constants.TABLE_NAME,
        con=empty_db,
        how='create_only',
        auto_index=True,
    )
    assert pb.has_table(empty_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[101, 102, 103],
                      columns=['boolean'],
                      data=[True, False, None])

    pb.to_sql(
        df,
        table_name=constants.TABLE_NAME,
        con=empty_db,
        how='append',
        auto_index=True,
    )

    df = pb.read_sql(constants.TABLE_NAME, con=empty_db)

    # Int64Dtype is a fine way to store nullable boolean values
    # Stored in database as boolean or NULL so the data can only be 0, 1, or None
    assert is_bool_dtype(df.boolean) or is_integer_dtype(df.boolean)

    # assume values were loaded in order:
    x = len(df)
    assert df.loc[x - 2, 'boolean']
    assert not df.loc[x - 1, 'boolean']
    assert pd.np.isnan(df.loc[x, 'boolean'])
    with pytest.raises(KeyError):
        _ = df.loc[x + 1, 'boolean']
예제 #4
0
def test_upsert_individual_values2(pandabase_loaded_db, constants):
    """upsert to update rows with only 1 of 5 values (and index) from incomplete DataFrame"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df2 = pd.DataFrame(index=df.index, columns=df.columns)
    for col in df2.columns:
        df2[col] = df2[col].astype(df[col].dtype)

    df2.loc[df2.index[0], 'float'] = 9.9
    df2.loc[df2.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    pb.to_sql(pd.DataFrame(index=df2.index[:1], columns=['float'], data=[9.9]),
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')
    pb.to_sql(pd.DataFrame(index=df2.index[3:4],
                           columns=['date'],
                           data=[pd.to_datetime('1968-01-01', utc=True)]),
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[df.index[0], 'float'] = 9.9
    df.loc[df.index[3], 'date'] = pd.to_datetime('1968-01-01', utc=True)

    assert companda(df, loaded)
def create_branch_table(con):
    branch_df = pd.DataFrame(branches, columns=branches.keys())
    branch_df = branch_df.transpose()
    branch_df.index.name = "branch_id"
    logging.info(f"Creating branch table ")
    pandabase.to_sql(branch_df, table_name="branches", con=con, how="upsert")
    logging.info(f"[Success] branch table created! ")
예제 #6
0
def test_upsert_numeric_column_names(empty_db, n_rows, n_cols, prefix, how):
    """if column names are purely numeric, upsert fails."""
    df = pd.DataFrame(index=range(1, n_rows + 1), columns=[prefix + str(n) for n in range(n_cols)],
                      data=np.random.random((n_rows, n_cols)))
    df.index.name = 'dex'
    with pytest.raises(NameError):
        pb.to_sql(df, con=empty_db, table_name='table', how=how)
예제 #7
0
def test_scale(rows, cols, empty_db):
    """test different scales. Pandabase fails at 200 cols"""
    df = scale_df(rows, cols)

    pb.to_sql(df,
              con=empty_db,
              table_name=f'scale_{rows}x{cols}',
              how='upsert')
예제 #8
0
def test_create_table_fails_non_utc_index(empty_db, simple_df, constants):
    """add a new table with explicit index, read it back with pandabase, check equality"""
    simple_df.index = pd.date_range('2011', freq='d', periods=len(simple_df), tz=LA_TZ)

    with pytest.raises(ValueError):
        pb.to_sql(simple_df,
                  table_name='sample',
                  con=empty_db,
                  how='create_only')
예제 #9
0
def test_select_fails_multi_index(empty_db, multi_index_df, lowest):
    """add a new minimal table & read it back with pandabase - select all"""
    pb.to_sql(multi_index_df,
              table_name='sample_mi',
              con=empty_db,
              how='create_only',
              )

    with pytest.raises(Exception):
        pb.read_sql(con=empty_db, table_name='sample_mi', highest=(1000, 1000), lowest=lowest)
예제 #10
0
def test_select_table_range_fails_different_index(empty_db, simple_df,
                                                  constants):
    """add a new table with explicit index, read it back with pandabase, check equality"""
    simple_df.index = simple_df.date
    simple_df = simple_df.drop('date', axis=1)

    pb.to_sql(simple_df, table_name='sample', con=empty_db, how='create_only')

    with pytest.raises(Exception):
        _ = pb.read_sql('sample', con=empty_db, lowest=0, highest=12)
예제 #11
0
def test_overwrite_table_fails(pandabase_loaded_db, simple_df, how, constants):
    """Try to append/insert rows with conflicting indices"""
    table_name = constants.TABLE_NAME
    assert pb.has_table(pandabase_loaded_db, table_name)

    with pytest.raises(Exception):
        pb.to_sql(simple_df,
                  table_name=table_name,
                  con=pandabase_loaded_db,
                  how=how)
예제 #12
0
def test_add_fails_wrong_index_name(pandabase_loaded_db, how, constants):
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[1], columns=['date'], data=[['x']])
    df.index.name = 'not_a_real_name'

    with pytest.raises(NameError):
        pb.to_sql(df,
                  table_name=constants.TABLE_NAME,
                  con=pandabase_loaded_db,
                  how=how)
예제 #13
0
def test_upsert_fails_invalid_float(pandabase_loaded_db, how, constants):
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[1], columns=['float'], data=[['x']])
    df.index.name = constants.SAMPLE_INDEX_NAME

    with pytest.raises(TypeError):
        pb.to_sql(df,
                  table_name=constants.TABLE_NAME,
                  con=pandabase_loaded_db,
                  how=how)
예제 #14
0
def test_add_fails_invalid_date(pandabase_loaded_db, how, constants):
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[1], columns=['date'], data=[['x']])
    df.index.name = constants.SAMPLE_INDEX_NAME

    with pytest.raises((ValueError, TypeError, sqa.exc.StatementError)):
        pb.to_sql(df,
                  table_name=constants.TABLE_NAME,
                  con=pandabase_loaded_db,
                  how=how)
예제 #15
0
def test_write_time(empty_db, how, qty):
    """test that write times are semi-acceptably fast"""
    start = datetime.utcnow()
    pb.to_sql(pd.DataFrame(index=range(qty), columns=['a', 'b', 'c'], data=np.random.random((qty, 3))),
              table_name='sample',
              con=empty_db,
              how=how,
              auto_index=True,
              )
    end = datetime.utcnow()
    assert end - start < pd.Timedelta(seconds=2)
예제 #16
0
def test_create_table_multi_index(empty_db, multi_index_df_4, how):
    """add a new minimal table & read it back with pandabase"""
    pb.to_sql(
        multi_index_df_4,
        table_name='sample_mi',
        con=empty_db,
        how=how,
    )

    loaded = pb.read_sql(con=empty_db, table_name='sample_mi')

    assert companda(multi_index_df_4, loaded)
예제 #17
0
def test_new_column_all_nan(pandabase_loaded_db, df_with_all_nan_col, constants):
    """insert into a new column"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df_with_all_nan_col.index = range(100, 100 + len(df_with_all_nan_col))
    df_with_all_nan_col.index.name = constants.SAMPLE_INDEX_NAME

    pb.to_sql(df_with_all_nan_col,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              auto_index=False,
              how='append', )
예제 #18
0
def test_new_column_fails(pandabase_loaded_db, how, constants):
    """insert into a new column"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[101], columns=['new_column'], data=[[1.1]])
    df.index.name = constants.SAMPLE_INDEX_NAME
    assert df.loc[101, 'new_column'] == 1.1

    with pytest.raises(NameError):
        pb.to_sql(df,
                  table_name=constants.TABLE_NAME,
                  con=pandabase_loaded_db,
                  how=how, )
예제 #19
0
def test_upsert_auto_index_fails(empty_db, minimal_df):
    """add a new minimal table w/o index; trying to add again should fail"""
    pb.to_sql(minimal_df,
              table_name='sample',
              con=empty_db,
              auto_index=True,
              how='create_only')
    with pytest.raises(IOError):
        pb.to_sql(minimal_df,
                  table_name='sample',
                  con=empty_db,
                  auto_index=True,
                  how='upsert')
예제 #20
0
def test_add_fails_invalid_timezone(pandabase_loaded_db, how, constants, tz):
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=range(5),
                      columns=['date'],
                      data=pd.date_range('2019-06-06', periods=5, freq='h', tz=tz))
    df.index.name = constants.SAMPLE_INDEX_NAME

    print(df.date)

    with pytest.raises(ValueError):
        pb.to_sql(df,
                  table_name=constants.TABLE_NAME,
                  con=pandabase_loaded_db,
                  how=how)
예제 #21
0
def test_append_bad_pk_fails(pandabase_loaded_db, simple_df, constants, unique_index_name):
    """Try to append rows with conflicting index columns"""
    table_name = constants.TABLE_NAME
    assert pb.has_table(pandabase_loaded_db, table_name)

    simple_df.index = simple_df['integer']
    if unique_index_name:
        simple_df[constants.SAMPLE_INDEX_NAME] = simple_df.integer
        simple_df = simple_df.drop('integer', axis=1)

    with pytest.raises(NameError):
        pb.to_sql(simple_df,
                  table_name=table_name,
                  con=pandabase_loaded_db,
                  how='append')
예제 #22
0
def test_create_table_no_index_load_pandas(empty_db, minimal_df, as_type, how):
    """add a new minimal table to db, read with Pandas"""
    if as_type:
        minimal_df['integer'] = minimal_df['integer'].astype('Int64')

    table = pb.to_sql(
        minimal_df,
        table_name='sample',
        con=empty_db,
        how=how,
        auto_index=True,
    )

    # print(table.columns)
    assert table.columns[PANDABASE_DEFAULT_INDEX].primary_key
    assert pb.has_table(empty_db, 'sample')

    loaded = pd.read_sql_table('sample',
                               con=empty_db,
                               index_col=PANDABASE_DEFAULT_INDEX)
    # pandas doesn't know about default index
    loaded.index.name = None
    # pandas doesn't know stored as UTC w/o timezone info
    loaded.date = pd.to_datetime(loaded.date, utc=True)

    assert pb.companda(loaded, minimal_df, ignore_index=True)
예제 #23
0
def test_select_some_multi_index(empty_db, multi_index_df, lowest, length):
    """add a new minimal table & read it back with pandabase - select all"""
    pb.to_sql(
        multi_index_df,
        table_name='sample_mi',
        con=empty_db,
        how='create_only',
    )

    loaded = pb.read_sql(con=empty_db,
                         table_name='sample_mi',
                         highest=(1000, 1000),
                         lowest=lowest)
    print('\n', loaded)

    assert len(loaded) == length
예제 #24
0
def test_upsert_new_cols(pandabase_loaded_db, constants, col_to_duplicate):
    """upsert new rows with only 1 of 5 values (and index)"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df['bonus_col'] = df[col_to_duplicate].copy()

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert',
              add_new_columns=True)

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
    assert 'bonus_col' in df.columns
예제 #25
0
def test_create_select_table_range_datetime_index(empty_db, simple_df,
                                                  constants):
    """add a new table with explicit index, read it back with pandabase, check equality"""
    simple_df.index = simple_df.date
    simple_df = simple_df.drop('date', axis=1)

    table = pb.to_sql(simple_df,
                      table_name='sample',
                      con=empty_db,
                      how='create_only')

    # print(table.columns)
    assert table.columns['date'].primary_key
    assert pb.has_table(empty_db, 'sample')

    loaded0 = pb.read_sql('sample',
                          con=empty_db,
                          lowest=simple_df.index[-1],
                          highest=simple_df.index[0])
    print(loaded0)
    assert len(loaded0) == 0

    loaded = pb.read_sql('sample',
                         con=empty_db,
                         lowest=simple_df.index[0],
                         highest=simple_df.index[-1])
    assert pb.companda(loaded, simple_df, ignore_all_nan_columns=True)
def create_collections_table(con):
    collections_df = AuthorityFiles.Authority_instance.df_credits
    collections_df = collections_df.rename(
        columns=collection_table_field_mapper)
    cols = [
        col for col in list(collections_df.columns)
        if col in collection_table_field_mapper.values()
    ]
    collections_df_final = collections_df[cols]
    collections_df_final.index.name = "collection_id"
    collections_df_final = replace_branch_with_fk(collections_df_final)
    logging.info(f"Creating collections table ")
    pandabase.to_sql(collections_df_final,
                     table_name="collections",
                     con=con,
                     how="upsert")
    logging.info(f"[Success] collections table created! ")
예제 #27
0
def data1_to_sql(data1, how):
    '''
    Args:
        data1: usersテーブルに追加したいデータのデータフレーム
        how : {'create_only', 'upsert', 'append'}
            - create_only:
                If table exists, raise an error and stop.
            - append:
                If table exists, append data. Raise if index overlaps
                Create table if does not exist.
            - upsert:
                create table if needed
                if record exists: update (possibly replacing values with NULL)
                else: insert
    '''
    data1 = data1.set_index('id')
    pandabase.to_sql(data1, table_name='users', con=con, how=how)
예제 #28
0
def test_coerce_integer(pandabase_loaded_db, how, constants):
    """insert an integer into float column"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[1], columns=['integer'], data=[[77.0]])
    df.index.name = constants.SAMPLE_INDEX_NAME
    types = df.dtypes

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    for col in df.columns:
        assert types[col] == df.dtypes[col]

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert loaded.loc[1, 'integer'] == 77
예제 #29
0
def test_add_new_rows(pandabase_loaded_db, simple_df, how, constants):
    """upsert or append new complete rows"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = simple_df.copy()
    df.index = df.index + 100

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how=how)

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    # print('loaded post-upsert by pandabase:')
    # print(loaded)

    assert loaded.isna().sum().sum() == 0
    assert companda(simple_df, loaded.loc[simple_df.index])
    assert companda(df, loaded.loc[df.index])
예제 #30
0
def test_append_auto_index(empty_db, minimal_df):
    """add a new minimal table; add it again"""
    pb.to_sql(minimal_df,
              table_name='sample',
              con=empty_db,
              auto_index=True,
              how='create_only')
    table2 = pb.to_sql(minimal_df,
                       table_name='sample',
                       con=empty_db,
                       auto_index=True,
                       how='append')

    assert table2.columns[PANDABASE_DEFAULT_INDEX].primary_key
    loaded = pb.read_sql('sample', con=empty_db)

    assert pb.has_table(empty_db, 'sample')
    double_df = pd.concat([minimal_df, minimal_df], ignore_index=True)
    assert pb.companda(loaded, double_df, ignore_index=True)
    assert len(loaded) == len(minimal_df) * 2