def test_end_to_end(engine, schema, create_table, if_row_exists, df_expected): # config # dtype for index for MySQL... (can't have flexible text length) dtype = { 'profileid': VARCHAR(10) } if 'mysql' in engine.dialect.dialect_description else None table_name = TableNames.END_TO_END # common kwargs for every time we use the upsert_or_aupsert function common_kwargs = dict(if_row_exists=if_row_exists, dtype=dtype, table_name=table_name) read_table = lambda: read_example_table_from_db( engine=engine, schema=schema, table_name=table_name).sort_index() # 1. create table upsert_or_aupsert(con=engine, schema=schema, df=df, create_table=True, **common_kwargs) pd.testing.assert_frame_equal(df, read_table()) # 2. insert update/ignore upsert_or_aupsert(con=engine, schema=schema, df=df2, create_table=create_table, **common_kwargs) pd.testing.assert_frame_equal(df_expected, read_table())
def test_get_nb_rows(engine, schema): # config table_name = TableNames.WITH_YIELD nb_rows, chunksize = 20, 3 nb_last_chunk = nb_rows % chunksize nb_chunks = math.ceil(nb_rows / chunksize) # MySQL does not want flexible text length in indices/PK dtype = { 'profileid': VARCHAR(10) } if 'mysql' in engine.dialect.dialect_description else None df = _TestsExampleTable.create_example_df(nb_rows=nb_rows) # iterate over upsert results # make sure we can extract the number of updated rows and that it is correct iterator = upsert(con=engine, df=df, table_name=table_name, if_row_exists='update', schema=schema, chunksize=chunksize, dtype=dtype, yield_chunks=True) for ix, result in enumerate(iterator): assert result.rowcount == (chunksize if ix != nb_chunks - 1 else nb_last_chunk) # verify the inserted data is as expected # we sort the index for MySQL df_db = read_example_table_from_db(engine=engine, schema=schema, table_name=table_name) pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
def test_create_table(engine, schema): # dtype for index for MySQL... (can't have flexible text length) dtype = {'profileid':VARCHAR(10)} if 'mysql' in engine.dialect.dialect_description else None drop_table_if_exists(engine=engine, schema=schema, table_name=table_name) upsert(engine=engine, schema=schema, df=df, if_row_exists='update', dtype=dtype, **default_args) df_db = read_example_table_from_db(engine=engine, schema=schema, table_name=table_name) pd.testing.assert_frame_equal(df, df_db)
def test_upsert_ignore(engine, schema): dtype = {'profileid':VARCHAR(10)} if 'mysql' in engine.dialect.dialect_description else None drop_table_if_exists(engine=engine, schema=schema, table_name=table_name) for _df in (df, df3): upsert(engine=engine, schema=schema, df=_df, if_row_exists='ignore', dtype=dtype, **default_args) df_db = read_example_table_from_db(engine=engine, schema=schema, table_name=table_name) expected = pd.concat((df, df3.tail(1)), axis=0) pd.testing.assert_frame_equal(expected, df_db)
def insert_chunks(engine, schema, chunksize, nb_rows): df = _TestsExampleTable.create_example_df(nb_rows=nb_rows) # MySQL does not want flexible text length in indices/PK dtype = { 'profileid': VARCHAR(10) } if 'mysql' in engine.dialect.dialect_description else None upsert_or_aupsert(schema=schema, table_name=TableNames.VARIOUS_CHUNKSIZES, df=df, chunksize=chunksize, con=engine, if_row_exists='update', dtype=dtype) df_db = read_example_table_from_db( engine=engine, schema=schema, table_name=TableNames.VARIOUS_CHUNKSIZES) # sort index (for MySQL...) pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
def insert_chunks(engine, schema, chunksize, nb_rows): df = _TestsExampleTable.create_example_df(nb_rows=nb_rows) table_name = f'test_insert_chunksize_{chunksize}' drop_table_if_exists(engine=engine, schema=schema, table_name=table_name) upsert( schema=schema, table_name=table_name, df=df, chunksize=chunksize, engine=engine, if_row_exists='update', # MySQL does not want flexible text length in indices/PK dtype={'profileid': VARCHAR(10)} if 'mysql' in engine.dialect.dialect_description else None) df_db = read_example_table_from_db(engine=engine, schema=schema, table_name=table_name) # sort index (for MySQL...) pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
def test_upsert_update(engine, schema): dtype = {'profileid':VARCHAR(10)} if 'mysql' in engine.dialect.dialect_description else None upsert(engine=engine, schema=schema, df=df2, if_row_exists='update', dtype=dtype, **default_args) df_db = read_example_table_from_db(engine=engine, schema=schema, table_name=table_name) pd.testing.assert_frame_equal(df2, df_db)