Esempio n. 1
0
def test_get_nb_rows(engine, schema):
    # config
    table_name = TableNames.WITH_YIELD
    nb_rows, chunksize = 20, 3
    nb_last_chunk = nb_rows % chunksize
    nb_chunks = math.ceil(nb_rows / chunksize)
    # MySQL does not want flexible text length in indices/PK
    dtype = {
        'profileid': VARCHAR(10)
    } if 'mysql' in engine.dialect.dialect_description else None
    df = _TestsExampleTable.create_example_df(nb_rows=nb_rows)

    # iterate over upsert results
    # make sure we can extract the number of updated rows and that it is correct
    iterator = upsert(con=engine,
                      df=df,
                      table_name=table_name,
                      if_row_exists='update',
                      schema=schema,
                      chunksize=chunksize,
                      dtype=dtype,
                      yield_chunks=True)

    for ix, result in enumerate(iterator):
        assert result.rowcount == (chunksize
                                   if ix != nb_chunks - 1 else nb_last_chunk)

    # verify the inserted data is as expected
    # we sort the index for MySQL
    df_db = read_example_table_from_db(engine=engine,
                                       schema=schema,
                                       table_name=table_name)
    pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
Esempio n. 2
0
def test_methods_and_attributes(engine, schema):
    """
    Makes tests in chain for PandasSpecialEngine.
    This makes code easier with a small inconvenient
    of not separating tests thematically.
    """
    # dtype for index for MySQL... (can't have flexible text length)
    dtype = {
        'profileid': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None
    table_name = 'test_pandas_special_engine'
    default_args = {
        'engine': engine,
        'schema': schema,
        'dtype': dtype,
        'table_name': table_name
    }
    drop_table_if_exists(engine=engine, schema=schema, table_name=table_name)
    # TEST INIT
    df = _TestsExampleTable.create_example_df(nb_rows=10)
    pse = PandasSpecialEngine(df=df, **default_args)
    # TEST ATTRIBUTE pse.table
    expected_cols = list(df.index.names) + df.columns.tolist()
    assert all((col in pse.table.columns for col in expected_cols))
    # TEST TABLE AND SCHEMA CREATION
    pse.create_schema_if_not_exists()
    pse.create_table_if_not_exists()

    # TEST ADD NEW COLUMNS
    # don't try to add JSON columns!
    # It's not supported by sqlalchemy compilers :(
    df = df.assign(
        new_text_col='test',
        new_int_col=0,
        new_float_col=1.1,
        new_bool_col=False,
        new_dt_col=pd.Timestamp('2020-01-01'),
        # create this col for later
        empty_col=None)
    # recreate pse then add columns
    pse = PandasSpecialEngine(df=df, **default_args)
    pse.add_new_columns()

    # TEST CHANGE COLUMN TYPE (not for SQlite)
    if not pse._db_type == 'sqlite':
        # don't try to alter from any type to JSON!
        # It's not supported by sqlalchemy compilers :(
        # also the order is very specific!!! we have to cast types
        # even though the column is empty (to avoid losing column information such as constraint)
        # so e.g. casting from BOOLEAN to BIGINT is not possible
        # actually BOOLEAN is not even here because it breaks the chain
        alterations = (1, 1.1, "abc", pd.Timestamp("2020-01-01", tz='UTC'))
        for i in alterations:
            # change empty_col
            df['empty_col'] = df['empty_col'].map(
                lambda x: i)  # this will work for lists or dicts as well
            # recreate pse then change column type
            pse = PandasSpecialEngine(df=df, **default_args)
            pse.adapt_dtype_of_empty_db_columns()
def test_table_attr(engine, schema):
    # generate a somewhat complex table model via the _TestsExampleTable class
    df = _TestsExampleTable.create_example_df(nb_rows=10)
    table_name = TableNames.NO_TABLE
    with sync_async_connect_switch(engine) as connection:
        pse = PandasSpecialEngine(connection=connection,
                                  schema=schema,
                                  table_name=table_name,
                                  df=df)
        # make sure columns and table name match
        expected_cols = list(df.index.names) + df.columns.tolist()
        assert all((col in pse.table.columns for col in expected_cols))
        assert pse.table.name == table_name
async def test_table_creation_async(engine, schema):
    dtype = {
        'profileid': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None
    df = _TestsExampleTable.create_example_df(nb_rows=10)
    async with engine.connect() as connection:
        pse = PandasSpecialEngine(connection=connection,
                                  schema=schema,
                                  dtype=dtype,
                                  table_name=TableNames.TABLE_CREATION,
                                  df=df)
        assert not await pse.atable_exists()
        await pse.acreate_table_if_not_exists()
        await connection.commit()
        assert await pse.atable_exists()
Esempio n. 5
0
def insert_chunks(engine, schema, chunksize, nb_rows):
    df = _TestsExampleTable.create_example_df(nb_rows=nb_rows)
    # MySQL does not want flexible text length in indices/PK
    dtype = {
        'profileid': VARCHAR(10)
    } if 'mysql' in engine.dialect.dialect_description else None
    upsert_or_aupsert(schema=schema,
                      table_name=TableNames.VARIOUS_CHUNKSIZES,
                      df=df,
                      chunksize=chunksize,
                      con=engine,
                      if_row_exists='update',
                      dtype=dtype)
    df_db = read_example_table_from_db(
        engine=engine, schema=schema, table_name=TableNames.VARIOUS_CHUNKSIZES)
    # sort index (for MySQL...)
    pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
Esempio n. 6
0
def insert_chunks(engine, schema, chunksize, nb_rows):
    df = _TestsExampleTable.create_example_df(nb_rows=nb_rows)
    table_name = f'test_insert_chunksize_{chunksize}'
    drop_table_if_exists(engine=engine, schema=schema, table_name=table_name)
    upsert(
        schema=schema,
        table_name=table_name,
        df=df,
        chunksize=chunksize,
        engine=engine,
        if_row_exists='update',
        # MySQL does not want flexible text length in indices/PK
        dtype={'profileid': VARCHAR(10)}
        if 'mysql' in engine.dialect.dialect_description else None)
    df_db = read_example_table_from_db(engine=engine,
                                       schema=schema,
                                       table_name=table_name)
    # sort index (for MySQL...)
    pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
Esempio n. 7
0
def test_upsert_speed(engine, schema, benchmark, library, nb_rows, rounds, iterations, if_row_exists):
    assert library == 'pangres'  # in case pandas changes and we forget to update the tests

    # skip async engines with pandas
    if is_async_sqla_obj(engine) and library == 'pandas':
        pytest.skip('async engines will not work with pandas')

    # get a df
    df = _TestsExampleTable.create_example_df(nb_rows=nb_rows).drop(columns=['favorite_colors'])

    # setup for test (create table with no rows)
    def setup():
        create_or_upsert_with_pangres(engine=engine, schema=schema, if_row_exists=if_row_exists,
                                      df=df.head(0), chunksize=nb_rows)

    # test func
    # insert update/ignore with `create_table=False` to maximise speed
    func = lambda: create_or_upsert_with_pangres(engine=engine, schema=schema, if_row_exists=if_row_exists,
                                                 df=df, chunksize=nb_rows, create_table=False)

    benchmark.pedantic(func, setup=setup, rounds=rounds, iterations=iterations)
Esempio n. 8
0
def test_create_and_insert_speed(engine, schema, benchmark, library, nb_rows, rounds, iterations):
    # skip async engines with pandas
    if is_async_sqla_obj(engine) and library == 'pandas':
        pytest.skip('async engines will not work with pandas')

    # get a df
    # we don't test JSON as this is problematic with pandas
    df = _TestsExampleTable.create_example_df(nb_rows=nb_rows).drop(columns=['favorite_colors'])

    # prepare funcs for benchmark and then do the benchmark
    switch = {'pangres':lambda: create_or_upsert_with_pangres(engine=engine, schema=schema, if_row_exists='update',
                                                              df=df, chunksize=nb_rows),
              'pandas':lambda: create_with_pandas(engine=engine, schema=schema, df=df)}

    try:
        benchmark.pedantic(switch[library], setup=lambda: drop_table(engine=engine, schema=schema,
                                                                     table_name=TableNames.BENCHMARK),
                           rounds=rounds, iterations=iterations)
    except NotImplementedError as e:
        if 'not implemented for SQLAlchemy 2' in str(e):
            pytest.skip('in Python 3.6 there is some kind of problem with engines created with '
                        '`future=True` flag and pandas')
Esempio n. 9
0
from pangres.tests.conftest import read_example_table_from_db, drop_table_if_exists


# # Config

table_name = 'test_upsert'
default_args = {'table_name':table_name,
                'create_schema':True,
                'add_new_columns':True,
                'adapt_dtype_of_empty_db_columns':False}


# # Test data

# +
df = _TestsExampleTable.create_example_df(nb_rows=5)
# test for NULL values except for boolean column
df.iloc[0,[ix for ix, col in enumerate(df.columns) if col != 'likes_pizza']] = None

# test for update
df2 = _TestsExampleTable.create_example_df(nb_rows=6)

# test for ignore
df3 = _TestsExampleTable.create_example_df(nb_rows=6)


# -

# # Tests
# ORDER MATTERS!
async def test_add_new_columns_async(engine, schema, axis=None):
    # store arguments we will use for multiple PandasSpecialEngine instances
    table_name = TableNames.ADD_NEW_COLUMN
    common_kwargs = dict(schema=schema, table_name=table_name)
    common_kwargs['dtype'] = {
        'profileid': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None

    # create our example table
    df = _TestsExampleTable.create_example_df(nb_rows=10)
    async with engine.connect() as connection:
        pse = PandasSpecialEngine(connection=connection,
                                  df=df,
                                  **common_kwargs)
        await pse.acreate_table_if_not_exists()
        await connection.commit()
        assert await pse.atable_exists()

    # we need to recreate an instance of PandasSpecialEngine
    # so that a new table model with the new columns is created then add columns
    async with engine.connect() as connection:
        # error message if we get unexpected values for "axis"
        # or we make a typo in our if/elif statements
        err_msg = f'Expected axis to be one of index, columns. Got {axis}'
        # add a new index level or new columns (no JSON ones,
        # it's not supported by sqlalchemy compilers :( )
        if axis == 'index':
            df['new_index_col'] = 'foo'
            df.set_index('new_index_col', append=True, inplace=True)
        elif axis == 'column':
            df = df.assign(
                new_text_col='test',
                new_int_col=0,
                new_float_col=1.1,
                new_bool_col=False,
                new_dt_col=pd.Timestamp('2020-01-01'),
                # create this col for later
                empty_col=None)
        else:
            raise AssertionError(err_msg)

        # recreate PandasSpecialEngine
        pse = PandasSpecialEngine(connection=connection,
                                  df=df,
                                  **common_kwargs)

        # check if we get an error when trying to add an index level
        if axis == 'index':
            with pytest.raises(MissingIndexLevelInSqlException) as exc_info:
                await pse.aadd_new_columns()
            assert 'Cannot add' in str(exc_info.value)
            return
        elif axis == 'column':
            await pse.aadd_new_columns()
            await connection.commit()
        else:
            raise AssertionError(err_msg)

    # check if the columns were correctly added
    # since we issued a return for 'index' earlier
    # the axis must now be 'columns'
    assert axis == 'column'
    # check the columns where added
    sync_engine = async_engine_to_sync_engine(engine)
    with sync_engine.connect() as connection:
        ns = get_table_namespace(schema=schema, table_name=table_name)
        df_db = pd.read_sql(text(f'SELECT * FROM {ns} LIMIT 0;'),
                            con=connection,
                            index_col='profileid')
        assert set(df.columns) == set(df_db.columns)