예제 #1
0
def test_add_new_column(engine, schema):
    # config
    table_name = TableNames.ADD_NEW_COLUMN
    dtype = {
        'id': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None
    df = pd.DataFrame({'id': ['foo']}).set_index('id')
    # common kwargs for all the times we use upsert_or_aupsert
    common_kwargs = dict(con=engine,
                         schema=schema,
                         table_name=table_name,
                         if_row_exists='update',
                         dtype=dtype)

    # 1. create table
    upsert_or_aupsert(df=df, **common_kwargs)
    # 2. add a new column and repeat upsert_or_aupsert
    df['new_column'] = 'bar'
    upsert_or_aupsert(df=df, add_new_columns=True, **common_kwargs)
    # verify content matches
    df_db = select_table(engine=engine,
                         schema=schema,
                         table_name=table_name,
                         index_col='id').sort_index()
    pd.testing.assert_frame_equal(df, df_db)
예제 #2
0
def test_upsert_with_unique_keys(engine, schema):
    # config/local helpers
    # common kwargs for all the upsert commands below
    table_name = TableNames.UNIQUE_KEY
    common_kwargs_upsert = dict(con=engine, schema=schema, table_name=table_name,
                                if_row_exists='update')

    def read_from_db():
        return select_table(engine=engine, schema=schema,
                            table_name=table_name, index_col='row_id')

    # create table
    create_test_table(engine=engine, schema=schema)

    # add initial data (df_old)
    upsert_or_aupsert(df=df_old, **common_kwargs_upsert)
    df = read_from_db()
    df_expected = df_old.assign(row_id=range(1, 4)).reset_index().set_index('row_id')
    pd.testing.assert_frame_equal(df, df_expected)

    # add new data (df_new)
    upsert_or_aupsert(df=df_new, **common_kwargs_upsert)
    df = read_from_db()
    # before creating our expected df we need to implement the special case of postgres
    # where the id of the last row will be 7 instead of 4. I suppose that PG's ON
    # CONFLICT UPDATE clause will run in such a way that it will count 4 (number we
    # would expected) + 3 (three previous rows that were updated)
    last_row_id = 7 if 'postgres' in engine.dialect.dialect_description else 4
    df_expected = (pd.DataFrame([[1, 'A0001', 'PD100', 10],
                                 [2, 'A0002', 'PD200', 20],
                                 [3, 'A0002', 'PD201', 77],
                                 [last_row_id, 'A0003', 'PD300', 30]],
                                columns=['row_id'] + df_old.reset_index().columns.tolist())
                   .set_index('row_id'))
    pd.testing.assert_frame_equal(df, df_expected)
예제 #3
0
def test_mysql_pk_not_auto_incremented(engine, schema):
    if 'mysql' not in engine.dialect.dialect_description:
        pytest.skip('This test is only relevant for MySQL')

    table_name = TableNames.PK_MYSQL

    # upsert first df using pangres which creates the table automatically
    df1 = pd.DataFrame({'id': [0, 1], 'name': ['foo', 'bar']}).set_index('id')
    upsert_or_aupsert(con=engine,
                      df=df1,
                      table_name=table_name,
                      if_row_exists='update')

    # upsert second df
    df2 = pd.DataFrame({
        'id': [100, 200],
        'name': ['baz', 'qux']
    }).set_index('id')
    upsert_or_aupsert(con=engine,
                      df=df2,
                      table_name=table_name,
                      if_row_exists='update')

    # read df back
    df_db = select_table(engine=engine,
                         schema=schema,
                         table_name=table_name,
                         index_col='id')

    # check mysql got that correctly
    pd.testing.assert_frame_equal(df_db.sort_index(),
                                  pd.concat((df1, df2)).sort_index())
예제 #4
0
def test_end_to_end(engine, schema, create_table, if_row_exists, df_expected):
    # config
    # dtype for index for MySQL... (can't have flexible text length)
    dtype = {
        'profileid': VARCHAR(10)
    } if 'mysql' in engine.dialect.dialect_description else None
    table_name = TableNames.END_TO_END
    # common kwargs for every time we use the upsert_or_aupsert function
    common_kwargs = dict(if_row_exists=if_row_exists,
                         dtype=dtype,
                         table_name=table_name)
    read_table = lambda: read_example_table_from_db(
        engine=engine, schema=schema, table_name=table_name).sort_index()

    # 1. create table
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      df=df,
                      create_table=True,
                      **common_kwargs)
    pd.testing.assert_frame_equal(df, read_table())

    # 2. insert update/ignore
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      df=df2,
                      create_table=create_table,
                      **common_kwargs)
    pd.testing.assert_frame_equal(df_expected, read_table())
예제 #5
0
def test_column_named_values(engine, schema):
    df = pd.DataFrame({'values': range(5, 9)},
                      index=pd.Index(range(1, 5), name='idx'))
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      df=df,
                      if_row_exists='update',
                      table_name=TableNames.COLUMN_NAMED_VALUES)
예제 #6
0
def test_bad_value_if_row_exists(_):
    df = pd.DataFrame({'id': [0]}).set_index('id')
    engine = create_engine('sqlite:///')
    with pytest.raises(ValueError) as excinfo:
        upsert_or_aupsert(con=engine,
                          df=df,
                          table_name=TableNames.NO_TABLE,
                          if_row_exists='test')
    assert 'must be "ignore" or "update"' in str(excinfo.value)
예제 #7
0
def test_bad_column_name_postgres_raises(engine, schema):
    if 'postgres' not in engine.dialect.dialect_description:
        pytest.skip('This test is only relevant for PostgreSQL')
    df = pd.DataFrame({'id': [0], '(test)': [0]}).set_index('id')
    with pytest.raises(BadColumnNamesException) as exc_info:
        upsert_or_aupsert(con=engine,
                          schema=schema,
                          df=df,
                          table_name=TableNames.BAD_COLUMN_NAMES,
                          if_row_exists='update')
    assert 'does not seem to support column names with' in str(exc_info.value)
예제 #8
0
def test_bad_text_insert(engine, schema):
    # add bad text in a column named 'text'
    create_random_text = lambda: ''.join(
        random.choice(bad_char_seq) for i in range(10))
    df_test = (pd.DataFrame({
        'text': [create_random_text() for i in range(10)]
    }).rename_axis(['profileid'], axis='index', inplace=False))
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      table_name=TableNames.BAD_TEXT,
                      df=df_test,
                      if_row_exists='update')
예제 #9
0
def test_index_with_null(engine, schema):
    df = pd.DataFrame({'ix': [None, 0], 'test': [0, 1]}).set_index('ix')
    with pytest.raises(IntegrityError):
        upsert_or_aupsert(con=engine,
                          schema=schema,
                          df=df,
                          table_name=TableNames.INDEX_WITH_NULL,
                          if_row_exists='update')
        # don't test error for mysql since only a warning is raised and the line is skipped
        if 'mysql' in engine.dialect.dialect_description:
            pytest.skip(
                'not tested with mysql as only a warning is issued and the line is skipped. '
                "Perhaps we could capture the warning with pytest?")
예제 #10
0
def test_create_schema_none(engine, schema):
    """
    If `create_schema` is True in `pangres.upsert_or_aupsert` but the schema is `None`
    we should not raise an error even if it is a database that does not
    support schemas
    """
    df = pd.DataFrame({'id': [0]}).set_index('id')
    upsert_or_aupsert(con=engine,
                      schema=None,
                      df=df,
                      if_row_exists='update',
                      create_schema=True,
                      table_name=TableNames.CREATE_SCHEMA_NONE,
                      create_table=True)
예제 #11
0
def test_cannot_insert_missing_table_no_create(engine, schema):
    """
    Check if an error is raised when trying to insert in a missing table
    and `create_table` is False.
    """
    df = pd.DataFrame({'id': [0]}).set_index('id')
    with pytest.raises((OperationalError, ProgrammingError)) as excinfo:
        upsert_or_aupsert(con=engine,
                          schema=schema,
                          df=df,
                          table_name=TableNames.NO_TABLE,
                          if_row_exists='update',
                          create_table=False)
    assert any(s in str(excinfo.value)
               for s in ('no such table', 'does not exist', "doesn't exist"))
예제 #12
0
def insert_chunks(engine, schema, chunksize, nb_rows):
    df = _TestsExampleTable.create_example_df(nb_rows=nb_rows)
    # MySQL does not want flexible text length in indices/PK
    dtype = {
        'profileid': VARCHAR(10)
    } if 'mysql' in engine.dialect.dialect_description else None
    upsert_or_aupsert(schema=schema,
                      table_name=TableNames.VARIOUS_CHUNKSIZES,
                      df=df,
                      chunksize=chunksize,
                      con=engine,
                      if_row_exists='update',
                      dtype=dtype)
    df_db = read_example_table_from_db(
        engine=engine, schema=schema, table_name=TableNames.VARIOUS_CHUNKSIZES)
    # sort index (for MySQL...)
    pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
예제 #13
0
def test_only_index(engine, schema, if_row_exists):
    # config
    table_name = TableNames.INDEX_ONLY_INSERT

    # upsert df with only index
    df = pd.DataFrame({'ix': [1]}).set_index('ix')
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      df=df,
                      table_name=table_name,
                      if_row_exists=if_row_exists)

    # check data integrity
    df_db = select_table(engine=engine, schema=schema, table_name=table_name)
    assert 'ix' in df_db.columns
    assert len(df_db) > 0
    assert df_db['ix'].iloc[0] == 1
예제 #14
0
def test_create_schema_not_none(engine, schema):
    # local helpers
    is_postgres = 'postgres' in engine.dialect.dialect_description

    # overwrite schema
    schema = schema_for_testing_creation

    # config
    df = pd.DataFrame({'id': [0]}).set_index('id')
    table_name = TableNames.CREATE_SCHEMA_NOT_NONE

    # drop table before test (could not get my decorator to work with another schema
    # when having an optional arg schema=None due to variable scopes problems)
    if is_postgres:
        drop_table(engine=engine, schema=schema, table_name=table_name)
        if is_postgres:
            drop_schema(engine=engine, schema=schema)

    try:
        upsert_or_aupsert(con=engine,
                          schema=schema,
                          df=df,
                          if_row_exists='update',
                          create_schema=True,
                          table_name=table_name,
                          create_table=True)
        if not is_postgres:
            raise AssertionError(
                'Expected the upsert_or_aupsert to fail when not using a postgres database'
            )
    except Exception as e:
        # for postgres this should have worked
        if is_postgres:
            raise e
        else:
            assert isinstance(e, HasNoSchemaSystemException)
    finally:
        # drop table and schema after test
        if is_postgres:
            drop_table(engine=engine, schema=schema, table_name=table_name)
            drop_schema(engine=engine, schema=schema)
예제 #15
0
def test_bad_column_names(engine, schema, iteration):
    # add columns with bad names
    # don't do this for MySQL which has more strict rules for column names
    if 'mysql' in engine.dialect.dialect_description:
        pytest.skip(
            'MySQL has very strict rules for column names so we do not even test it'
        )

    random_bad_col_name = ''.join(
        random.choice(bad_char_seq) for i in range(50))
    df_test = (pd.DataFrame({
        random_bad_col_name: ['test', None]
    }).rename_axis(['profileid'], axis='index', inplace=False))

    # psycopg2 can't process columns with "%" or "(" or ")" so we will need `fix_psycopg2_bad_cols`
    if 'postgres' in engine.dialect.dialect_description:
        df_test = fix_psycopg2_bad_cols(df_test)
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      df=df_test,
                      table_name=TableNames.BAD_COLUMN_NAMES,
                      if_row_exists='update')
예제 #16
0
def test_create_and_insert_table_multiindex(engine, schema):
    # local helpers
    table_name = TableNames.MULTIINDEX
    namespace = get_table_namespace(schema=schema, table_name=table_name)

    def read_from_db():
        df_db = select_table(engine=engine,
                             schema=schema,
                             table_name=table_name)
        df_db['ix3'] = pd.to_datetime(df_db['ix3'])
        return df_db.set_index(index_col)

    # dtype for index for MySQL... (can't have flexible text length)
    dtype = {
        'ix2': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None

    # create
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      df=df_multiindex,
                      table_name=table_name,
                      dtype=dtype,
                      if_row_exists='update')
    df_db = read_from_db()
    pd.testing.assert_frame_equal(df_db, df_multiindex)

    # insert
    upsert_or_aupsert(con=engine,
                      schema=schema,
                      df=df_multiindex2,
                      table_name=table_name,
                      dtype=dtype,
                      if_row_exists='update')
    df_db = read_from_db()
    pd.testing.assert_frame_equal(
        df_db, pd.concat(objs=[df_multiindex, df_multiindex2]))
예제 #17
0
def test_adapt_column_type(engine, schema):
    # skip for sqlite as it does not support such alteration
    if 'sqlite' in engine.dialect.dialect_description:
        pytest.skip('such column alteration is not possible with SQlite')

    # config
    table_name = TableNames.CHANGE_EMPTY_COL_TYPE
    dtype = {
        'id': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None
    df = pd.DataFrame({'id': ['foo'], 'empty_column': [None]}).set_index('id')
    # common kwargs for all the times we use upsert_or_aupsert
    common_kwargs = dict(con=engine,
                         schema=schema,
                         df=df,
                         table_name=table_name,
                         if_row_exists='update',
                         dtype=dtype)

    # 1. create table
    upsert_or_aupsert(**common_kwargs)
    # 2. add non string data in empty column and repeat upsert_or_aupsert
    df['empty_column'] = 1
    upsert_or_aupsert(**common_kwargs, adapt_dtype_of_empty_db_columns=True)
예제 #18
0
def create_or_upsert_with_pangres(engine, schema, if_row_exists, df, chunksize, **kwargs):
    # MySQL does not want flexible text length in indices/PK
    dtype={'profileid':VARCHAR(10)} if 'mysql' in engine.dialect.dialect_description else None
    upsert_or_aupsert(con=engine, df=df, schema=schema, chunksize=chunksize,
                      table_name=TableNames.BENCHMARK, if_row_exists=if_row_exists,
                      dtype=dtype, **kwargs)