Ejemplo n.º 1
0
def test_schema_creation(engine, schema):
    # overwrite schema
    schema = schema_for_testing_creation
    table_name = TableNames.NO_TABLE

    # schema may already exist before testing
    if 'postgres' in engine.dialect.dialect_description:
        drop_schema(engine=engine, schema=schema)

    # then try to create a schema from a PandasSpecialEngine instance
    dummy_df = pd.DataFrame(index=pd.Index(data=[0], name='id'))
    with engine.connect() as connection:
        try:
            pse = PandasSpecialEngine(connection=connection,
                                      schema=schema,
                                      table_name=TableNames.NO_TABLE,
                                      df=dummy_df)
            # this should raise HasNoSchemaSystemException
            # if we are not on a postgres engine
            assert not pse.schema_exists()
            pse.create_schema_if_not_exists()
            commit(connection)
            assert pse.schema_exists()
        except Exception as e:
            if pse._db_type == 'postgres' or not isinstance(
                    e, HasNoSchemaSystemException):
                raise e
        finally:
            if pse._db_type == 'postgres':
                drop_schema(engine=engine, schema=schema)
Ejemplo n.º 2
0
def test_connection_usable_after_upsert(engine, schema):
    df = pd.DataFrame(index=pd.Index([0], name='ix'))
    with engine.connect() as con:
        # do some random upsert operation
        upsert(con=con,
               df=df,
               schema=schema,
               table_name=TableNames.REUSE_CONNECTION,
               if_row_exists='update')
        # attempt to reuse the connection
        result = con.execute(text('SELECT 1;')).scalar()
        assert result == 1
        commit(con)
Ejemplo n.º 3
0
def test_table_creation(engine, schema):
    dtype = {
        'profileid': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None
    df = _TestsExampleTable.create_example_df(nb_rows=10)
    with engine.connect() as connection:
        pse = PandasSpecialEngine(connection=connection,
                                  schema=schema,
                                  dtype=dtype,
                                  table_name=TableNames.TABLE_CREATION,
                                  df=df)
        assert not pse.table_exists()
        pse.create_table_if_not_exists()
        commit(connection)
        assert pse.table_exists()
Ejemplo n.º 4
0
def test_change_column_type_if_column_empty(engine, schema, caplog,
                                            new_empty_column_value):
    # store arguments we will use for multiple PandasSpecialEngine instances
    table_name = TableNames.CHANGE_EMPTY_COL_TYPE
    common_kwargs = dict(schema=schema, table_name=table_name)
    common_kwargs['dtype'] = {
        'profileid': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None

    # json like will not work for sqlalchemy < 1.4
    # also skip sqlite as it does not support such alteration
    json_like = isinstance(new_empty_column_value, (dict, list))
    if json_like and not _sqla_gt14():
        pytest.skip('JSON like values will not work for sqlalchemy < 1.4')
    elif 'sqlite' in engine.dialect.dialect_description:
        pytest.skip('such column alteration is not possible with SQlite')

    # create our example table
    df = pd.DataFrame({
        'profileid': ['foo'],
        'empty_col': [None]
    }).set_index('profileid')
    with engine.connect() as connection:
        pse = PandasSpecialEngine(connection=connection,
                                  df=df,
                                  **common_kwargs)
        pse.create_table_if_not_exists()
        commit(connection)
        assert pse.table_exists()

    # recreate an instance of PandasSpecialEngine with a new df (so the model gets refreshed)
    # the line below is a "hack" to set any type of element as a column value
    # without pandas trying to broadcast it. This is useful when passing a list or such
    df['empty_col'] = df.index.map(lambda x: new_empty_column_value)
    with engine.connect() as connection:
        pse = PandasSpecialEngine(connection=connection,
                                  df=df,
                                  **common_kwargs)
        with caplog.at_level(logging.INFO, logger='pangres'):
            pse.adapt_dtype_of_empty_db_columns()
        assert len(caplog.records) == 1
        assert 'Changed type of column empty_col' in caplog.text
Ejemplo n.º 5
0
def test_add_new_columns(engine, schema, axis):
    # store arguments we will use for multiple PandasSpecialEngine instances
    table_name = TableNames.ADD_NEW_COLUMN
    common_kwargs = dict(schema=schema, table_name=table_name)
    common_kwargs['dtype'] = {
        'profileid': VARCHAR(5)
    } if 'mysql' in engine.dialect.dialect_description else None

    # create our example table
    df = _TestsExampleTable.create_example_df(nb_rows=10)
    with engine.connect() as connection:
        pse = PandasSpecialEngine(connection=connection,
                                  df=df,
                                  **common_kwargs)
        pse.create_table_if_not_exists()
        commit(connection)
        assert pse.table_exists()

    # we need to recreate an instance of PandasSpecialEngine
    # so that a new table model with the new columns is created then add columns
    with engine.connect() as connection:
        # error message if we get unexpected values for "axis"
        # or we make a typo in our if/elif statements
        err_msg = f'Expected axis to be one of index, columns. Got {axis}'
        # add a new index level or new columns (no JSON ones,
        # it's not supported by sqlalchemy compilers :( )
        if axis == 'index':
            df['new_index_col'] = 'foo'
            df.set_index('new_index_col', append=True, inplace=True)
        elif axis == 'column':
            df = df.assign(
                new_text_col='test',
                new_int_col=0,
                new_float_col=1.1,
                new_bool_col=False,
                new_dt_col=pd.Timestamp('2020-01-01'),
                # create this col for later
                empty_col=None)
        else:
            raise AssertionError(err_msg)

        # recreate PandasSpecialEngine
        pse = PandasSpecialEngine(connection=connection,
                                  df=df,
                                  **common_kwargs)

        # check if we get an error when trying to add an index level
        if axis == 'index':
            with pytest.raises(MissingIndexLevelInSqlException) as exc_info:
                pse.add_new_columns()
            assert 'Cannot add' in str(exc_info.value)
            return
        elif axis == 'column':
            pse.add_new_columns()
            commit(connection)
        else:
            raise AssertionError(err_msg)

    # check if the columns were correctly added
    # since we issued a return for 'index' earlier
    # the axis must now be 'columns'
    assert axis == 'column'
    # check the columns where added
    with engine.connect() as connection:
        ns = get_table_namespace(schema=schema, table_name=table_name)
        df_db = pd.read_sql(text(f'SELECT * FROM {ns} LIMIT 0;'),
                            con=connection,
                            index_col='profileid')
        assert set(df.columns) == set(df_db.columns)