def test_alter_primary_key_column(sql): table_name = "##test_alter_primary_key_column" columns = {"_pk": "TINYINT", "A": "VARCHAR(1)"} primary_key_column = "_pk" sql.create.table(table_name, columns, primary_key_column="_pk") schema, _ = conversion.get_schema(sql.connection, table_name) primary_key_name = schema.at[primary_key_column, "pk_name"] sql.modify.primary_key( table_name, modify="drop", columns=primary_key_column, primary_key_name=primary_key_name, ) sql.modify.column( table_name, modify="alter", column_name=primary_key_column, data_type="INT", is_nullable=False, ) sql.modify.primary_key( table_name, modify="add", columns=primary_key_column, primary_key_name=primary_key_name, ) schema, _ = conversion.get_schema(sql.connection, table_name) assert schema.at[primary_key_column, "sql_type"] == "int" assert schema.at[primary_key_column, "pk_seq"] == 1
def test_sample(sql, data, caplog): # create cursor to perform operations cursor = sql.cursor() cursor.fast_executemany = True # get table schema for setting input data types and sizes schema, dataframe = conversion.get_schema(connection=sql, table_name="##test_conversion") # only schema_name.table_name can be specified with pytest.raises(ValueError): conversion.get_schema(connection=sql, table_name="ServerName.dbo.##test_conversion") # dynamic SQL object names table = dynamic.escape(cursor, "##test_conversion") columns = dynamic.escape(cursor, data.columns) # prepare values of dataframe for insert dataframe, values = conversion.prepare_values(schema, data) # prepare cursor for input data types and sizes cursor = conversion.prepare_cursor(schema, dataframe, cursor) # issue insert statement insert = ", ".join(columns) params = ", ".join(["?"] * len(columns)) statement = f""" INSERT INTO {table} ( {insert} ) VALUES ( {params} ) """ cursor.executemany(statement, values) # read data, excluding ID columns that is only to insure sorting columns = ", ".join([x for x in data.columns]) statement = f"SELECT {columns} FROM {table} ORDER BY id ASC" result = conversion.read_values(statement, schema, connection=sql) # compare result to insert, comparing to dataframe as values may have changed during insert preparation assert result.equals(dataframe.set_index(keys="id")) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 2 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.conversion" assert caplog.record_tuples[0][1] == logging.WARNING assert ( caplog.record_tuples[0][2] == "Nanosecond precision for dataframe columns ['_time'] will be rounded as SQL data type 'time' allows 7 max decimal places." ) assert caplog.record_tuples[1][0] == "mssql_dataframe.core.conversion" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == "Nanosecond precision for dataframe columns ['_datetime2'] will be rounded as SQL data type 'datetime2' allows 7 max decimal places." )
def test_insert_alter_primary_key(sql, caplog): # inital insert table_name = "##test_insert_alter_primary_key" dataframe = pd.DataFrame({ "ColumnA": [0, 1, 2, 3], "ColumnB": [0, 1, 2, 3], "ColumnC": ["a", "b", "c", "d"], }).set_index(keys=["ColumnA", "ColumnB"]) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") schema, _ = conversion.get_schema(sql.connection, table_name) _, dtypes = conversion.sql_spec(schema, dataframe) assert dtypes == { "ColumnA": "tinyint", "ColumnB": "tinyint", "ColumnC": "varchar(1)", } assert schema.at["ColumnA", "pk_seq"] == 1 assert schema.at["ColumnB", "pk_seq"] == 2 assert pd.isna(schema.at["ColumnC", "pk_seq"]) # insert that alters primary key new = pd.DataFrame({ "ColumnA": [256, 257, 258, 259], "ColumnB": [4, 5, 6, 7], "ColumnC": ["e", "f", "g", "h"], }).set_index(keys=["ColumnA", "ColumnB"]) new = sql.insert.insert(table_name, new) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result.equals(pd.concat([dataframe, new])) _, dtypes = conversion.sql_spec(schema, new) assert dtypes == { "ColumnA": "smallint", "ColumnB": "tinyint", "ColumnC": "varchar(1)", } assert schema.at["ColumnA", "pk_seq"] == 1 assert schema.at["ColumnB", "pk_seq"] == 2 assert pd.isna(schema.at["ColumnC", "pk_seq"]) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 2 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Altering column 'ColumnA' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'." )
def test_insert_create_table_indexpk(sql, caplog): table_name = "##test_insert_create_table_indexpk" dataframe = pd.DataFrame( { "ColumnA": [1, 2, 3], "ColumnB": ["06/22/2021", "06-22-2021", "2021-06-22"] }, index=pd.Series([1, 2, 3], name="indexpk"), ) dataframe = sql.insert.insert(table_name, dataframe=dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) assert schema.index[schema["pk_seq"].notna()].equals( pd.Index(["indexpk"], dtype="string")) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 2 assert caplog.record_tuples[0][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[0][1] == logging.WARNING assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'." assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[1][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[1][2]
def test_update_composite_pk(sql, caplog): table_name = "##test_update_composite_pk" dataframe = pd.DataFrame({ "ColumnA": [1, 2], "ColumnB": ["a", "b"], "ColumnC": [3, 4] }) dataframe = dataframe.set_index(keys=["ColumnA", "ColumnB"]) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # update values in table, using the primary key created in SQL and ColumnA dataframe["ColumnC"] = [5, 6] updated = sql.update.update(table_name, dataframe) # test result schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result.equals(updated) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
def test_update_primary_key(sql, caplog): table_name = "##test_update_primary_key" dataframe = pd.DataFrame({ "ColumnA": [1, 2], "ColumnB": ["a", "b"], "ColumnC": [3, 4] }) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # update values in table, using the SQL primary key that came from the dataframe's index dataframe["ColumnC"] = [5, 6] updated = sql.update.update(table_name, dataframe=dataframe[["ColumnC"]]) dataframe["ColumnC"] = updated["ColumnC"] # test result schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert dataframe.equals(result[dataframe.columns]) assert "_time_update" not in result.columns assert "_time_insert" not in result.columns # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
def test_merge_upsert(sql, caplog): table_name = "##test_merge_upsert" dataframe = pd.DataFrame({"ColumnA": [3, 4]}) sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # delete, but keep in SQL since upserting dataframe = dataframe[dataframe.index != 0].copy() # update dataframe.loc[dataframe.index == 1, "ColumnA"] = 5 # insert dataframe = pd.concat([ dataframe, pd.DataFrame([6], columns=["ColumnA"], index=pd.Index([2], name="_index")), ]) # merge values into table, using the SQL primary key that came from the dataframe's index dataframe = sql.merge.merge(table_name, dataframe, upsert=True) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert dataframe.equals(result.loc[[1, 2]]) assert result.loc[0].equals( pd.Series([3], dtype="UInt8", index=["ColumnA"])) assert "_time_update" not in result.columns assert "_time_insert" not in result.columns # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
def test_merge_two_match_columns(sql, caplog): table_name = "##test_merge_two_match_columns" dataframe = pd.DataFrame({ "State": ["A", "B"], "ColumnA": [3, 4], "ColumnB": ["a", "b"] }) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # delete dataframe = dataframe[dataframe.index != 0] # update dataframe.loc[dataframe.index == 1, "ColumnA"] = 5 # insert dataframe = pd.concat([ dataframe, pd.DataFrame( { "State": ["C"], "ColumnA": [6], "ColumnB": ["d"] }, index=pd.Index([2], name="_index"), ), ]) # merge values into table, using the primary key that came from the dataframe's index and ColumnA dataframe = sql.merge_meta.merge(table_name, dataframe, match_columns=["_index", "State"]) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert all(result["_time_update"].notna() == [True, False]) assert all(result["_time_insert"].notna() == [False, True]) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 3 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." )
def test_insert_include_metadata_timestamps(sql, caplog): table_name = "##test_insert_include_metadata_timestamps" # sample data dataframe = pd.DataFrame({"_bit": pd.Series([1, 0, None], dtype="boolean")}) # create table sql.create.table(table_name, columns={"_bit": "BIT"}) # insert data dataframe = sql.insert_meta.insert(table_name, dataframe) # test result schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values( f"SELECT * FROM {table_name}", schema, sql.connection ) assert all(result["_time_insert"].notna()) assert result["_bit"].equals(dataframe["_bit"]) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[0][1] == logging.WARNING assert ( caplog.record_tuples[0][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." )
def _target_table( self, table_name: str, dataframe: pd.DataFrame, cursor: pyodbc.connect, additional_columns: List[str] = None, updating_table: bool = False, ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Get target schema, potentially handle errors, and adjust dataframe contents before inserting into target table. Parameters ---------- table_name (str) : name of target table dataframe (pandas.DataFrame): tabular data to insert cursor (pyodbc.connection.cursor) : cursor to execute statement additional_columns (list, default=None) : columns that will be generated by an SQL statement but not in the dataframe updating_table (bool, default=False) : flag that indicates if target table is being updated Returns ------- schema (pandas.DataFrame) : table column specifications and conversion rules dataframe (pandas.DataFrame) : input dataframe with optimal values and types for inserting into SQL """ for _ in range(0, self._adjust_sql_attempts + 1): try: # dataframe values converted according to SQL data type schema, dataframe = conversion.get_schema( self._connection, table_name, dataframe, additional_columns, ) break except ( custom_errors.SQLTableDoesNotExist, custom_errors.SQLColumnDoesNotExist, custom_errors.SQLInsufficientColumnSize, ) as failure: cursor.rollback() # dataframe values may be converted according to SQL data type dataframe = _exceptions.handle( failure, table_name, dataframe, updating_table, self.autoadjust_sql_objects, self._modify, self._create, ) cursor.commit() except Exception as err: cursor.rollback() raise err else: raise RecursionError( f"adjust_sql_attempts={self._adjust_sql_attempts} reached" ) return schema, dataframe
def test_column_add(sql): table_name = "##test_column_add" columns = {"A": "VARCHAR"} sql.create.table(table_name, columns) sql.modify.column( table_name, modify="add", column_name="B", data_type="VARCHAR(20)" ) schema, _ = conversion.get_schema(sql.connection, table_name) assert "B" in schema.index assert schema.at["B", "sql_type"] == "varchar" sql.modify.column(table_name, modify="add", column_name="C", data_type="BIGINT") schema, _ = conversion.get_schema(sql.connection, table_name) assert "C" in schema.index assert schema.at["C", "sql_type"] == "bigint"
def test_primary_key_one_column(sql): table_name = "##test_primary_key_one_column" columns = {"A": "INT", "B": "BIGINT", "C": "BIGINT", "D": "BIGINT"} sql.create.table(table_name, columns, not_nullable=["A", "B"]) sql.modify.primary_key( table_name, modify="add", columns="A", primary_key_name="_pk_1" ) schema, _ = conversion.get_schema(sql.connection, table_name) assert schema.at["A", "pk_seq"] == 1 assert sum(schema["pk_seq"].notna()) == 1 sql.modify.primary_key( table_name, modify="drop", columns="A", primary_key_name="_pk_1" ) schema, _ = conversion.get_schema(sql.connection, table_name) assert all(schema["pk_seq"].isna())
def test_insert_alter_column(sql, caplog): table_name = "##test_insert_alter_column" sql.create.table( table_name, columns={ "ColumnA": "TINYINT", "ColumnB": "VARCHAR(1)", "ColumnC": "TINYINT" }, ) dataframe = pd.DataFrame({ "ColumnA": [1], "ColumnB": ["aaa"], "ColumnC": [100000] }) dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert all(result["_time_insert"].notna()) _, dtypes = conversion.sql_spec(schema, dataframe) assert dtypes == { "ColumnA": "tinyint", "ColumnB": "varchar(3)", "ColumnC": "int", "_time_insert": "datetime2", } # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 3 assert caplog.record_tuples[0][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[0][1] == logging.WARNING assert ( caplog.record_tuples[0][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(3)' with 'is_nullable=True'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Altering column 'ColumnC' in table '{table_name}' to data type 'int' with 'is_nullable=True'." )
def test_insert_add_and_alter_column(sql, caplog): table_name = "##test_insert_add_and_alter_column" dataframe = pd.DataFrame({ "ColumnA": [0, 1, 2, 3], "ColumnB": [0, 1, 2, 3] }) dataframe = sql.create_meta.table_from_dataframe(table_name, dataframe, primary_key="index") new = pd.DataFrame( { "ColumnA": [4, 5, 6, 7], "ColumnB": [256, 257, 258, 259], "ColumnC": [0, 1, 2, 3], }, index=[4, 5, 6, 7], ) new.index.name = "_index" new = sql.insert_meta.insert(table_name, new) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[new.columns].equals(pd.concat([dataframe, new])) assert all(result["_time_insert"].notna()) _, dtypes = conversion.sql_spec(schema, dataframe) assert dtypes == { "_index": "tinyint", "ColumnA": "tinyint", "ColumnB": "smallint", "_time_insert": "datetime2", "ColumnC": "tinyint", } # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 3 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column 'ColumnC' in table '{table_name}' with data type 'tinyint'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Altering column 'ColumnB' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'." )
def test_column_drop(sql): table_name = "##column_drop" columns = {"A": "VARCHAR", "B": "VARCHAR"} sql.create.table(table_name, columns) sql.modify.column(table_name, modify="drop", column_name="B") schema, _ = conversion.get_schema(sql.connection, table_name) assert "B" not in schema.index
def test_column_alter(sql): table_name = "##test_column_alter" columns = {"A": "VARCHAR(10)", "B": "BIGINT", "C": "BIGINT", "D": "BIGINT"} sql.create.table(table_name, columns) sql.modify.column(table_name, modify="alter", column_name="B", data_type="INT") schema, _ = conversion.get_schema(sql.connection, table_name) assert "B" in schema.index assert schema.at["B", "sql_type"] == "int" assert schema.at["B", "is_nullable"] sql.modify.column( table_name, modify="alter", column_name="C", data_type="INT", is_nullable=False ) schema, _ = conversion.get_schema(sql.connection, table_name) assert "C" in schema.index assert schema.at["C", "sql_type"] == "int" assert not schema.at["C", "is_nullable"]
def test_merge_alter_column(sql, caplog): table_name = "##test_merge_alter_column" dataframe = pd.DataFrame({"ColumnA": [1, 2], "ColumnB": ["a", "b"]}) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # merge using the SQL primary key that came from the dataframe's index dataframe = dataframe[dataframe.index != 0] dataframe["ColumnA"] = dataframe["ColumnA"].astype("Int64") dataframe.loc[1, "ColumnA"] = 10000 dataframe.loc[1, "ColumnB"] = "bbbbb" dataframe = sql.merge_meta.merge(table_name, dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert all(result["_time_update"].notna()) assert all(result["_time_insert"].isna()) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 5 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[3][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[3][1] == logging.WARNING assert ( caplog.record_tuples[3][2] == f"Altering column 'ColumnA' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'." ) assert caplog.record_tuples[4][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[4][1] == logging.WARNING assert ( caplog.record_tuples[4][2] == f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(5)' with 'is_nullable=False'." )
def test_update_two_match_columns(sql, caplog): table_name = "##test_update_two_match_columns" dataframe = pd.DataFrame({ "ColumnA": [1, 2], "ColumnB": ["a", "b"], "ColumnC": [3, 4] }) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="sql") # update values in table, using the primary key created in SQL and ColumnA schema, _ = conversion.get_schema(sql.connection, table_name) dataframe = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) dataframe["ColumnC"] = [5, 6] updated = sql.update_meta.update(table_name, dataframe, match_columns=["_pk", "ColumnA"]) # test result schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert updated.equals(result[updated.columns]) assert result["_time_update"].notna().all() # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 2 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." )
def test_update_alter_column(sql, caplog): table_name = "##test_update_alter_column" dataframe = pd.DataFrame({ "ColumnA": [1, 2], "ColumnB": ["a", "b"], "ColumnC": [0, 0] }) sql.create.table_from_dataframe(table_name, dataframe, primary_key=None) # update using ColumnA dataframe["ColumnB"] = ["aaa", "bbb"] dataframe["ColumnC"] = [256, 256] updated = sql.update_meta.update(table_name, dataframe, match_columns=["ColumnA"]) dataframe[["ColumnB", "ColumnC"]] = updated[["ColumnB", "ColumnC"]] schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert result["_time_update"].notna().all() # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 4 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(3)' with 'is_nullable=False'." ) assert caplog.record_tuples[3][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[3][1] == logging.WARNING assert ( caplog.record_tuples[3][2] == f"Altering column 'ColumnC' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'." )
def get_schema(self, table_name: str): """Get schema of an SQL table and the defined conversion rules between data types. Parameters ---------- table_name (str) : table name to read schema from Returns ------- schema (pandas.DataFrame) : table column specifications and conversion rules """ schema, _ = conversion.get_schema(self.connection, table_name) return schema
def test_merge_add_column(sql, caplog): table_name = "##test_merge_add_column" dataframe = pd.DataFrame({"ColumnA": [1, 2]}) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # merge using the SQL primary key that came from the dataframe's index dataframe = dataframe[dataframe.index != 0] dataframe["NewColumn"] = [3] dataframe = sql.merge_meta.merge(table_name, dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert all(result["_time_update"].notna()) assert all(result["_time_insert"].isna()) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 4 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[3][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[3][1] == logging.WARNING assert ( caplog.record_tuples[3][2] == f"Creating column 'NewColumn' in table '{table_name}' with data type 'tinyint'." )
def test_read_values_errors(sql): schema, _ = conversion.get_schema(connection=sql, table_name="##test_conversion") # error for a column missingin schema definition with pytest.raises(AttributeError): conversion.read_values( statement="SELECT * FROM ##test_conversion", schema=schema[schema.index != "id"], connection=sql, ) # error for primary key missing from query statement with pytest.raises(KeyError): conversion.read_values( statement="SELECT _bit FROM ##test_conversion", schema=schema, connection=sql, )
def test_insert_singles(sql): table_name = "##test_insert_singles" # create table columns = { "ColumnA": "TINYINT", "ColumnB": "INT", "ColumnC": "DATE", } sql.create.table(table_name, columns) schema, _ = conversion.get_schema(sql.connection, table_name) # single value dataframe = pd.DataFrame({"ColumnA": [1]}) dataframe = sql.insert.insert(table_name, dataframe) result = conversion.read_values( f"SELECT ColumnA FROM {table_name}", schema, sql.connection ) assert all(result["ColumnA"] == [1]) # single column dataframe = pd.DataFrame({"ColumnB": [2, 3, 4]}) dataframe = sql.insert.insert(table_name, dataframe) result = conversion.read_values( f"SELECT ColumnB FROM {table_name}", schema, sql.connection ) assert result["ColumnB"].equals(pd.Series([pd.NA, 2, 3, 4], dtype="Int32")) # single column of dates dataframe = pd.DataFrame( {"ColumnC": ["06-22-2021", "06-22-2021"]}, dtype="datetime64[ns]" ) dataframe = sql.insert.insert(table_name, dataframe) result = conversion.read_values( f"SELECT ColumnC FROM {table_name}", schema, sql.connection ) assert result["ColumnC"].equals( pd.Series( [pd.NA, pd.NA, pd.NA, pd.NA, "06-22-2021", "06-22-2021"], dtype="datetime64[ns]", ) )
def test_merge_create_table(sql, caplog): table_name = "##test_merge_create_table" dataframe = pd.DataFrame({ "_pk": [1, 2], "ColumnA": [5, 6], "ColumnB": ["06/22/2021", "2023-08-31"] }) dataframe = sql.merge_meta.merge(table_name, dataframe, match_columns=["_pk"]) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert all(result["_time_update"].isna()) assert all(result["_time_insert"].notna()) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 4 assert caplog.record_tuples[0][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[0][1] == logging.WARNING assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'." assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[1][1] == logging.WARNING assert "Created table" in caplog.record_tuples[1][2] assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[3][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[3][1] == logging.WARNING assert ( caplog.record_tuples[3][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." )
def test_insert_composite_pk(sql): table_name = "##test_insert_composite_pk" columns = columns = { "ColumnA": "TINYINT", "ColumnB": "VARCHAR(5)", "ColumnC": "BIGINT", } sql.create.table(table_name, columns, primary_key_column=["ColumnA", "ColumnB"]) dataframe = pd.DataFrame({"ColumnA": [1], "ColumnB": ["12345"], "ColumnC": [1]}) dataframe = sql.insert.insert(table_name, dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values( f"SELECT * FROM {table_name}", schema, sql.connection ) assert all(result.index == pd.MultiIndex.from_tuples([(1, "12345")])) assert all(result["ColumnC"] == 1)
def test_insert_create_table(sql, caplog): table_name = "##test_insert_create_table" dataframe = pd.DataFrame({ "ColumnA": [1, 2, 3], "ColumnB": ["06/22/2021", "06-22-2021", "2021-06-22"] }) dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) expected = pd.DataFrame({ "ColumnA": pd.Series([1, 2, 3], dtype="UInt8"), "ColumnB": pd.Series( [pd.Timestamp(year=2021, month=6, day=22)] * 3, dtype="datetime64[ns]", ), }).set_index(keys="ColumnA") assert result[expected.columns].equals(expected) assert all(result["_time_insert"].notna()) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 3 assert caplog.record_tuples[0][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[0][1] == logging.WARNING assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'." assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[1][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[1][2] assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." )
def test_merge_override_timestamps(sql, caplog): table_name = "##test_merge_override_timestamps" dataframe = pd.DataFrame({"ColumnA": [3, 4]}) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # update dataframe.loc[dataframe.index == 1, "ColumnA"] = 5 # merge values into table, using the SQL primary key that came from the dataframe's index dataframe = sql.merge.merge(table_name, dataframe, include_metadata_timestamps=True) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert all(result["_time_update"].notna() == [True, True]) assert all(result["_time_insert"].notna() == [False, False]) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 3 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." )
def test_insert_add_column(sql, caplog): table_name = "##test_insert_add_column" sql.create.table(table_name, columns={"ColumnA": "TINYINT"}) dataframe = pd.DataFrame({ "ColumnA": [1], "ColumnB": [2], "ColumnC": ["zzz"] }) dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert all(result["_time_insert"].notna()) # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 3 assert caplog.record_tuples[0][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[0][1] == logging.WARNING assert ( caplog.record_tuples[0][2] == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'." ) assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column 'ColumnB' in table '{table_name}' with data type 'tinyint'." ) assert caplog.record_tuples[2][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[2][1] == logging.WARNING assert ( caplog.record_tuples[2][2] == f"Creating column 'ColumnC' in table '{table_name}' with data type 'varchar(3)'." )
def test_merge_composite_pk(sql, caplog): table_name = "##test_merge_composite_pk" dataframe = pd.DataFrame({ "State": ["A", "B"], "ColumnA": [3, 4], "ColumnB": ["a", "b"] }).set_index(keys=["State", "ColumnA"]) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # delete dataframe = dataframe[dataframe.index != ("A", 3)].copy() # update dataframe.loc[dataframe.index == ("B", 4), "ColumnB"] = "c" # insert dataframe = pd.concat([ dataframe, pd.DataFrame({ "State": ["C"], "ColumnA": [6], "ColumnB": ["d"] }).set_index(keys=["State", "ColumnA"]), ]) dataframe = sql.merge.merge(table_name, dataframe) schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert result[dataframe.columns].equals(dataframe) assert "_time_update" not in result assert "_time_insert" not in result # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
def test_update_override_timestamps(sql, caplog): table_name = "##test_update_override_timestamps" dataframe = pd.DataFrame({ "ColumnA": [1, 2], "ColumnB": ["a", "b"], "ColumnC": [3, 4] }) dataframe = sql.create.table_from_dataframe(table_name, dataframe, primary_key="index") # update values in table, using the SQL primary key that came from the dataframe's index dataframe["ColumnC"] = [5, 6] updated = sql.update.update(table_name, dataframe=dataframe[["ColumnC"]], include_metadata_timestamps=True) dataframe["ColumnC"] = updated["ColumnC"] # test result schema, _ = conversion.get_schema(sql.connection, table_name) result = conversion.read_values(f"SELECT * FROM {table_name}", schema, sql.connection) assert dataframe.equals(result[dataframe.columns]) assert result["_time_update"].notna().all() # assert warnings raised by logging after all other tasks assert len(caplog.record_tuples) == 2 assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create" assert caplog.record_tuples[0][1] == logging.WARNING assert f"Created table: {table_name}" in caplog.record_tuples[0][2] assert caplog.record_tuples[1][ 0] == "mssql_dataframe.core.write._exceptions" assert caplog.record_tuples[1][1] == logging.WARNING assert ( caplog.record_tuples[1][2] == f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'." )