예제 #1
0
def test_alter_primary_key_column(sql):

    table_name = "##test_alter_primary_key_column"
    columns = {"_pk": "TINYINT", "A": "VARCHAR(1)"}
    primary_key_column = "_pk"
    sql.create.table(table_name, columns, primary_key_column="_pk")

    schema, _ = conversion.get_schema(sql.connection, table_name)
    primary_key_name = schema.at[primary_key_column, "pk_name"]

    sql.modify.primary_key(
        table_name,
        modify="drop",
        columns=primary_key_column,
        primary_key_name=primary_key_name,
    )
    sql.modify.column(
        table_name,
        modify="alter",
        column_name=primary_key_column,
        data_type="INT",
        is_nullable=False,
    )
    sql.modify.primary_key(
        table_name,
        modify="add",
        columns=primary_key_column,
        primary_key_name=primary_key_name,
    )

    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert schema.at[primary_key_column, "sql_type"] == "int"
    assert schema.at[primary_key_column, "pk_seq"] == 1
예제 #2
0
def test_sample(sql, data, caplog):

    # create cursor to perform operations
    cursor = sql.cursor()
    cursor.fast_executemany = True

    # get table schema for setting input data types and sizes
    schema, dataframe = conversion.get_schema(connection=sql,
                                              table_name="##test_conversion")

    # only schema_name.table_name can be specified
    with pytest.raises(ValueError):
        conversion.get_schema(connection=sql,
                              table_name="ServerName.dbo.##test_conversion")

    # dynamic SQL object names
    table = dynamic.escape(cursor, "##test_conversion")
    columns = dynamic.escape(cursor, data.columns)

    # prepare values of dataframe for insert
    dataframe, values = conversion.prepare_values(schema, data)

    # prepare cursor for input data types and sizes
    cursor = conversion.prepare_cursor(schema, dataframe, cursor)

    # issue insert statement
    insert = ", ".join(columns)
    params = ", ".join(["?"] * len(columns))
    statement = f"""
    INSERT INTO
    {table} (
        {insert}
    ) VALUES (
        {params}
    )
    """
    cursor.executemany(statement, values)

    # read data, excluding ID columns that is only to insure sorting
    columns = ", ".join([x for x in data.columns])
    statement = f"SELECT {columns} FROM {table} ORDER BY id ASC"
    result = conversion.read_values(statement, schema, connection=sql)

    # compare result to insert, comparing to dataframe as values may have changed during insert preparation
    assert result.equals(dataframe.set_index(keys="id"))

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.conversion"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2] ==
        "Nanosecond precision for dataframe columns ['_time'] will be rounded as SQL data type 'time' allows 7 max decimal places."
    )
    assert caplog.record_tuples[1][0] == "mssql_dataframe.core.conversion"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        "Nanosecond precision for dataframe columns ['_datetime2'] will be rounded as SQL data type 'datetime2' allows 7 max decimal places."
    )
def test_insert_alter_primary_key(sql, caplog):

    # inital insert
    table_name = "##test_insert_alter_primary_key"
    dataframe = pd.DataFrame({
        "ColumnA": [0, 1, 2, 3],
        "ColumnB": [0, 1, 2, 3],
        "ColumnC": ["a", "b", "c", "d"],
    }).set_index(keys=["ColumnA", "ColumnB"])
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    schema, _ = conversion.get_schema(sql.connection, table_name)
    _, dtypes = conversion.sql_spec(schema, dataframe)
    assert dtypes == {
        "ColumnA": "tinyint",
        "ColumnB": "tinyint",
        "ColumnC": "varchar(1)",
    }
    assert schema.at["ColumnA", "pk_seq"] == 1
    assert schema.at["ColumnB", "pk_seq"] == 2
    assert pd.isna(schema.at["ColumnC", "pk_seq"])

    # insert that alters primary key
    new = pd.DataFrame({
        "ColumnA": [256, 257, 258, 259],
        "ColumnB": [4, 5, 6, 7],
        "ColumnC": ["e", "f", "g", "h"],
    }).set_index(keys=["ColumnA", "ColumnB"])
    new = sql.insert.insert(table_name, new)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result.equals(pd.concat([dataframe, new]))
    _, dtypes = conversion.sql_spec(schema, new)
    assert dtypes == {
        "ColumnA": "smallint",
        "ColumnB": "tinyint",
        "ColumnC": "varchar(1)",
    }
    assert schema.at["ColumnA", "pk_seq"] == 1
    assert schema.at["ColumnB", "pk_seq"] == 2
    assert pd.isna(schema.at["ColumnC", "pk_seq"])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Altering column 'ColumnA' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
def test_insert_create_table_indexpk(sql, caplog):

    table_name = "##test_insert_create_table_indexpk"

    dataframe = pd.DataFrame(
        {
            "ColumnA": [1, 2, 3],
            "ColumnB": ["06/22/2021", "06-22-2021", "2021-06-22"]
        },
        index=pd.Series([1, 2, 3], name="indexpk"),
    )

    dataframe = sql.insert.insert(table_name, dataframe=dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert schema.index[schema["pk_seq"].notna()].equals(
        pd.Index(["indexpk"], dtype="string"))

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'."
    assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[1][2]
예제 #5
0
def test_update_composite_pk(sql, caplog):

    table_name = "##test_update_composite_pk"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = dataframe.set_index(keys=["ColumnA", "ColumnB"])
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # update values in table, using the primary key created in SQL and ColumnA
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update.update(table_name, dataframe)

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result.equals(updated)

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
예제 #6
0
def test_update_primary_key(sql, caplog):

    table_name = "##test_update_primary_key"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # update values in table, using the SQL primary key that came from the dataframe's index
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update.update(table_name, dataframe=dataframe[["ColumnC"]])
    dataframe["ColumnC"] = updated["ColumnC"]

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert dataframe.equals(result[dataframe.columns])
    assert "_time_update" not in result.columns
    assert "_time_insert" not in result.columns

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
예제 #7
0
def test_merge_upsert(sql, caplog):

    table_name = "##test_merge_upsert"
    dataframe = pd.DataFrame({"ColumnA": [3, 4]})
    sql.create.table_from_dataframe(table_name, dataframe, primary_key="index")

    # delete, but keep in SQL since upserting
    dataframe = dataframe[dataframe.index != 0].copy()
    # update
    dataframe.loc[dataframe.index == 1, "ColumnA"] = 5
    # insert
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame([6],
                     columns=["ColumnA"],
                     index=pd.Index([2], name="_index")),
    ])

    # merge values into table, using the SQL primary key that came from the dataframe's index
    dataframe = sql.merge.merge(table_name, dataframe, upsert=True)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert dataframe.equals(result.loc[[1, 2]])
    assert result.loc[0].equals(
        pd.Series([3], dtype="UInt8", index=["ColumnA"]))
    assert "_time_update" not in result.columns
    assert "_time_insert" not in result.columns

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
예제 #8
0
def test_merge_two_match_columns(sql, caplog):

    table_name = "##test_merge_two_match_columns"
    dataframe = pd.DataFrame({
        "State": ["A", "B"],
        "ColumnA": [3, 4],
        "ColumnB": ["a", "b"]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # delete
    dataframe = dataframe[dataframe.index != 0]
    # update
    dataframe.loc[dataframe.index == 1, "ColumnA"] = 5
    # insert
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame(
            {
                "State": ["C"],
                "ColumnA": [6],
                "ColumnB": ["d"]
            },
            index=pd.Index([2], name="_index"),
        ),
    ])

    # merge values into table, using the primary key that came from the dataframe's index and ColumnA
    dataframe = sql.merge_meta.merge(table_name,
                                     dataframe,
                                     match_columns=["_index", "State"])

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna() == [True, False])
    assert all(result["_time_insert"].notna() == [False, True])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
예제 #9
0
def test_insert_include_metadata_timestamps(sql, caplog):

    table_name = "##test_insert_include_metadata_timestamps"

    # sample data
    dataframe = pd.DataFrame({"_bit": pd.Series([1, 0, None], dtype="boolean")})

    # create table
    sql.create.table(table_name, columns={"_bit": "BIT"})

    # insert data
    dataframe = sql.insert_meta.insert(table_name, dataframe)

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(
        f"SELECT * FROM {table_name}", schema, sql.connection
    )
    assert all(result["_time_insert"].notna())
    assert result["_bit"].equals(dataframe["_bit"])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2]
        == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
예제 #10
0
    def _target_table(
        self,
        table_name: str,
        dataframe: pd.DataFrame,
        cursor: pyodbc.connect,
        additional_columns: List[str] = None,
        updating_table: bool = False,
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Get target schema, potentially handle errors, and adjust dataframe contents before inserting into target table.

        Parameters
        ----------
        table_name (str) : name of target table
        dataframe (pandas.DataFrame): tabular data to insert
        cursor (pyodbc.connection.cursor) : cursor to execute statement
        additional_columns (list, default=None) : columns that will be generated by an SQL statement but not in the dataframe
        updating_table (bool, default=False) : flag that indicates if target table is being updated

        Returns
        -------
        schema (pandas.DataFrame) : table column specifications and conversion rules
        dataframe (pandas.DataFrame) : input dataframe with optimal values and types for inserting into SQL
        """
        for _ in range(0, self._adjust_sql_attempts + 1):
            try:
                # dataframe values converted according to SQL data type
                schema, dataframe = conversion.get_schema(
                    self._connection,
                    table_name,
                    dataframe,
                    additional_columns,
                )
                break
            except (
                custom_errors.SQLTableDoesNotExist,
                custom_errors.SQLColumnDoesNotExist,
                custom_errors.SQLInsufficientColumnSize,
            ) as failure:
                cursor.rollback()
                # dataframe values may be converted according to SQL data type
                dataframe = _exceptions.handle(
                    failure,
                    table_name,
                    dataframe,
                    updating_table,
                    self.autoadjust_sql_objects,
                    self._modify,
                    self._create,
                )
                cursor.commit()
            except Exception as err:
                cursor.rollback()
                raise err
        else:
            raise RecursionError(
                f"adjust_sql_attempts={self._adjust_sql_attempts} reached"
            )

        return schema, dataframe
예제 #11
0
def test_column_add(sql):

    table_name = "##test_column_add"
    columns = {"A": "VARCHAR"}
    sql.create.table(table_name, columns)

    sql.modify.column(
        table_name, modify="add", column_name="B", data_type="VARCHAR(20)"
    )
    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert "B" in schema.index
    assert schema.at["B", "sql_type"] == "varchar"

    sql.modify.column(table_name, modify="add", column_name="C", data_type="BIGINT")
    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert "C" in schema.index
    assert schema.at["C", "sql_type"] == "bigint"
예제 #12
0
def test_primary_key_one_column(sql):

    table_name = "##test_primary_key_one_column"
    columns = {"A": "INT", "B": "BIGINT", "C": "BIGINT", "D": "BIGINT"}
    sql.create.table(table_name, columns, not_nullable=["A", "B"])

    sql.modify.primary_key(
        table_name, modify="add", columns="A", primary_key_name="_pk_1"
    )
    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert schema.at["A", "pk_seq"] == 1
    assert sum(schema["pk_seq"].notna()) == 1

    sql.modify.primary_key(
        table_name, modify="drop", columns="A", primary_key_name="_pk_1"
    )
    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert all(schema["pk_seq"].isna())
def test_insert_alter_column(sql, caplog):

    table_name = "##test_insert_alter_column"
    sql.create.table(
        table_name,
        columns={
            "ColumnA": "TINYINT",
            "ColumnB": "VARCHAR(1)",
            "ColumnC": "TINYINT"
        },
    )

    dataframe = pd.DataFrame({
        "ColumnA": [1],
        "ColumnB": ["aaa"],
        "ColumnC": [100000]
    })
    dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_insert"].notna())

    _, dtypes = conversion.sql_spec(schema, dataframe)
    assert dtypes == {
        "ColumnA": "tinyint",
        "ColumnB": "varchar(3)",
        "ColumnC": "int",
        "_time_insert": "datetime2",
    }

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(3)' with 'is_nullable=True'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Altering column 'ColumnC' in table '{table_name}' to data type 'int' with 'is_nullable=True'."
    )
def test_insert_add_and_alter_column(sql, caplog):

    table_name = "##test_insert_add_and_alter_column"
    dataframe = pd.DataFrame({
        "ColumnA": [0, 1, 2, 3],
        "ColumnB": [0, 1, 2, 3]
    })
    dataframe = sql.create_meta.table_from_dataframe(table_name,
                                                     dataframe,
                                                     primary_key="index")

    new = pd.DataFrame(
        {
            "ColumnA": [4, 5, 6, 7],
            "ColumnB": [256, 257, 258, 259],
            "ColumnC": [0, 1, 2, 3],
        },
        index=[4, 5, 6, 7],
    )
    new.index.name = "_index"
    new = sql.insert_meta.insert(table_name, new)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[new.columns].equals(pd.concat([dataframe, new]))
    assert all(result["_time_insert"].notna())

    _, dtypes = conversion.sql_spec(schema, dataframe)
    assert dtypes == {
        "_index": "tinyint",
        "ColumnA": "tinyint",
        "ColumnB": "smallint",
        "_time_insert": "datetime2",
        "ColumnC": "tinyint",
    }

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column 'ColumnC' in table '{table_name}' with data type 'tinyint'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
예제 #15
0
def test_column_drop(sql):

    table_name = "##column_drop"
    columns = {"A": "VARCHAR", "B": "VARCHAR"}
    sql.create.table(table_name, columns)

    sql.modify.column(table_name, modify="drop", column_name="B")

    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert "B" not in schema.index
예제 #16
0
def test_column_alter(sql):

    table_name = "##test_column_alter"
    columns = {"A": "VARCHAR(10)", "B": "BIGINT", "C": "BIGINT", "D": "BIGINT"}
    sql.create.table(table_name, columns)

    sql.modify.column(table_name, modify="alter", column_name="B", data_type="INT")
    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert "B" in schema.index
    assert schema.at["B", "sql_type"] == "int"
    assert schema.at["B", "is_nullable"]

    sql.modify.column(
        table_name, modify="alter", column_name="C", data_type="INT", is_nullable=False
    )
    schema, _ = conversion.get_schema(sql.connection, table_name)
    assert "C" in schema.index
    assert schema.at["C", "sql_type"] == "int"
    assert not schema.at["C", "is_nullable"]
예제 #17
0
def test_merge_alter_column(sql, caplog):

    table_name = "##test_merge_alter_column"
    dataframe = pd.DataFrame({"ColumnA": [1, 2], "ColumnB": ["a", "b"]})
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # merge using the SQL primary key that came from the dataframe's index
    dataframe = dataframe[dataframe.index != 0]
    dataframe["ColumnA"] = dataframe["ColumnA"].astype("Int64")
    dataframe.loc[1, "ColumnA"] = 10000
    dataframe.loc[1, "ColumnB"] = "bbbbb"
    dataframe = sql.merge_meta.merge(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna())
    assert all(result["_time_insert"].isna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 5
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Altering column 'ColumnA' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
    assert caplog.record_tuples[4][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[4][1] == logging.WARNING
    assert (
        caplog.record_tuples[4][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(5)' with 'is_nullable=False'."
    )
예제 #18
0
def test_update_two_match_columns(sql, caplog):

    table_name = "##test_update_two_match_columns"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="sql")

    # update values in table, using the primary key created in SQL and ColumnA
    schema, _ = conversion.get_schema(sql.connection, table_name)
    dataframe = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                       sql.connection)
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update_meta.update(table_name,
                                     dataframe,
                                     match_columns=["_pk", "ColumnA"])

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert updated.equals(result[updated.columns])
    assert result["_time_update"].notna().all()

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
예제 #19
0
def test_update_alter_column(sql, caplog):

    table_name = "##test_update_alter_column"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [0, 0]
    })
    sql.create.table_from_dataframe(table_name, dataframe, primary_key=None)

    # update using ColumnA
    dataframe["ColumnB"] = ["aaa", "bbb"]
    dataframe["ColumnC"] = [256, 256]
    updated = sql.update_meta.update(table_name,
                                     dataframe,
                                     match_columns=["ColumnA"])
    dataframe[["ColumnB", "ColumnC"]] = updated[["ColumnB", "ColumnC"]]

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert result["_time_update"].notna().all()

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 4
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(3)' with 'is_nullable=False'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Altering column 'ColumnC' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
예제 #20
0
    def get_schema(self, table_name: str):
        """Get schema of an SQL table and the defined conversion rules between data types.

        Parameters
        ----------
        table_name (str) : table name to read schema from

        Returns
        -------
        schema (pandas.DataFrame) : table column specifications and conversion rules
        """
        schema, _ = conversion.get_schema(self.connection, table_name)

        return schema
예제 #21
0
def test_merge_add_column(sql, caplog):

    table_name = "##test_merge_add_column"
    dataframe = pd.DataFrame({"ColumnA": [1, 2]})
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # merge using the SQL primary key that came from the dataframe's index
    dataframe = dataframe[dataframe.index != 0]
    dataframe["NewColumn"] = [3]
    dataframe = sql.merge_meta.merge(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna())
    assert all(result["_time_insert"].isna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 4
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Creating column 'NewColumn' in table '{table_name}' with data type 'tinyint'."
    )
예제 #22
0
def test_read_values_errors(sql):

    schema, _ = conversion.get_schema(connection=sql,
                                      table_name="##test_conversion")
    # error for a column missingin schema definition
    with pytest.raises(AttributeError):
        conversion.read_values(
            statement="SELECT * FROM ##test_conversion",
            schema=schema[schema.index != "id"],
            connection=sql,
        )
    # error for primary key missing from query statement
    with pytest.raises(KeyError):
        conversion.read_values(
            statement="SELECT _bit FROM ##test_conversion",
            schema=schema,
            connection=sql,
        )
예제 #23
0
def test_insert_singles(sql):

    table_name = "##test_insert_singles"

    # create table
    columns = {
        "ColumnA": "TINYINT",
        "ColumnB": "INT",
        "ColumnC": "DATE",
    }
    sql.create.table(table_name, columns)

    schema, _ = conversion.get_schema(sql.connection, table_name)

    # single value
    dataframe = pd.DataFrame({"ColumnA": [1]})
    dataframe = sql.insert.insert(table_name, dataframe)
    result = conversion.read_values(
        f"SELECT ColumnA FROM {table_name}", schema, sql.connection
    )
    assert all(result["ColumnA"] == [1])

    # single column
    dataframe = pd.DataFrame({"ColumnB": [2, 3, 4]})
    dataframe = sql.insert.insert(table_name, dataframe)
    result = conversion.read_values(
        f"SELECT ColumnB FROM {table_name}", schema, sql.connection
    )
    assert result["ColumnB"].equals(pd.Series([pd.NA, 2, 3, 4], dtype="Int32"))

    # single column of dates
    dataframe = pd.DataFrame(
        {"ColumnC": ["06-22-2021", "06-22-2021"]}, dtype="datetime64[ns]"
    )
    dataframe = sql.insert.insert(table_name, dataframe)
    result = conversion.read_values(
        f"SELECT ColumnC FROM {table_name}", schema, sql.connection
    )
    assert result["ColumnC"].equals(
        pd.Series(
            [pd.NA, pd.NA, pd.NA, pd.NA, "06-22-2021", "06-22-2021"],
            dtype="datetime64[ns]",
        )
    )
예제 #24
0
def test_merge_create_table(sql, caplog):

    table_name = "##test_merge_create_table"
    dataframe = pd.DataFrame({
        "_pk": [1, 2],
        "ColumnA": [5, 6],
        "ColumnB": ["06/22/2021", "2023-08-31"]
    })
    dataframe = sql.merge_meta.merge(table_name,
                                     dataframe,
                                     match_columns=["_pk"])

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].isna())
    assert all(result["_time_insert"].notna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 4
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'."
    assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert "Created table" in caplog.record_tuples[1][2]
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
예제 #25
0
def test_insert_composite_pk(sql):

    table_name = "##test_insert_composite_pk"

    columns = columns = {
        "ColumnA": "TINYINT",
        "ColumnB": "VARCHAR(5)",
        "ColumnC": "BIGINT",
    }
    sql.create.table(table_name, columns, primary_key_column=["ColumnA", "ColumnB"])

    dataframe = pd.DataFrame({"ColumnA": [1], "ColumnB": ["12345"], "ColumnC": [1]})
    dataframe = sql.insert.insert(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(
        f"SELECT * FROM {table_name}", schema, sql.connection
    )
    assert all(result.index == pd.MultiIndex.from_tuples([(1, "12345")]))
    assert all(result["ColumnC"] == 1)
def test_insert_create_table(sql, caplog):

    table_name = "##test_insert_create_table"

    dataframe = pd.DataFrame({
        "ColumnA": [1, 2, 3],
        "ColumnB": ["06/22/2021", "06-22-2021", "2021-06-22"]
    })
    dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    expected = pd.DataFrame({
        "ColumnA":
        pd.Series([1, 2, 3], dtype="UInt8"),
        "ColumnB":
        pd.Series(
            [pd.Timestamp(year=2021, month=6, day=22)] * 3,
            dtype="datetime64[ns]",
        ),
    }).set_index(keys="ColumnA")
    assert result[expected.columns].equals(expected)
    assert all(result["_time_insert"].notna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'."
    assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[1][2]
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
예제 #27
0
def test_merge_override_timestamps(sql, caplog):

    table_name = "##test_merge_override_timestamps"
    dataframe = pd.DataFrame({"ColumnA": [3, 4]})
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")
    # update
    dataframe.loc[dataframe.index == 1, "ColumnA"] = 5

    # merge values into table, using the SQL primary key that came from the dataframe's index
    dataframe = sql.merge.merge(table_name,
                                dataframe,
                                include_metadata_timestamps=True)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna() == [True, True])
    assert all(result["_time_insert"].notna() == [False, False])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
def test_insert_add_column(sql, caplog):

    table_name = "##test_insert_add_column"
    sql.create.table(table_name, columns={"ColumnA": "TINYINT"})

    dataframe = pd.DataFrame({
        "ColumnA": [1],
        "ColumnB": [2],
        "ColumnC": ["zzz"]
    })
    dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_insert"].notna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column 'ColumnB' in table '{table_name}' with data type 'tinyint'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column 'ColumnC' in table '{table_name}' with data type 'varchar(3)'."
    )
예제 #29
0
def test_merge_composite_pk(sql, caplog):

    table_name = "##test_merge_composite_pk"
    dataframe = pd.DataFrame({
        "State": ["A", "B"],
        "ColumnA": [3, 4],
        "ColumnB": ["a", "b"]
    }).set_index(keys=["State", "ColumnA"])
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # delete
    dataframe = dataframe[dataframe.index != ("A", 3)].copy()
    # update
    dataframe.loc[dataframe.index == ("B", 4), "ColumnB"] = "c"
    # insert
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame({
            "State": ["C"],
            "ColumnA": [6],
            "ColumnB": ["d"]
        }).set_index(keys=["State", "ColumnA"]),
    ])
    dataframe = sql.merge.merge(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert "_time_update" not in result
    assert "_time_insert" not in result

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
예제 #30
0
def test_update_override_timestamps(sql, caplog):

    table_name = "##test_update_override_timestamps"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # update values in table, using the SQL primary key that came from the dataframe's index
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update.update(table_name,
                                dataframe=dataframe[["ColumnC"]],
                                include_metadata_timestamps=True)
    dataframe["ColumnC"] = updated["ColumnC"]

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert dataframe.equals(result[dataframe.columns])
    assert result["_time_update"].notna().all()

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )