Beispiel #1
0
def test_merge_upsert(sql, caplog):

    table_name = "##test_merge_upsert"
    dataframe = pd.DataFrame({"ColumnA": [3, 4]})
    sql.create.table_from_dataframe(table_name, dataframe, primary_key="index")

    # delete, but keep in SQL since upserting
    dataframe = dataframe[dataframe.index != 0].copy()
    # update
    dataframe.loc[dataframe.index == 1, "ColumnA"] = 5
    # insert
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame([6],
                     columns=["ColumnA"],
                     index=pd.Index([2], name="_index")),
    ])

    # merge values into table, using the SQL primary key that came from the dataframe's index
    dataframe = sql.merge.merge(table_name, dataframe, upsert=True)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert dataframe.equals(result.loc[[1, 2]])
    assert result.loc[0].equals(
        pd.Series([3], dtype="UInt8", index=["ColumnA"]))
    assert "_time_update" not in result.columns
    assert "_time_insert" not in result.columns

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
Beispiel #2
0
def test_update_primary_key(sql, caplog):

    table_name = "##test_update_primary_key"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # update values in table, using the SQL primary key that came from the dataframe's index
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update.update(table_name, dataframe=dataframe[["ColumnC"]])
    dataframe["ColumnC"] = updated["ColumnC"]

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert dataframe.equals(result[dataframe.columns])
    assert "_time_update" not in result.columns
    assert "_time_insert" not in result.columns

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
Beispiel #3
0
def test_update_composite_pk(sql, caplog):

    table_name = "##test_update_composite_pk"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = dataframe.set_index(keys=["ColumnA", "ColumnB"])
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # update values in table, using the primary key created in SQL and ColumnA
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update.update(table_name, dataframe)

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result.equals(updated)

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
Beispiel #4
0
def test_merge_two_match_columns(sql, caplog):

    table_name = "##test_merge_two_match_columns"
    dataframe = pd.DataFrame({
        "State": ["A", "B"],
        "ColumnA": [3, 4],
        "ColumnB": ["a", "b"]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # delete
    dataframe = dataframe[dataframe.index != 0]
    # update
    dataframe.loc[dataframe.index == 1, "ColumnA"] = 5
    # insert
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame(
            {
                "State": ["C"],
                "ColumnA": [6],
                "ColumnB": ["d"]
            },
            index=pd.Index([2], name="_index"),
        ),
    ])

    # merge values into table, using the primary key that came from the dataframe's index and ColumnA
    dataframe = sql.merge_meta.merge(table_name,
                                     dataframe,
                                     match_columns=["_index", "State"])

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna() == [True, False])
    assert all(result["_time_insert"].notna() == [False, True])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
def test_insert_include_metadata_timestamps(sql, caplog):

    table_name = "##test_insert_include_metadata_timestamps"

    # sample data
    dataframe = pd.DataFrame({"_bit": pd.Series([1, 0, None], dtype="boolean")})

    # create table
    sql.create.table(table_name, columns={"_bit": "BIT"})

    # insert data
    dataframe = sql.insert_meta.insert(table_name, dataframe)

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(
        f"SELECT * FROM {table_name}", schema, sql.connection
    )
    assert all(result["_time_insert"].notna())
    assert result["_bit"].equals(dataframe["_bit"])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2]
        == f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
def test_sample(sql, data, caplog):

    # create cursor to perform operations
    cursor = sql.cursor()
    cursor.fast_executemany = True

    # get table schema for setting input data types and sizes
    schema, dataframe = conversion.get_schema(connection=sql,
                                              table_name="##test_conversion")

    # only schema_name.table_name can be specified
    with pytest.raises(ValueError):
        conversion.get_schema(connection=sql,
                              table_name="ServerName.dbo.##test_conversion")

    # dynamic SQL object names
    table = dynamic.escape(cursor, "##test_conversion")
    columns = dynamic.escape(cursor, data.columns)

    # prepare values of dataframe for insert
    dataframe, values = conversion.prepare_values(schema, data)

    # prepare cursor for input data types and sizes
    cursor = conversion.prepare_cursor(schema, dataframe, cursor)

    # issue insert statement
    insert = ", ".join(columns)
    params = ", ".join(["?"] * len(columns))
    statement = f"""
    INSERT INTO
    {table} (
        {insert}
    ) VALUES (
        {params}
    )
    """
    cursor.executemany(statement, values)

    # read data, excluding ID columns that is only to insure sorting
    columns = ", ".join([x for x in data.columns])
    statement = f"SELECT {columns} FROM {table} ORDER BY id ASC"
    result = conversion.read_values(statement, schema, connection=sql)

    # compare result to insert, comparing to dataframe as values may have changed during insert preparation
    assert result.equals(dataframe.set_index(keys="id"))

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.conversion"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2] ==
        "Nanosecond precision for dataframe columns ['_time'] will be rounded as SQL data type 'time' allows 7 max decimal places."
    )
    assert caplog.record_tuples[1][0] == "mssql_dataframe.core.conversion"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        "Nanosecond precision for dataframe columns ['_datetime2'] will be rounded as SQL data type 'datetime2' allows 7 max decimal places."
    )
def test_insert_alter_primary_key(sql, caplog):

    # inital insert
    table_name = "##test_insert_alter_primary_key"
    dataframe = pd.DataFrame({
        "ColumnA": [0, 1, 2, 3],
        "ColumnB": [0, 1, 2, 3],
        "ColumnC": ["a", "b", "c", "d"],
    }).set_index(keys=["ColumnA", "ColumnB"])
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    schema, _ = conversion.get_schema(sql.connection, table_name)
    _, dtypes = conversion.sql_spec(schema, dataframe)
    assert dtypes == {
        "ColumnA": "tinyint",
        "ColumnB": "tinyint",
        "ColumnC": "varchar(1)",
    }
    assert schema.at["ColumnA", "pk_seq"] == 1
    assert schema.at["ColumnB", "pk_seq"] == 2
    assert pd.isna(schema.at["ColumnC", "pk_seq"])

    # insert that alters primary key
    new = pd.DataFrame({
        "ColumnA": [256, 257, 258, 259],
        "ColumnB": [4, 5, 6, 7],
        "ColumnC": ["e", "f", "g", "h"],
    }).set_index(keys=["ColumnA", "ColumnB"])
    new = sql.insert.insert(table_name, new)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result.equals(pd.concat([dataframe, new]))
    _, dtypes = conversion.sql_spec(schema, new)
    assert dtypes == {
        "ColumnA": "smallint",
        "ColumnB": "tinyint",
        "ColumnC": "varchar(1)",
    }
    assert schema.at["ColumnA", "pk_seq"] == 1
    assert schema.at["ColumnB", "pk_seq"] == 2
    assert pd.isna(schema.at["ColumnC", "pk_seq"])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Altering column 'ColumnA' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
def test_read_values_errors(sql):

    schema, _ = conversion.get_schema(connection=sql,
                                      table_name="##test_conversion")
    # error for a column missingin schema definition
    with pytest.raises(AttributeError):
        conversion.read_values(
            statement="SELECT * FROM ##test_conversion",
            schema=schema[schema.index != "id"],
            connection=sql,
        )
    # error for primary key missing from query statement
    with pytest.raises(KeyError):
        conversion.read_values(
            statement="SELECT _bit FROM ##test_conversion",
            schema=schema,
            connection=sql,
        )
def test_insert_singles(sql):

    table_name = "##test_insert_singles"

    # create table
    columns = {
        "ColumnA": "TINYINT",
        "ColumnB": "INT",
        "ColumnC": "DATE",
    }
    sql.create.table(table_name, columns)

    schema, _ = conversion.get_schema(sql.connection, table_name)

    # single value
    dataframe = pd.DataFrame({"ColumnA": [1]})
    dataframe = sql.insert.insert(table_name, dataframe)
    result = conversion.read_values(
        f"SELECT ColumnA FROM {table_name}", schema, sql.connection
    )
    assert all(result["ColumnA"] == [1])

    # single column
    dataframe = pd.DataFrame({"ColumnB": [2, 3, 4]})
    dataframe = sql.insert.insert(table_name, dataframe)
    result = conversion.read_values(
        f"SELECT ColumnB FROM {table_name}", schema, sql.connection
    )
    assert result["ColumnB"].equals(pd.Series([pd.NA, 2, 3, 4], dtype="Int32"))

    # single column of dates
    dataframe = pd.DataFrame(
        {"ColumnC": ["06-22-2021", "06-22-2021"]}, dtype="datetime64[ns]"
    )
    dataframe = sql.insert.insert(table_name, dataframe)
    result = conversion.read_values(
        f"SELECT ColumnC FROM {table_name}", schema, sql.connection
    )
    assert result["ColumnC"].equals(
        pd.Series(
            [pd.NA, pd.NA, pd.NA, pd.NA, "06-22-2021", "06-22-2021"],
            dtype="datetime64[ns]",
        )
    )
def test_insert_alter_column(sql, caplog):

    table_name = "##test_insert_alter_column"
    sql.create.table(
        table_name,
        columns={
            "ColumnA": "TINYINT",
            "ColumnB": "VARCHAR(1)",
            "ColumnC": "TINYINT"
        },
    )

    dataframe = pd.DataFrame({
        "ColumnA": [1],
        "ColumnB": ["aaa"],
        "ColumnC": [100000]
    })
    dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_insert"].notna())

    _, dtypes = conversion.sql_spec(schema, dataframe)
    assert dtypes == {
        "ColumnA": "tinyint",
        "ColumnB": "varchar(3)",
        "ColumnC": "int",
        "_time_insert": "datetime2",
    }

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(3)' with 'is_nullable=True'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Altering column 'ColumnC' in table '{table_name}' to data type 'int' with 'is_nullable=True'."
    )
def test_insert_add_and_alter_column(sql, caplog):

    table_name = "##test_insert_add_and_alter_column"
    dataframe = pd.DataFrame({
        "ColumnA": [0, 1, 2, 3],
        "ColumnB": [0, 1, 2, 3]
    })
    dataframe = sql.create_meta.table_from_dataframe(table_name,
                                                     dataframe,
                                                     primary_key="index")

    new = pd.DataFrame(
        {
            "ColumnA": [4, 5, 6, 7],
            "ColumnB": [256, 257, 258, 259],
            "ColumnC": [0, 1, 2, 3],
        },
        index=[4, 5, 6, 7],
    )
    new.index.name = "_index"
    new = sql.insert_meta.insert(table_name, new)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[new.columns].equals(pd.concat([dataframe, new]))
    assert all(result["_time_insert"].notna())

    _, dtypes = conversion.sql_spec(schema, dataframe)
    assert dtypes == {
        "_index": "tinyint",
        "ColumnA": "tinyint",
        "ColumnB": "smallint",
        "_time_insert": "datetime2",
        "ColumnC": "tinyint",
    }

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column 'ColumnC' in table '{table_name}' with data type 'tinyint'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
Beispiel #12
0
def test_merge_alter_column(sql, caplog):

    table_name = "##test_merge_alter_column"
    dataframe = pd.DataFrame({"ColumnA": [1, 2], "ColumnB": ["a", "b"]})
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # merge using the SQL primary key that came from the dataframe's index
    dataframe = dataframe[dataframe.index != 0]
    dataframe["ColumnA"] = dataframe["ColumnA"].astype("Int64")
    dataframe.loc[1, "ColumnA"] = 10000
    dataframe.loc[1, "ColumnB"] = "bbbbb"
    dataframe = sql.merge_meta.merge(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna())
    assert all(result["_time_insert"].isna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 5
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Altering column 'ColumnA' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
    assert caplog.record_tuples[4][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[4][1] == logging.WARNING
    assert (
        caplog.record_tuples[4][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(5)' with 'is_nullable=False'."
    )
Beispiel #13
0
def test_update_two_match_columns(sql, caplog):

    table_name = "##test_update_two_match_columns"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="sql")

    # update values in table, using the primary key created in SQL and ColumnA
    schema, _ = conversion.get_schema(sql.connection, table_name)
    dataframe = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                       sql.connection)
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update_meta.update(table_name,
                                     dataframe,
                                     match_columns=["_pk", "ColumnA"])

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert updated.equals(result[updated.columns])
    assert result["_time_update"].notna().all()

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
Beispiel #14
0
def test_update_alter_column(sql, caplog):

    table_name = "##test_update_alter_column"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [0, 0]
    })
    sql.create.table_from_dataframe(table_name, dataframe, primary_key=None)

    # update using ColumnA
    dataframe["ColumnB"] = ["aaa", "bbb"]
    dataframe["ColumnC"] = [256, 256]
    updated = sql.update_meta.update(table_name,
                                     dataframe,
                                     match_columns=["ColumnA"])
    dataframe[["ColumnB", "ColumnC"]] = updated[["ColumnB", "ColumnC"]]

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert result["_time_update"].notna().all()

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 4
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Altering column 'ColumnB' in table '{table_name}' to data type 'varchar(3)' with 'is_nullable=False'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Altering column 'ColumnC' in table '{table_name}' to data type 'smallint' with 'is_nullable=False'."
    )
Beispiel #15
0
def test_merge_add_column(sql, caplog):

    table_name = "##test_merge_add_column"
    dataframe = pd.DataFrame({"ColumnA": [1, 2]})
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # merge using the SQL primary key that came from the dataframe's index
    dataframe = dataframe[dataframe.index != 0]
    dataframe["NewColumn"] = [3]
    dataframe = sql.merge_meta.merge(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna())
    assert all(result["_time_insert"].isna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 4
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Creating column 'NewColumn' in table '{table_name}' with data type 'tinyint'."
    )
Beispiel #16
0
def test_merge_create_table(sql, caplog):

    table_name = "##test_merge_create_table"
    dataframe = pd.DataFrame({
        "_pk": [1, 2],
        "ColumnA": [5, 6],
        "ColumnB": ["06/22/2021", "2023-08-31"]
    })
    dataframe = sql.merge_meta.merge(table_name,
                                     dataframe,
                                     match_columns=["_pk"])

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].isna())
    assert all(result["_time_insert"].notna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 4
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'."
    assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert "Created table" in caplog.record_tuples[1][2]
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[3][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[3][1] == logging.WARNING
    assert (
        caplog.record_tuples[3][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
Beispiel #17
0
def test_insert_composite_pk(sql):

    table_name = "##test_insert_composite_pk"

    columns = columns = {
        "ColumnA": "TINYINT",
        "ColumnB": "VARCHAR(5)",
        "ColumnC": "BIGINT",
    }
    sql.create.table(table_name, columns, primary_key_column=["ColumnA", "ColumnB"])

    dataframe = pd.DataFrame({"ColumnA": [1], "ColumnB": ["12345"], "ColumnC": [1]})
    dataframe = sql.insert.insert(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(
        f"SELECT * FROM {table_name}", schema, sql.connection
    )
    assert all(result.index == pd.MultiIndex.from_tuples([(1, "12345")]))
    assert all(result["ColumnC"] == 1)
def test_insert_create_table(sql, caplog):

    table_name = "##test_insert_create_table"

    dataframe = pd.DataFrame({
        "ColumnA": [1, 2, 3],
        "ColumnB": ["06/22/2021", "06-22-2021", "2021-06-22"]
    })
    dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    expected = pd.DataFrame({
        "ColumnA":
        pd.Series([1, 2, 3], dtype="UInt8"),
        "ColumnB":
        pd.Series(
            [pd.Timestamp(year=2021, month=6, day=22)] * 3,
            dtype="datetime64[ns]",
        ),
    }).set_index(keys="ColumnA")
    assert result[expected.columns].equals(expected)
    assert all(result["_time_insert"].notna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert caplog.record_tuples[0][2] == f"Creating table '{table_name}'."
    assert caplog.record_tuples[1][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[1][2]
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
Beispiel #19
0
def test_merge_override_timestamps(sql, caplog):

    table_name = "##test_merge_override_timestamps"
    dataframe = pd.DataFrame({"ColumnA": [3, 4]})
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")
    # update
    dataframe.loc[dataframe.index == 1, "ColumnA"] = 5

    # merge values into table, using the SQL primary key that came from the dataframe's index
    dataframe = sql.merge.merge(table_name,
                                dataframe,
                                include_metadata_timestamps=True)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_update"].notna() == [True, True])
    assert all(result["_time_insert"].notna() == [False, False])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
def test_insert_add_column(sql, caplog):

    table_name = "##test_insert_add_column"
    sql.create.table(table_name, columns={"ColumnA": "TINYINT"})

    dataframe = pd.DataFrame({
        "ColumnA": [1],
        "ColumnB": [2],
        "ColumnC": ["zzz"]
    })
    dataframe = sql.insert_meta.insert(table_name, dataframe=dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert all(result["_time_insert"].notna())

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column 'ColumnB' in table '{table_name}' with data type 'tinyint'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column 'ColumnC' in table '{table_name}' with data type 'varchar(3)'."
    )
Beispiel #21
0
def test_merge_composite_pk(sql, caplog):

    table_name = "##test_merge_composite_pk"
    dataframe = pd.DataFrame({
        "State": ["A", "B"],
        "ColumnA": [3, 4],
        "ColumnB": ["a", "b"]
    }).set_index(keys=["State", "ColumnA"])
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # delete
    dataframe = dataframe[dataframe.index != ("A", 3)].copy()
    # update
    dataframe.loc[dataframe.index == ("B", 4), "ColumnB"] = "c"
    # insert
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame({
            "State": ["C"],
            "ColumnA": [6],
            "ColumnB": ["d"]
        }).set_index(keys=["State", "ColumnA"]),
    ])
    dataframe = sql.merge.merge(table_name, dataframe)

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert result[dataframe.columns].equals(dataframe)
    assert "_time_update" not in result
    assert "_time_insert" not in result

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
Beispiel #22
0
def test_update_override_timestamps(sql, caplog):

    table_name = "##test_update_override_timestamps"
    dataframe = pd.DataFrame({
        "ColumnA": [1, 2],
        "ColumnB": ["a", "b"],
        "ColumnC": [3, 4]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # update values in table, using the SQL primary key that came from the dataframe's index
    dataframe["ColumnC"] = [5, 6]
    updated = sql.update.update(table_name,
                                dataframe=dataframe[["ColumnC"]],
                                include_metadata_timestamps=True)
    dataframe["ColumnC"] = updated["ColumnC"]

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert dataframe.equals(result[dataframe.columns])
    assert result["_time_update"].notna().all()

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 2
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
Beispiel #23
0
def test_insert_dataframe(sql, caplog):

    table_name = "##test_insert_dataframe"

    # sample data
    dataframe = pd.DataFrame(
        {
            "_bit": pd.Series([1, 0, None], dtype="boolean"),
            "_tinyint": pd.Series([0, 255, None], dtype="UInt8"),
            "_smallint": pd.Series([-(2**15), 2**15 - 1, None], dtype="Int16"),
            "_int": pd.Series([-(2**31), 2**31 - 1, None], dtype="Int32"),
            "_bigint": pd.Series([-(2**63), 2**63 - 1, None], dtype="Int64"),
            "_float": pd.Series([-(1.79**308), 1.79**308, None], dtype="float"),
            "_time": pd.Series(
                ["00:00:00.0000000", "23:59:59.9999999", None], dtype="timedelta64[ns]"
            ),
            "_date": pd.Series(
                [
                    (pd.Timestamp.min + pd.Timedelta(days=1)).date(),
                    pd.Timestamp.max.date(),
                    None,
                ],
                dtype="datetime64[ns]",
            ),
            "_datetime2": pd.Series(
                [pd.Timestamp.min, pd.Timestamp.max, None], dtype="datetime64[ns]"
            ),
            "_varchar": pd.Series(["a", "bbb", None], dtype="string"),
            "_nvarchar": pd.Series(
                ["100\N{DEGREE SIGN}F", "company name\N{REGISTERED SIGN}", None],
                dtype="string",
            ),
        }
    )

    # create table
    columns = {
        "_time_insert": "DATETIME2",
        "_bit": "BIT",
        "_tinyint": "TINYINT",
        "_smallint": "SMALLINT",
        "_int": "INT",
        "_bigint": "BIGINT",
        "_float": "FLOAT",
        "_time": "TIME",
        "_date": "DATE",
        "_datetime2": "DATETIME2",
        "_varchar": "VARCHAR",
        "_nvarchar": "NVARCHAR",
    }
    columns["_varchar"] = (
        columns["_varchar"] + "(" + str(dataframe["_varchar"].str.len().max()) + ")"
    )
    columns["_nvarchar"] = (
        columns["_nvarchar"] + "(" + str(dataframe["_nvarchar"].str.len().max()) + ")"
    )
    sql.create.table(table_name, columns)

    # insert data
    dataframe = sql.insert_meta.insert(table_name, dataframe)

    # test result
    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(
        f"SELECT * FROM {table_name}", schema, sql.connection
    )
    assert all(result["_time_insert"].notna())
    assert dataframe.equals(result[result.columns.drop("_time_insert")])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 1
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.conversion"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert (
        caplog.record_tuples[0][2]
        == "Nanosecond precision for dataframe columns ['_datetime2'] will be rounded as SQL data type 'datetime2' allows 7 max decimal places."
    )
Beispiel #24
0
def test_merge_non_pk_column(sql, caplog):

    table_name = "##test_merge_non_pk_column"
    dataframe = pd.DataFrame({
        "State": ["A", "B"],
        "ColumnA": [3, 4],
        "ColumnB": ["a", "b"]
    })
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key=None)

    # delete
    dataframe = dataframe[dataframe.index != 0]
    dataframe = dataframe.reset_index(drop=True)
    # update
    dataframe.loc[dataframe.index == 1, "ColumnA"] = 5
    # insert
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame(
            {
                "State": ["C"],
                "ColumnA": [6],
                "ColumnB": ["d"]
            },
            index=pd.Index([1], name="_index"),
        ),
    ])

    # merge values into table, using a single column that is not the primary key:
    dataframe = sql.merge_meta.merge(table_name,
                                     dataframe,
                                     match_columns=["State"])

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(
        f"SELECT * FROM {table_name} ORDER BY _time_update DESC",
        schema,
        sql.connection,
    )
    assert result[dataframe.columns].equals(dataframe)

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
Beispiel #25
0
def test_merge_two_delete_requires(sql, caplog):

    table_name = "##test_merge_two_delete_requires"
    dataframe = pd.DataFrame(
        {
            "State1": ["A", "B", "B"],
            "State2": ["X", "Y", "Z"],
            "ColumnA": [3, 4, 4],
            "ColumnB": ["a", "b", "b"],
        },
        index=[0, 1, 2],
    )
    dataframe.index.name = "_pk"
    dataframe = sql.create.table_from_dataframe(table_name,
                                                dataframe,
                                                primary_key="index")

    # delete 2 records
    dataframe = dataframe[dataframe.index == 1].copy()
    # update
    dataframe.loc[dataframe.index == 1, ["ColumnA", "ColumnB"]] = [5, "c"]
    # insert
    dataframe.index.name = "_pk"
    dataframe = pd.concat([
        dataframe,
        pd.DataFrame(
            {
                "State1": ["C"],
                "State2": ["Z"],
                "ColumnA": [6],
                "ColumnB": ["d"]
            },
            index=pd.Index([3], name="_pk"),
        ),
    ])

    # merge values into table, using the primary key that came from the dataframe's index
    # also require a match on State1 and State2 to prevent a record from being deleted
    dataframe = sql.merge_meta.merge(
        table_name,
        dataframe,
        match_columns=["_pk"],
        delete_requires=["State1", "State2"],
    )

    schema, _ = conversion.get_schema(sql.connection, table_name)
    result = conversion.read_values(f"SELECT * FROM {table_name}", schema,
                                    sql.connection)
    assert all(
        result.loc[[1, 3],
                   ["State1", "State2", "ColumnA", "ColumnB"]] == dataframe)
    assert all(result.loc[0, ["State1", "State2", "ColumnA", "ColumnB"]] ==
               pd.Series(["A", "X", 3, "a"],
                         index=["State1", "State2", "ColumnA", "ColumnB"]))
    assert all(result["_time_update"].notna() == [False, True, False])
    assert all(result["_time_insert"].notna() == [False, False, True])

    # assert warnings raised by logging after all other tasks
    assert len(caplog.record_tuples) == 3
    assert caplog.record_tuples[0][0] == "mssql_dataframe.core.create"
    assert caplog.record_tuples[0][1] == logging.WARNING
    assert f"Created table: {table_name}" in caplog.record_tuples[0][2]
    assert caplog.record_tuples[1][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[1][1] == logging.WARNING
    assert (
        caplog.record_tuples[1][2] ==
        f"Creating column '_time_update' in table '{table_name}' with data type 'datetime2'."
    )
    assert caplog.record_tuples[2][
        0] == "mssql_dataframe.core.write._exceptions"
    assert caplog.record_tuples[2][1] == logging.WARNING
    assert (
        caplog.record_tuples[2][2] ==
        f"Creating column '_time_insert' in table '{table_name}' with data type 'datetime2'."
    )
Beispiel #26
0
    def table(
        self,
        table_name: str,
        column_names: list = None,
        where: str = None,
        limit: int = None,
        order_column: str = None,
        order_direction: Literal[None, "ASC", "DESC"] = None,
    ) -> pd.DataFrame:
        """Select data from SQL into a dataframe.

        Parameters
        ----------
        table_name (str) : name of table to select data frame
        column_names (list|str, default=None) : list of columns to select, or None to select all
        where (str, default=None) : where clause filter to apply
        limit (int, default=None) : select limited number of records only
        order_column (str, default=None) : order results by column
        order_direction (str, default=None) : order direction

        Returns
        -------
        dataframe (pandas.DataFrame): tabular data from select statement

        Examples
        --------
        A sample table to read, created from a dataframe.
        >>> df = pd.DataFrame(
        ...    {
        ...        "ColumnA": [5, 6, 7],
        ...        "ColumnB": [5, 6, None],
        ...        "ColumnC": [pd.NA, 6, 7],
        ...        "ColumnD": ["06-22-2021", "06-22-2021", pd.NaT],
        ...        "ColumnE": ["a", "b", None],
        ...    }, index = ["xxx", "yyy", "zzz"]
        ... )
        >>> df = create.table_from_dataframe('##ExampleRead', df, primary_key='index')

        Select the entire table. The primary key is set as the dataframe's index.
        >>> query = read.table('##ExampleRead')

        Select specific columns.
        >>> query = read.table('##ExampleRead', column_names=['ColumnA','ColumnB'])

        Select using conditions grouped by parentheses while applying a limit and order.
        >>> query = read.table('##ExampleRead', where="(ColumnB>4 AND ColumnC IS NOT NULL) OR ColumnE IS NULL", limit=5, order_column='ColumnB', order_direction='DESC')
        """
        # get table schema for conversion to pandas
        schema, _ = conversion.get_schema(self._connection, table_name)

        # always read in primary key columns for dataframe index
        primary_key_columns = list(
            schema.loc[schema["pk_seq"].notna(),
                       "pk_seq"].sort_values(ascending=True).index)

        # dynamic table and column names, and column_name development
        table_name = dynamic.escape(self._connection.cursor(), table_name)
        if column_names is None:
            column_names = "*"
        else:
            if isinstance(column_names, str):
                column_names = [column_names]
            elif isinstance(column_names, pd.Index):
                column_names = list(column_names)
            column_names = primary_key_columns + column_names
            column_names = list(set(column_names))
            missing = [x for x in column_names if x not in schema.index]
            if len(missing) > 0:
                raise custom_errors.SQLColumnDoesNotExist(
                    f"Column does not exist in table {table_name}:", missing)
            column_names = dynamic.escape(self._connection.cursor(),
                                          column_names)
            column_names = "\n,".join(column_names)

        # format optional where_statement
        if where is None:
            where_statement, where_args = ("", None)
        else:
            where_statement, where_args = dynamic.where(
                self._connection.cursor(), where)

        # format optional limit
        if limit is None:
            limit = ""
        elif not isinstance(limit, int):
            raise ValueError("limit must be an integer")
        else:
            limit = "TOP(" + str(limit) + ")"

        # format optional order
        options = [None, "ASC", "DESC"]
        if (order_column is None and order_direction is not None) or (
                order_column is not None and order_direction is None):
            raise ValueError(
                "order_column and order_direction must both be specified")
        elif order_direction not in options:
            raise ValueError("order direction must be one of: " + str(options))
        elif order_column is not None:
            order = ("ORDER BY " +
                     dynamic.escape(self._connection.cursor(), order_column) +
                     " " + order_direction)
        else:
            order = ""

        # select values
        statement = f"""
        SELECT {limit}
            {column_names}
        FROM
            {table_name}
            {where_statement}
            {order}
        """

        # read sql query
        dataframe = conversion.read_values(statement, schema, self._connection,
                                           where_args)

        return dataframe