Esempio n. 1
0
    def _test_exists(self, df, index):
        assume_not_all_delims_and_quotechars(df)
        # first populate the data
        to_sql(
            df=df,
            table_name=self.table_name,
            creds=self.sql_creds,
            index=index,
            sql_type=self.sql_type,
            if_exists="replace",
        )

        # then test to sql with option of 'append'
        to_sql(
            df=df,
            table_name=self.table_name,
            creds=self.sql_creds,
            index=index,
            sql_type=self.sql_type,
            if_exists="append",
        )
        actual = pd.read_sql_query(
            sql=f"SELECT * FROM dbo.{self.table_name}", con=self.pyodbc_creds
        )
        df = prep_df_for_comparison(df=df, index=index)
        expected = pd.concat([df, df], axis=0, ignore_index=True)  # appended
        assert_frame_equal(expected, actual, check_column_type="equiv")
Esempio n. 2
0
def test_tosql_all_quotechars(sql_creds):
    df = pd.DataFrame({
        i: [v, "random", "string", 1, 2.0]
        for i, v in enumerate(_QUOTECHAR_OPTIONS)
    })
    with pytest.raises(BCPandasValueError):
        to_sql(df=df,
               table_name="tbl_all_delims",
               creds=sql_creds,
               if_exists="replace")
Esempio n. 3
0
 def _test_df_template(self, df, sql_creds, index):
     assume_not_all_delims_and_quotechars(df)
     with pytest.raises(AssertionError):
         to_sql(
             df=df,
             table_name=self.table_name,
             creds=self.sql_creds,
             index=index,
             sql_type=self.sql_type,
             if_exists="bad_arg",
         )
Esempio n. 4
0
 def _create_tbl_with_data(self):
     # first create a new table and put some data into it
     execute_sql_statement(
         self.sql_creds.engine, f"DROP TABLE IF EXISTS {self.schema_name}.{self.table_name}"
     )
     to_sql(
         df=self.df,
         table_name=self.table_name,
         creds=self.sql_creds,
         if_exists="replace",
         index=False,
         sql_type=self.sql_type,
     )
Esempio n. 5
0
 def _test_base(self, df, index):
     assume_not_all_delims_and_quotechars(df)
     to_sql(
         df=df,
         table_name=self.table_name,
         creds=self.sql_creds,
         index=index,
         sql_type=self.sql_type,
         if_exists="replace",
     )
     actual = pd.read_sql_query(sql=f"SELECT * FROM {self.table_name}", con=self.pyodbc_creds)
     expected = prep_df_for_comparison(df=df, index=index)
     assert_frame_equal(expected, actual, check_column_type="equiv")
Esempio n. 6
0
def run_benchmark_readsql(df: pd.DataFrame,
                          creds: SqlCreds) -> Dict[str, float]:
    chunk_size = floor(2000 / (len(df.columns) + 1))  # +1 in case index=True

    # first create table and insert rows
    tbl_name = "sql_tbl_read_sql"
    to_sql(
        df,
        table_name=tbl_name,
        creds=creds,
        sql_type="table",
        schema="dbo",
        index=False,
        if_exists="replace",
        batch_size=10_000,
    )

    funcs = [
        dict(
            title=f"pandas_readsql_{chunk_size}",
            func=pd.read_sql_table,
            schema="dbo",
            table_name=tbl_name,
            con=creds.engine,
            chunksize=chunk_size,
        ),
        dict(
            title=f"bcpandas_batchsize_{chunk_size}_check_delim",
            func=read_sql,
            table_name=tbl_name,
            creds=creds,
            sql_type="table",
            schema="dbo",
            batch_size=chunk_size,
            check_delim=True,
        ),
        dict(
            title=f"bcpandas_batchsize_{chunk_size}_no_check_delim",
            func=read_sql,
            table_name=tbl_name,
            creds=creds,
            sql_type="table",
            schema="dbo",
            batch_size=chunk_size,
            check_delim=False,
        ),
    ]

    return {i["title"]: _run_single_func(**i) for i in funcs}
Esempio n. 7
0
 def _test_not_exists(self, df, index):
     execute_sql_statement(
         self.pyodbc_creds.engine, f"DROP TABLE IF EXISTS dbo.{self.table_name}"
     )
     assume_not_all_delims_and_quotechars(df)
     to_sql(
         df=df,
         table_name=self.table_name,
         creds=self.sql_creds,
         index=index,
         sql_type=self.sql_type,
         if_exists="fail",
     )
     actual = pd.read_sql_query(sql=f"SELECT * FROM {self.table_name}", con=self.pyodbc_creds)
     expected = prep_df_for_comparison(df=df, index=index)
     assert_frame_equal(expected, actual, check_column_type="equiv")
Esempio n. 8
0
    def test_extra_col(self):
        # first create a new table and put some data into it
        self._create_tbl_with_data()

        # then insert data into only some of the columns, and out of order
        df_changed = self.df.copy()
        df_changed.loc[:, "col_N"] = [17.5, 18.5, 19.5, 20.5]

        with pytest.raises(BCPandasValueError):
            to_sql(
                df=df_changed,
                table_name=self.table_name,
                creds=self.sql_creds,
                if_exists="append",
                index=False,
                sql_type=self.sql_type,
            )
Esempio n. 9
0
    def test_cols_skip_and_out_of_order(self):
        # first create a new table and put some data into it
        self._create_tbl_with_data()

        # then insert data into only some of the columns, and out of order
        df_changed = self.df.loc[:, ["col4", "col1", "col2"]]
        to_sql(
            df=df_changed,
            table_name=self.table_name,
            creds=self.sql_creds,
            if_exists="append",
            index=False,
            sql_type=self.sql_type,
        )

        # check result
        self._check_result(df_changed)
Esempio n. 10
0
def test_tosql_empty_df(df, sql_creds):
    tbl_name = "tbl_df_empty"
    schema_name = "dbo"
    execute_sql_statement(sql_creds.engine, f"DROP TABLE IF EXISTS {schema_name}.{tbl_name}")
    to_sql(df=df, table_name=tbl_name, creds=sql_creds, schema=schema_name, if_exists="replace")
    # make sure nothing happened in the database
    qry = """
        SELECT * 
        FROM INFORMATION_SCHEMA.TABLES 
        WHERE TABLE_SCHEMA = '{_schema}' 
        AND TABLE_NAME = '{_tbl}'
        """.format(
        _tbl=tbl_name, _schema=schema_name
    )
    res = pd.read_sql_query(sql=qry, con=sql_creds.engine)
    # assert that rows == 0, it has columns even without rows because it is an internal system table
    assert res.shape[0] == 0
Esempio n. 11
0
def test_tosql_nan_null_inf(df, sql_creds):
    tbl_name = "tbl_df_nan_null_last_col"
    schema_name = "dbo"
    execute_sql_statement(sql_creds.engine, f"DROP TABLE IF EXISTS {schema_name}.{tbl_name}")
    to_sql(
        df=df,
        table_name=tbl_name,
        creds=sql_creds,
        schema=schema_name,
        if_exists="replace",
        index=False,
    )

    # check result
    actual = pd.read_sql_query(sql=f"SELECT * FROM {schema_name}.{tbl_name}", con=sql_creds.engine)
    expected = prep_df_for_comparison(df=df, index=False)
    assert_frame_equal(expected, actual)
Esempio n. 12
0
def test_tosql_full_bcp_path(bcp_path, sql_creds):
    df = pd.DataFrame({"col1": ["a", "b", "c", "d"], "col2": [1.5, 2.5, 3.5, 4.5]})
    tbl_name = "tbl_df_full_bcp_path"
    schema_name = "dbo"
    execute_sql_statement(sql_creds.engine, f"DROP TABLE IF EXISTS {schema_name}.{tbl_name}")
    to_sql(
        df=df,
        table_name=tbl_name,
        creds=sql_creds,
        schema=schema_name,
        if_exists="replace",
        index=False,
        bcp_path=bcp_path,
    )

    # check result
    actual = pd.read_sql_query(sql=f"SELECT * FROM {schema_name}.{tbl_name}", con=sql_creds.engine)
    expected = prep_df_for_comparison(df=df, index=False)
    assert_frame_equal(expected, actual)
Esempio n. 13
0
    def test_cols_out_of_order(self):
        """
        see https://docs.microsoft.com/en-us/sql/relational-databases/import-export/use-a-format-file-to-map-table-columns-to-data-file-fields-sql-server
        """
        # first create a new table and put some data into it
        self._create_tbl_with_data()

        # then insert (append) data with columns out of order
        df_changed = self.df.iloc[:, ::-1]  # columns reversed
        to_sql(
            df=df_changed,
            table_name=self.table_name,
            creds=self.sql_creds,
            if_exists="append",
            index=False,
            sql_type=self.sql_type,
        )

        # check result
        self._check_result(df_changed)
Esempio n. 14
0
    def test_skip_cols(self):
        """
        see https://docs.microsoft.com/en-us/sql/relational-databases/import-export/use-a-format-file-to-skip-a-table-column-sql-server
        """
        # first create a new table and put some data into it
        self._create_tbl_with_data()

        # then insert (append) data into only some of the columns
        df_changed = self.df.iloc[:, 1:]
        to_sql(
            df=df_changed,
            table_name=self.table_name,
            creds=self.sql_creds,
            if_exists="append",
            index=False,
            sql_type=self.sql_type,
        )

        # check result
        self._check_result(df_changed)
Esempio n. 15
0
def test_duplicate_columns(sql_creds):
    df = pd.DataFrame({
        "col1": [1.5, 2.5, 3.5, 4.5],
        "col2": [5.5, 6.5, 7.5, 8.5],
        "col3": [9.5, 10.5, 11.5, 12.5],
        "col4": [13.5, 14.5, 15.5, 16.5],
    })

    # set duplicate column
    df = df.rename(columns={"col3": "col2"})

    with pytest.raises(BCPandasValueError):
        table_name = "tosql_column_scenarios_1"
        to_sql(
            df=df,
            table_name=table_name,
            creds=sql_creds,
            if_exists="replace",
            index=False,
            sql_type="table",
        )
Esempio n. 16
0
def test_batch_size_param(sql_creds, batch_size):
    """
    Test that batch size can't be:
        - 0
        - larger than the number of rows
    """
    df = pd.DataFrame({
        "col1": [1.5, 2.5, 3.5, 4.5],
        "col2": [5.5, 6.5, 7.5, 8.5],
        "col3": [9.5, 10.5, 11.5, 12.5],
        "col4": [13.5, 14.5, 15.5, 16.5],
    })
    with pytest.raises(BCPandasValueError):
        to_sql(
            df=df,
            table_name="some_table",
            creds=sql_creds,
            if_exists="replace",
            index=False,
            sql_type="table",
            batch_size=batch_size,
        )
Esempio n. 17
0
    def _test_exists(self, df, index):
        assume_not_all_delims_and_quotechars(df)
        # first populate the data
        to_sql(
            df=df,
            table_name=self.table_name,
            creds=self.sql_creds,
            index=index,
            sql_type=self.sql_type,
            if_exists="replace",
        )

        # then test to sql with option of 'fail'
        with pytest.raises(BCPandasValueError):
            to_sql(
                df=df,
                table_name=self.table_name,
                creds=self.sql_creds,
                index=index,
                sql_type=self.sql_type,
                if_exists="fail",
            )
Esempio n. 18
0
    def test_with_dtype(self):
        to_sql(
            df=self.df,
            table_name=self.table_name,
            creds=self.sql_creds,
            index=False,
            sql_type=self.sql_type,
            if_exists="replace",
            dtype={
                "col1": sqlalchemy.types.INTEGER(),
                "col2": sqlalchemy.types.FLOAT(),
                "col3": sqlalchemy.types.NVARCHAR(length=10),
                "col4": sqlalchemy.types.DATE(),
                "col5": sqlalchemy.dialects.mssql.BIT(),
            },
        )

        actual = pd.read_sql_query(
            sql=f"""
            SELECT
              COLUMN_NAME,
              DATA_TYPE,
              CHARACTER_MAXIMUM_LENGTH
            FROM
              INFORMATION_SCHEMA.COLUMNS
            WHERE
              TABLE_NAME = '{self.table_name}'""",
            con=self.pyodbc_creds,
        )
        expected = pd.DataFrame(
            {
                "COLUMN_NAME": ["col1", "col2", "col3", "col4", "col5"],
                "DATA_TYPE": ["int", "float", "nvarchar", "date", "bit"],
                "CHARACTER_MAXIMUM_LENGTH": [np.NaN, np.NaN, 10.0, np.NaN, np.NaN],
            }
        )
        assert_frame_equal(expected, actual)
Esempio n. 19
0
from bcpandas import sqlCred, read_sql, to_sql

if __name__=="__main__":
    creds = sqlCred(database='Northwind_Db',username='******',password='******')
    dt = read_sql(creds, table_name='Customers')
    to_sql(creds,dt,table_name='testing')
    print(dt.iloc[2])