def test_create_connection_only_once(self, mocker): """Test that two datasets that need to connect to the same db (but different tables, for example) only create a connection once. """ mock_engine = mocker.patch( "kedro.extras.datasets.pandas.sql_dataset.create_engine") first = SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) assert len(first.engines) == 1 second = SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) assert len(second.engines) == 1 assert len(first.engines) == 1 mock_engine.assert_called_once_with(CONNECTION)
def test_empty_con_error(self): """Check the error when instantiating with empty connection string""" pattern = ( r"`con` argument cannot be empty\. Please provide " r"a SQLAlchemy connection string" ) with pytest.raises(DataSetError, match=pattern): SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=""))
def test_load_driver_missing(self, mocker): """Test that if an unknown module/driver is encountered by SQLAlchemy then the error should contain the original error message""" _err = ImportError("No module named 'mysqldb'") mocker.patch("kedro.extras.datasets.pandas.sql_dataset.create_engine", side_effect=_err) with pytest.raises(DataSetError, match=ERROR_PREFIX + "mysqlclient"): SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION))
def test_empty_query_error(self): """Check the error when instantiating with empty query or file""" pattern = (r"`sql` and `filepath` arguments cannot both be empty\." r"Please provide a sql query or path to a sql query file\.") with pytest.raises(DataSetError, match=pattern): SQLQueryDataSet(sql="", filepath="", credentials=dict(con=CONNECTION))
def test_invalid_module(self, mocker): """Test that if an unknown module/driver is encountered by SQLAlchemy then the error should contain the original error message""" _err = ImportError("Invalid module some_module") mocker.patch("kedro.extras.datasets.pandas.sql_dataset.create_engine", side_effect=_err) pattern = ERROR_PREFIX + r"Invalid module some\_module" with pytest.raises(DataSetError, match=pattern): SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION))
def query_data_set(request): kwargs = dict(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) kwargs.update(request.param) return SQLQueryDataSet(**kwargs)
def test_empty_query_error(self): """Check the error when instantiating with empty query""" pattern = r"`sql` argument cannot be empty\. " r"Please provide a sql query" with pytest.raises(DataSetError, match=pattern): SQLQueryDataSet(sql="", credentials=dict(con=CONNECTION))
read_diagnoses= read_diagnoses_query(admissions_case,adm_where) #UNION VIEWS FOR OLD SMCH AND NEW SMCH DATA read_new_smch_admissions = read_new_smch_admissions_query() read_new_smch_discharges = read_new_smch_discharges_query() read_old_smch_admissions = read_old_smch_admissions_query() read_old_smch_discharges = read_old_smch_discharges_query() read_old_smch_matched_data = read_old_smch_matched_view_query() read_new_smch_matched = read_new_smch_matched_query() #Create A Kedro Data Catalog from which we can easily get a Pandas DataFrame using catalog.load('name_of_dataframe') catalog = DataCatalog( { #Read Admissions "read_admissions": SQLQueryDataSet( sql= read_admissions, credentials=dict(con=con) ), #Read Raw Discharges "read_discharges": SQLQueryDataSet( sql= read_discharges, credentials=dict(con=con) ), #Read Derived Admissions "read_derived_admissions": SQLQueryDataSet( sql= derived_admissions, #load_args= dict(index_col="uid"), credentials=dict(con=con) ), #Read Derived Discharges "read_derived_discharges": SQLQueryDataSet( sql= derived_discharges,
def query_file_data_set(request, sql_file): kwargs = dict(filepath=sql_file, credentials=dict(con=CONNECTION)) kwargs.update(request.param) return SQLQueryDataSet(**kwargs)
def test_sql_and_filepath_args(self, sql_file): """Test that an error is raised when both `sql` and `filepath` args are given.""" pattern = (r"`sql` and `filepath` arguments cannot both be provided." r"Please only provide one.") with pytest.raises(DataSetError, match=pattern): SQLQueryDataSet(sql=SQL_QUERY, filepath=sql_file)
def test_load_unknown_sql(self): """Check the error when unknown SQL dialect is provided in the connection string""" pattern = r"The SQL dialect in your connection is not supported by SQLAlchemy" with pytest.raises(DataSetError, match=pattern): SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=FAKE_CONN_STR))