def test_sqlalchemydataset_with_custom_sql(): engine = sa.create_engine('sqlite://') data = pd.DataFrame({ "name": ["Frank", "Steve", "Jane", "Frank", "Michael"], "age": [16, 21, 38, 22, 10], "pet": ["fish", "python", "cat", "python", "frog"] }) data.to_sql(name='test_sql_data', con=engine, index=False) custom_sql = "SELECT name, pet FROM test_sql_data WHERE age > 25" custom_sql_dataset = SqlAlchemyDataset('test_sql_data', engine=engine, custom_sql=custom_sql) custom_sql_dataset._initialize_expectations() custom_sql_dataset.set_default_expectation_argument( "result_format", {"result_format": "COMPLETE"}) result = custom_sql_dataset.expect_column_values_to_be_in_set( "pet", ["fish", "cat", "python"]) assert result['success'] == True result = custom_sql_dataset.expect_column_to_exist("age") assert result['success'] == False
def test_adding_expectation_to_sqlalchemy_dataset_not_send_usage_message( mock_emit, sa): """ What does this test and why? When an Expectation is called using a SqlAlchemyDataset, it validates the dataset using the implementation of the Expectation. As part of the process, it also adds the Expectation to the active ExpectationSuite. This test ensures that this in-direct way of adding an Expectation to the ExpectationSuite (ie not calling add_expectations() directly) does not emit a usage_stats event. """ engine = sa.create_engine("sqlite://") data = pd.DataFrame({ "name": ["Frank", "Steve", "Jane", "Frank", "Michael"], "age": [16, 21, 38, 22, 10], "pet": ["fish", "python", "cat", "python", "frog"], }) data.to_sql(name="test_sql_data", con=engine, index=False) custom_sql = "SELECT name, pet FROM test_sql_data WHERE age > 12" custom_sql_dataset = SqlAlchemyDataset(engine=engine, custom_sql=custom_sql) custom_sql_dataset._initialize_expectations() custom_sql_dataset.set_default_expectation_argument( "result_format", {"result_format": "COMPLETE"}) result = custom_sql_dataset.expect_column_values_to_be_in_set( "pet", ["fish", "cat", "python"]) # add_expectation() will not send usage_statistics event when called from a SqlAlchemy Dataset assert mock_emit.call_count == 0 assert mock_emit.call_args_list == []