def test_behavior_with_whole_table_splitter( test_cases_for_sql_data_connector_sqlite_execution_engine, ): db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME assets: table_partitioned_by_date_column__A: splitter_method : "_split_on_whole_table" splitter_kwargs : {} """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report_object = my_data_connector.self_check() print(json.dumps(report_object, indent=2)) batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", ) ) ) assert len(batch_definition_list) == 1 assert batch_definition_list[0]["batch_identifiers"] == {} batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", data_connector_query={}, ) ) ) assert len(batch_definition_list) == 1 assert batch_definition_list[0]["batch_identifiers"] == {} batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", data_connector_query={"batch_filter_parameters": {}}, ) ) ) assert len(batch_definition_list) == 1 assert batch_definition_list[0]["batch_identifiers"] == {}
def test_get_batch_definition_list_from_batch_request( test_cases_for_sql_data_connector_sqlite_execution_engine, ): random.seed(0) db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME data_assets: table_partitioned_by_date_column__A: splitter_method: _split_on_column_value splitter_kwargs: column_name: date """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) my_data_connector._refresh_data_references_cache() batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", partition_request={ "partition_identifiers": { "date": "2020-01-01" } }, ))) assert len(batch_definition_list) == 1 batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", partition_request={"partition_identifiers": {}}, ))) assert len(batch_definition_list) == 30 # Note: Abe 20201109: It would be nice to put in safeguards for mistakes like this. # In this case, "date" should go inside "partition_identifiers". # Currently, the method ignores "date" entirely, and matches on too many partitions. # I don't think this is unique to ConfiguredAssetSqlDataConnector. # with pytest.raises(DataConnectorError) as e: # batch_definition_list = my_data_connector.get_batch_definition_list_from_batch_request( # batch_request=BatchRequest( # datasource_name="FAKE_Datasource_NAME", # data_connector_name="my_sql_data_connector", # data_asset_name="table_partitioned_by_date_column__A", # partition_request={ # "partition_identifiers" : {}, # "date" : "2020-01-01", # } # )) # assert "Unmatched key" in e.value.message batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", ))) assert len(batch_definition_list) == 30 with pytest.raises(TypeError): my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", )) with pytest.raises(TypeError): my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest(datasource_name="FAKE_Datasource_NAME", )) with pytest.raises(TypeError): my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest())