def test_example_F(test_cases_for_sql_data_connector_sqlite_execution_engine): random.seed(0) db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME data_assets: table_partitioned_by_foreign_key__F: splitter_method: _split_on_column_value splitter_kwargs: column_name: session_id """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report = my_data_connector.self_check() print(json.dumps(report, indent=2)) assert report == { "class_name": "ConfiguredAssetSqlDataConnector", "data_asset_count": 1, "example_data_asset_names": ["table_partitioned_by_foreign_key__F"], "data_assets": { "table_partitioned_by_foreign_key__F": { "batch_definition_count": 49, # TODO Abe 20201029 : These values should be sorted "example_data_references": [ { "session_id": 3 }, { "session_id": 2 }, { "session_id": 4 }, ], } }, "unmatched_data_reference_count": 0, "example_unmatched_data_references": [], "example_data_reference": { "n_rows": 2, "batch_spec": { "table_name": "table_partitioned_by_foreign_key__F", "partition_definition": { "session_id": 2 }, "splitter_method": "_split_on_column_value", "splitter_kwargs": { "column_name": "session_id" }, }, }, }
def test_example_A(test_cases_for_sql_data_connector_sqlite_execution_engine): random.seed(0) db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME data_assets: table_partitioned_by_date_column__A: splitter_method: _split_on_column_value splitter_kwargs: column_name: date """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report = my_data_connector.self_check() print(json.dumps(report, indent=2)) assert report == { "class_name": "ConfiguredAssetSqlDataConnector", "data_asset_count": 1, "example_data_asset_names": ["table_partitioned_by_date_column__A"], "data_assets": { "table_partitioned_by_date_column__A": { "batch_definition_count": 30, "example_data_references": [ { "date": "2020-01-01" }, { "date": "2020-01-02" }, { "date": "2020-01-03" }, ], } }, "unmatched_data_reference_count": 0, "example_unmatched_data_references": [], "example_data_reference": { "n_rows": 8, "batch_spec": { "table_name": "table_partitioned_by_date_column__A", "partition_definition": { "date": "2020-01-02" }, "splitter_method": "_split_on_column_value", "splitter_kwargs": { "column_name": "date" }, }, }, }
def test_behavior_with_whole_table_splitter( test_cases_for_sql_data_connector_sqlite_execution_engine, ): db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME assets: table_partitioned_by_date_column__A: splitter_method : "_split_on_whole_table" splitter_kwargs : {} """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report_object = my_data_connector.self_check() print(json.dumps(report_object, indent=2)) batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", ) ) ) assert len(batch_definition_list) == 1 assert batch_definition_list[0]["batch_identifiers"] == {} batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", data_connector_query={}, ) ) ) assert len(batch_definition_list) == 1 assert batch_definition_list[0]["batch_identifiers"] == {} batch_definition_list = ( my_data_connector.get_batch_definition_list_from_batch_request( batch_request=BatchRequest( datasource_name="FAKE_Datasource_NAME", data_connector_name="my_sql_data_connector", data_asset_name="table_partitioned_by_date_column__A", data_connector_query={"batch_filter_parameters": {}}, ) ) ) assert len(batch_definition_list) == 1 assert batch_definition_list[0]["batch_identifiers"] == {}
def test_example_G(test_cases_for_sql_data_connector_sqlite_execution_engine): random.seed(0) db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME assets: table_partitioned_by_multiple_columns__G: splitter_method: _split_on_multi_column_values splitter_kwargs: column_names: - y - m - d """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report = my_data_connector.self_check() print(json.dumps(report, indent=2)) assert report == { "class_name": "ConfiguredAssetSqlDataConnector", "data_asset_count": 1, "example_data_asset_names": ["table_partitioned_by_multiple_columns__G"], "data_assets": { "table_partitioned_by_multiple_columns__G": { "batch_definition_count": 30, # TODO Abe 20201029 : These values should be sorted "example_data_references": [ {"y": 2020, "m": 1, "d": 1}, {"y": 2020, "m": 1, "d": 2}, {"y": 2020, "m": 1, "d": 3}, ], } }, "unmatched_data_reference_count": 0, "example_unmatched_data_references": [], # FIXME: (Sam) example_data_reference removed temporarily in PR #2590: # "example_data_reference": { # "n_rows": 8, # "batch_spec": { # "table_name": "table_partitioned_by_multiple_columns__G", # "data_asset_name": "table_partitioned_by_multiple_columns__G", # "batch_identifiers": { # "y": 2020, # "m": 1, # "d": 2, # }, # "splitter_method": "_split_on_multi_column_values", # "splitter_kwargs": {"column_names": ["y", "m", "d"]}, # }, # }, }
def test_basic_self_check( test_cases_for_sql_data_connector_sqlite_execution_engine): random.seed(0) execution_engine = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME assets: table_partitioned_by_date_column__A: #table_name: events # If table_name is omitted, then the table_name defaults to the asset name splitter_method: _split_on_column_value splitter_kwargs: column_name: date """, ) config["execution_engine"] = execution_engine my_data_connector = ConfiguredAssetSqlDataConnector(**config) report = my_data_connector.self_check() print(json.dumps(report, indent=2)) assert report == { "class_name": "ConfiguredAssetSqlDataConnector", "data_asset_count": 1, "example_data_asset_names": ["table_partitioned_by_date_column__A"], "data_assets": { "table_partitioned_by_date_column__A": { "batch_definition_count": 30, "example_data_references": [ { "date": "2020-01-01" }, { "date": "2020-01-02" }, { "date": "2020-01-03" }, ], } }, "unmatched_data_reference_count": 0, "example_unmatched_data_references": [], # FIXME: (Sam) example_data_reference removed temporarily in PR #2590: # "example_data_reference": { # "n_rows": 8, # "batch_spec": { # "table_name": "table_partitioned_by_date_column__A", # "data_asset_name": "table_partitioned_by_date_column__A", # "batch_identifiers": {"date": "2020-01-02"}, # "splitter_method": "_split_on_column_value", # "splitter_kwargs": {"column_name": "date"}, # }, # }, }
def test_example_B(test_cases_for_sql_data_connector_sqlite_execution_engine): random.seed(0) db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load(""" name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME assets: table_partitioned_by_timestamp_column__B: splitter_method: _split_on_converted_datetime splitter_kwargs: column_name: timestamp """) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report = my_data_connector.self_check() print(json.dumps(report, indent=2)) assert report == { "class_name": "ConfiguredAssetSqlDataConnector", "data_asset_count": 1, "example_data_asset_names": ["table_partitioned_by_timestamp_column__B"], "data_assets": { "table_partitioned_by_timestamp_column__B": { "batch_definition_count": 30, "example_data_references": [ { "timestamp": "2020-01-01" }, { "timestamp": "2020-01-02" }, { "timestamp": "2020-01-03" }, ], } }, "unmatched_data_reference_count": 0, "example_unmatched_data_references": [], # FIXME: (Sam) example_data_reference removed temporarily in PR #2590: # "example_data_reference": { # "n_rows": 8, # "batch_spec": { # "table_name": "table_partitioned_by_timestamp_column__B", # "data_asset_name": "table_partitioned_by_timestamp_column__B", # "batch_identifiers": {"timestamp": "2020-01-02"}, # "splitter_method": "_split_on_converted_datetime", # "splitter_kwargs": {"column_name": "timestamp"}, # }, # }, }
def test_example_C( splitter_method_name_prefix, test_cases_for_sql_data_connector_sqlite_execution_engine, ): random.seed(0) db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( f""" name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME assets: table_partitioned_by_regularly_spaced_incrementing_id_column__C: splitter_method: {splitter_method_name_prefix}split_on_divided_integer splitter_kwargs: column_name: id divisor: 10 """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report = my_data_connector.self_check() print(json.dumps(report, indent=2)) assert report == { "class_name": "ConfiguredAssetSqlDataConnector", "data_asset_count": 1, "example_data_asset_names": [ "table_partitioned_by_regularly_spaced_incrementing_id_column__C" ], "data_assets": { "table_partitioned_by_regularly_spaced_incrementing_id_column__C": { "batch_definition_count": 12, "example_data_references": [ {"id": 0}, {"id": 1}, {"id": 2}, ], } }, "unmatched_data_reference_count": 0, "example_unmatched_data_references": [], # FIXME: (Sam) example_data_reference removed temporarily in PR #2590: # "example_data_reference": { # "n_rows": 10, # "batch_spec": { # "table_name": "table_partitioned_by_regularly_spaced_incrementing_id_column__C", # "data_asset_name": "table_partitioned_by_regularly_spaced_incrementing_id_column__C", # "batch_identifiers": {"id": 1}, # "splitter_method": "_split_on_divided_integer", # "splitter_kwargs": {"column_name": "id", "divisor": 10}, # }, # }, }
def test_example_E(test_cases_for_sql_data_connector_sqlite_execution_engine): random.seed(0) db = test_cases_for_sql_data_connector_sqlite_execution_engine config = yaml.load( """ name: my_sql_data_connector datasource_name: FAKE_Datasource_NAME data_assets: table_partitioned_by_incrementing_batch_id__E: splitter_method: _split_on_column_value splitter_kwargs: column_name: batch_id """, ) config["execution_engine"] = db my_data_connector = ConfiguredAssetSqlDataConnector(**config) report = my_data_connector.self_check() print(json.dumps(report, indent=2)) assert report == { "class_name": "ConfiguredAssetSqlDataConnector", "data_asset_count": 1, "example_data_asset_names": ["table_partitioned_by_incrementing_batch_id__E"], "data_assets": { "table_partitioned_by_incrementing_batch_id__E": { "batch_definition_count": 11, "example_data_references": [ { "batch_id": 0 }, { "batch_id": 1 }, { "batch_id": 2 }, ], } }, "unmatched_data_reference_count": 0, "example_unmatched_data_references": [], # FIXME: (Sam) example_data_reference removed temporarily in PR #2590: # "example_data_reference": { # "n_rows": 9, # "batch_spec": { # "table_name": "table_partitioned_by_incrementing_batch_id__E", # "data_asset_name": "table_partitioned_by_incrementing_batch_id__E", # "partition_definition": {"batch_id": 1}, # "splitter_method": "_split_on_column_value", # "splitter_kwargs": {"column_name": "batch_id"}, # }, # }, }