def test_map_data_reference_string_to_batch_definition_list_using_regex(): # regex_pattern does not match --> None data_reference = "alex_20200809_1000.csv" regex_pattern = r"^(.+)_____________\.csv$" group_names = ["name", "timestamp", "price"] returned_batch_def_list = map_data_reference_string_to_batch_definition_list_using_regex( datasource_name="test_datasource", data_connector_name="test_data_connector", data_asset_name=None, data_reference=data_reference, regex_pattern=regex_pattern, group_names=group_names, ) assert returned_batch_def_list is None # no data_asset_name configured --> DEFAULT_ASSET_NAME data_reference = "alex_20200809_1000.csv" regex_pattern = r"^(.+)_(\d+)_(\d+)\.csv$" group_names = ["name", "timestamp", "price"] returned_batch_def_list = map_data_reference_string_to_batch_definition_list_using_regex( datasource_name="test_datasource", data_connector_name="test_data_connector", data_asset_name=None, data_reference=data_reference, regex_pattern=regex_pattern, group_names=group_names, ) assert returned_batch_def_list == [ BatchDefinition( datasource_name="test_datasource", data_connector_name="test_data_connector", data_asset_name="DEFAULT_ASSET_NAME", partition_definition=PartitionDefinition( {"name": "alex", "timestamp": "20200809", "price": "1000",} ), ) ] # data_asset_name configured returned_batch_def_list = map_data_reference_string_to_batch_definition_list_using_regex( datasource_name="test_datasource", data_connector_name="test_data_connector", data_asset_name="test_data_asset", data_reference=data_reference, regex_pattern=regex_pattern, group_names=group_names, ) assert returned_batch_def_list == [ BatchDefinition( datasource_name="test_datasource", data_connector_name="test_data_connector", data_asset_name="test_data_asset", partition_definition=PartitionDefinition( {"name": "alex", "timestamp": "20200809", "price": "1000",} ), ) ]
def _map_data_reference_to_batch_definition_list( self, data_reference: str, data_asset_name: str = None ) -> Optional[List[BatchDefinition]]: regex_config: dict = self._get_regex_config(data_asset_name=data_asset_name) pattern: str = regex_config["pattern"] group_names: List[str] = regex_config["group_names"] return map_data_reference_string_to_batch_definition_list_using_regex( datasource_name=self.datasource_name, data_connector_name=self.name, data_asset_name=data_asset_name, data_reference=data_reference, regex_pattern=pattern, group_names=group_names, )