Exemple #1
0
def test_map_data_reference_string_to_batch_definition_list_using_regex():
    # regex_pattern does not match --> None
    data_reference = "alex_20200809_1000.csv"
    regex_pattern = r"^(.+)_____________\.csv$"
    group_names = ["name", "timestamp", "price"]
    returned_batch_def_list = map_data_reference_string_to_batch_definition_list_using_regex(
        datasource_name="test_datasource",
        data_connector_name="test_data_connector",
        data_asset_name=None,
        data_reference=data_reference,
        regex_pattern=regex_pattern,
        group_names=group_names,
    )
    assert returned_batch_def_list is None

    # no data_asset_name configured --> DEFAULT_ASSET_NAME
    data_reference = "alex_20200809_1000.csv"
    regex_pattern = r"^(.+)_(\d+)_(\d+)\.csv$"
    group_names = ["name", "timestamp", "price"]
    returned_batch_def_list = map_data_reference_string_to_batch_definition_list_using_regex(
        datasource_name="test_datasource",
        data_connector_name="test_data_connector",
        data_asset_name=None,
        data_reference=data_reference,
        regex_pattern=regex_pattern,
        group_names=group_names,
    )
    assert returned_batch_def_list == [
        BatchDefinition(
            datasource_name="test_datasource",
            data_connector_name="test_data_connector",
            data_asset_name="DEFAULT_ASSET_NAME",
            partition_definition=PartitionDefinition(
                {"name": "alex", "timestamp": "20200809", "price": "1000",}
            ),
        )
    ]

    # data_asset_name configured
    returned_batch_def_list = map_data_reference_string_to_batch_definition_list_using_regex(
        datasource_name="test_datasource",
        data_connector_name="test_data_connector",
        data_asset_name="test_data_asset",
        data_reference=data_reference,
        regex_pattern=regex_pattern,
        group_names=group_names,
    )
    assert returned_batch_def_list == [
        BatchDefinition(
            datasource_name="test_datasource",
            data_connector_name="test_data_connector",
            data_asset_name="test_data_asset",
            partition_definition=PartitionDefinition(
                {"name": "alex", "timestamp": "20200809", "price": "1000",}
            ),
        )
    ]
Exemple #2
0
 def _map_data_reference_to_batch_definition_list(
     self, data_reference: str, data_asset_name: str = None
 ) -> Optional[List[BatchDefinition]]:
     regex_config: dict = self._get_regex_config(data_asset_name=data_asset_name)
     pattern: str = regex_config["pattern"]
     group_names: List[str] = regex_config["group_names"]
     return map_data_reference_string_to_batch_definition_list_using_regex(
         datasource_name=self.datasource_name,
         data_connector_name=self.name,
         data_asset_name=data_asset_name,
         data_reference=data_reference,
         regex_pattern=pattern,
         group_names=group_names,
     )