def test_validator_batch_filter(multi_batch_taxi_validator, ): total_batch_definition_list: List[BatchDefinition] = [ v.batch_definition for k, v in multi_batch_taxi_validator.batches.items() ] jan_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={"batch_filter_parameters": { "month": "01" }}) jan_batch_definition_list: List[ BatchDefinition] = jan_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list) assert len(jan_batch_definition_list) == 1 assert jan_batch_definition_list[0]["batch_identifiers"]["month"] == "01" assert jan_batch_definition_list[0][ "id"] == "18653cbf8fb5baf5fbbc5ed95f9ee94d" feb_march_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={"index": slice(-1, 0, -1)}) feb_march_batch_definition_list: List[ BatchDefinition] = feb_march_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list) for i in feb_march_batch_definition_list: print(i["batch_identifiers"]) assert len(feb_march_batch_definition_list) == 2 batch_definitions_months_set: Set[str] = { v.batch_identifiers["month"] for v in feb_march_batch_definition_list } assert batch_definitions_months_set == {"02", "03"} jan_march_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={ "custom_filter_function": lambda batch_identifiers: batch_identifiers["month"] == "01" or batch_identifiers["month"] == "03" }) jan_march_batch_definition_list: List[ BatchDefinition] = jan_march_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list) for i in jan_march_batch_definition_list: print(i["batch_identifiers"]) assert len(jan_march_batch_definition_list) == 2 batch_definitions_months_set: Set[str] = { v.batch_identifiers["month"] for v in jan_march_batch_definition_list } assert batch_definitions_months_set == {"01", "03"}
def test_custom_filter_function( multi_batch_taxi_validator, ): total_batch_definition_list: List[BatchDefinition] = [ v.batch_definition for k, v in multi_batch_taxi_validator.batches.items() ] assert len(total_batch_definition_list) == 3 # Filter to all batch_definitions prior to March jan_feb_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={ "custom_filter_function": lambda batch_identifiers: int( batch_identifiers["month"] ) < 3 } ) jan_feb_batch_definition_list: list = ( jan_feb_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list ) ) assert len(jan_feb_batch_definition_list) == 2 batch_definitions_months_set: Set[str] = { v.batch_identifiers["month"] for v in jan_feb_batch_definition_list } assert batch_definitions_months_set == {"01", "02"}
def _get_batch_definition_list_from_batch_request( self, batch_request: BatchRequestBase, ) -> List[BatchDefinition]: """ Retrieve batch_definitions that match batch_request. First retrieves all batch_definitions that match batch_request - if batch_request also has a batch_filter, then select batch_definitions that match batch_filter. - if data_connector has sorters configured, then sort the batch_definition list before returning. Args: batch_request (BatchRequestBase): BatchRequestBase (BatchRequest without attribute validation) to process Returns: A list of BatchDefinition objects that match BatchRequest """ self._validate_batch_request(batch_request=batch_request) if len(self._data_references_cache) == 0: self._refresh_data_references_cache() batch_definition_list: List[BatchDefinition] = list( filter( lambda batch_definition: batch_definition_matches_batch_request( batch_definition=batch_definition, batch_request=batch_request), self._get_batch_definition_list_from_cache(), )) if len(self.sorters) > 0: batch_definition_list = self._sort_batch_definition_list( batch_definition_list=batch_definition_list) if batch_request.data_connector_query is not None: data_connector_query_dict = batch_request.data_connector_query.copy( ) if (batch_request.limit is not None and data_connector_query_dict.get("limit") is None): data_connector_query_dict["limit"] = batch_request.limit batch_filter_obj: BatchFilter = build_batch_filter( data_connector_query_dict=data_connector_query_dict) batch_definition_list = batch_filter_obj.select_from_data_connector_query( batch_definition_list=batch_definition_list) return batch_definition_list
def test_validator_batch_filter(multi_batch_taxi_validator, ): total_batch_definition_list: List[BatchDefinition] = [ v.batch_definition for k, v in multi_batch_taxi_validator.batches.items() ] jan_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={"batch_filter_parameters": { "month": "01" }}) jan_batch_definition_list: List[ BatchDefinition] = jan_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list) assert len(jan_batch_definition_list) == 1 assert jan_batch_definition_list[0]["batch_identifiers"]["month"] == "01" assert jan_batch_definition_list[0][ "id"] == "0327cfb13205ec8512e1c28e438ab43b" feb_march_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={"index": slice(-1, 0, -1)}) feb_march_batch_definition_list: List[ BatchDefinition] = feb_march_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list) for i in feb_march_batch_definition_list: print(i["batch_identifiers"]) assert len(feb_march_batch_definition_list) == 2 batch_definitions_months_set: Set[str] = { v.batch_identifiers["month"] for v in feb_march_batch_definition_list } assert batch_definitions_months_set == {"02", "03"} jan_march_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={ "custom_filter_function": lambda batch_identifiers: batch_identifiers["month"] == "01" or batch_identifiers["month"] == "03" }) jan_march_batch_definition_list: List[ BatchDefinition] = jan_march_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list) for i in jan_march_batch_definition_list: print(i["batch_identifiers"]) assert len(jan_march_batch_definition_list) == 2 batch_definitions_months_set: Set[str] = { v.batch_identifiers["month"] for v in jan_march_batch_definition_list } assert batch_definitions_months_set == {"01", "03"} # Filter using limit param limit_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={"limit": 2}) limit_batch_filter_definition_list: List[ BatchDefinition] = limit_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list) assert len(limit_batch_filter_definition_list) == 2 assert limit_batch_filter_definition_list[0]["batch_identifiers"][ "month"] == "01" assert (limit_batch_filter_definition_list[0]["id"] == "0327cfb13205ec8512e1c28e438ab43b") assert limit_batch_filter_definition_list[1]["batch_identifiers"][ "month"] == "02" assert (limit_batch_filter_definition_list[1]["id"] == "0808e185a52825d22356de2fe00a8f5f")
) # The active batch should be December, as this should be the last one loaded. Confirming here. assert validator_multi_batch.active_batch_definition.batch_identifiers["month"] == "12" # Get the list of all batches contained by the Validator for use in the BatchFilter total_batch_definition_list: list = [ v.batch_definition for k, v in validator_multi_batch.batches.items() ] # Filter to all batch_definitions prior to December pre_dec_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={ "custom_filter_function": lambda batch_identifiers: int( batch_identifiers["month"] ) < 12 and batch_identifiers["year"] == "2019" } ) pre_dec_batch_definition_list: list = ( pre_dec_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list ) ) # Get the highest max and lowest min before December cumulative_max = 0 cumulative_min = np.Inf for batch_definition in pre_dec_batch_definition_list: batch_id: str = batch_definition.id
jan_batch_request, feb_batch_request, march_batch_request ], expectation_suite=suite, ) assert validator.active_batch_definition.batch_identifiers["month"] == "03" assert validator.active_batch_definition.batch_identifiers["year"] == "2019" # Get the list of all batches contained by the Validator for use in the BatchFileter total_batch_definition_list: List = [ v.batch_definition for k, v in validator.batches.items() ] # Filter to all batch_definitions prior to March jan_feb_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={ "custom_filter_function": lambda batch_identifiers: int(batch_identifiers["month"]) < 3 }) jan_feb_batch_definition_list: list = ( jan_feb_batch_filter.select_from_data_connector_query( batch_definition_list=total_batch_definition_list)) # Get the highest max and lowest min between January and February cumulative_max = 0 cumulative_min = np.Inf for batch_definition in jan_feb_batch_definition_list: batch_id: str = batch_definition.id current_max = validator.get_metric( MetricConfiguration( "column.max", metric_domain_kwargs={