def get_validator( context: DataContext, batch_request: Union[dict, BatchRequest], suite: Union[str, ExpectationSuite], ) -> Validator: assert isinstance( suite, (str, ExpectationSuite )), "Invalid suite type (must be ExpectationSuite) or a string." if isinstance(batch_request, dict): batch_request = BatchRequest(**batch_request) validator: Validator if isinstance(suite, str): validator = context.get_validator(batch_request=batch_request, expectation_suite_name=suite) else: validator = context.get_validator(batch_request=batch_request, expectation_suite=suite) return validator
from great_expectations.core.batch import BatchRequest from great_expectations.data_context.data_context import DataContext from great_expectations.validator.validation_graph import MetricConfiguration context = DataContext() suite = context.get_expectation_suite("yellow_trip_data_validations") # Get February BatchRequest and Validator batch_request_february = BatchRequest( datasource_name="taxi_pandas", data_connector_name="monthly", data_asset_name="my_reports", data_connector_query={"index": -2}, ) validator_february = context.get_validator( batch_request=batch_request_february, expectation_suite=suite) # Get the table row count for February february_table_row_count = validator_february.get_metric( MetricConfiguration("table.row_count", metric_domain_kwargs={})) # Get March BatchRequest and Validator batch_request_march = BatchRequest( datasource_name="taxi_pandas", data_connector_name="monthly", data_asset_name="my_reports", data_connector_query={"index": -1}, ) validator_march = context.get_validator(batch_request=batch_request_march, expectation_suite=suite)
march_batch_request: BatchRequest = BatchRequest( datasource_name="taxi_pandas", data_connector_name="monthly", data_asset_name="my_reports", data_connector_query={ "batch_filter_parameters": { "month": "03", "year": "2019" } }, ) validator: Validator = context.get_validator( batch_request_list=[ jan_batch_request, feb_batch_request, march_batch_request ], expectation_suite=suite, ) assert validator.active_batch_definition.batch_identifiers["month"] == "03" assert validator.active_batch_definition.batch_identifiers["year"] == "2019" # Get the list of all batches contained by the Validator for use in the BatchFileter total_batch_definition_list: List = [ v.batch_definition for k, v in validator.batches.items() ] # Filter to all batch_definitions prior to March jan_feb_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={ "custom_filter_function": lambda batch_identifiers: int(batch_identifiers["month"]) < 3
from great_expectations.validator.validation_graph import MetricConfiguration context = DataContext() suite = context.get_expectation_suite("yellow_trip_data_validations") # This BatchRequest will retrieve all twelve batches from 2019 multi_batch_request = BatchRequest( datasource_name="taxi_pandas", data_connector_name="monthly", data_asset_name="my_reports", data_connector_query={"batch_filter_parameters": {"year": "2019"}}, ) # Instantiate the Validator validator_multi_batch = context.get_validator( batch_request=multi_batch_request, expectation_suite=suite ) # The active batch should be December, as this should be the last one loaded. Confirming here. assert validator_multi_batch.active_batch_definition.batch_identifiers["month"] == "12" # Get the list of all batches contained by the Validator for use in the BatchFilter total_batch_definition_list: list = [ v.batch_definition for k, v in validator_multi_batch.batches.items() ] # Filter to all batch_definitions prior to December pre_dec_batch_filter: BatchFilter = build_batch_filter( data_connector_query_dict={ "custom_filter_function": lambda batch_identifiers: int( batch_identifiers["month"]
suite = context.get_expectation_suite("yellow_tripdata_validations") # Create a BatchRequest and instantiate a Validator with only the January 2019 data jan_batch_request: BatchRequest = BatchRequest( datasource_name="taxi_pandas", data_connector_name="monthly", data_asset_name="my_reports", data_connector_query={ "batch_filter_parameters": { "month": "01", "year": "2019" } }, ) validator: Validator = context.get_validator(batch_request=jan_batch_request, expectation_suite=suite) assert validator.active_batch_definition.batch_identifiers["month"] == "01" assert validator.active_batch_definition.batch_identifiers["year"] == "2019" # Create a Batch from February 2019 data, then load it to the instantiated Validator feb_batch_request: BatchRequest = BatchRequest( datasource_name="taxi_pandas", data_connector_name="monthly", data_asset_name="my_reports", data_connector_query={ "batch_filter_parameters": { "month": "02", "year": "2019" } }, )