예제 #1
0
def get_validator(
    context: DataContext,
    batch_request: Union[dict, BatchRequest],
    suite: Union[str, ExpectationSuite],
) -> Validator:
    assert isinstance(
        suite,
        (str, ExpectationSuite
         )), "Invalid suite type (must be ExpectationSuite) or a string."

    if isinstance(batch_request, dict):
        batch_request = BatchRequest(**batch_request)

    validator: Validator
    if isinstance(suite, str):
        validator = context.get_validator(batch_request=batch_request,
                                          expectation_suite_name=suite)
    else:
        validator = context.get_validator(batch_request=batch_request,
                                          expectation_suite=suite)
    return validator
from great_expectations.core.batch import BatchRequest
from great_expectations.data_context.data_context import DataContext
from great_expectations.validator.validation_graph import MetricConfiguration

context = DataContext()
suite = context.get_expectation_suite("yellow_trip_data_validations")

# Get February BatchRequest and Validator
batch_request_february = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={"index": -2},
)
validator_february = context.get_validator(
    batch_request=batch_request_february, expectation_suite=suite)

# Get the table row count for February
february_table_row_count = validator_february.get_metric(
    MetricConfiguration("table.row_count", metric_domain_kwargs={}))

# Get March BatchRequest and Validator
batch_request_march = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={"index": -1},
)
validator_march = context.get_validator(batch_request=batch_request_march,
                                        expectation_suite=suite)
예제 #3
0
march_batch_request: BatchRequest = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={
        "batch_filter_parameters": {
            "month": "03",
            "year": "2019"
        }
    },
)

validator: Validator = context.get_validator(
    batch_request_list=[
        jan_batch_request, feb_batch_request, march_batch_request
    ],
    expectation_suite=suite,
)
assert validator.active_batch_definition.batch_identifiers["month"] == "03"
assert validator.active_batch_definition.batch_identifiers["year"] == "2019"

# Get the list of all batches contained by the Validator for use in the BatchFileter
total_batch_definition_list: List = [
    v.batch_definition for k, v in validator.batches.items()
]

# Filter to all batch_definitions prior to March
jan_feb_batch_filter: BatchFilter = build_batch_filter(
    data_connector_query_dict={
        "custom_filter_function":
        lambda batch_identifiers: int(batch_identifiers["month"]) < 3
예제 #4
0
from great_expectations.validator.validation_graph import MetricConfiguration

context = DataContext()
suite = context.get_expectation_suite("yellow_trip_data_validations")

# This BatchRequest will retrieve all twelve batches from 2019
multi_batch_request = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={"batch_filter_parameters": {"year": "2019"}},
)

# Instantiate the Validator
validator_multi_batch = context.get_validator(
    batch_request=multi_batch_request, expectation_suite=suite
)

# The active batch should be December, as this should be the last one loaded. Confirming here.
assert validator_multi_batch.active_batch_definition.batch_identifiers["month"] == "12"

# Get the list of all batches contained by the Validator for use in the BatchFilter
total_batch_definition_list: list = [
    v.batch_definition for k, v in validator_multi_batch.batches.items()
]

# Filter to all batch_definitions prior to December
pre_dec_batch_filter: BatchFilter = build_batch_filter(
    data_connector_query_dict={
        "custom_filter_function": lambda batch_identifiers: int(
            batch_identifiers["month"]
suite = context.get_expectation_suite("yellow_tripdata_validations")

# Create a BatchRequest and instantiate a Validator with only the January 2019 data
jan_batch_request: BatchRequest = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={
        "batch_filter_parameters": {
            "month": "01",
            "year": "2019"
        }
    },
)

validator: Validator = context.get_validator(batch_request=jan_batch_request,
                                             expectation_suite=suite)
assert validator.active_batch_definition.batch_identifiers["month"] == "01"
assert validator.active_batch_definition.batch_identifiers["year"] == "2019"

# Create a Batch from February 2019 data, then load it to the instantiated Validator
feb_batch_request: BatchRequest = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={
        "batch_filter_parameters": {
            "month": "02",
            "year": "2019"
        }
    },
)