# Here is a BatchRequest naming a data_asset batch_request = BatchRequest( datasource_name="my_azure_datasource", data_connector_name="default_inferred_data_connector_name", data_asset_name="<YOUR_DATA_ASSET_NAME>", batch_spec_passthrough={ "reader_method": "csv", "reader_options": { "header": True } }, ) # Please note this override is only to provide good UX for docs and tests. # In normal usage you'd set your data asset name directly in the BatchRequest above. batch_request.data_asset_name = ( "data/taxi_yellow_tripdata_samples/yellow_tripdata_sample_2019-01") context.create_expectation_suite(expectation_suite_name="test_suite", overwrite_existing=True) validator = context.get_validator(batch_request=batch_request, expectation_suite_name="test_suite") print(validator.head()) # NOTE: The following code is only for testing and can be ignored by users. assert isinstance(validator, ge.validator.validator.Validator) assert [ds["name"] for ds in context.list_datasources()] == ["my_azure_datasource"] assert set( context.get_available_data_asset_names()["my_azure_datasource"] ["default_inferred_data_connector_name"]) == { "data/taxi_yellow_tripdata_samples/yellow_tripdata_sample_2019-01",
data_connector_names="default_inferred_data_connector_name" )[ "default_inferred_data_connector_name" ] assert len(available_data_asset_names) == 36 # Here is a BatchRequest naming an inferred data_asset. batch_request = BatchRequest( datasource_name="taxi_datasource", data_connector_name="default_inferred_data_connector_name", data_asset_name="<YOUR_DATA_ASSET_NAME>", ) # Please note this override is only to provide good UX for docs and tests. # In normal usage you'd set your data asset name directly in the BatchRequest above. batch_request.data_asset_name = "yellow_tripdata_sample_2019-01.csv" context.create_expectation_suite( expectation_suite_name="test_suite", overwrite_existing=True ) validator = context.get_validator( batch_request=batch_request, expectation_suite_name="test_suite" ) print(validator.head(n_rows=10)) batch_list = context.get_batch_list(batch_request=batch_request) assert len(batch_list) == 1 assert batch_list[0].data.dataframe.shape[0] == 10000 # Here is a BatchRequest naming a configured data_asset representing an un-partitioned (flat) filename structure. batch_request = BatchRequest(
"name_starts_with"] = "data/taxi_yellow_tripdata_samples/" context.test_yaml_config(yaml.dump(datasource_config)) context.add_datasource(**datasource_config) # Here is a BatchRequest naming a data_asset batch_request = BatchRequest( datasource_name="my_azure_datasource", data_connector_name="configured_data_connector_name", data_asset_name="<YOUR_DATA_ASSET_NAME>", ) # Please note this override is only to provide good UX for docs and tests. # In normal usage you'd set your data asset name directly in the BatchRequest above. batch_request.data_asset_name = "taxi_data" context.create_expectation_suite(expectation_suite_name="test_suite", overwrite_existing=True) validator = context.get_validator(batch_request=batch_request, expectation_suite_name="test_suite") print(validator.head()) # NOTE: The following code is only for testing and can be ignored by users. assert isinstance(validator, ge.validator.validator.Validator) assert [ds["name"] for ds in context.list_datasources()] == ["my_azure_datasource"] assert set(context.get_available_data_asset_names()["my_azure_datasource"] ["configured_data_connector_name"]) == {"taxi_data"} batch_list: List[Batch] = context.get_batch_list(batch_request=batch_request)
context.add_datasource(**my_spark_datasource_config) batch_request = BatchRequest( datasource_name="insert_your_datasource_name_here", data_connector_name="insert_your_data_connector_name_here", data_asset_name="yellow_tripdata", batch_spec_passthrough={ "reader_method": "csv", "reader_options": { "header": True, }, }, ) # For the purposes of this script, the data_asset_name includes "sample" batch_request.data_asset_name = "yellow_tripdata_sample" # CODE ^^^^^ ^^^^^ # NOTE: The following code is only for testing and can be ignored by users. # ASSERTIONS vvvvv vvvvv assert len(context.list_datasources()) == 1 assert context.list_datasources( )[0]["name"] == "insert_your_datasource_name_here" assert list(context.list_datasources()[0]["data_connectors"].keys()) == [ "insert_your_data_connector_name_here" ] sorted_available_data_asset_names_from_datasource = sorted( context.datasources["insert_your_datasource_name_here"]. get_available_data_asset_names( data_connector_names="insert_your_data_connector_name_here")
context.add_datasource(**yaml.load(datasource_yaml)) available_data_asset_names = context.datasources[ "taxi_datasource"].get_available_data_asset_names( data_connector_names="whole_table")["whole_table"] assert len(available_data_asset_names) == 2 # Here is a BatchRequest referring to an un-partitioned inferred data_asset. batch_request = BatchRequest( datasource_name="taxi_datasource", data_connector_name="whole_table", data_asset_name="<YOUR_DATA_ASSET_NAME>", ) # Please note this override is only to provide good UX for docs and tests. # In normal usage you'd set your data asset name directly in the BatchRequest above. batch_request.data_asset_name = "yellow_tripdata_sample_2019_01" context.create_expectation_suite(expectation_suite_name="test_suite", overwrite_existing=True) validator = context.get_validator(batch_request=batch_request, expectation_suite_name="test_suite") print(validator.head(n_rows=10)) batch_list = context.get_batch_list(batch_request=batch_request) assert len(batch_list) == 1 batch_data = batch_list[0].data num_rows = batch_data.execution_engine.engine.execute( sa.select([sa.func.count()]).select_from(batch_data.selectable)).one()[0] assert num_rows == 10000 # Here is a BatchRequest naming an inferred data_asset partitioned by day.