def test_file_output(tmp_path: Path, yaml_handler: YAMLHandler) -> None: simplest_yaml: str = "abc: 1" test_file: str = os.path.join(tmp_path, "out.yaml") out: Path = Path(test_file) data: dict = yaml_handler.load(simplest_yaml) yaml_handler.dump(data, out) # check the output with open(test_file) as f: line = f.readline().strip() data_from_file: dict = yaml_handler.load(line) assert data_from_file == data
def test_dump_default_behavior_with_no_stream_specified( yaml_handler: YAMLHandler, ) -> None: # when we specify no stream, then StringIO is used by default simplest_dict: dict = dict(abc=1) dumped: Optional[str] = yaml_handler.dump(simplest_dict) assert dumped == "abc: 1\n"
from typing import Dict, List import pandas as pd import pytest import great_expectations import great_expectations.exceptions as ge_exceptions from great_expectations import DataContext from great_expectations.core.batch import Batch, RuntimeBatchRequest from great_expectations.core.id_dict import BatchSpec from great_expectations.core.yaml_handler import YAMLHandler from great_expectations.execution_engine.sparkdf_batch_data import SparkDFBatchData from great_expectations.validator.validator import Validator yaml = YAMLHandler() @pytest.fixture() def test_df_spark(spark_session): test_df: "pyspark.sql.dataframe.DataFrame" = spark_session.createDataFrame( data=pd.DataFrame(data={ "col1": [1, 2], "col2": [3, 4] })) return test_df @pytest.fixture def data_context_with_datasource_spark_engine_batch_spec_passthrough( empty_data_context, spark_session): context: DataContext = empty_data_context
from great_expectations.execution_engine.sqlalchemy_batch_data import ( SqlAlchemyBatchData, ) from tests.integration.fixtures.split_and_sample_data.splitter_test_cases_and_fixtures import ( TaxiSplittingTestCase, TaxiSplittingTestCases, TaxiTestData, ) from tests.test_utils import ( LoadedTable, clean_up_tables_with_prefix, get_bigquery_connection_url, get_snowflake_connection_url, load_data_into_test_database, ) yaml_handler: YAMLHandler = YAMLHandler() def _get_connection_string_and_dialect() -> Tuple[str, str]: with open("./connection_string.yml") as f: db_config: dict = yaml_handler.load(f) dialect: str = db_config["dialect"] if dialect == "snowflake": connection_string: str = get_snowflake_connection_url() elif dialect == "bigquery": connection_string: str = get_bigquery_connection_url() else: connection_string: str = db_config["connection_string"]
import os # <snippet> import great_expectations as ge from great_expectations.core.batch import BatchRequest # </snippet> from great_expectations.core.yaml_handler import YAMLHandler yaml = YAMLHandler() # <snippet> context = ge.get_context() # </snippet> # NOTE: The following code is only for testing and depends on an environment # variable to set the gcp_project. You can replace the value with your own # GCP project information gcp_project = os.environ.get("GE_TEST_GCP_PROJECT") if not gcp_project: raise ValueError( "Environment Variable GE_TEST_GCP_PROJECT is required to run GCS integration tests" ) # parse great_expectations.yml for comparison great_expectations_yaml_file_path = os.path.join(context.root_directory, "great_expectations.yml") with open(great_expectations_yaml_file_path) as f: great_expectations_yaml = yaml.load(f) stores = great_expectations_yaml["stores"]
def test_dump_stdout_specified(capsys, yaml_handler: YAMLHandler) -> None: # ruamel documentation recommends that we specify the stream as stdout when we are using YAML to return a string. simplest_dict: dict = dict(abc=1) yaml_handler.dump(simplest_dict, stream=sys.stdout) captured: Any = capsys.readouterr() assert captured.out == "abc: 1\n"
def test_load_incorrect_input(yaml_handler: YAMLHandler) -> None: with pytest.raises(TypeError): yaml_handler.load(12345)
def test_load_correct_input( simple_yaml: str, simple_dict: dict, yaml_handler: YAMLHandler ) -> None: res: dict = yaml_handler.load(simple_yaml) assert res == simple_dict
def yaml_handler() -> YAMLHandler: return YAMLHandler()
Assert statements are included to ensure that if the behaviour shown in this script breaks it will not pass tests and will be updated. These statements can be ignored by users. Comments with the tags `<snippet>` and `</snippet>` are used to ensure that if this script is updated the snippets that are specified for use in documentation are maintained. These comments can be ignored by users. --documentation-- https://docs.greatexpectations.io/docs/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_onboarding_data_assistant """ import great_expectations as ge from great_expectations.checkpoint import SimpleCheckpoint from great_expectations.core.batch import BatchRequest from great_expectations.core.yaml_handler import YAMLHandler yaml = YAMLHandler() context: ge.DataContext = ge.get_context() # Configure your datasource (if you aren't using one that already exists) # <snippet> datasource_config = { "name": "taxi_multi_batch_datasource", "class_name": "Datasource", "module_name": "great_expectations.datasource", "execution_engine": { "module_name": "great_expectations.execution_engine", "class_name": "PandasExecutionEngine", }, "data_connectors": {
# <snippet> import os import great_expectations as ge from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest from great_expectations.core.yaml_handler import YAMLHandler # </snippet> yaml = YAMLHandler() # NOTE: The following code is only for testing and depends on an environment # variable to set the gcp_project. You can replace the value with your own # GCP project information gcp_project = os.environ.get("GE_TEST_GCP_PROJECT") if not gcp_project: raise ValueError( "Environment Variable GE_TEST_GCP_PROJECT is required to run BigQuery integration tests" ) bigquery_dataset = "demo" CONNECTION_STRING = f"bigquery://{gcp_project}/{bigquery_dataset}" # <snippet> context = ge.get_context() # </snippet> # <snippet> datasource_yaml = f""" name: my_bigquery_datasource class_name: Datasource execution_engine:
import os from typing import List import pytest from great_expectations.core.yaml_handler import YAMLHandler from great_expectations.data_context import BaseDataContext from great_expectations.data_context.types.base import DataContextConfig from great_expectations.data_context.store import ( # isort:skip ExpectationsStore, ValidationsStore, EvaluationParameterStore, ) yaml: YAMLHandler = YAMLHandler() @pytest.fixture() def basic_in_memory_data_context_config_just_stores(): return DataContextConfig( config_version=3.0, plugins_directory=None, evaluation_parameter_store_name="evaluation_parameter_store", expectations_store_name="expectations_store", datasources={}, stores={ "expectations_store": { "class_name": "ExpectationsStore" }, "evaluation_parameter_store": { "class_name": "EvaluationParameterStore" },