def test_ValidationsStore__convert_resource_identifier_to_list(): my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", }, root_directory=None, ) ns_1 = ValidationResultIdentifier( from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-100") assert my_store._convert_resource_identifier_to_tuple(ns_1) == ( 'a', 'b', 'c', 'quarantine', 'prod-100')
def test_StoreAction(): fake_in_memory_store = ValidationsStore(root_directory=None, store_backend={ "class_name": "InMemoryStoreBackend", }) stores = {"fake_in_memory_store": fake_in_memory_store} # NOTE: This is a hack meant to last until we implement runtime_configs class Object(object): pass data_context = Object() data_context.stores = stores action = StoreAction( data_context=data_context, target_store_name="fake_in_memory_store", ) assert fake_in_memory_store.list_keys() == [] vr_id = "ValidationResultIdentifier.my_db.default_generator.my_table.default_expectations.prod_20190801" action.run(validation_result_suite_identifier=ValidationResultIdentifier( from_string=vr_id), validation_result_suite={}, data_asset=None) assert len(fake_in_memory_store.list_keys()) == 1 assert fake_in_memory_store.list_keys()[0].to_string( ) == "ValidationResultIdentifier.my_db.default_generator.my_table.default_expectations.prod_20190801" assert fake_in_memory_store.get( ValidationResultIdentifier( from_string= "ValidationResultIdentifier.my_db.default_generator.my_table.default_expectations.prod_20190801" )) == {}
def test_StoreAction(): fake_in_memory_store = ValidationsStore( store_backend={ "class_name": "InMemoryStoreBackend", } ) stores = {"fake_in_memory_store": fake_in_memory_store} class Object: ge_cloud_mode = False data_context = Object() data_context.stores = stores action = StoreValidationResultAction( data_context=data_context, target_store_name="fake_in_memory_store", ) assert fake_in_memory_store.list_keys() == [] action.run( validation_result_suite_identifier=ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="default_expectations" ), run_id=RunIdentifier(run_name="prod_20190801"), batch_identifier="1234", ), validation_result_suite=ExpectationSuiteValidationResult( success=False, results=[] ), data_asset=None, ) expected_run_id = RunIdentifier( run_name="prod_20190801", run_time="20190926T134241.000000Z" ) assert len(fake_in_memory_store.list_keys()) == 1 stored_identifier = fake_in_memory_store.list_keys()[0] assert stored_identifier.batch_identifier == "1234" assert ( stored_identifier.expectation_suite_identifier.expectation_suite_name == "default_expectations" ) assert stored_identifier.run_id == expected_run_id assert fake_in_memory_store.get( ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="default_expectations" ), run_id=expected_run_id, batch_identifier="1234", ) ) == ExpectationSuiteValidationResult(success=False, results=[])
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory): path = str( tmp_path_factory.mktemp( "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir") ) project_path = str(tmp_path_factory.mktemp("my_dir")) my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( "asset.quarantine"), run_id="prod-100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier.from_tuple( ("asset", "quarantine", "prod-20", "batch_id")) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } print(gen_directory_tree_str(path)) assert (gen_directory_tree_str(path) == """\ test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/ my_store/ asset/ quarantine/ prod-100/ batch_id.json prod-20/ batch_id.json """)
def test_ValidationsStore_with_InMemoryStoreBackend(): my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", }) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier.from_tuple( ("a", "b", "c", "quarantine", "prod-100")) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier.from_tuple( ("a", "b", "c", "quarantine", "prod-200")) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_TupleS3StoreBackend(): bucket = "test_validation_store_bucket" prefix = "test/prefix" # create a bucket in Moto's mock AWS environment conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend my_store = ValidationsStore(store_backend={ "class_name": "TupleS3StoreBackend", "bucket": bucket, "prefix": prefix, }) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) # Verify that internals are working as expected, including the default filepath assert set([ s3_object_info["Key"] for s3_object_info in boto3.client( "s3").list_objects(Bucket=bucket, Prefix=prefix)["Contents"] ]) == { "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/batch_id.json", "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/batch_id.json", } print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_DatabaseStoreBackend(): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ValidationsStore(store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, }) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[]) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[]) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_InMemoryStoreBackend(): my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier.from_tuple( ( "a", "b", "c", "quarantine", datetime.datetime.now(datetime.timezone.utc), "prod-100", ) ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier.from_tuple( ( "a", "b", "c", "quarantine", datetime.datetime.now(datetime.timezone.utc), "prod-200", ) ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_DatabaseStoreBackend(sa): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ValidationsStore( store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_TupleS3StoreBackend(): bucket = "test_validation_store_bucket" prefix = "test/prefix" # create a bucket in Moto's mock AWS environment conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend my_store = ValidationsStore( store_backend={ "class_name": "TupleS3StoreBackend", "bucket": bucket, "prefix": prefix, } ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( expectation_suite_name="asset.quarantine", ), run_id="20191007T151224.1234Z_prod_200", batch_identifier="batch_id", ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) # Verify that internals are working as expected, including the default filepath assert { s3_object_info["Key"] for s3_object_info in boto3.client("s3").list_objects_v2( Bucket=bucket, Prefix=prefix )["Contents"] } == { "test/prefix/.ge_store_backend_id", "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/batch_id.json", "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/20190926T134241.000000Z/batch_id.json", } print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id)
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory): path = str( tmp_path_factory.mktemp( "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir" ) ) project_path = str(tmp_path_factory.mktemp("my_dir")) my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier("asset.quarantine"), run_id="prod-100", batch_identifier="batch_id", ) my_store.set(ns_1, ExpectationSuiteValidationResult(success=True)) assert my_store.get(ns_1) == ExpectationSuiteValidationResult( success=True, statistics={}, results=[] ) ns_2 = ValidationResultIdentifier.from_tuple( ( "asset", "quarantine", "prod-20", datetime.datetime.now(datetime.timezone.utc), "batch_id", ) ) my_store.set(ns_2, ExpectationSuiteValidationResult(success=False)) assert my_store.get(ns_2) == ExpectationSuiteValidationResult( success=False, statistics={}, results=[] ) print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } print(gen_directory_tree_str(path)) assert ( gen_directory_tree_str(path) == """\ test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/ my_store/ .ge_store_backend_id asset/ quarantine/ prod-100/ 20190926T134241.000000Z/ batch_id.json prod-20/ 20190926T134241.000000Z/ batch_id.json """ ) """ What does this test and why? A Store should be able to report it's store_backend_id which is set when the StoreBackend is instantiated. """ # Check that store_backend_id exists can be read assert my_store.store_backend_id is not None # Check that store_backend_id is a valid UUID assert test_utils.validate_uuid4(my_store.store_backend_id) # Check that another store with the same configuration shares the same store_backend_id my_store_duplicate = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "TupleFilesystemStoreBackend", "base_directory": "my_store/", }, runtime_environment={"root_directory": path}, ) assert my_store.store_backend_id == my_store_duplicate.store_backend_id
def test_ValidationsStore_with_InMemoryStoreBackend(): my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", "separator": ".", }, root_directory=None, #"dummy/path/", ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") with pytest.raises(MissingTopLevelConfigKeyError): my_store.get(ValidationResultIdentifier(**{})) ns_1 = ValidationResultIdentifier( from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-100") my_store.set(ns_1, {"A": "aaa"}) assert my_store.get(ns_1) == {"A": "aaa"} ns_2 = ValidationResultIdentifier( from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-200") my_store.set(ns_2, "bbb") assert my_store.get(ns_2) == "bbb" # Verify that internals are working as expected assert my_store.store_backend.store == { 'a.b.c.quarantine.prod-100': '{"A": "aaa"}', 'a.b.c.quarantine.prod-200': '"bbb"', } print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_FixedLengthTupleS3StoreBackend(): bucket = "test_validation_store_bucket" prefix = "test/prefix" # create a bucket in Moto's mock AWS environment conn = boto3.resource('s3', region_name='us-east-1') conn.create_bucket(Bucket=bucket) # First, demonstrate that we pick up default configuration including from an S3FixedLengthTupleS3StoreBackend my_store = ValidationsStore(store_backend={ "class_name": "FixedLengthTupleS3StoreBackend", "bucket": bucket, "prefix": prefix }, root_directory=None) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") with pytest.raises(MissingTopLevelConfigKeyError): my_store.get(ValidationResultIdentifier(**{})) ns_1 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( data_asset_name=DataAssetIdentifier(datasource="a", generator="b", generator_asset="c"), expectation_suite_name="quarantine", ), run_id="20191007T151224.1234Z_prod_100") my_store.set(ns_1, {"A": "aaa"}) assert my_store.get(ns_1) == {"A": "aaa"} ns_2 = ValidationResultIdentifier( expectation_suite_identifier=ExpectationSuiteIdentifier( data_asset_name=DataAssetIdentifier(datasource="a", generator="b", generator_asset="c"), expectation_suite_name="quarantine", ), run_id="20191007T151224.1234Z_prod_200") my_store.set(ns_2, "bbb") assert my_store.get(ns_2) == "bbb" # Verify that internals are working as expected, including the default filepath assert set([ s3_object_info['Key'] for s3_object_info in boto3.client( 's3').list_objects(Bucket=bucket, Prefix=prefix)['Contents'] ]) == { 'test/prefix/20191007T151224.1234Z_prod_100/a/b/c/quarantine.json', 'test/prefix/20191007T151224.1234Z_prod_200/a/b/c/quarantine.json' } print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_ValidationsStore_with_FixedLengthTupleFileSystemStoreBackend( tmp_path_factory): path = str( tmp_path_factory.mktemp( 'test_ValidationResultStore_with_FixedLengthTupleFileSystemStoreBackend__dir' )) project_path = str(tmp_path_factory.mktemp('my_dir')) my_store = ValidationsStore( store_backend={ "module_name": "great_expectations.data_context.store", "class_name": "FixedLengthTupleFilesystemStoreBackend", "base_directory": "my_store/", "filepath_template": "{4}/{0}/{1}/{2}/{3}.txt", }, root_directory=path, ) with pytest.raises(TypeError): my_store.get("not_a_ValidationResultIdentifier") with pytest.raises(MissingTopLevelConfigKeyError): my_store.get(ValidationResultIdentifier(**{})) ns_1 = ValidationResultIdentifier( from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-100") my_store.set(ns_1, {"A": "aaa"}) assert my_store.get(ns_1) == {"A": "aaa"} ns_2 = ValidationResultIdentifier( from_string="ValidationResultIdentifier.a.b.c.quarantine.prod-20") my_store.set(ns_2, {"B": "bbb"}) assert my_store.get(ns_2) == {"B": "bbb"} print(my_store.list_keys()) assert set(my_store.list_keys()) == { ns_1, ns_2, } print(gen_directory_tree_str(path)) assert gen_directory_tree_str(path) == """\