def test_get_dataset_metadata(): "Test ``get_dataset_metadata``." name, version = 'gmb', '1.0.2' gmb_schema = get_dataset_metadata(name, version=version) assert gmb_schema == export_schemata().schemata['datasets'].export_schema('datasets', name, version)
def test_secure_connections_succeed_load_schemata( self, dataset_schema_url_or_path): "Test secure connections that should succeed for :func:`pydax.load_schemata`." # We use '/' instead of os.path.sep because URLs only accept / not \ as separators, but Windows path accepts # both. This is not an issue for the purpose of this test. init(update_only=True, DATASET_SCHEMA_URL=dataset_schema_url_or_path) load_schemata(force_reload=True, tls_verification=True) assert export_schemata().schemata[ 'datasets'].retrieved_url_or_path == dataset_schema_url_or_path
def test_export_schemata(self, schema_file_absolute_dir, schema_file_https_url): "Test high-level export-schemata function." assert export_schemata() is not _get_schemata() # The two returned schemata should equal assert (json.dumps(export_schemata().schemata['datasets'].export_schema(), sort_keys=True, indent=2, default=str) == json.dumps(_get_schemata().schemata['datasets'].export_schema(), sort_keys=True, indent=2, default=str)) # Different from https url used by pydax_initialization autouse fixture new_urls = { 'DATASET_SCHEMA_URL': schema_file_absolute_dir / 'datasets.yaml', 'LICENSE_SCHEMA_URL': schema_file_absolute_dir / 'licenses.yaml' } init(update_only=True, **new_urls) assert export_schemata().schemata['formats'].retrieved_url_or_path == f'{schema_file_https_url}/formats.yaml' assert export_schemata().schemata['datasets'].retrieved_url_or_path == new_urls['DATASET_SCHEMA_URL'] assert export_schemata().schemata['licenses'].retrieved_url_or_path == new_urls['LICENSE_SCHEMA_URL']
def test_describe_dataset(): "Test ``describe_dataset``." name, version = 'gmb', '1.0.2' gmb_description = describe_dataset(name, version=version) dataset_schema = export_schemata().schemata['datasets'].export_schema('datasets', name, version) license_schema = export_schemata().schemata['licenses'].export_schema('licenses') # Check a couple of spots assert dataset_schema['name'] in gmb_description assert dataset_schema['estimated_size'] in gmb_description assert license_schema[dataset_schema["license"]]["name"] in gmb_description # Instead of copying over the string for testing, we test a couple of important characteristics here gmb_lines = gmb_description.splitlines() assert len(gmb_lines) == 6 # number of lines assert all(line.strip() != '' for line in gmb_lines) # no blank line assert all(line.strip() == line for line in gmb_lines) # no trailing or leading whitespace assert all(':' in line for line in gmb_lines) # no missing colons
def test_insecure_connections_load_schemata(self, remote_dataset_schema_url, untrust_self_signed_cert): "Test insecure connections that should fail when ``tls_verification=True`` for ``load_schemata``." init(update_only=True, DATASET_SCHEMA_URL=remote_dataset_schema_url) with pytest.raises(InsecureConnectionError) as e: load_schemata(force_reload=True, tls_verification=True) assert remote_dataset_schema_url in str(e.value) # Insecure load succeeds, no exception raised load_schemata(force_reload=True, tls_verification=False) assert export_schemata().schemata[ 'datasets'].retrieved_url_or_path == remote_dataset_schema_url