def test_invalid_schema(self): "Test retrieving user-specified invalid schema files." with pytest.raises(ValueError) as e: retrieve_schema_file(url_or_path='ftp://ftp/is/unsupported') assert str(e.value) == 'Unknown scheme in "ftp://ftp/is/unsupported": "ftp"'
def test_custom_schema_insecure(self, location_type, schema_file_relative_dir, request): "Test retrieving user-specified schema files, with insecure connections allowed." # We use '/' instead of os.path.sep because URLs only accept / not \ as separators, but Windows path accepts # both. This is not an issue for the purpose of this test. base = str( request.getfixturevalue('schema_file_' + location_type)) + '/' assert retrieve_schema_file(base + 'datasets.yaml', tls_verification=False) == \ (schema_file_relative_dir / 'datasets.yaml').read_text(encoding='utf-8') assert retrieve_schema_file(base + 'formats.yaml', encoding='ascii', tls_verification=False) == \ (schema_file_relative_dir / 'formats.yaml').read_text(encoding='ascii') with pytest.raises(UnicodeDecodeError) as e: retrieve_schema_file(base + 'licenses.yaml', encoding='ascii', tls_verification=False) # We usually don't assert the position info because we don't want to rewrite this test every time the yaml # file changes in length. assert "'ascii' codec can't decode byte 0xe2" in str(e.value) with pytest.raises(UnicodeDecodeError) as e: retrieve_schema_file(base + 'formats-utf-16le-bom.yaml', tls_verification=False) # Test "position 0" here because it should fail at the beginning of the decoding assert "'utf-8' codec can't decode byte 0xff in position 0" in str( e.value) with pytest.raises(UnicodeDecodeError) as e: retrieve_schema_file(base + 'formats-utf-16be.yaml', encoding='utf-8', tls_verification=False) assert "'utf-8' codec can't decode byte 0x90" in str(e.value)
def test_default_schema_url_content(self): """Test the content of the remote URLs a bit. We only assert them not being None here just in case the server returns zero-length files.""" init(update_only=False) # We only assert that we have retrieved some non-empty files in this test. This is because we want to decouple # the maintenance of schema files in production with the library development. These files likely would change # more regularly than the library. For this reason, we also verify the default schema URLs are also valid https # links in ``test_default_schema_url_https``. # This test is in `test_config.py` not in `test_schema_retrieval.py` because this test is more about the content # of the default schema URLs than the retrieving functionality. assert len(retrieve_schema_file(Config.DATASET_SCHEMA_FILE_URL)) > 0 assert len(retrieve_schema_file(Config.FORMAT_SCHEMA_FILE_URL)) > 0 assert len(retrieve_schema_file(Config.LICENSE_SCHEMA_FILE_URL)) > 0
def test_secure_connections_succeed_retrieve_schema_file( self, dataset_schema_url_or_path, schema_file_relative_dir): "Test secure connections that should succeed for ``retrieve_schema_file``." assert retrieve_schema_file(dataset_schema_url_or_path, tls_verification=True) == \ (schema_file_relative_dir / 'datasets.yaml').read_text(encoding='utf-8')