def test_get_schema_fails_on_type(): with pytest.raises(ValueError) as excinfo: Schema.read(1) assert ( str(excinfo.value) == '"1" is an unidentified schema source.\nA dict, a full s3 path or URL is expected' )
def test_read_schema_fails(mocker, source, downloaded_schema, expected_error): mocker.patch( "arche.readers.schema.Schema.from_url", return_value=downloaded_schema, autospec=True, ) with pytest.raises(expected_error): Schema.read(source)
def test_get_enums(): s = Schema({"properties": {"a": {"enum": ["x"]}, "b": {"enum": ["y"]}}}) assert s.get_enums() == ["a", "b"] s = Schema({"properties": {"a": {"type": "string", "enum": ["x"]}}}) assert s.get_enums() == ["a"] s = Schema({"properties": {"a": {"type": "string"}}}) assert not s.get_enums()
def test_schema_from_url(schema_path, schema_contents, expected, mocker): mocker.patch( "arche.readers.schema.s3.get_contents", return_value=schema_contents, autospec=True, ) assert Schema.from_url(schema_path) == expected
def basic_json_schema(data_source: str, items_numbers: List[int] = None) -> Schema: """Print a json schema based on the provided job_key and item numbers Args: data_source: a collection or job key items_numbers: array of item numbers to create schema from """ schema = create_json_schema(data_source, items_numbers) return Schema(schema)
def test_schema(get_schema): s = Schema(get_schema) assert s.allowed_tags == { "unique", "category", "name_field", "product_url_field", "product_price_field", "product_price_was_field", } assert s.raw == get_schema assert not s.enums assert not s.tags
def test_schema_json(capsys): s = Schema({ "definitions": { "float": { "pattern": r"^-?[0-9]+\.[0-9]{2}$" } }, "properties": { "name": {} }, "additionalProperties": False, }) s.json() assert (capsys.readouterr().out == """{ "definitions": { "float": { "pattern": "^-?[0-9]+\\\\.[0-9]{2}$" } }, "properties": { "name": {} }, "additionalProperties": false }\n""")
def test_schema_repr(): assert Schema({ "definitions": { "float": { "pattern": r"^-?[0-9]+\.[0-9]{2}$" } }, "properties": { "name": {} }, "additionalProperties": False, }).__repr__() == ( "{'additionalProperties': False,\n" " 'definitions': {'float': {'pattern': '^-?[0-9]+\\\\.[0-9]{2}$'}},\n" " 'properties': {'name': {}}}")
def test_dqr_empty_report(mocker, get_job_items, get_schema): mocker.patch( "arche.data_quality_report.DataQualityReport.plot_to_notebook", autospec=True) mocker.patch( "arche.tools.api.get_response_status_count", return_value=(10, 0, 0, 0), autospec=True, ) mocker.patch("arche.tools.api.get_runtime_s", return_value=60, autospec=True) mocker.patch("arche.tools.api.get_items_count", return_value=1000, autospec=True) mocker.patch("arche.tools.api.get_requests_count", return_value=1000, autospec=True) dqr = DataQualityReport(items=get_job_items, schema=Schema(get_schema), report=Report()) assert len(dqr.figures) == 4
def test_read_schema(mocker, source, downloaded, expected): mocker.patch("arche.readers.schema.Schema.from_url", return_value=downloaded, autospec=True) assert Schema.read(source) == expected
def test_get_field_tags(tags, field, expected_tags): assert Schema.get_field_tags(tags, field, defaultdict(list)) == expected_tags
def test_get_field_tags_fails(tags, exception): with pytest.raises(ValueError) as excinfo: Schema.get_field_tags(tags, None, defaultdict(list)) assert str(excinfo.value) == exception
def test_schema_tags(schema, expected_tags): assert Schema(schema).tags == expected_tags
def test_schema_no_properties(schema): with pytest.raises(ValueError) as excinfo: Schema(source=schema) assert str(excinfo.value) == "The schema does not have any 'properties'"
def schema(self): if not self._schema and self.schema_source: self._schema = Schema(self.schema_source) return self._schema
def schema(self, schema_source): self.schema_source = schema_source self._schema = Schema(schema_source)
def test_parse_tag(value, expected): assert Schema.parse_tag(value) == expected