def test_item_rebuild(json0): data = json0 class User(Item): uid = Field(JSONExtractor("id")) item = User(JSONExtractor("data.users[0]")) assert not item.built assert not item.extractor.built assert not item.uid.built assert item.extract(data) == {"uid": 0} assert item.built assert item.extractor.built assert item.uid.built item.extractor = JSONExtractor("data.users[1]") assert not item.built assert not item.extractor.built assert item.uid.built assert item.extract(data) == {"uid": 1} assert item.built assert item.extractor.built assert item.uid.built
def test_invalid_css_selector_expr(element, expr): extractor = JSONExtractor(expr) with pytest.raises(ExprError) as catch: extractor.extract(element) exc = catch.value assert exc.extractor is extractor assert isinstance(exc.exc, (JsonPathLexerError, Exception))
def test_extract_first_without_default(element, expr): extractor = JSONExtractor(expr) with pytest.raises(ExtractError) as catch: extractor.extract_first(element) exc = catch.value assert len(exc.extractors) == 1 assert exc.extractors[0] is extractor assert exc.element is element
def test_extract(element, expr, expect, build_first): extractor = JSONExtractor(expr) assert not extractor.built if build_first: extractor.build() assert extractor.built assert expect == extractor.extract(element) assert extractor.built
def test_complex_item_extract_json_data(json0): data = json0 class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") gender = Field(JSONExtractor("gender"), default=None) class UserResponse(Item): start = Field(JSONExtractor("start"), default=0) size = Field(JSONExtractor("size")) total = Field(JSONExtractor("total")) data = User(JSONExtractor("users[*]"), is_many=True) users_result = [ { "uid": 0, "name": "Vang Stout", "gender": "female" }, { "uid": 1, "name": "Jeannie Gaines", "gender": "male" }, { "uid": 2, "name": "Guzman Hunter", "gender": "female" }, { "uid": 3, "name": "Janine Gross", "gender": None }, { "uid": 4, "name": "Clarke Patrick", "gender": "male" }, { "uid": 5, "name": "Whitney Mcfadden", "gender": None }, ] assert User( JSONExtractor("data.users[*]")).extract(data) == users_result[0] assert (User(JSONExtractor("data.users[*]"), is_many=True).extract(data) == users_result) assert UserResponse(JSONExtractor("data")).extract(data) == { "start": 0, "size": 5, "total": 100, "data": users_result, }
def test_special_field_name_in_the_nested_class_definition(json0): data = json0 class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") class UserResponse(Item): _ = User(JSONExtractor("users[*]"), name="data") first_row = {"uid": 0, "name": "Vang Stout"} assert User(JSONExtractor("data.users[*]")).extract(data) == first_row assert UserResponse(JSONExtractor("data")).extract(data) == { "data": first_row }
def test_simplify(json0, build_first, simplify_first): data = json0 class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") gender = Field(JSONExtractor("gender"), default=None) item = User(JSONExtractor("data.users[*]")) if not simplify_first and build_first: item.build() extractor = item.simplify() if simplify_first and build_first: extractor.build() users_result = [ {"uid": 0, "name": "Vang Stout", "gender": "female"}, {"uid": 1, "name": "Jeannie Gaines", "gender": "male"}, {"uid": 2, "name": "Guzman Hunter", "gender": "female"}, {"uid": 3, "name": "Janine Gross", "gender": None}, {"uid": 4, "name": "Clarke Patrick", "gender": "male"}, {"uid": 5, "name": "Whitney Mcfadden", "gender": None}, ] assert isinstance(extractor, JSONExtractor) assert is_simple_extractor(extractor) assert not is_complex_extractor(extractor) assert repr(extractor) == "UserSimplified('data.users[*]')" assert extractor.expr == "data.users[*]" assert extractor.extract_first(data) == users_result[0] assert extractor.extract(data) == users_result
def test_simplified_item_extract_first_with_default(data, expect): class User(Item): id = Field(JSONExtractor("id")) name_ = Field(JSONExtractor("name"), name="name") extractor = User(JSONExtractor("result")).simplify() assert extractor.extract_first(data, None) == expect
def test_modify_simplified_item(json0, build_first, simplify_first): data = json0 class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") gender = Field(JSONExtractor("gender"), default=None) complex_extractor = User(JSONExtractor("data.users[*]")) if not simplify_first and build_first: complex_extractor.build() extractor = complex_extractor.simplify() if simplify_first and build_first: extractor.build() assert complex_extractor.extractor.expr == extractor.expr extractor.expr = "data.users[0]" assert complex_extractor.extractor.expr != extractor.expr assert isinstance(extractor, JSONExtractor) assert is_simple_extractor(extractor) assert not is_complex_extractor(extractor) assert repr(extractor) == "UserSimplified('data.users[0]')" assert extractor.extract_first(data) == { "uid": 0, "name": "Vang Stout", "gender": "female", } assert extractor.extract(data) == [{ "uid": 0, "name": "Vang Stout", "gender": "female" }]
def test_exception_trace(json0): data = json0 class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") gender = Field(JSONExtractor("gender")) class UserResponse(Item): start = Field(JSONExtractor("start"), default=0) size = Field(JSONExtractor("size")) total = Field(JSONExtractor("total")) data = User(JSONExtractor("users[*]"), is_many=True) extractor = UserResponse(JSONExtractor("data")) with pytest.raises(ExtractError) as catch: extractor.extract(data) exc = catch.value assert len(exc.extractors) == 3 assert exc.extractors[0] is User.gender assert exc.extractors[1] is UserResponse.data assert exc.extractors[2] is extractor assert exc.element == {"id": 3, "name": "Janine Gross"} assert (str(exc.args[0]) == textwrap.dedent(""" ExtractError(Field(JSONExtractor('gender')), element={'id': 3, 'name': 'Janine Gross'}) |-UserResponse(JSONExtractor('data')) |-User(JSONExtractor('users[*]'), is_many=True) |-Field(JSONExtractor('gender')) |-{'id': 3, 'name': 'Janine Gross'} """).strip())
def test_special_field_name(json0): data = json0 class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="user.name") item = User(JSONExtractor("data.users[*]")) assert item.extract(data) == {"uid": 0, "user.name": "Vang Stout"}
def test_simplified_item_extract_error(data, len_extractors_stack, target): class User(Item): id = Field(JSONExtractor("id")) name_ = Field(JSONExtractor("name"), name="name") extractor = User(JSONExtractor("result")).simplify() with pytest.raises(ExtractError) as catch: extractor.extract(data) exc: ExtractError = catch.value assert len(exc.extractors) == len_extractors_stack assert exc.element == target
def test_avoid_field_overwriting_item_parameter(json0, stack_frame_support): data = json0 with pytest.raises(SyntaxError): class User(Item): uid = Field(JSONExtractor("id")) name = Field(JSONExtractor("name")) # type: ignore class User(Item): # type: ignore # noqa: F811 uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") item = User(JSONExtractor("data.users[*]")) assert item.extract(data) == {"uid": 0, "name": "Vang Stout"}
def test_item_build_explicitly(json0): data = json0 class User(Item): uid = Field(JSONExtractor("id")) item = User(JSONExtractor("data.users[0]")) assert not item.built assert not item.extractor.built assert not item.uid.built item.build() assert item.built assert item.extractor.built assert item.uid.built assert item.extract(data) == {"uid": 0}
def test_avoid_field_name_overwriting_item_parameter(json0): data = json0 with pytest.raises(SyntaxError): class User(Item): uid = Field(JSONExtractor("id")) name = Field(JSONExtractor("name")) class User(Item): # noqa uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") assert User(JSONExtractor("data.users[*]")).extract(data) == { "uid": 0, "name": "Vang Stout", }
def test_invalid_jsonpath_expr(element, expr): with pytest.raises(ExprError) as catch: JSONExtractor(expr) exc = catch.value if (data_extractor.json.json_extractor_backend is data_extractor.json.JSONPathExtractor): # JSONExtractor implementated by 'jsonpath-extractor' # only raise SyntaxError assert isinstance(exc.exc, SyntaxError) else: # Third Party Library from jsonpath_rw.lexer import JsonPathLexerError assert isinstance(exc.exc, (JsonPathLexerError, Exception)) assert re.match(r"ExprError with .+? raised by .+? extracting", str(exc))
def test_modify_built_item(json0): data = json0["data"]["users"][0] class User(Item): uid = Field(JSONExtractor("id")) item = User(JSONExtractor("user")) assert not item.built assert not item.extractor.built assert not item.uid.built item.build() assert item.built assert item.extractor.built assert item.uid.built item.extractor = None assert not item.built assert item.uid.built assert item.extract(data) == {"uid": 0} assert item.built
def test_invalid_css_selector_expr(element, expr, by): extractor = JSONExtractor(expr) with pytest.raises(ExprError) as catch: if by == "build": extractor.build() elif by == "extract": extractor.extract(element) exc = catch.value assert exc.extractor is extractor if (data_extractor.json.json_extractor_backend is data_extractor.json.JSONPathExtractor): # JSONExtractor implementated by 'jsonpath-extractor' # only raise SyntaxError in build method assert isinstance(exc.exc, SyntaxError) else: from jsonpath_rw.lexer import JsonPathLexerError assert isinstance(exc.exc, (JsonPathLexerError, Exception)) assert re.match(r"ExprError with .+? raised by .+? extracting", str(exc))
class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") gender = Field(JSONExtractor("gender"), default=None)
class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("username")) count = Count()
class Count(Item): follower = Field(JSONExtractor("count_follower")) following = Field(JSONExtractor("count_following")) like = Field(JSONExtractor("count_like"))
class UserResponse(Item): _ = User(JSONExtractor("users[*]"), name="data")
class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name")
def test_misplacing(): class ComplexExtractor(Item): pass with pytest.raises(ValueError): Field(extractor=ComplexExtractor(extractor=JSONExtractor("users[*]")))
class UserResponse(Item): start = Field(JSONExtractor("start"), default=0) size = Field(JSONExtractor("size")) total = Field(JSONExtractor("total")) data = User(JSONExtractor("users[*]"), is_many=True)
class Users(Item): users = User(JSONExtractor("users"), is_many=True) count = Field(JSONExtractor("count"), default=0)
class User(Item): id = Field(JSONExtractor("id")) name_ = Field(JSONExtractor("name"), name="name")
class User(Item): uid = Field(JSONExtractor("id"))
class UserWithGender(User): gender = Field(JSONExtractor("gender"))
def test_type_creation(): type("Foo", (Item, ), {"bar": Field(JSONExtractor("bar"))})