def test_field_extract_without_default(element0, Extractor, expr): extractor = Field(Extractor(expr)) with pytest.raises(ExtractError) as catch: extractor.extract(element0) exc = catch.value assert len(exc.extractors) == 1 assert exc.extractors[0] is extractor assert exc.element is element0
def test_field_extract(element0, Extractor, expr, expect, build_first): field = Field(Extractor(expr)) assert not field.built assert not field.extractor.built if build_first: field.build() assert field.built assert field.extractor.built assert expect == field.extract(element0) assert field.built assert field.extractor.built
def test_field_xpath_extract_result_not_list(element0, build_first): field = Field(XPathExtractor("normalize-space(//div[@class='title'])")) assert not field.built assert not field.extractor.built if build_first: field.build() assert field.built assert field.extractor.built assert field.extract(element0) == "Title 1" assert field.built assert field.extractor.built
def test_field_extract_with_is_many(element0, Extractor, expr, expect, build_first): field = Field(Extractor(expr), is_many=True) assert not field.built assert not field.extractor.built if build_first: field.build() assert field.built assert field.extractor.built assert expect == field.extract(element0) assert field.built assert field.extractor.built
class Channel(Item): title = Field(XPathExtractor("./title/text()")) link = Field(XPathExtractor("./link/text()")) description = Field(XPathExtractor("./description/text()")) language = Field(XPathExtractor("./language/text()")) publish_date = Field(XPathExtractor("./pubDate/text()")) last_build_date = Field(XPathExtractor("./lastBuildDate/text()")) docs = Field(XPathExtractor("./docs/text()")) generator = Field(XPathExtractor("./generator/text()")) managing_editor = Field(XPathExtractor("./managingEditor/text()")) web_master = Field(XPathExtractor("./webMaster/text()")) items = ChannelItem(XPathExtractor("./item"), is_many=True)
def test_field_overwrites_item_parameter_type_creation(stack_frame_support, item_property): with pytest.raises(SyntaxError) as catch: # fmt: off type("Parameter", (Item, ), {item_property: Field(XPathExtractor("./span[@class='name']")) }) # noqa: E950 # fmt: on exc = catch.value if stack_frame_support: assert exc.filename == __file__ assert exc.lineno == inspect.currentframe().f_lineno - 6 assert exc.offset == 8 assert (exc.text == """ type("Parameter", (Item,), {item_property: Field(XPathExtractor("./span[@class='name']"))}) # noqa: E950 """.strip()) else: assert exc.filename is None assert exc.lineno is None assert exc.offset is None assert ( exc.text == f"""{item_property}=Field(XPathExtractor("./span[@class='name']"))""" )
def test_field_with_convertor(): f = Field(D(), convertor=lambda x: str(x).upper()) assert f.type is None assert f.extract("abc") == "ABC" f = Field(D(), type=str, convertor=lambda x: str(x).upper()) assert f.type is str assert f.extract("abc") == "ABC"
def test_field_name_overwrite_item_parameter_type_creation(): with pytest.raises(SyntaxError) as catch: # fmt: off type("Parameter", (Item, ), {"name": Field(XPathExtractor("./span[@class='name']"))}) # fmt: on exc = catch.value assert exc.filename == __file__ assert exc.lineno == inspect.currentframe().f_lineno - 5 assert exc.offset == 8 assert ( exc.text == 'type("Parameter", (Item,), {"name": Field(XPathExtractor("./span[@class=\'name\']"))})' )
def test_field_extract_without_default(element0, Extractor, expr, build_first): extractor = Field(Extractor(expr)) assert not extractor.built assert not extractor.extractor.built if build_first: extractor.build() assert extractor.built assert extractor.extractor.built with pytest.raises(ExtractError) as catch: extractor.extract(element0) assert extractor.built assert extractor.extractor.built exc = catch.value assert len(exc.extractors) == 1 assert exc.extractors[0] is extractor assert exc.element is element0
class Count(Item): follower = Field(JSONExtractor("count_follower")) following = Field(JSONExtractor("count_following")) like = Field(JSONExtractor("count_like"))
class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name")
class User(Item): def baz(self): pass baz = Field(JSONExtractor("baz")) # noqa: F811
class User(Item): # noqa: F811 baz_ = Field(JSONExtractor("baz"), name="baz") def baz(self): pass
def test_field_parameters_conflict(): with pytest.raises(ValueError): Field(TextCSSExtractor(".nomatter"), is_many=True, default=None)
class UserWithGender(User): gender = Field(JSONExtractor("gender"))
class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("name"), name="name") gender = Field(JSONExtractor("gender"), default=None)
class UserResponse(Item): start = Field(JSONExtractor("start"), default=0) size = Field(JSONExtractor("size")) total = Field(JSONExtractor("total")) data = User(JSONExtractor("users[*]"), is_many=True)
class User(Item): field_names = Field(JSONExtractor("field_names"))
class ChannelItem(Item): title = Field(XPathExtractor("./title/text()"), default="") link = Field(XPathExtractor("./link/text()"), default="") description = Field(XPathExtractor("./description/text()")) publish_date = Field(XPathExtractor("./pubDate/text()")) guid = Field(XPathExtractor("./guid/text()"))
class Article(Item): title = Field(XPathExtractor("./div[@class='title']/text()")) content = Field(XPathExtractor("./div[@class='content']/text()"))
class User(Item): # noqa: F811 field_names_ = Field(JSONExtractor("field_names"), name="field_names")
class Users(Item): users = User(JSONExtractor("users"), is_many=True) count = Field(JSONExtractor("count"), default=0)
class User(Item): id = Field(JSONExtractor("id")) name_ = Field(JSONExtractor("name"), name="name")
class User(Item): uid = Field(JSONExtractor("id")) username = Field(JSONExtractor("username")) count = Count()
class User(Item): baz = Field(JSONExtractor("baz")) def baz(self): pass
def test_misplacing(): class ComplexExtractor(Item): pass with pytest.raises(ValueError): Field(extractor=ComplexExtractor(extractor=JSONExtractor("users[*]")))
class User(Item): uid = Field(JSONExtractor("id"))
class Parameter(Item): name = Field( XPathExtractor("./span[@class='name']")) # noqa: B950, E701
def test_lazy_str(): string = "" def func(): nonlocal string return string ls = LazyStr(func=func) assert str(ls) == "" string = "abc" assert str(ls) == "abc" @pytest.fixture(params=[Field(), Item()], ids=repr) def complex_extractor(request): return request.param @pytest.fixture( params=[ AttrCSSExtractor(expr="div.class", attr="id") if not _missing_cssselect else pytest.param("Missing 'cssselect'", marks=pytest.mark.skip()), CSSExtractor(expr="div.class") if not _missing_cssselect else pytest.param("Missing 'cssselect'", marks=pytest.mark.skip()), JSONPathExtractor(expr="boo") if not _missing_jsonpath else pytest.param("Missing 'jsonpath-extractor'", marks=pytest.mark.skip()), JSONPathRWExtractor(expr="boo") if not _missing_jsonpath_rw else pytest.param("Missing 'jsonpath-rw'", marks=pytest.mark.skip()), JSONPathRWExtExtractor(expr="boo") if not _missing_jsonpath_rw_ext else
def test_type_creation(): type("Foo", (Item, ), {"bar": Field(JSONExtractor("bar"))})