def test_wikipedia_export(): @xml_handle_element("mediawiki", "page", "revision") class Revision: def __init__(self): self.author = None self.date = None @xml_handle_text("contributor", "username") def handle_author(self, node): self.author = node.text @xml_handle_text("timestamp") def handle_date(self, node): self.date = datetime.strptime(node.text, "%Y-%m-%dT%H:%M:%SZ") with LZMAFile(Path(__file__).parent / "wikipedia_python_export.xml.xz") as stream: items = list(Parser(stream).iter_from(Revision)) assert len(items) == 1000 assert all(isinstance(item, Revision) for item in items) revision = items[-1] assert revision.author == "Lulu of the Lotus-Eaters" assert revision.date.year == 2006 assert revision.date.month == 4 assert revision.date.day == 14 assert revision.date.hour == 15 assert revision.date.minute == 58
def test_with_handler( # pylint: disable=redefined-outer-name ram_usage, ): @xml_handle_text("root", "entry", "nb") def handler(node): yield int(node.text) items = Parser(big_stream(ram_usage)).iter_from(handler) for exp, item in enumerate(items): assert item == exp
def test_comments(): @xml_handle_element("comments", "comment") def handler(node): yield node.text items = Parser(XML).iter_from(handler) assert next(items) == "Test" assert next(items) == "Hello everyone!" assert next(items) == ("I've found this quote that I feel you may like:" " The grass is greener where you water it." " -Neil Barringham" " Feel free to share it!")
def test_maths_eval_list(): handlers = [] @xml_handle_element("expr") def handle_expr(node): yield reduce( getattr(operator, node.attributes["op"]), node.iter_from(*handlers), ) handlers.append(handle_expr) @xml_handle_text("number") def handle_number(node): yield int(node.text) handlers.append(handle_number) assert list(Parser(XML).iter_from(*handlers)) == [1337]
def test_maths_eval_class(): class Eval: @staticmethod @xml_handle_element("expr") def handle_expr(node): yield reduce( getattr(operator, node.attributes["op"]), node.iter_from(Eval), ) @staticmethod @xml_handle_text("number") def handle_number(node): yield int(node.text) @staticmethod def xml_handler(generator): yield from generator assert list(Parser(XML).iter_from(Eval)) == [1337]
def test_wrong_explicit_encoding(): xml = ("<?xml version='1.0' encoding='ISO-8859-1'?>" + XML_STR).encode("utf_8") parser = Parser(xml) with pytest.raises(ET.ParseError): parser.return_from()
def test_encoding(xml): @xml_handle_element("élément") def handler(node): yield node.text assert Parser(xml).return_from(handler) == TXT_STR
def test_no_handler( # pylint: disable=redefined-outer-name ram_usage, ): items = Parser(big_stream(ram_usage)).iter_from() assert not list(items)
def test_namespaces(): class Handler: # # namespaces in element names # # using `aaa` handle all {*}aaa unless overridden @staticmethod @xml_handle_element("root", "aaa") def handle_aaa(node): yield ("aaa", node.namespace) # using `{...}aaa` overrides `aaa` handler @staticmethod @xml_handle_element("root", "{https://example.com/xml/ex}aaa") def handle_aaa_ex(node): yield ("aaa_ex", node.namespace) # using `{}aaa` overrides `aaa` handler when there is no namespace @staticmethod @xml_handle_element("root", "{}aaa") def handle_aaa_no_namespace(node): # this code is never run yield ("aaa_no_namespace", node.namespace) # # namespaces in element attributes # @staticmethod @xml_handle_element("root", "bbb") def handle_bbb(node): # `name` gets any namespace # `{ns}name` gets specific namespace # `{}name` gets no namespace yield ("bbb", "uuu default", node.attributes["uuu"]) yield ("bbb", "uuu no", node.attributes["{}uuu"]) yield ("bbb", "vvv default", node.attributes["vvv"]) yield ( "bbb", "vvv specific", node.attributes["{https://example.com/xml/ex}vvv"], ) # `name` prefers to get no namespace when possible yield ("bbb", "xxx default", node.attributes["xxx"]) # -> 2 yield ("bbb", "xxx no", node.attributes["{}xxx"]) yield ( "bbb", "xxx specific", node.attributes["{https://example.com/xml/ex}xxx"], ) # note that a warning is emitted if there are attributes with various # namespaces but none without namespace with pytest.warns(RuntimeWarning): # current implementation uses "first" attribute in that case # but you should not rely on it and specify the namespace to use yield ("bbb", "yyy default", node.attributes["yyy"]) @staticmethod def xml_handler(generator): yield from generator assert list(Parser(XML).iter_from(Handler)) == [ ("aaa", "https://example.com/xml/"), ("aaa", "https://example.com/xml/aaa"), ("aaa_ex", "https://example.com/xml/ex"), ("bbb", "uuu default", "0"), ("bbb", "uuu no", "0"), ("bbb", "vvv default", "1"), ("bbb", "vvv specific", "1"), ("bbb", "xxx default", "2"), ("bbb", "xxx no", "2"), ("bbb", "xxx specific", "3"), ("bbb", "yyy default", "5"), ]
def test_external_entities(xml): with pytest.raises(EntitiesForbidden): Parser(xml).return_from(handler_get_text)