Esempio n. 1
0
def test_wikipedia_export():
    @xml_handle_element("mediawiki", "page", "revision")
    class Revision:
        def __init__(self):
            self.author = None
            self.date = None

        @xml_handle_text("contributor", "username")
        def handle_author(self, node):
            self.author = node.text

        @xml_handle_text("timestamp")
        def handle_date(self, node):
            self.date = datetime.strptime(node.text, "%Y-%m-%dT%H:%M:%SZ")

    with LZMAFile(Path(__file__).parent / "wikipedia_python_export.xml.xz") as stream:
        items = list(Parser(stream).iter_from(Revision))
        assert len(items) == 1000
        assert all(isinstance(item, Revision) for item in items)
        revision = items[-1]
        assert revision.author == "Lulu of the Lotus-Eaters"
        assert revision.date.year == 2006
        assert revision.date.month == 4
        assert revision.date.day == 14
        assert revision.date.hour == 15
        assert revision.date.minute == 58
Esempio n. 2
0
def test_with_handler(
    # pylint: disable=redefined-outer-name
    ram_usage, ):
    @xml_handle_text("root", "entry", "nb")
    def handler(node):
        yield int(node.text)

    items = Parser(big_stream(ram_usage)).iter_from(handler)

    for exp, item in enumerate(items):
        assert item == exp
Esempio n. 3
0
def test_comments():
    @xml_handle_element("comments", "comment")
    def handler(node):
        yield node.text

    items = Parser(XML).iter_from(handler)
    assert next(items) == "Test"
    assert next(items) == "Hello everyone!"
    assert next(items) == ("I've found this quote that I feel you may like:"
                           " The grass is greener where you water it."
                           " -Neil Barringham"
                           " Feel free to share it!")
Esempio n. 4
0
def test_maths_eval_list():
    handlers = []

    @xml_handle_element("expr")
    def handle_expr(node):
        yield reduce(
            getattr(operator, node.attributes["op"]),
            node.iter_from(*handlers),
        )

    handlers.append(handle_expr)

    @xml_handle_text("number")
    def handle_number(node):
        yield int(node.text)

    handlers.append(handle_number)

    assert list(Parser(XML).iter_from(*handlers)) == [1337]
Esempio n. 5
0
def test_maths_eval_class():
    class Eval:
        @staticmethod
        @xml_handle_element("expr")
        def handle_expr(node):
            yield reduce(
                getattr(operator, node.attributes["op"]),
                node.iter_from(Eval),
            )

        @staticmethod
        @xml_handle_text("number")
        def handle_number(node):
            yield int(node.text)

        @staticmethod
        def xml_handler(generator):
            yield from generator

    assert list(Parser(XML).iter_from(Eval)) == [1337]
Esempio n. 6
0
def test_wrong_explicit_encoding():
    xml = ("<?xml version='1.0' encoding='ISO-8859-1'?>" + XML_STR).encode("utf_8")
    parser = Parser(xml)
    with pytest.raises(ET.ParseError):
        parser.return_from()
Esempio n. 7
0
def test_encoding(xml):
    @xml_handle_element("élément")
    def handler(node):
        yield node.text

    assert Parser(xml).return_from(handler) == TXT_STR
Esempio n. 8
0
def test_no_handler(
    # pylint: disable=redefined-outer-name
    ram_usage, ):
    items = Parser(big_stream(ram_usage)).iter_from()
    assert not list(items)
Esempio n. 9
0
def test_namespaces():
    class Handler:

        #
        # namespaces in element names
        #

        # using `aaa` handle all {*}aaa unless overridden
        @staticmethod
        @xml_handle_element("root", "aaa")
        def handle_aaa(node):
            yield ("aaa", node.namespace)

        # using `{...}aaa` overrides `aaa` handler
        @staticmethod
        @xml_handle_element("root", "{https://example.com/xml/ex}aaa")
        def handle_aaa_ex(node):
            yield ("aaa_ex", node.namespace)

        # using `{}aaa` overrides `aaa` handler when there is no namespace
        @staticmethod
        @xml_handle_element("root", "{}aaa")
        def handle_aaa_no_namespace(node):
            # this code is never run
            yield ("aaa_no_namespace", node.namespace)

        #
        # namespaces in element attributes
        #

        @staticmethod
        @xml_handle_element("root", "bbb")
        def handle_bbb(node):
            # `name` gets any namespace
            # `{ns}name` gets specific namespace
            # `{}name` gets no namespace
            yield ("bbb", "uuu default", node.attributes["uuu"])
            yield ("bbb", "uuu no", node.attributes["{}uuu"])
            yield ("bbb", "vvv default", node.attributes["vvv"])
            yield (
                "bbb",
                "vvv specific",
                node.attributes["{https://example.com/xml/ex}vvv"],
            )
            # `name` prefers to get no namespace when possible
            yield ("bbb", "xxx default", node.attributes["xxx"])  # -> 2
            yield ("bbb", "xxx no", node.attributes["{}xxx"])
            yield (
                "bbb",
                "xxx specific",
                node.attributes["{https://example.com/xml/ex}xxx"],
            )
            # note that a warning is emitted if there are attributes with various
            # namespaces but none without namespace
            with pytest.warns(RuntimeWarning):
                # current implementation uses "first" attribute in that case
                # but you should not rely on it and specify the namespace to use
                yield ("bbb", "yyy default", node.attributes["yyy"])

        @staticmethod
        def xml_handler(generator):
            yield from generator

    assert list(Parser(XML).iter_from(Handler)) == [
        ("aaa", "https://example.com/xml/"),
        ("aaa", "https://example.com/xml/aaa"),
        ("aaa_ex", "https://example.com/xml/ex"),
        ("bbb", "uuu default", "0"),
        ("bbb", "uuu no", "0"),
        ("bbb", "vvv default", "1"),
        ("bbb", "vvv specific", "1"),
        ("bbb", "xxx default", "2"),
        ("bbb", "xxx no", "2"),
        ("bbb", "xxx specific", "3"),
        ("bbb", "yyy default", "5"),
    ]
Esempio n. 10
0
def test_external_entities(xml):
    with pytest.raises(EntitiesForbidden):
        Parser(xml).return_from(handler_get_text)