def test_add_text_after_appended(): document = Document("<root><tag/>foo</root>") root = document.root foo = root[1] bar = TextNode("bar") foo.add_next(bar) peng = TextNode("peng") bar.add_next(peng) assert len(root) == 4 assert foo._appended_text_node is bar assert foo._position == TAIL assert bar._bound_to is foo assert bar._appended_text_node is peng assert bar._position == APPENDED assert peng._bound_to is bar assert peng._appended_text_node is None assert peng._position == APPENDED assert peng._bound_to is bar document.merge_text_nodes() assert len(root) == 2, [x for x in root.child_nodes()] assert root[0]._etree_obj.tail == "foobarpeng"
def test_css_select_or(files_path): document = Document(files_path / "tei_stevenson_treasure_island.xml") result = document.css_select("titleStmt title, titleStmt author") assert len(result) == 2 assert {x.local_name for x in result} == {"author", "title"}
def test_previous_node(): document = Document("<root><a/></root>") assert document.root.previous_node() is None # document = Document("<root>a<c/></root>") root = document.root c = root[1] assert c.local_name == "c" a = c.previous_node() assert a.content == "a" a.add_next("b") assert c.previous_node().content == "b" # document = Document("<root><a/><!-- bla --><b/></root>") b = document.root[1] a = b.previous_node() assert a is not None assert a.local_name == "a"
def test_add_html_classes(): doc = Document("<html><body/></html>") transformation = Transformation( Rule("body", lib.add_html_classes("transformed"))) result = transformation(doc).root assert result[0].attributes["class"] == "transformed" doc = Document('<html><body class="loaded" /></html>') result = transformation(doc).root assert all(x in result[0].attributes["class"] for x in ("transformed", "loaded")) transformation = Transformation( Rule("body", lib.add_html_classes("transformed", "and_something_else"))) result = transformation(doc).root assert all(x in result[0].attributes["class"] for x in ("and_something_else", "loaded", "transformed")) transformation = Transformation( Rule("body", lib.add_html_classes(Ref("html_classes"))), context={"html_classes": ["transformed", "and_something_else"]}, ) result = transformation(doc).root assert all(x in result[0].attributes["class"] for x in ("and_something_else", "loaded", "transformed"))
def test_tag_definition_copies_attributes(): root = Document('<root foo="bar"/>').root definition = tag("test", root.attributes) root.attributes["bar"] = "foo" root.append_child(definition) assert root.attributes == {"bar": "foo", "foo": "bar"} assert root.first_child.attributes == {"foo": "bar"}
def test_exist_client_delete_document(rest_base_url, test_client): Document('<example id="t5">i am to be deleted</example>', existdb_client=test_client).existdb_store(collection="/bar", filename="foo.xml") test_client.delete_document("/bar/foo.xml") with pytest.raises(FailedDocumentLoading): Document("/bar/foo.xml", existdb_client=test_client)
def test_transparency(files_path, result_file): for file in (x for x in files_path.glob("[!tei_]*.xml")): doc = Document(file, collapse_whitespace=False) doc.save(result_file) assert_documents_are_semantical_equal(file, result_file) assert count_pis(file) == count_pis(result_file)
def test_fetch_or_create_by_xpath_with_prefixes_attributes(): root = Document('<root xmlns:foo="bar"/>').root assert ( str(root.fetch_or_create_by_xpath("./node[@foo:attr='value']")) == '<node xmlns:foo="bar" foo:attr="value"/>' ) assert str(root) == '<root xmlns:foo="bar"><node foo:attr="value"/></root>'
def test_set_root(): document = Document("<root><node/></root>") document.root = document.root[0].detach() assert str(document) == "<node/>" document_2 = Document("<root><replacement/>parts</root>") with pytest.raises(InvalidOperation): document.root = document_2.root[0]
def test_location_path_and_xpath_concordance(files_path): for doc_path in files_path.glob("*.xml"): document = Document(doc_path) for node in document.root.child_nodes(is_tag_node, recurse=True): queried_nodes = document.xpath(node.location_path) assert queried_nodes.size == 1 assert queried_nodes.first is node
def test_subclass(): stub = f'<document xmlns="{OFFICE_NS}" mimetype="{ODT_MIMETYPE}"/>' assert isinstance(Document(stub), OpenDocumentText) stub = stub.replace("open", "closed") document = Document(stub) assert isinstance(document, Document) assert not isinstance(document, OpenDocumentText)
def test_invalid_xpath(files_path): document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml") with pytest.raises(InvalidOperation): tuple(document.xpath(".//pb/@facs")) with pytest.raises(InvalidOperation): tuple(document.xpath(".//comment()"))
def test_clone_quick_and_unsafe(sample_document): assert str(sample_document.root.clone( deep=True, quick_and_unsafe=True)) == str(sample_document.root) root = Document("<root>node a</root>").root root.append_child("|node b") assert str(root.clone(deep=True, quick_and_unsafe=True)) == "<root>node a</root>"
def test_next_in_stream(files_path): document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml") page_breaks = document.xpath(".//pb").as_list() cursor = page_breaks.pop(0) while len(page_breaks) > 1: _next = page_breaks.pop(0) assert cursor.next_node_in_stream(is_pagebreak) is _next cursor = _next
def test_contains(): document_a = Document("<root><a/></root>") document_b = Document("<root><a/></root>") a = document_a.root[0] gc.collect() assert a in document_a assert a not in document_b
def test_previous_in_stream(files_path): document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml") page_breaks = document.xpath(".//pb").as_list() cursor = page_breaks.pop() while len(page_breaks) > 1: prev = page_breaks.pop() assert cursor.previous_node_in_stream(is_pagebreak) is prev cursor = prev
def test_detach_node_retains_namespace_prefixes(): # libxml2 loses the notion if a default prefix for nodes that have been # removed from a parent node document = Document("""\ <root xmlns="schema://default/"> <child><grandchild/></child> </root> """) child = document.css_select("child").first.detach() assert child.css_select("grandchild").size == 1
def test_appended_text_contents_arent_lost(): root = Document("<root><a><b><c><d/></c></b></a></root>").root for node in tuple(root.child_nodes(is_tag_node, recurse=True)): node.prepend_child("D") node.add_next("T") for node in tuple(root.child_nodes(is_text_node, recurse=True)): node.add_next("Z") gc.collect() assert str( root) == "<root><a>DZ<b>DZ<c>DZ<d>DZ</d>TZ</c>TZ</b>TZ</a>TZ</root>"
def test_first_and_last_child(): document = Document("<root/>") assert document.root.first_child is None assert document.root.last_child is None document = Document("<root><e1/><e2/></root>") assert document.root.first_child.local_name == "e1" assert document.root.last_child.local_name == "e2" document = Document("<root>first<e1/><e2/>last</root>") assert document.root.first_child.content == "first" assert document.root.last_child.content == "last"
def test_wrapper_cache(): gc.collect() assert len(_wrapper_cache.wrappers) == 0 root = Document("<root/>").root assert len(_wrapper_cache.wrappers) == 1 root.append_child(tag("node")) assert len(_wrapper_cache.wrappers) == 2 root.first_child.detach() gc.collect() assert len(_wrapper_cache.wrappers) == 1
def test_fetch_or_create_by_xpath_with_predicates_in_parentheses(): root = Document("<root/>").root cit = root.fetch_or_create_by_xpath( './entry/sense/cit[((@type="translation") and (@lang="en"))]' ) assert ( root.fetch_or_create_by_xpath( './entry/sense/cit[(@type="translation")][((@lang="en"))]' ) is cit ) assert root.css_select('entry > sense > cit[lang="en"]').size == 1
def test_root_takes_no_siblings(): root = Document("<root/>").root with pytest.raises(InvalidOperation): root.add_next(tag("x")) with pytest.raises(InvalidOperation): root.add_next("x") with pytest.raises(InvalidOperation): root.add_previous(tag("x")) with pytest.raises(InvalidOperation): root.add_previous("x")
def test_fetch_or_create_by_xpath_with_multiple_attributes(): root = Document("<root/>").root cit = root.fetch_or_create_by_xpath( './entry/sense/cit[@type="translation" and @lang="en"]' ) assert str(cit) == '<cit type="translation" lang="en"/>' assert ( root.fetch_or_create_by_xpath( './entry/sense/cit[@type="translation"][@lang="en"]' ) is cit )
def test_fetch_or_create_by_xpath_with_prefix(): root = Document("<root xmlns:prfx='http://test.io'><intermediate/></root>").root assert ( str(root.fetch_or_create_by_xpath("./intermediate/prfx:test")) == '<prfx:test xmlns:prfx="http://test.io"/>' ) assert ( str(root) == '<root xmlns:prfx="http://test.io">' "<intermediate><prfx:test/></intermediate>" "</root>" ) with pytest.raises(XPathEvalError): root.fetch_or_create_by_xpath("./unknwn:test")
def test_simple_transformation(): root = Document( '<root><node copyOf="#foo"/><node copyOf="#bar"/></root>').root document = Document("""<radix> <a> <b xml:id="foo"><c>hi</c></b> <b xml:id="baz"/> </a> <a xml:id="bar">na?</a> </radix> """) resolve_copy_of = ResolveCopyOf() tree = resolve_copy_of(root, document) assert str(tree) == ("<root><b><c>hi</c></b><a>na?</a></root>")
def test_collapse_whitespace(): document = Document(""" <root> <title> I Roy - <hi>Touting I Self</hi> </title> <matrix xml:space="preserve">HB 243 A Re</matrix> <matrix xml:space="preserve">HB 243 B\tRe</matrix> </root> """) document.collapse_whitespace() root = document.root assert root.first_child.full_text == "I Roy - Touting I Self" assert root.css_select("matrix")[0].full_text == "HB 243 A Re" assert root.css_select("matrix")[1].full_text == "HB 243 B\tRe" # document = Document( '<docImprint><hi rendition="#g">Veröffentlicht im</hi> <docDate>' '<hi rendition="#g">Februar</hi> 1848</docDate>.</docImprint>') hi_1 = document.root.first_child assert hi_1._etree_obj.tail == " " x = hi_1.next_node() assert isinstance(x, TextNode) assert x.content == " " document.collapse_whitespace() assert document.root.full_text == "Veröffentlicht im Februar 1848."
def test_exist_client_xpath(test_client): paragraph_1 = "<p>retrieve me first!</p>" paragraph_2 = "<p>retrieve me too!</p>" Document( f'<example id="t7">{paragraph_1}</example>', existdb_client=test_client).existdb_store(filename="document_1.xml") Document( paragraph_2, existdb_client=test_client).existdb_store(filename="document_2.xml") retrieved_nodes = test_client.xpath("//p") retrieved_nodes_str = [str(node) for node in retrieved_nodes] assert paragraph_1 in retrieved_nodes_str assert paragraph_2 in retrieved_nodes_str
def test_setup_long_term_references(long_term_references): long_term_references["a_document"] = Document("<document/>") root = Document("<root>a</root>").root root.append_child("b") long_term_references["appended_b"] = root.last_child root = Document("<root>a</root>").root root.append_child("b") root.append_child("c") long_term_references["appended_b_with_c"] = root.last_child
def test_invalid_operations(): document_1 = Document("<root/>") document_2 = Document("<root><replacement/>parts</root>") with pytest.raises(InvalidOperation): document_1.root.append_child(document_2.root[0]) new_node = new_tag_node("newNode") document_1.root.append_child(new_node) with pytest.raises(InvalidOperation): new_node.add_next(document_2.root[0]) with pytest.raises(InvalidOperation): new_node.add_previous(document_2.root[0])
def test_xpath(files_path): document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml") for i, page_break in enumerate(document.xpath(".//pb")): assert isinstance(page_break, TagNode) assert page_break.universal_name == "{http://www.tei-c.org/ns/1.0}pb" assert i == 22 for j, page_break in enumerate(document.xpath('.//pb[@n="I"]')): assert isinstance(page_break, TagNode) assert page_break.universal_name == "{http://www.tei-c.org/ns/1.0}pb" assert page_break.attributes["n"] == "I" assert j == 0