def word_table_is_read_as_document_table_element(self): element = xml_element( "w:tbl", {}, [xml_element("w:tr", {}, [xml_element("w:tc", {}, [xml_element("w:p", {}, [])])])] ) table = _read_and_get_document_xml_element(element) expected_result = documents.table([documents.table_row([documents.table_cell([documents.paragraph([])])])]) assert_equal(expected_result, table)
def when_optional_attributes_of_comment_are_not_blank_then_they_are_read(): comments = create_comments_reader(body_xml.reader())(xml_element("w:comments", {}, [ xml_element("w:comment", {"w:id": "1", "w:author": "The Piemaker", "w:initials": "TP"}, []), ])) comment, = comments.value assert_equal("The Piemaker", comment.author_name) assert_equal("TP", comment.author_initials)
def hyperlink_is_ignored_if_it_does_not_have_a_relationship_id_nor_anchor(self): run_element = xml_element("w:r") element = xml_element("w:hyperlink", {}, [run_element]) assert_equal( [documents.run([])], _read_and_get_document_xml_element(element) )
def complex_field_nested_within_a_hyperlink_complex_field_is_wrapped_with_the_hyperlink(self): element = xml_element("w:p", {}, [ self._BEGIN_COMPLEX_FIELD, self._HYPERLINK_INSTRTEXT, self._SEPARATE_COMPLEX_FIELD, self._BEGIN_COMPLEX_FIELD, xml_element("w:instrText", {}, [ xml_text(' AUTHOR "John Doe"') ]), self._SEPARATE_COMPLEX_FIELD, _run_element_with_text("John Doe"), self._END_COMPLEX_FIELD, self._END_COMPLEX_FIELD, ]) paragraph = _read_and_get_document_xml_element(element) assert_that(paragraph, is_paragraph(children=is_sequence( is_empty_run, self._is_empty_hyperlinked_run, self._is_empty_hyperlinked_run, self._is_empty_hyperlinked_run, self._is_hyperlinked_run(children=is_sequence( is_text("John Doe"), )), self._is_empty_hyperlinked_run, is_empty_run, )))
def hyperlink_is_read_if_it_has_an_anchor_attribute(self): run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"w:anchor": "start"}, [run_element]) assert_equal( documents.hyperlink(anchor="start", children=[documents.run([])]), _read_and_get_document_xml_element(element) )
def when_optional_attributes_of_comment_are_missing_then_they_are_as_none(): comments = create_comments_reader(body_xml.reader())(xml_element("w:comments", {}, [ xml_element("w:comment", {"w:id": "1"}, []), ])) comment, = comments.value assert_equal(None, comment.author_name) assert_equal(None, comment.author_initials)
def id_and_body_of_comment_is_read(): body = [xml_element("w:p")] comments = create_comments_reader(body_xml.reader())(xml_element("w:comments", {}, [ xml_element("w:comment", {"w:id": "1"}, body), ])) assert_equal(1, len(comments.value)) assert_equal(comments.value[0].body, [documents.paragraph(children=[])]) assert_equal("1", comments.value[0].comment_id)
def id_and_body_of_footnote_are_read(): footnote_body = [xml_element("w:p")] footnotes = read_footnotes_xml_element(xml_element("w:footnotes", {}, [ xml_element("w:footnote", {"w:id": "1"}, footnote_body), ])) assert_equal(1, len(footnotes)) assert footnote_body is footnotes[0].body assert_equal("1", footnotes[0].id)
def sdt_is_read_using_sdt_content(self): element = xml_element("w:sdt", {}, [ xml_element("w:sdtContent", {}, [ xml_element("w:t", {}, [xml_text("Blackdown")]), ]), ]) result = _read_and_get_document_xml_element(element) assert_equal(documents.text("Blackdown"), result)
def hyperlink_is_read_if_it_has_a_relationship_id(self): relationships = Relationships({"r42": Relationship(target="http://example.com")}) run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"r:id": "r42"}, [run_element]) assert_equal( documents.hyperlink(href="http://example.com", children=[documents.run([])]), _read_and_get_document_xml_element(element, relationships=relationships), )
def id_and_body_of_footnote_are_read(): footnote_body = [xml_element("w:p")] footnotes = read_footnotes_xml_element(xml_element("w:footnotes", {}, [ xml_element("w:footnote", {"w:id": "1"}, footnote_body), ]), body_reader=body_xml.reader()) assert_equal(1, len(footnotes.value)) assert isinstance(footnotes.value[0].body[0], documents.Paragraph) assert_equal("1", footnotes.value[0].note_id)
def content_type_is_based_on_default_for_extension_if_there_is_no_override(): element = xml_element( "content-types:Types", {}, [xml_element("content-types:Default", {"Extension": "png", "ContentType": "image/png"})], ) content_types = read_content_types_xml_element(element) assert_equal("image/png", content_types.find_content_type("word/media/hat.png"))
def _create_image_elements(description, blip): return [ xml_element("wp:docPr", {"descr": description}), xml_element( "a:graphic", {}, [xml_element("a:graphicData", {}, [xml_element("pic:pic", {}, [xml_element("pic:blipFill", {}, [blip])])])], ), ]
def paragraph_has_style_name_read_from_paragraph_properties_and_styles(self): style_xml = xml_element("w:pStyle", {"w:val": "Heading1"}) properties_xml = xml_element("w:pPr", {}, [style_xml]) paragraph_xml = xml_element("w:p", {}, [properties_xml]) styles = Styles({"Heading1": Style(style_id="Heading1", name="Heading 1")}, {}) paragraph = _read_and_get_document_xml_element(paragraph_xml, styles=styles) assert_equal("Heading 1", paragraph.style_name)
def existing_fragment_is_replaced_when_anchor_is_set_on_external_link(self): relationships = Relationships([ _hyperlink_relationship("r42", "http://example.com/#previous"), ]) run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"r:id": "r42", "w:anchor": "fragment"}, [run_element]) assert_equal( documents.hyperlink(href="http://example.com/#fragment", children=[documents.run([])]), _read_and_get_document_xml_element(element, relationships=relationships) )
def warning_is_emitted_when_table_style_cannot_be_found(self): style_xml = xml_element("w:tblStyle", {"w:val": "TableNormal"}) properties_xml = xml_element("w:tblPr", {}, [style_xml]) table_xml = xml_element("w:tbl", {}, [properties_xml]) result = _read_document_xml_element(table_xml, styles=Styles.EMPTY) table = result.value assert_equal("TableNormal", table.style_id) assert_equal(None, table.style_name) assert_equal([results.warning("Table style with ID TableNormal was referenced but not defined in the document")], result.messages)
def warning_is_emitted_when_paragraph_style_cannot_be_found(self): style_xml = xml_element("w:pStyle", {"w:val": "Heading1"}) properties_xml = xml_element("w:pPr", {}, [style_xml]) paragraph_xml = xml_element("w:p", {}, [properties_xml]) result = _read_document_xml_element(paragraph_xml, styles=Styles.EMPTY) paragraph = result.value assert_equal("Heading1", paragraph.style_id) assert_equal(None, paragraph.style_name) assert_equal([results.warning("Paragraph style with ID Heading1 was referenced but not defined in the document")], result.messages)
def _assert_children_are_converted_normally(tag_name): element = xml_element("w:p", {}, [ xml_element(tag_name, {}, [ xml_element("w:r") ]) ]) assert_equal( documents.paragraph([documents.run([])]), _read_and_get_document_xml_element(element) )
def footnotes_of_document_are_read(self): notes = [documents.note("footnote", "4", [documents.paragraph([])])] body_xml = xml_element("w:body") document_xml = xml_element("w:document", {}, [body_xml]) document = _read_and_get_document_xml_element(document_xml, notes=notes) footnote = document.notes.find_note("footnote", "4") assert_equal("4", footnote.note_id) assert isinstance(footnote.body[0], documents.Paragraph)
def paragraph_has_numbering_properties_from_paragraph_properties_if_present(self): paragraph_xml = self._paragraph_with_numbering_properties( [xml_element("w:ilvl", {"w:val": "1"}), xml_element("w:numId", {"w:val": "42"})] ) numbering = Numbering({"42": {"1": documents.numbering_level("1", True)}}) paragraph = _read_and_get_document_xml_element(paragraph_xml, numbering=numbering) assert_equal("1", paragraph.numbering.level_index) assert_equal(True, paragraph.numbering.is_ordered)
def hyperlink_is_read_as_external_hyperlink_if_it_has_a_relationship_id_and_an_anchor(self): relationships = Relationships([ _hyperlink_relationship("r42", "http://example.com/"), ]) run_element = xml_element("w:r") element = xml_element("w:hyperlink", {"r:id": "r42", "w:anchor": "fragment"}, [run_element]) assert_equal( documents.hyperlink(href="http://example.com/#fragment", children=[documents.run([])]), _read_and_get_document_xml_element(element, relationships=relationships) )
def children_of_w_smart_tag_are_converted_normally(self): element = xml_element("w:p", {}, [ xml_element("w:smartTag", {}, [ xml_element("w:r") ]) ]) assert_equal( documents.paragraph([documents.run([])]), _read_and_get_document_xml_element(element) )
def warning_is_emitted_when_run_style_cannot_be_found(self): style_xml = xml_element("w:rStyle", {"w:val": "Heading1Char"}) properties_xml = xml_element("w:rPr", {}, [style_xml]) run_xml = xml_element("w:r", {}, [properties_xml]) result = _read_document_xml_element(run_xml, styles=Styles.EMPTY) run = result.value assert_equal("Heading1Char", run.style_id) assert_equal(None, run.style_name) assert_equal([results.warning("Run style with ID Heading1Char was referenced but not defined in the document")], result.messages)
def footnotes_of_document_are_read(self): paragraph_xml = xml_element("w:p") footnotes = [FootnoteElement("4", [paragraph_xml])] body_xml = xml_element("w:body") document_xml = xml_element("w:document", {}, [body_xml]) document = _read_and_get_document_xml_element(document_xml, footnote_elements=footnotes) footnote = document.footnotes.find_footnote_by_id("4") assert_equal("4", footnote.id) assert isinstance(footnote.body[0], documents.Paragraph)
def alternate_content_is_read_using_fallback(self): element = xml_element("mc:AlternateContent", {}, [ xml_element("mc:Choice", {"Requires": "wps"}, [ _paragraph_with_style_id("first") ]), xml_element("mc:Fallback", {}, [ _paragraph_with_style_id("second") ]) ]) result = _read_and_get_document_xml_element(element) assert_equal("second", result[0].style_id)
def content_type_is_based_on_override_if_present(): element = xml_element( "content-types:Types", {}, [ xml_element("content-types:Default", {"Extension": "png", "ContentType": "image/png"}), xml_element("content-types:Override", {"PartName": "/word/media/hat.png", "ContentType": "image/hat"}), ], ) content_types = read_content_types_xml_element(element) assert_equal("image/hat", content_types.find_content_type("word/media/hat.png"))
def numbering_style_has_num_id_read_from_paragraph_properties(): element = xml_element("w:styles", {}, [ xml_element("w:style", {"w:type": "numbering", "w:styleId": "List1"}, [ xml_element("w:pPr", {}, [ xml_element("w:numPr", {}, [ xml_element("w:numId", {"w:val": "42"}) ]), ]), ]), ]) styles = read_styles_xml_element(element) assert_equal("42", styles.find_numbering_style_by_id("List1").num_id)
def table_has_style_id_and_name_read_from_paragraph_properties_if_present(self): style_xml = xml_element("w:tblStyle", {"w:val": "TableNormal"}) properties_xml = xml_element("w:tblPr", {}, [style_xml]) table_xml = xml_element("w:tbl", {}, [properties_xml]) styles = Styles.create( table_styles={"TableNormal": Style(style_id="TableNormal", name="Normal Table")}, ) paragraph = _read_and_get_document_xml_element(table_xml, styles=styles) assert_equal("TableNormal", paragraph.style_id) assert_equal("Normal Table", paragraph.style_name)
def target_is_read_from_relationship_element(): element = xml_element("relationships:Relationships", {}, [ xml_element("relationships:Relationship", { "Id": "rId8", "Type": "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink", "Target": "http://example.com", }) ]) relationships = read_relationships_xml_element(element) assert_equal( Relationship(target="http://example.com"), relationships["rId8"], )
def run_has_style_name_read_from_run_properties_and_styles(self): style_xml = xml_element("w:rStyle", {"w:val": "Heading1Char"}) styles = Styles({}, {"Heading1Char": Style(style_id="Heading1Char", name="Heading 1 Char")}) run = self._read_run_with_properties([style_xml], styles=styles) assert_equal("Heading 1 Char", run.style_name)
def paragraph_has_no_numbering_if_it_has_no_numbering_properties(self): element = xml_element("w:p") assert_equal(None, _read_and_get_document_xml_element(element).numbering)
def _paragraph_with_style_id(style_id): style_xml = xml_element("w:pStyle", {"w:val": style_id}) properties_xml = xml_element("w:pPr", {}, [style_xml]) return xml_element("w:p", {}, [properties_xml])
def _text_element(value): return xml_element("w:t", {}, [xml_text(value)])
def unrecognised_children_are_ignored(): element = xml_element("w:r", {}, [_text_element("Hello!"), xml_element("w:huh", {}, [])]) assert_equal( documents.run([documents.Text("Hello!")]), _read_document_xml_element(element).value )
def _document_element_with_text(text): return xml_element("w:document", {}, [ xml_element("w:body", {}, [_paragraph_element_with_text(text)]) ])
def returns_none_if_no_children(self): xml = xml_element("a") assert_equal(None, xml.find_child("b"))
def unrecognised_elements_emit_warning(): element = xml_element("w:huh", {}, []) result = _read_document_xml_element(element) expected_warning = results.warning("An unrecognised element was ignored: w:huh") assert_equal([expected_warning], result.messages)
def can_parse_text_element_before_new_tag(): xml = _parse_xml_string(b"<body>Hello!<br/></body>") assert_equal( xml_element( "body", {}, [xml_text("Hello!"), xml_element("br", {}, [])]), xml)
def can_parse_element_with_children(): xml = _parse_xml_string(b"<body><a/><b/></body>") assert_equal([xml_element("a", {}, []), xml_element("b", {}, [])], xml.children)
def can_parse_text_element(): xml = _parse_xml_string(b"<body>Hello!</body>") assert_equal(xml_element("body", {}, [xml_text("Hello!")]), xml)
def can_parse_attributes_of_tag(): xml = _parse_xml_string(b"<body name='bob'></body>") assert_equal(xml_element("body", {"name": "bob"}, []), xml)
def can_parse_empty_element_with_separate_closing_tag(): xml = _parse_xml_string(b"<body></body>") assert_equal(xml_element("body", {}, []), xml)
def w_tc(properties=None, *children): return xml_element("w:tc", {}, [xml_element("w:tcPr", {}, properties)] + list(children))
def can_parse_self_closing_element(): xml = _parse_xml_string(b"<body/>") assert_equal(xml_element("body", {}, []), xml)
def ignored_elements_are_ignored_without_message(): element = xml_element("w:bookmarkEnd") result = _read_document_xml_element(element) assert_equal(None, result.value) assert_equal([], result.messages)
def returns_none_if_no_matching_children(self): xml = xml_element("a", {}, [xml_element("c")]) assert_equal(None, xml.find_child("b"))
def unrecognised_elements_are_ignored(): element = xml_element("w:huh", {}, []) assert_equal(None, _read_document_xml_element(element).value)
def returns_first_matching_child(self): xml = xml_element( "a", {}, [xml_element("b", {"id": 1}), xml_element("b", {"id": 2})]) assert_equal(1, xml.find_child("b").attributes["id"])
def text_nodes_are_ignored_when_reading_children(): element = xml_element("w:r", {}, [xml_text("[text]")]) assert_equal( documents.run([]), _read_and_get_document_xml_element(element) )
def ignores_text_nodes(self): xml = xml_element("a", {}, [xml_text("Hello!")]) assert_equal(None, xml.find_child("b"))
def _create_anchored_image(description, blip): return xml_element("w:drawing", {}, [ xml_element("wp:anchor", {}, _create_image_elements(description, blip)) ])
def w_tr(*children): return xml_element("w:tr", {}, children)
def _create_inline_image(description, blip): return xml_element("w:drawing", {}, [ xml_element("wp:inline", {}, _create_image_elements(description, blip)) ])
def _blip(attributes): return xml_element("a:blip", attributes)
def _run_element_with_text(text): return xml_element("w:r", {}, [_text_element(text)])
def footnote_reference_has_id_read(): footnote_xml = xml_element("w:footnoteReference", {"w:id": "4"}) footnote = _read_and_get_document_xml_element(footnote_xml) assert_equal("4", footnote.note_id)
def _paragraph_element_with_text(text): return xml_element("w:p", {}, [_run_element_with_text(text)])
def comment_reference_has_id_read(): comment_reference_xml = xml_element("w:commentReference", {"w:id": "4"}) comment_reference = _read_and_get_document_xml_element(comment_reference_xml) assert_equal(documents.CommentReference("4"), comment_reference)
def w_vmerge(val): return xml_element("w:vMerge", {"w:val": val})
def w_gridspan(val): return xml_element("w:gridSpan", {"w:val": val})