def test_pull_image_files(self) -> None: """Copy image files to output path.""" docx2python("resources/example.docx", "delete_this/path/to/images") assert os.listdir("delete_this/path/to/images") == [ "image1.png", "image2.jpg" ] # clean up shutil.rmtree("delete_this")
def test_my_checkbox(self) -> None: """A good selection of checked and unchecked boxes, and several dropdowns""" extraction = docx2python(os.path.join("resources", "check_drop_my.docx")) assert extraction.body == [ [ [ [ "[user unchecked]\u2610[user unchecked]", "", "[user checked]\u2612[user checked]", "", "[my unchecked]\u2610[my unchecked]", "", "[my checked]\u2612[my checked]", "", "User dropdown (Piihan B)", "Piihan B", "", "My dropdown (no choice)", "Choose an item.", "", "My dropdown (chose A)", "my_item_A", "", "My dropdown (chose B)", "my_item_B", ] ] ] ]
def test_bulleted_lists(self) -> None: pars = docx2python("resources/created-in-pages-bulleted-lists.docx") assert pars.text == ( "\n\nThis is a document for testing docx2python module.\n\n\n\n--\tWhy " "did the chicken cross the road?\n\n\t--\tJust because\n\n\t--\tDon't " "know\n\n\t--\tTo get to the other side\n\n--\tWhat's the meaning of life, " "universe and everything?\n\n\t--\t42\n\n\t--\t0\n\n\t--\t-1\n\n")
def test_paragraphs_only(self) -> None: """Run without issue""" pars = docx2python("resources/created-in-pages-paragraphs-only.docx") assert pars.text == ( "\n\nThis is a document for testing docx2python module.\n\n\n\nThis " "document contains paragraphs.\n\n\n\nThis document does not contain any " "bulleted lists.\n\n")
def test_prints(self) -> None: """ Open a docx with ``word/document.xml`` renamed to ``word/blah_blah.xml`` and all references updated. Test that text extracts as expected.""" extraction = docx2python( os.path.join("resources", "renamed_document_xml.docx")) assert ('<a href="http://www.shayallenhill.com/">my website</a>' in extraction.text)
def test_user_unchecked_dropdown1(self) -> None: """Get unchecked box glyph and first dd entry""" extraction = docx2python(os.path.join("resources", "unchecked_drop0.docx")) assert extraction.text == "\u2610 \n\n\n\n\n\nPiihan B"
def test_user_checked_dropdown0(self) -> None: """Get checked-out box glyph and second dd entry""" extraction = docx2python(os.path.join("resources", "checked_drop1.docx")) assert extraction.text == "\u2612 \n\n\n\n\n\nPIlihan A"
def test_extraction(self) -> None: """Image placeholder inserted into extracted text.""" extraction = docx2python(os.path.join("resources", "has_pict.docx")) assert "image1.png" in extraction.images assert "----media/image1.png----" in extraction.text
#!/usr/bin/env python3 # _*_ coding: utf-8 _*_ """Test full functionality of source_old :author: Shay Hill :created: 7/5/2019 """ import os import shutil import re from docx2python.main import docx2python from docx2python.iterators import iter_at_depth OUTPUT = docx2python("resources/example.docx") HTML_OUTPUT = docx2python("resources/example.docx", html=True) class TestFormatting: """Nested list output string formatting""" def test_header(self) -> None: """Header text in correct location""" header_text = "".join(iter_at_depth(OUTPUT.header, 4)) assert re.match(r"Header text----media/image\d+\.\w+----$", header_text) def test_footer(self) -> None: """Footer text in correct location""" footer_text = "".join(iter_at_depth(OUTPUT.footer, 4)) assert re.match(r"Footer text----media/image\d+\.\w+----$",
#!/usr/bin/env python3 # _*_ coding: utf-8 _*_ """Test features of DocxContent that weren't tested in test_docx2python. :author: Shay Hill :created: 7/6/2019 """ from docx2python.main import docx2python INST = docx2python("resources/example.docx") class TestDocument: def test_combine_of_header_body_footer(self) -> None: """Return all content combined as instance.document """ assert (INST.document == INST.header + INST.body + INST.footer + INST.footnotes + INST.endnotes) def test_read_only(self) -> None: """Document attribute is read only.""" doc1 = INST.document doc1 = doc1[:1] assert doc1 != INST.document assert (INST.document == INST.header + INST.body + INST.footer + INST.footnotes + INST.endnotes) class TestText: def test_function(self) -> None: """Return '\n\n'-delimited paragraphs as instance.text. """
def test_dop_1013a() -> None: """Misidentifies ``word/document.xml`` as ``word/word/document.xml``""" docx2python("resources/example.docx") # noinspection SpellCheckingInspection docx2python("resources/240-DOP-1013A Lay Down Tubulars.docx")
def test_prints(self) -> None: """Pull the text of the hyperlink""" extraction = docx2python(os.path.join("resources", "hyperlink.docx")) assert ( '<a href="http://www.shayallenhill.com/">my website</a>' in extraction.text )
def test_get_toc_text(self) -> None: """Extract header text from table-of-contents header.""" assert docx2python("resources/zen_of_python.docx").text[:66] == ( 'Contents\n\n\tBeautiful is better than ugly.' '\t1</a>\n\n\n\n\n\n\n\nBeautiful i')
def test_run(self) -> None: """Run original code and see how it works""" extraction = docx2python("resources/has_pict.docx") assert "image1.png" in extraction.images assert "----media/image1.png----" in extraction.text