def test_set_caret(self) -> None: """Open or close branches to prepare for next branch or item.""" inst = DepthCollector(3) inst.set_caret(3) assert inst.rightmost_branches == [[[[]]], [[]], []] inst.set_caret(2) assert inst.rightmost_branches == [[[[]]], [[]]] inst.set_caret(1) assert inst.rightmost_branches == [[[[]]]]
def test_raise_caret(self) -> None: """Reduce caret list by one.""" inst = DepthCollector(3) # caret = [[]] inst.drop_caret() assert inst.rightmost_branches == [[[]], []] inst.raise_caret() assert inst.rightmost_branches == [[[]]]
def test_caret_will_not_drop_past_item_depth(self) -> None: """Raise error before dropping caret past item_depth""" inst = DepthCollector(3) # at depth 1 inst.drop_caret() # at depth 2 inst.drop_caret() # at depth 3 (item_depth) with pytest.raises(CaretDepthError): inst.drop_caret()
def get_text(xml: bytes, context: Dict[str, Any]) -> TablesList: """Xml as a string to a list of cell strings. :param xml: an xml bytes object which might contain text :param context: dictionary of document attributes generated in get_docx_text :returns: A 4-deep nested list of strings. Sorts the text into the DepthCollector instance, five-levels deep ``[table][row][cell][paragraph][run]`` is a string Joins the runs before returning, so return list will be ``[table][row][cell][paragraph]`` is a string If you'd like to extend or edit this package, this function is probably where you want to do it. Nothing tricky here except keeping track of the text formatting. """ tables = DepthCollector(5) do_html = context["do_html"] # noinspection PyPep8Naming def branches(branch: Element) -> None: """ Recursively iterate over descendents of branch. Add text when found. :param branch: An Element from an xml file (ElementTree) :return: None. Adds text cells to outer variable `tables`. """ for child in branch: tag = child.tag # set caret depth if tag == TABLE: tables.set_caret(1) elif tag == TABLE_ROW: tables.set_caret(2) elif tag == TABLE_CELL: tables.set_caret(3) elif tag == PARAGRAPH: tables.set_caret(4) # open elements if tag == PARAGRAPH: tables.insert(_get_bullet_string(child, context)) elif tag == RUN and do_html is True: # new text run run_style = get_run_style(child) open_style = getattr(tables, "open_style", ()) if run_style != open_style: tables.insert(style_close(open_style)) tables.insert(style_open(run_style)) tables.open_style = run_style elif tag == TEXT: # new text object. oddly enough, these don't all contain text text = child.text if child.text is not None else "" if do_html is True: text = text.replace("<", "<") text = text.replace(">", ">") tables.insert(text) elif tag == FOOTNOTE: if "separator" not in child.attrib.get(qn("w:type"), "").lower(): tables.insert("footnote{})\t".format(child.attrib[qn('w:id')])) elif tag == ENDNOTE: if "separator" not in child.attrib.get(qn("w:type"), "").lower(): tables.insert("endnote{})\t".format(child.attrib[qn('w:id')])) # add placeholders elif tag == FOOTNOTE_REFERENCE: tables.insert("----footnote{}----".format(child.attrib[qn('w:id')])) elif tag == ENDNOTE_REFERENCE: tables.insert("----endnote{}----".format(child.attrib[qn('w:id')])) elif tag == IMAGE: rId = child.attrib[qn("r:embed")] image = context["rId2Target"].get(rId) if image: tables.insert("----{}----".format(image)) elif tag == IMAGEDATA: rId = child.attrib[qn("r:id")] image = context["rId2Target"].get(rId) if image: tables.insert("----{}----".format(image)) elif tag == TAB: tables.insert("\t") # enter child element branches(child) # close elements if tag == PARAGRAPH and do_html is True: tables.insert(style_close(getattr(tables, "open_style", ()))) tables.open_style = () if tag in {TABLE_ROW, TABLE_CELL, PARAGRAPH}: tables.raise_caret() elif tag == TABLE: tables.set_caret(1) branches(ElementTree.fromstring(xml)) tree = tables.tree for (i, j, k, l), paragraph in enum_at_depth(tree, 4): tree[i][j][k][l] = "".join(paragraph) return tree
def test_insert(self) -> None: """Place item at inst.item_depth.""" inst = DepthCollector(5) inst.insert("text") assert inst.rightmost_branches[0] == [[[[["text"]]]]]
def test_caret_will_not_raise_past_root(self) -> None: """Raise error before raising caret to depth 0.""" inst = DepthCollector(3) # caret = [[]] with pytest.raises(CaretDepthError): inst.raise_caret()
def test_last_caret(self) -> None: """Add empty list to caret[-1]. Append pointer to new list to caret. """ inst = DepthCollector(3) inst.drop_caret() assert inst.rightmost_branches == [[[]], []] assert inst.rightmost_branches[-1] is inst.rightmost_branches[-2][-1]
def test_init(self) -> None: """Init containers""" inst = DepthCollector(3) assert inst.item_depth == 3 assert inst.rightmost_branches == [[]]