def import_questions(tables, file, test): tables = remove_empty_paragraphs(tables) if test is None: raise 'File cannot be empty' qcount = Question.objects.count() for (i, j, k, l), paragraph in enum_at_depth(tables, 4): if l % 6 == 0: q = Question.objects.create(title=paragraph, test=test) elif '<b>' in paragraph and l % 6 != 0: Choice.objects.create(title=paragraph, question=q, correct=True) elif l % 6 != 0: Choice.objects.create(title=paragraph, question=q) else: print(paragraph) return Question.objects.count() - qcount
def get_text(xml: bytes, context: Dict[str, Any]) -> TablesList: """Xml as a string to a list of cell strings. :param xml: an xml bytes object which might contain text :param context: dictionary of document attributes generated in get_docx_text :returns: A 4-deep nested list of strings. Sorts the text into the DepthCollector instance, five-levels deep ``[table][row][cell][paragraph][run]`` is a string Joins the runs before returning, so return list will be ``[table][row][cell][paragraph]`` is a string If you'd like to extend or edit this package, this function is probably where you want to do it. Nothing tricky here except keeping track of the text formatting. """ tables = DepthCollector(5) do_html = context["do_html"] # noinspection PyPep8Naming def branches(branch: Element) -> None: """ Recursively iterate over descendents of branch. Add text when found. :param branch: An Element from an xml file (ElementTree) :return: None. Adds text cells to outer variable `tables`. """ for child in branch: tag = child.tag # set caret depth if tag == TABLE: tables.set_caret(1) elif tag == TABLE_ROW: tables.set_caret(2) elif tag == TABLE_CELL: tables.set_caret(3) elif tag == PARAGRAPH: tables.set_caret(4) # open elements if tag == PARAGRAPH: tables.insert(_get_bullet_string(child, context)) elif tag == RUN and do_html is True: # new text run run_style = get_run_style(child) open_style = getattr(tables, "open_style", ()) if run_style != open_style: tables.insert(style_close(open_style)) tables.insert(style_open(run_style)) tables.open_style = run_style elif tag == TEXT: # new text object. oddly enough, these don't all contain text text = child.text if child.text is not None else "" if do_html is True: text = text.replace("<", "<") text = text.replace(">", ">") tables.insert(text) elif tag == FOOTNOTE: if "separator" not in child.attrib.get(qn("w:type"), "").lower(): tables.insert("footnote{})\t".format(child.attrib[qn('w:id')])) elif tag == ENDNOTE: if "separator" not in child.attrib.get(qn("w:type"), "").lower(): tables.insert("endnote{})\t".format(child.attrib[qn('w:id')])) # add placeholders elif tag == FOOTNOTE_REFERENCE: tables.insert("----footnote{}----".format(child.attrib[qn('w:id')])) elif tag == ENDNOTE_REFERENCE: tables.insert("----endnote{}----".format(child.attrib[qn('w:id')])) elif tag == IMAGE: rId = child.attrib[qn("r:embed")] image = context["rId2Target"].get(rId) if image: tables.insert("----{}----".format(image)) elif tag == IMAGEDATA: rId = child.attrib[qn("r:id")] image = context["rId2Target"].get(rId) if image: tables.insert("----{}----".format(image)) elif tag == TAB: tables.insert("\t") # enter child element branches(child) # close elements if tag == PARAGRAPH and do_html is True: tables.insert(style_close(getattr(tables, "open_style", ()))) tables.open_style = () if tag in {TABLE_ROW, TABLE_CELL, PARAGRAPH}: tables.raise_caret() elif tag == TABLE: tables.set_caret(1) branches(ElementTree.fromstring(xml)) tree = tables.tree for (i, j, k, l), paragraph in enum_at_depth(tree, 4): tree[i][j][k][l] = "".join(paragraph) return tree
def test_enum_at_depth_high(self) -> None: """Raise ValueError when attempting to enumerate over depth < 1.""" with pytest.raises(TypeError) as msg: tuple(enum_at_depth(TABLES, 5)) assert "will not iterate over sequence item" in str(msg.value)
def test_enum_at_depth_low(self) -> None: """Raise ValueError when attempting to enumerate over depth < 1.""" with pytest.raises(ValueError) as msg: tuple(enum_at_depth(TABLES, 0)) assert "must be >= 1" in str(msg.value)