def buildParagraphs(tagSplit): paragraphList = [] paraEnabled = False picIndex = 0 for tags in tagSplit: if "<w:p" in tags and "w:rsidRDefault" in tags: currentParagraph = Paragraph() currentParagraph.setType("Paragraph") paraEnabled = True elif "</w:p>" in tags: paragraphList.append(currentParagraph) elif paraEnabled: currentParagraph.addElement(tags) for formats in paragraphList: inFormat = False formatStartIndex = 0 i = 0 for elements in formats.getElements(): if "<w:pPr" in elements: inFormat = True formatStartIndex = i if "</w:pPr" in elements: inFormat = False listHolder = formats.getElements() del listHolder[formatStartIndex:i + 1] formats.clearElements() formats.setElements(listHolder) if "<w:pStyle w:val" in elements and inFormat: formats.setType(elements[elements.index('"') + 1:elements.rfind('"')]) if "<w:ilvl w:val" in elements and inFormat: formats.setListIndent(elements[elements.index('"') + 1:elements.rfind('"')]) if "<w:numId w:val" in elements and inFormat: formats.setListId(elements[elements.index('"') + 1:elements.rfind('"')]) if "<pic:nvPicPr" in elements: formats.setImageIndex(picIndex) formats.setType("Image") picIndex += 1 i += 1 for formats in paragraphList: currentElementList = [] currentText = "" inText = False for elements in formats.getElements(): if "<w:r" in elements: currentElement = Element() elif "<w:b/>" in elements: currentElement.bold = True elif "<w:strike/>" in elements: currentElement.setStrikethrough(True) elif '<w:vertAlign w:val="subscript"/>' in elements: currentElement.setSubscript(True) elif '<w:vertAlign w:val="superscript"/>' in elements: currentElement.setSuperscript(True) elif "<w:i/>" in elements: currentElement.italics = True elif "<w:u" in elements: currentElement.underline = True elif "<w:t>" in elements or '<w:t xml:space=' in elements: inText = True elif "</w:r>" in elements: inElement = False currentElementList.append(currentElement) elif "</w:t>" in elements and inText: inText = False currentElement.setText(currentText) currentText = "" elif inText: currentText = currentText + elements formats.clearElements() formats.setElements(currentElementList) return paragraphList