Esempio n. 1
0
def buildParagraphs(tagSplit):
    paragraphList = []

    paraEnabled = False
    picIndex = 0

    for tags in tagSplit:
        if "<w:p" in tags and "w:rsidRDefault" in tags:
            currentParagraph = Paragraph()
            currentParagraph.setType("Paragraph")
            paraEnabled = True
        elif "</w:p>" in tags:
            paragraphList.append(currentParagraph)
        elif paraEnabled:
            currentParagraph.addElement(tags)

    for formats in paragraphList:
        inFormat = False
        formatStartIndex = 0
        i = 0

        for elements in formats.getElements():
            if "<w:pPr" in elements:
                inFormat = True
                formatStartIndex = i
            if "</w:pPr" in elements:
                inFormat = False
                listHolder = formats.getElements()
                del listHolder[formatStartIndex:i + 1]
                formats.clearElements()
                formats.setElements(listHolder)
            if "<w:pStyle w:val" in elements and inFormat:
                formats.setType(elements[elements.index('"') +
                                         1:elements.rfind('"')])
            if "<w:ilvl w:val" in elements and inFormat:
                formats.setListIndent(elements[elements.index('"') +
                                               1:elements.rfind('"')])
            if "<w:numId w:val" in elements and inFormat:
                formats.setListId(elements[elements.index('"') +
                                           1:elements.rfind('"')])
            if "<pic:nvPicPr" in elements:
                formats.setImageIndex(picIndex)
                formats.setType("Image")
                picIndex += 1
            i += 1

    for formats in paragraphList:
        currentElementList = []
        currentText = ""
        inText = False
        for elements in formats.getElements():
            if "<w:r" in elements:
                currentElement = Element()
            elif "<w:b/>" in elements:
                currentElement.bold = True
            elif "<w:strike/>" in elements:
                currentElement.setStrikethrough(True)
            elif '<w:vertAlign w:val="subscript"/>' in elements:
                currentElement.setSubscript(True)
            elif '<w:vertAlign w:val="superscript"/>' in elements:
                currentElement.setSuperscript(True)
            elif "<w:i/>" in elements:
                currentElement.italics = True
            elif "<w:u" in elements:
                currentElement.underline = True
            elif "<w:t>" in elements or '<w:t xml:space=' in elements:
                inText = True
            elif "</w:r>" in elements:
                inElement = False
                currentElementList.append(currentElement)
            elif "</w:t>" in elements and inText:
                inText = False
                currentElement.setText(currentText)
                currentText = ""
            elif inText:
                currentText = currentText + elements

        formats.clearElements()
        formats.setElements(currentElementList)

    return paragraphList