Example #1
0
 def parse(self, raw):
     global tmp_content
     global tmp_alignment
     global tmp_grid_col_to
     global tmp_grid_col_from
     global tmp_grid_row_to
     global tmp_grid_row_from
     global tmp_vertical_align
     
     # At first, content is blank
     tmp_content = ""
     # because there's no line
     tmp_objs = []
     
     # Get <cell> node attributes
     tmp_alignment = re.search(r"alignment=\"(\w+)\"", raw).group(1)
     
     tmp_grid_row_from = re.search(r"gridRowFrom=\"(\d+)\"", raw).group(1)
     tmp_grid_row_to = re.search(r"gridRowTill=\"(\d+)\"", raw).group(1)
     tmp_grid_col_from = re.search(r"gridColFrom=\"(\d+)\"", raw).group(1)
     tmp_grid_col_to = re.search(r"gridColTill=\"(\d+)\"", raw).group(1)
     
     # TODO: don't understand, attribute with value = 0 will be returned as undef by twig
     tmp_grid_row_from = tmp_grid_row_from if tmp_grid_row_from else 0
     tmp_grid_row_to = tmp_grid_row_to if tmp_grid_row_to else 0
     tmp_grid_col_from = tmp_grid_col_from if tmp_grid_col_from else 0
     tmp_grid_col_to = tmp_grid_col_to if tmp_grid_col_to else 0
     
     tmp_vertical_align = re.search(r"verticalAlignment=\"(\w+)\"", raw).group(1)
     
     tmpRaw = raw
     tmpRaw = re.sub(r"^<cell[^>]*>\n", "", tmpRaw)
     tmpRaw = re.sub(r"</cell>(\n)?$", "", tmpRaw)
     
     tmpRaw = re.sub(r"<leftBorder[^>]*>\n", "", tmpRaw, 1)
     tmpRaw = re.sub(r"<rightBorder[^>]*>\n", "", tmpRaw, 1)
     tmpRaw = re.sub(r"<topBorder[^>]*>\n", "", tmpRaw, 1)
     tmpRaw = re.sub(r"<bottomBorder[^>]*>\n", "", tmpRaw, 1)
     
     sys.path.append("./LSRRTF/")
     import rtfPara
     
     while True:
         if re.search(r"^<para", tmpRaw) != None:
             para = rtfPara.rtfPara()
             
             indexParaEnd = tmpRaw.index("</para>") + len("</para>")
             tmpParaRaw = tmpRaw[:indexParaEnd]
             tmpRaw = tmpParaRaw[indexParaEnd+1:]
             # Set raw content
             para.set_raw(tmpParaRaw)
             
             # Update paragraph list
             tmp_objs.append(para)
             
             # Update content
             tmp_content += para.get_content() + "\n"
         else:
             break
Example #2
0
    def parse(self, raw):
        # At first, content is blank
        tmp_content = ""
        # because there's no object
        tmp_objs = []

        # Get <column> node attributes
        tmp_bottom = re.search(r"b=\"(\d+)\"", raw).group(1)
        tmp_top = re.search(r"t=\"(\d+)\"", raw).group(1)
        tmp_left = re.search(r"l=\"(\d+)\"", raw).group(1)
        tmp_right = re.search(r"r=\"(\d+)\"", raw).group(1)

        tmpRaw = raw
        tmpRaw = re.sub(r"^<column[^>]*>\n", "", tmpRaw)
        tmpRaw = re.sub(r"</column>(\n)?$", "", tmpRaw)

        sys.path.append("./LSRRTF/")
        import rtfPara
        import rtfTable

        while True:
            if re.search(r"^<para", tmpRaw) != None:
                para = rtfPara.rtfPara()

                indexParaEnd = tmpRaw.index("</para>") + len("</para>")
                tmpParaRaw = tmpRaw[:indexParaEnd]
                tmpRaw = tmpRaw[indexParaEnd + 1 :]

                # Set raw content
                para.set_raw(tmpParaRaw)

                # Update paragraph list
                tmp_objs.append(para)

                # Update content
                tmp_content = tmp_content + para.get_content() + "\n"
            elif re.search(r"^<dd", tmpRaw) != None:
                sys.strerr.write("Para::dd::Neimplementovano\n")
                indexDdEnd = tmpRaw.index("</dd>") + len("</dd>")
                # tmpDdRaw = tmpRaw[:indexDdEnd]
                tmpRaw = tmpRaw[indexDdEnd + 1 :]
            elif re.search(r"^<table", tmpRaw) != None:
                import rtfTable

                table = rtfTable.rtfTable()

                indexTableEnd = tmpRaw.index("</table>") + len("</table>")
                tmpTableRaw = tmpRaw[:indexTableEnd]
                tmpRaw = tmpRaw[indexTableEnd + 1 :]

                # Set raw content
                table.set_raw(tmpTableRaw)

                # Update paragraph list
                tmp_objs.append(table)

                # Update content
                tmp_content = tmp_content + table.get_content() + "\n"
            elif re.search(r"^<image", tmpRaw) != None:
                # import rtfImage
                # table = rtfTable.rtfTable()

                indexImageEnd = tmpRaw.index("</image>") + len("</image>")
                # tmpTableRaw = tmpRaw[:indexTableEnd]
                tmpRaw = tmpRaw[indexImageEnd + 1 :]

                # Set raw content
                # table.set_raw(tmpTableRaw)

                # Update paragraph list
                # tmp_objs.append(table)

                # Update content
                # tmp_content = tmp_content + table.get_content() + "\n"
            elif re.search(r"^<frame", tmpRaw) != None:
                import rtfFrame

                frame = rtfFrame.rtfFrame()

                indexFrameEnd = tmpRaw.index("</frame>") + len("</frame>")
                tmpFrameRaw = tmpRaw[:indexFrameEnd]
                tmpRaw = tmpRaw[indexFrameEnd + 1 :]

                # Set raw content
                frame.set_raw(tmpFrameRaw)

                # Update paragraph list
                tmp_objs.append(frame)

                # Update content
                tmp_content = tmp_content + frame.get_content() + "\n"
            else:
                break

        self.members["_bottom"] = tmp_bottom
        self.members["_top"] = tmp_top
        self.members["_left"] = tmp_left
        self.members["_right"] = tmp_right
        self.members["_objs"] = tmp_objs
        self.members["_content"] = tmp_content