def parse(self, raw): global tmp_content global tmp_alignment global tmp_grid_col_to global tmp_grid_col_from global tmp_grid_row_to global tmp_grid_row_from global tmp_vertical_align # At first, content is blank tmp_content = "" # because there's no line tmp_objs = [] # Get <cell> node attributes tmp_alignment = re.search(r"alignment=\"(\w+)\"", raw).group(1) tmp_grid_row_from = re.search(r"gridRowFrom=\"(\d+)\"", raw).group(1) tmp_grid_row_to = re.search(r"gridRowTill=\"(\d+)\"", raw).group(1) tmp_grid_col_from = re.search(r"gridColFrom=\"(\d+)\"", raw).group(1) tmp_grid_col_to = re.search(r"gridColTill=\"(\d+)\"", raw).group(1) # TODO: don't understand, attribute with value = 0 will be returned as undef by twig tmp_grid_row_from = tmp_grid_row_from if tmp_grid_row_from else 0 tmp_grid_row_to = tmp_grid_row_to if tmp_grid_row_to else 0 tmp_grid_col_from = tmp_grid_col_from if tmp_grid_col_from else 0 tmp_grid_col_to = tmp_grid_col_to if tmp_grid_col_to else 0 tmp_vertical_align = re.search(r"verticalAlignment=\"(\w+)\"", raw).group(1) tmpRaw = raw tmpRaw = re.sub(r"^<cell[^>]*>\n", "", tmpRaw) tmpRaw = re.sub(r"</cell>(\n)?$", "", tmpRaw) tmpRaw = re.sub(r"<leftBorder[^>]*>\n", "", tmpRaw, 1) tmpRaw = re.sub(r"<rightBorder[^>]*>\n", "", tmpRaw, 1) tmpRaw = re.sub(r"<topBorder[^>]*>\n", "", tmpRaw, 1) tmpRaw = re.sub(r"<bottomBorder[^>]*>\n", "", tmpRaw, 1) sys.path.append("./LSRRTF/") import rtfPara while True: if re.search(r"^<para", tmpRaw) != None: para = rtfPara.rtfPara() indexParaEnd = tmpRaw.index("</para>") + len("</para>") tmpParaRaw = tmpRaw[:indexParaEnd] tmpRaw = tmpParaRaw[indexParaEnd+1:] # Set raw content para.set_raw(tmpParaRaw) # Update paragraph list tmp_objs.append(para) # Update content tmp_content += para.get_content() + "\n" else: break
def parse(self, raw): # At first, content is blank tmp_content = "" # because there's no object tmp_objs = [] # Get <column> node attributes tmp_bottom = re.search(r"b=\"(\d+)\"", raw).group(1) tmp_top = re.search(r"t=\"(\d+)\"", raw).group(1) tmp_left = re.search(r"l=\"(\d+)\"", raw).group(1) tmp_right = re.search(r"r=\"(\d+)\"", raw).group(1) tmpRaw = raw tmpRaw = re.sub(r"^<column[^>]*>\n", "", tmpRaw) tmpRaw = re.sub(r"</column>(\n)?$", "", tmpRaw) sys.path.append("./LSRRTF/") import rtfPara import rtfTable while True: if re.search(r"^<para", tmpRaw) != None: para = rtfPara.rtfPara() indexParaEnd = tmpRaw.index("</para>") + len("</para>") tmpParaRaw = tmpRaw[:indexParaEnd] tmpRaw = tmpRaw[indexParaEnd + 1 :] # Set raw content para.set_raw(tmpParaRaw) # Update paragraph list tmp_objs.append(para) # Update content tmp_content = tmp_content + para.get_content() + "\n" elif re.search(r"^<dd", tmpRaw) != None: sys.strerr.write("Para::dd::Neimplementovano\n") indexDdEnd = tmpRaw.index("</dd>") + len("</dd>") # tmpDdRaw = tmpRaw[:indexDdEnd] tmpRaw = tmpRaw[indexDdEnd + 1 :] elif re.search(r"^<table", tmpRaw) != None: import rtfTable table = rtfTable.rtfTable() indexTableEnd = tmpRaw.index("</table>") + len("</table>") tmpTableRaw = tmpRaw[:indexTableEnd] tmpRaw = tmpRaw[indexTableEnd + 1 :] # Set raw content table.set_raw(tmpTableRaw) # Update paragraph list tmp_objs.append(table) # Update content tmp_content = tmp_content + table.get_content() + "\n" elif re.search(r"^<image", tmpRaw) != None: # import rtfImage # table = rtfTable.rtfTable() indexImageEnd = tmpRaw.index("</image>") + len("</image>") # tmpTableRaw = tmpRaw[:indexTableEnd] tmpRaw = tmpRaw[indexImageEnd + 1 :] # Set raw content # table.set_raw(tmpTableRaw) # Update paragraph list # tmp_objs.append(table) # Update content # tmp_content = tmp_content + table.get_content() + "\n" elif re.search(r"^<frame", tmpRaw) != None: import rtfFrame frame = rtfFrame.rtfFrame() indexFrameEnd = tmpRaw.index("</frame>") + len("</frame>") tmpFrameRaw = tmpRaw[:indexFrameEnd] tmpRaw = tmpRaw[indexFrameEnd + 1 :] # Set raw content frame.set_raw(tmpFrameRaw) # Update paragraph list tmp_objs.append(frame) # Update content tmp_content = tmp_content + frame.get_content() + "\n" else: break self.members["_bottom"] = tmp_bottom self.members["_top"] = tmp_top self.members["_left"] = tmp_left self.members["_right"] = tmp_right self.members["_objs"] = tmp_objs self.members["_content"] = tmp_content