def get_toc(self, depth = 4): " Returns the TOC of this LyX document." paragraphs_filter = {'Title' : 0,'Chapter' : 1, 'Section' : 2, 'Subsection' : 3, 'Subsubsection': 4} allowed_insets = ['Quotes'] allowed_parameters = ('\\paragraph_spacing', '\\noindent', '\\align', '\\labelwidthstring', "\\start_of_appendix", "\\leftindent") sections = [] for section in paragraphs_filter.keys(): sections.append('\\begin_layout %s' % section) toc_par = [] i = 0 while 1: i = find_tokens(self.body, sections, i) if i == -1: break j = find_end_of(self.body, i + 1, '\\begin_layout', '\\end_layout') if j == -1: self.warning('Incomplete file.', 0) break section = self.body[i].split()[1] if section[-1] == '*': section = section[:-1] par = [] k = i + 1 # skip paragraph parameters while not self.body[k].strip() or self.body[k].split()[0] \ in allowed_parameters: k += 1 while k < j: if check_token(self.body[k], '\\begin_inset'): inset = self.body[k].split()[1] end = find_end_of_inset(self.body, k) if end == -1 or end > j: self.warning('Malformed file.', 0) if inset in allowed_insets: par.extend(self.body[k: end+1]) k = end + 1 else: par.append(self.body[k]) k += 1 # trim empty lines in the end. while par and par[-1].strip() == '': par.pop() toc_par.append(Paragraph(section, par)) i = j + 1 return toc_par
def _fix_text_styling(self): lines = self.lines self.stack = [] stack = self.stack fixes = [] # First we'll rename the \color in the header, if any. i = find_tokens(lines, ('\\color', '\\end_header'), 0) if i != -1 and \ check_token(lines[i], '\\color') and \ not get_containing_layout(lines, i): lines[i] = '\\color_in_header ' + lines[i].split()[1] # Now let's get on with the rest i = 0 while i < len(lines): line = lines[i] self.dbg(4, 'looking at line %d: %s' % (i, line)) # XXX We really should simplify all this startswith() # nonsense. if line.startswith('\\end_'): self.dbg(2, 'fixing unended style tags %s' % (repr(stack))) for k in range(len(stack) - 1, -1, -1): lines.insert( i, '\\' + stack[k][0] + ' ' + mixed_tags[stack[k][0]]) i += 1 del (stack[0:]) i += 1 continue if not line.startswith('\\') or line.find(' ') == -1: self.dbg(5, '1 i++ at %d' % (i, )) i += 1 continue # XXX And this parsing nonsense needs refactoring too line = _chomp(line[1:]) a = line[0:line.find(' ')] if not a in mixed_tags: self.dbg(5, '2 i++ at %d' % (i, )) i += 1 continue v = line[line.find(' ') + 1:] # We're opening a new whatever it is. But we need to handle # the possibility that we're changing the whatever it is! # How to handle this? We could convert: # \lang french foo \lang spanish bar \lang english foobar # to # <lang a="french">foo<lang a="spanish">bar</lang></lang> # or to # <lang a="french">foo</lang><lang a="spanish">bar</lang> # The former might be easier: just close all tags up to the # farthest one in the stack for the tag being closed, then # re-open any others that were found along the way. But the # latter is more sensible, so that's what we do. # Invariant: we never have more than one style tag in the # stack (XXX assert this in code). self.dbg(4, 'seen \\%s at line %d' % (line, i)) i = self._close_and_reopen_styling(i, a, v) self.dbg(5, '3 i++ at %d' % (i, ))
def get_next_paragraph(lines, i, format): " Finds the paragraph after the paragraph that contains line i." tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"] while i != -1: i = find_tokens(lines, tokens, i) if not check_token(lines[i], "\\begin_inset"): return i i = find_end_of_inset(lines, i) return -1
def _fix_text_styling(self): lines = self.lines self.stack = [] stack = self.stack fixes = [] # First we'll rename the \color in the header, if any. i = find_tokens(lines, ('\\color', '\\end_header'), 0) if i != -1 and \ check_token(lines[i], '\\color') and \ not get_containing_layout(lines, i): lines[i] = '\\color_in_header ' + lines[i].split()[1] # Now let's get on with the rest i = 0 while i < len(lines): line = lines[i] self.dbg(4, 'looking at line %d: %s' % (i, line)) # XXX We really should simplify all this startswith() # nonsense. if line.startswith('\\end_'): self.dbg(2, 'fixing unended style tags %s' % (repr(stack))) for k in range(len(stack) - 1, -1, -1): lines.insert(i, '\\' + stack[k][0] + ' ' + mixed_tags[stack[k][0]]) i += 1 del(stack[0:]) i += 1 continue if not line.startswith('\\') or line.find(' ') == -1: self.dbg(5, '1 i++ at %d' % (i,)) i += 1 continue # XXX And this parsing nonsense needs refactoring too line = _chomp(line[1:]) a = line[0:line.find(' ')] if not a in mixed_tags: self.dbg(5, '2 i++ at %d' % (i,)) i += 1 continue v = line[line.find(' ') + 1:] # We're opening a new whatever it is. But we need to handle # the possibility that we're changing the whatever it is! # How to handle this? We could convert: # \lang french foo \lang spanish bar \lang english foobar # to # <lang a="french">foo<lang a="spanish">bar</lang></lang> # or to # <lang a="french">foo</lang><lang a="spanish">bar</lang> # The former might be easier: just close all tags up to the # farthest one in the stack for the tag being closed, then # re-open any others that were found along the way. But the # latter is more sensible, so that's what we do. # Invariant: we never have more than one style tag in the # stack (XXX assert this in code). self.dbg(4, 'seen \\%s at line %d' % (line, i)) i = self._close_and_reopen_styling(i, a, v) self.dbg(5, '3 i++ at %d' % (i,))
def update_tabular(document): " Update tabular to version 1 (xml like syntax). " lines = document.body i = 0 while 1: i = find_re(lines, lyxtable_re, i) if i == -1: break prop_dict = { "family": "default", "series": "default", "shape": "default", "size": "default", "emph": "default", "bar": "default", "noun": "default", "latex": "default", "color": "default" } # remove \LyXTable lines[i] = lines[i][:-9] i = i + 1 lines.insert(i, '') i = i + 1 lines[i] = "\\begin_inset Tabular" i = i + 1 head = lines[i].split() rows = int(head[0]) columns = int(head[1]) tabular_line = i i = i + 1 lines.insert( i, '<Features rotate="%s" islongtable="%s" endhead="%s" endfirsthead="%s" endfoot="%s" endlastfoot="%s">' % (head[2], head[3], head[4], head[5], head[6], head[7])) i = i + 1 row_info = [] cont_row = [] for j in range(rows): row_info.append(lines[i].split()) if lines[i].split()[2] == '1': cont_row.append(j) del lines[i] column_info = [] col_info_re = re.compile(r'(\d) (\d) (\d) (".*") (".*")') for j in range(columns): column_info.append(col_info_re.match(lines[i]).groups()) del lines[i] cell_info = [] cell_col = [] ncells = 0 cell_re = re.compile( r'(\d) (\d) (\d) (\d) (\d) (\d) (\d) (".*") (".*")') for j in range(rows): for k in range(columns): #add column location to read properties cell_info.append(cell_re.match(lines[i]).groups()) cell_col.append(k) if lines[i][0] != "2": ncells = ncells + 1 del lines[i] lines[ tabular_line] = '<LyXTabular version="1" rows="%s" columns="%s">' % ( rows - len(cont_row), columns) del lines[i] if not lines[i]: del lines[i] # Read cells l = 0 cell_content = [] for j in range(rows): cell_content.append([]) for j in range(rows): for k in range(columns): cell_content[j].append([]) for j in range(rows): for k in range(columns): m = j * columns + k if cell_info[m][0] == '2': continue if l == ncells - 1: # the end variable refers to cell end, not to document end. end = find_tokens(lines, [ '\\layout', '\\the_end', '\\end_deeper', '\\end_float' ], i) else: end = find_token(lines, '\\newline', i) if end == -1: document.error("Malformed LyX file.") end = end - i while end > 0: cell_content[j][k].append(lines[i]) del lines[i] end = end - 1 if lines[i].find('\\newline') != -1: del lines[i] l = l + 1 tmp = [] tmp.append("") for j in range(rows): if j in cont_row: continue tmp.append('<Row topline="%s" bottomline="%s" newpage="%s">' % (row_info[j][0], row_info[j][1], row_info[j][3])) for k in range(columns): if j: tmp.append('<Column>') else: tmp.append( '<Column alignment="%s" valignment="0" leftline="%s" rightline="%s" width=%s special=%s>' % (column_info[k][0], column_info[k][1], column_info[k][2], column_info[k][3], column_info[k][4])) m = j * columns + k leftline = int(column_info[k][1]) if cell_info[m][0] == '1': n = m + 1 while n < rows * columns - 1 and cell_info[n][0] == '2': n = n + 1 rightline = int(column_info[cell_col[n - 1]][2]) else: # not a multicolumn main cell rightline = int(column_info[k][2]) tmp.append( '<Cell multicolumn="%s" alignment="%s" valignment="0" topline="%s" bottomline="%s" leftline="%d" rightline="%d" rotate="%s" usebox="%s" width=%s special=%s>' % (cell_info[m][0], cell_info[m][1], cell_info[m][2], cell_info[m][3], leftline, rightline, cell_info[m][5], cell_info[m][6], cell_info[m][7], cell_info[m][8])) tmp.append('\\begin_inset Text') tmp.append('') tmp.append('\\layout %s' % document.default_layout) tmp.append('') if cell_info[m][0] != '2': paragraph = [] if cell_info[m][4] == '1': l = j paragraph = paragraph + cell_content[j][k] while cell_info[m][4] == '1': m = m + columns l = l + 1 if l >= rows: break paragraph = paragraph + cell_content[l][k] else: paragraph = cell_content[j][k] tmp = tmp + set_paragraph_properties(paragraph, prop_dict) tmp.append('\\end_inset ') tmp.append('</Cell>') tmp.append('</Column>') tmp.append('</Row>') tmp.append('</LyXTabular>') tmp.append('') tmp.append('\\end_inset ') tmp.append('') tmp.append('') lines[i:i] = tmp i = i + len(tmp)
def remove_oldert(document): " Remove old ERT inset." ert_begin = ["\\begin_inset ERT", "status Collapsed", "", '\\layout %s' % document.default_layout, ""] lines = document.body i = 0 while 1: i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) if i == -1: break j = i+1 while 1: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"], j) if check_token(lines[j], "\\begin_inset"): j = find_end_of_inset(lines, j)+1 else: break if check_token(lines[j], "\\layout"): while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"): j = j-1 # We need to remove insets, special chars & font commands from ERT text new = [] new2 = [] if check_token(lines[i], "\\layout LaTeX"): new = ['\layout %s' % document.default_layout, "", ""] k = i+1 while 1: k2 = find_re(lines, ert_rexp, k, j) inset = hfill = specialchar = 0 if k2 == -1: k2 = j elif check_token(lines[k2], "\\begin_inset"): inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j-1 else: specialchar = 1 mo = spchar_rexp.match(lines[k2]) lines[k2] = mo.group(1) specialchar_str = mo.group(2) k2 = k2+1 tmp = [] for line in lines[k:k2]: # Move some lines outside the ERT inset: if move_rexp.match(line): if new2 == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format new2 = [""] new2.append(line) elif not check_token(line, "\\latex"): tmp.append(line) if is_empty(tmp): if filter(lambda x:x != "", tmp) != []: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+" " else: new = new+[" "] else: new = new+ert_begin+tmp+["\\end_inset ", ""] if inset: k3 = find_end_of_inset(lines, k2) new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset k = k3+1 # Skip the empty line after \end_inset if not is_nonempty_line(lines[k]): k = k+1 new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 elif specialchar: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+specialchar_str new = [""] else: new = new+[specialchar_str, ""] k = k2 else: break new = new+new2 if not check_token(lines[j], "\\latex "): new = new+[""]+[lines[j]] lines[i:j+1] = new i = i+1 # Delete remaining "\latex xxx" tokens i = 0 while 1: i = find_token(lines, "\\latex ", i) if i == -1: break del lines[i]
def remove_pextra(document): " Remove pextra token." lines = document.body i = 0 flag = 0 while 1: i = find_re(lines, pextra_type2_rexp, i) if i == -1: break # Sometimes the \pextra_widthp argument comes in it own # line. If that happens insert it back in this line. if pextra_widthp.search(lines[i+1]): lines[i] = lines[i] + ' ' + lines[i+1] del lines[i+1] mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": # handle \pextra_type 1 (indented paragraph) lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i]) i = i+1 continue # handle \pextra_type 2 (minipage) position = mo.group(3) hfill = mo.group(5) lines[i] = re.sub(pextra_rexp, "", lines[i]) start = ["\\begin_inset Minipage", "position " + position, "inner_position 0", 'height "0pt"', 'width "%s"' % width, "collapsed false" ] if flag: flag = 0 if hfill: start = ["","\hfill",""]+start else: start = ['\\layout %s' % document.default_layout,''] + start j0 = find_token_backwards(lines,"\\layout", i-1) j = get_next_paragraph(lines, i, document.format + 1) count = 0 while 1: # collect more paragraphs to the minipage count = count+1 if j == -1 or not check_token(lines[j], "\\layout"): break i = find_re(lines, pextra_type2_rexp2, j+1) if i == -1: break mo = pextra_rexp.search(lines[i]) if not mo: break if mo.group(7) == "1": flag = 1 break lines[i] = re.sub(pextra_rexp, "", lines[i]) j = find_tokens(lines, ["\\layout", "\\end_float"], i+1) mid = lines[j0:j] end = ["\\end_inset "] lines[j0:j] = start+mid+end i = i+1
def remove_oldert(document): " Remove old ERT inset." ert_begin = [ "\\begin_inset ERT", "status Collapsed", "", '\\layout %s' % document.default_layout, "" ] lines = document.body i = 0 while 1: i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) if i == -1: break j = i + 1 while 1: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. j = find_tokens(lines, [ "\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end" ], j) if check_token(lines[j], "\\begin_inset"): j = find_end_of_inset(lines, j) + 1 else: break if check_token(lines[j], "\\layout"): while j - 1 >= 0 and check_token(lines[j - 1], "\\begin_deeper"): j = j - 1 # We need to remove insets, special chars & font commands from ERT text new = [] new2 = [] if check_token(lines[i], "\\layout LaTeX"): new = ['\layout %s' % document.default_layout, "", ""] k = i + 1 while 1: k2 = find_re(lines, ert_rexp, k, j) inset = hfill = specialchar = 0 if k2 == -1: k2 = j elif check_token(lines[k2], "\\begin_inset"): inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j - 1 else: specialchar = 1 mo = spchar_rexp.match(lines[k2]) lines[k2] = mo.group(1) specialchar_str = mo.group(2) k2 = k2 + 1 tmp = [] for line in lines[k:k2]: # Move some lines outside the ERT inset: if move_rexp.match(line): if new2 == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format new2 = [""] new2.append(line) elif not check_token(line, "\\latex"): tmp.append(line) if is_empty(tmp): if [x for x in tmp if x != ""] != []: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i - 1] = lines[i - 1] + " " else: new = new + [" "] else: new = new + ert_begin + tmp + ["\\end_inset ", ""] if inset: k3 = find_end_of_inset(lines, k2) new = new + [""] + lines[k2:k3 + 1] + [ "" ] # Put an empty line after \end_inset k = k3 + 1 # Skip the empty line after \end_inset if not is_nonempty_line(lines[k]): k = k + 1 new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 elif specialchar: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i - 1] = lines[i - 1] + specialchar_str new = [""] else: new = new + [specialchar_str, ""] k = k2 else: break new = new + new2 if not check_token(lines[j], "\\latex "): new = new + [""] + [lines[j]] lines[i:j + 1] = new i = i + 1 # Delete remaining "\latex xxx" tokens i = 0 while 1: i = find_token(lines, "\\latex ", i) if i == -1: break del lines[i]
def remove_pextra(document): " Remove pextra token." lines = document.body i = 0 flag = 0 while 1: i = find_re(lines, pextra_type2_rexp, i) if i == -1: break # Sometimes the \pextra_widthp argument comes in it own # line. If that happens insert it back in this line. if pextra_widthp.search(lines[i + 1]): lines[i] = lines[i] + ' ' + lines[i + 1] del lines[i + 1] mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": # handle \pextra_type 1 (indented paragraph) lines[i] = re.sub(pextra_rexp, "\\leftindent " + width + " ", lines[i]) i = i + 1 continue # handle \pextra_type 2 (minipage) position = mo.group(3) hfill = mo.group(5) lines[i] = re.sub(pextra_rexp, "", lines[i]) start = [ "\\begin_inset Minipage", "position " + position, "inner_position 0", 'height "0pt"', 'width "%s"' % width, "collapsed false" ] if flag: flag = 0 if hfill: start = ["", "\hfill", ""] + start else: start = ['\\layout %s' % document.default_layout, ''] + start j0 = find_token_backwards(lines, "\\layout", i - 1) j = get_next_paragraph(lines, i, document.format + 1) count = 0 while 1: # collect more paragraphs to the minipage count = count + 1 if j == -1 or not check_token(lines[j], "\\layout"): break i = find_re(lines, pextra_type2_rexp2, j + 1) if i == -1: break mo = pextra_rexp.search(lines[i]) if not mo: break if mo.group(7) == "1": flag = 1 break lines[i] = re.sub(pextra_rexp, "", lines[i]) j = find_tokens(lines, ["\\layout", "\\end_float"], i + 1) mid = lines[j0:j] end = ["\\end_inset "] lines[j0:j] = start + mid + end i = i + 1
def update_tabular(document): " Update tabular to version 1 (xml like syntax). " lines = document.body i=0 while 1: i = find_re(lines, lyxtable_re, i) if i == -1: break prop_dict = {"family" : "default", "series" : "default", "shape" : "default", "size" : "default", "emph" : "default", "bar" : "default", "noun" : "default", "latex" : "default", "color" : "default"} # remove \LyXTable lines[i] = lines[i][:-9] i = i + 1 lines.insert(i,'') i = i + 1 lines[i] = "\\begin_inset Tabular" i = i + 1 head = lines[i].split() rows = int(head[0]) columns = int(head[1]) tabular_line = i i = i +1 lines.insert(i, '<Features rotate="%s" islongtable="%s" endhead="%s" endfirsthead="%s" endfoot="%s" endlastfoot="%s">' % (head[2],head[3],head[4],head[5],head[6],head[7])) i = i +1 row_info = [] cont_row = [] for j in range(rows): row_info.append(lines[i].split()) if lines[i].split()[2] == '1': cont_row.append(j) del lines[i] column_info = [] col_info_re = re.compile(r'(\d) (\d) (\d) (".*") (".*")') for j in range(columns): column_info.append(col_info_re.match(lines[i]).groups()) del lines[i] cell_info = [] cell_col = [] ncells = 0 cell_re = re.compile(r'(\d) (\d) (\d) (\d) (\d) (\d) (\d) (".*") (".*")') for j in range(rows): for k in range(columns): #add column location to read properties cell_info.append(cell_re.match(lines[i]).groups()) cell_col.append(k) if lines[i][0] != "2": ncells = ncells + 1 del lines[i] lines[tabular_line] = '<LyXTabular version="1" rows="%s" columns="%s">' % (rows-len(cont_row),columns) del lines[i] if not lines[i]: del lines[i] # Read cells l = 0 cell_content = [] for j in range(rows): cell_content.append([]) for j in range(rows): for k in range(columns): cell_content[j].append([]) for j in range(rows): for k in range(columns): m = j*columns + k if cell_info[m][0] == '2': continue if l == ncells -1: # the end variable refers to cell end, not to document end. end = find_tokens(lines, ['\\layout','\\the_end','\\end_deeper','\\end_float'], i) else: end = find_token(lines, '\\newline', i) if end == -1: document.error("Malformed LyX file.") end = end - i while end > 0: cell_content[j][k].append(lines[i]) del lines[i] end = end -1 if lines[i].find('\\newline') != -1: del lines[i] l = l + 1 tmp = [] tmp.append("") for j in range(rows): if j in cont_row: continue tmp.append('<Row topline="%s" bottomline="%s" newpage="%s">' % (row_info[j][0],row_info[j][1],row_info[j][3])) for k in range(columns): if j: tmp.append('<Column>') else: tmp.append('<Column alignment="%s" valignment="0" leftline="%s" rightline="%s" width=%s special=%s>' % (column_info[k][0],column_info[k][1], column_info[k][2], column_info[k][3], column_info[k][4])) m = j*columns + k leftline = int(column_info[k][1]) if cell_info[m][0] == '1': n = m + 1 while n < rows * columns - 1 and cell_info[n][0] == '2': n = n + 1 rightline = int(column_info[cell_col[n-1]][2]) else: # not a multicolumn main cell rightline = int(column_info[k][2]) tmp.append('<Cell multicolumn="%s" alignment="%s" valignment="0" topline="%s" bottomline="%s" leftline="%d" rightline="%d" rotate="%s" usebox="%s" width=%s special=%s>' % (cell_info[m][0],cell_info[m][1],cell_info[m][2],cell_info[m][3],leftline,rightline,cell_info[m][5],cell_info[m][6],cell_info[m][7],cell_info[m][8])) tmp.append('\\begin_inset Text') tmp.append('') tmp.append('\\layout %s' % document.default_layout) tmp.append('') if cell_info[m][0] != '2': paragraph = [] if cell_info[m][4] == '1': l = j paragraph = paragraph + cell_content[j][k] while cell_info[m][4] == '1': m = m + columns l = l + 1 if l >= rows: break paragraph = paragraph + cell_content[l][k] else: paragraph = cell_content[j][k] tmp = tmp + set_paragraph_properties(paragraph, prop_dict) tmp.append('\\end_inset ') tmp.append('</Cell>') tmp.append('</Column>') tmp.append('</Row>') tmp.append('</LyXTabular>') tmp.append('') tmp.append('\\end_inset ') tmp.append('') tmp.append('') lines[i:i] = tmp i = i + len(tmp)