def header_update(document): " Update document header." lines = document.header i = 0 l = len(lines) while i < l: if lines[i][-1:] == ' ': lines[i] = lines[i][:-1] if check_token(lines[i], '\\epsfig'): lines[i] = lines[i].replace('\\epsfig', '\\graphics') i = i + 1 continue if check_token(lines[i], '\\papersize'): size = lines[i].split()[1] new_size = size paperpackage = "" if size == 'usletter': new_size = 'letterpaper' if size == 'a4wide': new_size = 'Default' paperpackage = "widemarginsa4" lines[i] = '\\papersize ' + new_size i = i + 1 if paperpackage: lines.insert(i, '\\paperpackage ' + paperpackage) i = i + 1 lines.insert(i,'\\use_geometry 0') lines.insert(i + 1,'\\use_amsmath 0') i = i + 2 continue if check_token(lines[i], '\\baselinestretch'): size = lines[i].split()[1] if size == '1.00': name = 'single' elif size == '1.50': name = 'onehalf' elif size == '2.00': name = 'double' else: name = 'other ' + size lines[i] = '\\spacing %s ' % name i = i + 1 continue i = i + 1
def header_update(document): " Update document header." lines = document.header i = 0 l = len(lines) while i < l: if lines[i][-1:] == ' ': lines[i] = lines[i][:-1] if check_token(lines[i], '\\epsfig'): lines[i] = lines[i].replace('\\epsfig', '\\graphics') i = i + 1 continue if check_token(lines[i], '\\papersize'): size = lines[i].split()[1] new_size = size paperpackage = "" if size == 'usletter': new_size = 'letterpaper' if size == 'a4wide': new_size = 'Default' paperpackage = "widemarginsa4" lines[i] = '\\papersize ' + new_size i = i + 1 if paperpackage: lines.insert(i, '\\paperpackage ' + paperpackage) i = i + 1 lines.insert(i, '\\use_geometry 0') lines.insert(i + 1, '\\use_amsmath 0') i = i + 2 continue if check_token(lines[i], '\\baselinestretch'): size = lines[i].split()[1] if size == '1.00': name = 'single' elif size == '1.50': name = 'onehalf' elif size == '2.00': name = 'double' else: name = 'other ' + size lines[i] = '\\spacing %s ' % name i = i + 1 continue i = i + 1
def is_ert_paragraph(document, i): " Is this a ert paragraph? " lines = document.body if not check_token(lines[i], "\\layout"): return 0 if not document.is_default_layout(get_layout(lines[i], document.default_layout)): return 0 i = find_nonempty_line(lines, i+1) if not check_token(lines[i], "\\begin_inset ERT"): return 0 j = find_end_of_inset(lines, i) k = find_nonempty_line(lines, j+1) return check_token(lines[k], "\\layout")
def is_ert_paragraph(document, i): " Is this a ert paragraph? " lines = document.body if not check_token(lines[i], "\\layout"): return 0 if not document.is_default_layout( get_layout(lines[i], document.default_layout)): return 0 i = find_nonempty_line(lines, i + 1) if not check_token(lines[i], "\\begin_inset ERT"): return 0 j = find_end_of_inset(lines, i) k = find_nonempty_line(lines, j + 1) return check_token(lines[k], "\\layout")
def get_toc(self, depth = 4): " Returns the TOC of this LyX document." paragraphs_filter = {'Title' : 0,'Chapter' : 1, 'Section' : 2, 'Subsection' : 3, 'Subsubsection': 4} allowed_insets = ['Quotes'] allowed_parameters = ('\\paragraph_spacing', '\\noindent', '\\align', '\\labelwidthstring', "\\start_of_appendix", "\\leftindent") sections = [] for section in paragraphs_filter.keys(): sections.append('\\begin_layout %s' % section) toc_par = [] i = 0 while 1: i = find_tokens(self.body, sections, i) if i == -1: break j = find_end_of(self.body, i + 1, '\\begin_layout', '\\end_layout') if j == -1: self.warning('Incomplete file.', 0) break section = self.body[i].split()[1] if section[-1] == '*': section = section[:-1] par = [] k = i + 1 # skip paragraph parameters while not self.body[k].strip() or self.body[k].split()[0] \ in allowed_parameters: k += 1 while k < j: if check_token(self.body[k], '\\begin_inset'): inset = self.body[k].split()[1] end = find_end_of_inset(self.body, k) if end == -1 or end > j: self.warning('Malformed file.', 0) if inset in allowed_insets: par.extend(self.body[k: end+1]) k = end + 1 else: par.append(self.body[k]) k += 1 # trim empty lines in the end. while par and par[-1].strip() == '': par.pop() toc_par.append(Paragraph(section, par)) i = j + 1 return toc_par
def _fix_text_styling(self): lines = self.lines self.stack = [] stack = self.stack fixes = [] # First we'll rename the \color in the header, if any. i = find_tokens(lines, ('\\color', '\\end_header'), 0) if i != -1 and \ check_token(lines[i], '\\color') and \ not get_containing_layout(lines, i): lines[i] = '\\color_in_header ' + lines[i].split()[1] # Now let's get on with the rest i = 0 while i < len(lines): line = lines[i] self.dbg(4, 'looking at line %d: %s' % (i, line)) # XXX We really should simplify all this startswith() # nonsense. if line.startswith('\\end_'): self.dbg(2, 'fixing unended style tags %s' % (repr(stack))) for k in range(len(stack) - 1, -1, -1): lines.insert( i, '\\' + stack[k][0] + ' ' + mixed_tags[stack[k][0]]) i += 1 del (stack[0:]) i += 1 continue if not line.startswith('\\') or line.find(' ') == -1: self.dbg(5, '1 i++ at %d' % (i, )) i += 1 continue # XXX And this parsing nonsense needs refactoring too line = _chomp(line[1:]) a = line[0:line.find(' ')] if not a in mixed_tags: self.dbg(5, '2 i++ at %d' % (i, )) i += 1 continue v = line[line.find(' ') + 1:] # We're opening a new whatever it is. But we need to handle # the possibility that we're changing the whatever it is! # How to handle this? We could convert: # \lang french foo \lang spanish bar \lang english foobar # to # <lang a="french">foo<lang a="spanish">bar</lang></lang> # or to # <lang a="french">foo</lang><lang a="spanish">bar</lang> # The former might be easier: just close all tags up to the # farthest one in the stack for the tag being closed, then # re-open any others that were found along the way. But the # latter is more sensible, so that's what we do. # Invariant: we never have more than one style tag in the # stack (XXX assert this in code). self.dbg(4, 'seen \\%s at line %d' % (line, i)) i = self._close_and_reopen_styling(i, a, v) self.dbg(5, '3 i++ at %d' % (i, ))
def get_next_paragraph(lines, i, format): " Finds the paragraph after the paragraph that contains line i." tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"] while i != -1: i = find_tokens(lines, tokens, i) if not check_token(lines[i], "\\begin_inset"): return i i = find_end_of_inset(lines, i) return -1
def _fix_text_styling(self): lines = self.lines self.stack = [] stack = self.stack fixes = [] # First we'll rename the \color in the header, if any. i = find_tokens(lines, ('\\color', '\\end_header'), 0) if i != -1 and \ check_token(lines[i], '\\color') and \ not get_containing_layout(lines, i): lines[i] = '\\color_in_header ' + lines[i].split()[1] # Now let's get on with the rest i = 0 while i < len(lines): line = lines[i] self.dbg(4, 'looking at line %d: %s' % (i, line)) # XXX We really should simplify all this startswith() # nonsense. if line.startswith('\\end_'): self.dbg(2, 'fixing unended style tags %s' % (repr(stack))) for k in range(len(stack) - 1, -1, -1): lines.insert(i, '\\' + stack[k][0] + ' ' + mixed_tags[stack[k][0]]) i += 1 del(stack[0:]) i += 1 continue if not line.startswith('\\') or line.find(' ') == -1: self.dbg(5, '1 i++ at %d' % (i,)) i += 1 continue # XXX And this parsing nonsense needs refactoring too line = _chomp(line[1:]) a = line[0:line.find(' ')] if not a in mixed_tags: self.dbg(5, '2 i++ at %d' % (i,)) i += 1 continue v = line[line.find(' ') + 1:] # We're opening a new whatever it is. But we need to handle # the possibility that we're changing the whatever it is! # How to handle this? We could convert: # \lang french foo \lang spanish bar \lang english foobar # to # <lang a="french">foo<lang a="spanish">bar</lang></lang> # or to # <lang a="french">foo</lang><lang a="spanish">bar</lang> # The former might be easier: just close all tags up to the # farthest one in the stack for the tag being closed, then # re-open any others that were found along the way. But the # latter is more sensible, so that's what we do. # Invariant: we never have more than one style tag in the # stack (XXX assert this in code). self.dbg(4, 'seen \\%s at line %d' % (line, i)) i = self._close_and_reopen_styling(i, a, v) self.dbg(5, '3 i++ at %d' % (i,))
def get_paragraph(lines, i, format): " Finds the paragraph that contains line i." begin_layout = "\\layout" while i != -1: i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i) if i == -1: return -1 if check_token(lines[i], begin_layout): return i i = find_beginning_of_inset(lines, i) return -1
def update_tabular(document): " Convert tabular format 2 to 3." regexp = re.compile(r'^\\begin_inset\s+Tabular') lines = document.body i = 0 while 1: i = find_re(lines, regexp, i) if i == -1: break for k in get_tabular_lines(lines, i): if check_token(lines[k], "<lyxtabular"): lines[k] = lines[k].replace('version="2"', 'version="3"') elif check_token(lines[k], "<column"): lines[k] = lines[k].replace('width=""', 'width="0pt"') if line_re.match(lines[k]): lines[k] = re.sub(attr_re, "", lines[k]) i = i+1
def update_tabular(document): " Convert tabular format 2 to 3." regexp = re.compile(r'^\\begin_inset\s+Tabular') lines = document.body i = 0 while 1: i = find_re(lines, regexp, i) if i == -1: break for k in get_tabular_lines(lines, i): if check_token(lines[k], "<lyxtabular"): lines[k] = lines[k].replace('version="2"', 'version="3"') elif check_token(lines[k], "<column"): lines[k] = lines[k].replace('width=""', 'width="0pt"') if line_re.match(lines[k]): lines[k] = re.sub(attr_re, "", lines[k]) i = i + 1
def get_tabular_lines(lines, i): " Returns a lists of tabular lines." result = [] i = i + 1 j = find_end_of_tabular(lines, i) if j == -1: return [] while i <= j: if check_token(lines[i], "\\begin_inset"): i = find_end_of_inset(lines, i) + 1 else: result.append(i) i = i + 1 return result
def get_tabular_lines(lines, i): " Returns a lists of tabular lines." result = [] i = i+1 j = find_end_of_tabular(lines, i) if j == -1: return [] while i <= j: if check_token(lines[i], "\\begin_inset"): i = find_end_of_inset(lines, i)+1 else: result.append(i) i = i+1 return result
def read(self): """Reads a file into the self.header and self.body parts, from self.input.""" while True: line = self.input.readline() if not line: self.error("Invalid LyX file.") line = trim_eol(line) if check_token(line, '\\begin_preamble'): while 1: line = self.input.readline() if not line: self.error("Invalid LyX file.") line = trim_eol(line) if check_token(line, '\\end_preamble'): break if line.split()[:0] in ("\\layout", "\\begin_layout", "\\begin_body"): self.warning("Malformed LyX file:" "Missing '\\end_preamble'." "\nAdding it now and hoping" "for the best.") self.preamble.append(line) if check_token(line, '\\end_preamble'): continue line = line.strip() if not line: continue if line.split()[0] in ("\\layout", "\\begin_layout", "\\begin_body", "\\begin_deeper"): self.body.append(line) break self.header.append(line) i = find_token(self.header, '\\textclass', 0) if i == -1: self.warning("Malformed LyX file: Missing '\\textclass'.") i = find_token(self.header, '\\lyxformat', 0) + 1 self.header[i:i] = ['\\textclass article'] self.textclass = get_value(self.header, "\\textclass", 0) self.backend = get_backend(self.textclass) self.format = self.read_format() self.language = get_value(self.header, "\\language", 0, default = "english") self.inputencoding = get_value(self.header, "\\inputencoding", 0, default = "auto") self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding) self.initial_version = self.read_version() # Second pass over header and preamble, now we know the file encoding # Do not forget the textclass (Debian bug #700828) self.textclass = self.textclass.decode(self.encoding) for i in range(len(self.header)): self.header[i] = self.header[i].decode(self.encoding) for i in range(len(self.preamble)): self.preamble[i] = self.preamble[i].decode(self.encoding) # Read document body while 1: line = self.input.readline().decode(self.encoding) if not line: break self.body.append(trim_eol(line))
def revert_separator(document): " Revert separator insets to layout separators " beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"] if document.textclass in beamer_classes: beglaysep = "\\begin_layout Separator" else: beglaysep = "\\begin_layout --Separator--" parsep = [beglaysep, "", "\\end_layout", ""] comert = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", "%", "\\end_layout", "", "\\end_inset", ""] empert = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", " ", "\\end_layout", "", "\\end_inset", ""] i = 0 while 1: i = find_token(document.body, "\\begin_inset Separator", i) if i == -1: return lay = get_containing_layout(document.body, i) if lay == False: document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i)) i = i + 1 continue layoutname = lay[0] beg = lay[1] end = lay[2] kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1] before = document.body[beg+1:i] something_before = len(before) > 0 and len("".join(before)) > 0 j = find_end_of_inset(document.body, i) after = document.body[j+1:end] something_after = len(after) > 0 and len("".join(after)) > 0 if kind == "plain": beg = beg + len(before) + 1 elif something_before: document.body[i:i] = ["\\end_layout", ""] i = i + 2 j = j + 2 beg = i end = end + 2 if kind == "plain": if something_after: document.body[beg:j+1] = empert i = i + len(empert) else: document.body[beg:j+1] = comert i = i + len(comert) else: if something_after: if layoutname == "Standard": if not something_before: document.body[beg:j+1] = parsep i = i + len(parsep) document.body[i:i] = ["", "\\begin_layout Standard"] i = i + 2 else: document.body[beg:j+1] = ["\\begin_layout Standard"] i = i + 1 else: document.body[beg:j+1] = ["\\begin_deeper"] i = i + 1 end = end + 1 - (j + 1 - beg) if not something_before: document.body[i:i] = parsep i = i + len(parsep) end = end + len(parsep) document.body[i:i] = ["\\begin_layout Standard"] document.body[end+2:end+2] = ["", "\\end_deeper", ""] i = i + 4 else: next_par_is_aligned = False k = find_nonempty_line(document.body, end+1) if k != -1 and check_token(document.body[k], "\\begin_layout"): lay = get_containing_layout(document.body, k) next_par_is_aligned = lay != False and \ find_token(document.body, "\\align", lay[1], lay[2]) != -1 if k != -1 and not next_par_is_aligned \ and not check_token(document.body[k], "\\end_deeper") \ and not check_token(document.body[k], "\\begin_deeper"): if layoutname == "Standard": document.body[beg:j+1] = [beglaysep] i = i + 1 else: document.body[beg:j+1] = ["\\begin_deeper", beglaysep] end = end + 2 - (j + 1 - beg) document.body[end+1:end+1] = ["", "\\end_deeper", ""] i = i + 3 else: if something_before: del document.body[i:end+1] else: del document.body[i:end-1] i = i + 1
def set_paragraph_properties(lines, prop_dict): " Set paragraph properties." # we need to preserve the order of options properties = [ "family", "series", "shape", "size", "emph", "bar", "noun", "latex", "color" ] prop_value = { "family": "default", "series": "medium", "shape": "up", "size": "normal", "emph": "off", "bar": "no", "noun": "off", "latex": "no_latex", "color": "none" } start = 0 end = 0 i = 0 n = len(lines) #skip empty lines while i < n and lines[i] == "": i = i + 1 start = i #catch open char properties while i < n and lines[i][:1] == "\\": result = prop_exp.match(lines[i]) # sys.stderr.write(lines[i]+"\n") prop = result.group(1) if prop not in properties: break else: prop_dict[prop] = result.group(2) i = i + 1 end = i aux = [] insert = 0 for prop in properties: if prop_dict[prop] != 'default': insert = 1 if prop == "color": aux.append("\\%s %s" % (prop, prop_dict[prop])) elif prop != "family" or prop_dict[prop] != "roman": aux.append("\\%s %s " % (prop, prop_dict[prop])) # remove final char properties n = len(lines) changed_prop = [] while n: n = n - 1 if not lines[n]: del lines[n] continue if lines[n][:1] == '\\': result = prop_exp.match(lines[n]) prop = result.group(1) if prop in properties: changed_prop.append(prop) prop_dict[prop] = result.group(2) del lines[n] continue if check_token(lines[n], '\\end_inset'): # ensure proper newlines after inset end lines.append('') lines.append('') break for line in lines[end:]: if line[:1] == '\\': result = prop_exp.match(line) prop = result.group(1) if prop in properties and prop not in changed_prop: prop_dict[prop] = result.group(2) if not lines[start:] and not lines[end:]: return [] result = lines[:start] + aux[:] + lines[end:] if insert and result[0] != '': return [''] + result[:] return result[:]
def revert_separator(document): " Revert separator insets to layout separators " beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"] if document.textclass in beamer_classes: beglaysep = "\\begin_layout Separator" else: beglaysep = "\\begin_layout --Separator--" parsep = [beglaysep, "", "\\end_layout", ""] comert = [ "\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", "%", "\\end_layout", "", "\\end_inset", "" ] empert = [ "\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", " ", "\\end_layout", "", "\\end_inset", "" ] i = 0 while 1: i = find_token(document.body, "\\begin_inset Separator", i) if i == -1: return lay = get_containing_layout(document.body, i) if lay == False: document.warning( "Malformed LyX document: Can't convert separator inset at line " + str(i)) i = i + 1 continue layoutname = lay[0] beg = lay[1] end = lay[2] kind = get_value(document.body, "\\begin_inset Separator", i, i + 1, "plain").split()[1] before = document.body[beg + 1:i] something_before = len(before) > 0 and len("".join(before)) > 0 j = find_end_of_inset(document.body, i) after = document.body[j + 1:end] something_after = len(after) > 0 and len("".join(after)) > 0 if kind == "plain": beg = beg + len(before) + 1 elif something_before: document.body[i:i] = ["\\end_layout", ""] i = i + 2 j = j + 2 beg = i end = end + 2 if kind == "plain": if something_after: document.body[beg:j + 1] = empert i = i + len(empert) else: document.body[beg:j + 1] = comert i = i + len(comert) else: if something_after: if layoutname == "Standard": if not something_before: document.body[beg:j + 1] = parsep i = i + len(parsep) document.body[i:i] = ["", "\\begin_layout Standard"] i = i + 2 else: document.body[beg:j + 1] = ["\\begin_layout Standard"] i = i + 1 else: document.body[beg:j + 1] = ["\\begin_deeper"] i = i + 1 end = end + 1 - (j + 1 - beg) if not something_before: document.body[i:i] = parsep i = i + len(parsep) end = end + len(parsep) document.body[i:i] = ["\\begin_layout Standard"] document.body[end + 2:end + 2] = ["", "\\end_deeper", ""] i = i + 4 else: next_par_is_aligned = False k = find_nonempty_line(document.body, end + 1) if k != -1 and check_token(document.body[k], "\\begin_layout"): lay = get_containing_layout(document.body, k) next_par_is_aligned = lay != False and \ find_token(document.body, "\\align", lay[1], lay[2]) != -1 if k != -1 and not next_par_is_aligned \ and not check_token(document.body[k], "\\end_deeper") \ and not check_token(document.body[k], "\\begin_deeper"): if layoutname == "Standard": document.body[beg:j + 1] = [beglaysep] i = i + 1 else: document.body[beg:j + 1] = ["\\begin_deeper", beglaysep] end = end + 2 - (j + 1 - beg) document.body[end + 1:end + 1] = ["", "\\end_deeper", ""] i = i + 3 else: if something_before: del document.body[i:end + 1] else: del document.body[i:end - 1] i = i + 1
def set_paragraph_properties(lines, prop_dict): " Set paragraph properties." # we need to preserve the order of options properties = ["family","series","shape","size", "emph","bar","noun","latex","color"] prop_value = {"family" : "default", "series" : "medium", "shape" : "up", "size" : "normal", "emph" : "off", "bar" : "no", "noun" : "off", "latex" : "no_latex", "color" : "none"} start = 0 end = 0 i = 0 n = len(lines) #skip empty lines while i<n and lines[i] == "": i = i + 1 start = i #catch open char properties while i<n and lines[i][:1] == "\\": result = prop_exp.match(lines[i]) # sys.stderr.write(lines[i]+"\n") prop = result.group(1) if prop not in properties: break else: prop_dict[prop] = result.group(2) i = i + 1 end = i aux = [] insert = 0 for prop in properties: if prop_dict[prop] != 'default': insert = 1 if prop == "color": aux.append("\\%s %s" % (prop, prop_dict[prop])) elif prop != "family" or prop_dict[prop] != "roman": aux.append("\\%s %s " % (prop, prop_dict[prop])) # remove final char properties n = len(lines) changed_prop = [] while n: n = n - 1 if not lines[n]: del lines[n] continue if lines[n][:1] == '\\': result = prop_exp.match(lines[n]) prop = result.group(1) if prop in properties: changed_prop.append(prop) prop_dict[prop] = result.group(2) del lines[n] continue if check_token(lines[n],'\\end_inset'): # ensure proper newlines after inset end lines.append('') lines.append('') break for line in lines[end:]: if line[:1] == '\\': result = prop_exp.match(line) prop = result.group(1) if prop in properties and prop not in changed_prop: prop_dict[prop] = result.group(2) if not lines[start:] and not lines[end:]: return [] result = lines[:start] + aux[:] + lines[end:] if insert and result[0] != '': return [''] + result[:] return result[:]
def remove_pextra(document): " Remove pextra token." lines = document.body i = 0 flag = 0 while 1: i = find_re(lines, pextra_type2_rexp, i) if i == -1: break # Sometimes the \pextra_widthp argument comes in it own # line. If that happens insert it back in this line. if pextra_widthp.search(lines[i + 1]): lines[i] = lines[i] + ' ' + lines[i + 1] del lines[i + 1] mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": # handle \pextra_type 1 (indented paragraph) lines[i] = re.sub(pextra_rexp, "\\leftindent " + width + " ", lines[i]) i = i + 1 continue # handle \pextra_type 2 (minipage) position = mo.group(3) hfill = mo.group(5) lines[i] = re.sub(pextra_rexp, "", lines[i]) start = [ "\\begin_inset Minipage", "position " + position, "inner_position 0", 'height "0pt"', 'width "%s"' % width, "collapsed false" ] if flag: flag = 0 if hfill: start = ["", "\hfill", ""] + start else: start = ['\\layout %s' % document.default_layout, ''] + start j0 = find_token_backwards(lines, "\\layout", i - 1) j = get_next_paragraph(lines, i, document.format + 1) count = 0 while 1: # collect more paragraphs to the minipage count = count + 1 if j == -1 or not check_token(lines[j], "\\layout"): break i = find_re(lines, pextra_type2_rexp2, j + 1) if i == -1: break mo = pextra_rexp.search(lines[i]) if not mo: break if mo.group(7) == "1": flag = 1 break lines[i] = re.sub(pextra_rexp, "", lines[i]) j = find_tokens(lines, ["\\layout", "\\end_float"], i + 1) mid = lines[j0:j] end = ["\\end_inset "] lines[j0:j] = start + mid + end i = i + 1
def remove_oldfloat(document): " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset" lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_float", i) if i == -1: break # There are no nested floats, so finding the end of the float is simple j = find_token(lines, "\\end_float", i + 1) floattype = lines[i].split()[1] if floattype not in floats: document.warning("Error! Unknown float type " + floattype) floattype = "fig" # skip \end_deeper tokens i2 = i + 1 while check_token(lines[i2], "\\end_deeper"): i2 = i2 + 1 if i2 > i + 1: j2 = get_next_paragraph(lines, j + 1, document.format + 1) lines[j2:j2] = ["\\end_deeper "] * (i2 - (i + 1)) new = floats[floattype] + [""] # Check if the float is floatingfigure k = find_re(lines, pextra_type3_rexp, i, j) if k != -1: mo = pextra_rexp.search(lines[k]) width = get_width(mo) lines[k] = re.sub(pextra_rexp, "", lines[k]) new = [ "\\begin_inset Wrap figure", 'width "%s"' % width, "collapsed false", "" ] new = new + lines[i2:j] + ["\\end_inset ", ""] # After a float, all font attributes are reseted. # We need to output '\foo default' for every attribute foo # whose value is not default before the float. # The check here is not accurate, but it doesn't matter # as extra '\foo default' commands are ignored. # In fact, it might be safer to output '\foo default' for all # font attributes. k = get_paragraph(lines, i, document.format + 1) flag = 0 for token in font_tokens: if find_token(lines, token, k, i) != -1: if not flag: # This is not necessary, but we want the output to be # as similar as posible to the lyx format flag = 1 new.append("") if token == "\\lang": new.append(token + " " + document.language) else: new.append(token + " default ") lines[i:j + 1] = new i = i + 1
def remove_oldert(document): " Remove old ERT inset." ert_begin = [ "\\begin_inset ERT", "status Collapsed", "", '\\layout %s' % document.default_layout, "" ] lines = document.body i = 0 while 1: i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) if i == -1: break j = i + 1 while 1: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. j = find_tokens(lines, [ "\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end" ], j) if check_token(lines[j], "\\begin_inset"): j = find_end_of_inset(lines, j) + 1 else: break if check_token(lines[j], "\\layout"): while j - 1 >= 0 and check_token(lines[j - 1], "\\begin_deeper"): j = j - 1 # We need to remove insets, special chars & font commands from ERT text new = [] new2 = [] if check_token(lines[i], "\\layout LaTeX"): new = ['\layout %s' % document.default_layout, "", ""] k = i + 1 while 1: k2 = find_re(lines, ert_rexp, k, j) inset = hfill = specialchar = 0 if k2 == -1: k2 = j elif check_token(lines[k2], "\\begin_inset"): inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j - 1 else: specialchar = 1 mo = spchar_rexp.match(lines[k2]) lines[k2] = mo.group(1) specialchar_str = mo.group(2) k2 = k2 + 1 tmp = [] for line in lines[k:k2]: # Move some lines outside the ERT inset: if move_rexp.match(line): if new2 == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format new2 = [""] new2.append(line) elif not check_token(line, "\\latex"): tmp.append(line) if is_empty(tmp): if [x for x in tmp if x != ""] != []: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i - 1] = lines[i - 1] + " " else: new = new + [" "] else: new = new + ert_begin + tmp + ["\\end_inset ", ""] if inset: k3 = find_end_of_inset(lines, k2) new = new + [""] + lines[k2:k3 + 1] + [ "" ] # Put an empty line after \end_inset k = k3 + 1 # Skip the empty line after \end_inset if not is_nonempty_line(lines[k]): k = k + 1 new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 elif specialchar: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i - 1] = lines[i - 1] + specialchar_str new = [""] else: new = new + [specialchar_str, ""] k = k2 else: break new = new + new2 if not check_token(lines[j], "\\latex "): new = new + [""] + [lines[j]] lines[i:j + 1] = new i = i + 1 # Delete remaining "\latex xxx" tokens i = 0 while 1: i = find_token(lines, "\\latex ", i) if i == -1: break del lines[i]
def remove_oldert(document): " Remove old ERT inset." ert_begin = ["\\begin_inset ERT", "status Collapsed", "", '\\layout %s' % document.default_layout, ""] lines = document.body i = 0 while 1: i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) if i == -1: break j = i+1 while 1: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"], j) if check_token(lines[j], "\\begin_inset"): j = find_end_of_inset(lines, j)+1 else: break if check_token(lines[j], "\\layout"): while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"): j = j-1 # We need to remove insets, special chars & font commands from ERT text new = [] new2 = [] if check_token(lines[i], "\\layout LaTeX"): new = ['\layout %s' % document.default_layout, "", ""] k = i+1 while 1: k2 = find_re(lines, ert_rexp, k, j) inset = hfill = specialchar = 0 if k2 == -1: k2 = j elif check_token(lines[k2], "\\begin_inset"): inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j-1 else: specialchar = 1 mo = spchar_rexp.match(lines[k2]) lines[k2] = mo.group(1) specialchar_str = mo.group(2) k2 = k2+1 tmp = [] for line in lines[k:k2]: # Move some lines outside the ERT inset: if move_rexp.match(line): if new2 == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format new2 = [""] new2.append(line) elif not check_token(line, "\\latex"): tmp.append(line) if is_empty(tmp): if filter(lambda x:x != "", tmp) != []: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+" " else: new = new+[" "] else: new = new+ert_begin+tmp+["\\end_inset ", ""] if inset: k3 = find_end_of_inset(lines, k2) new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset k = k3+1 # Skip the empty line after \end_inset if not is_nonempty_line(lines[k]): k = k+1 new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 elif specialchar: if new == []: # This is not necessary, but we want the output to be # as similar as posible to the lyx format lines[i-1] = lines[i-1]+specialchar_str new = [""] else: new = new+[specialchar_str, ""] k = k2 else: break new = new+new2 if not check_token(lines[j], "\\latex "): new = new+[""]+[lines[j]] lines[i:j+1] = new i = i+1 # Delete remaining "\latex xxx" tokens i = 0 while 1: i = find_token(lines, "\\latex ", i) if i == -1: break del lines[i]
def remove_pextra(document): " Remove pextra token." lines = document.body i = 0 flag = 0 while 1: i = find_re(lines, pextra_type2_rexp, i) if i == -1: break # Sometimes the \pextra_widthp argument comes in it own # line. If that happens insert it back in this line. if pextra_widthp.search(lines[i+1]): lines[i] = lines[i] + ' ' + lines[i+1] del lines[i+1] mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": # handle \pextra_type 1 (indented paragraph) lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i]) i = i+1 continue # handle \pextra_type 2 (minipage) position = mo.group(3) hfill = mo.group(5) lines[i] = re.sub(pextra_rexp, "", lines[i]) start = ["\\begin_inset Minipage", "position " + position, "inner_position 0", 'height "0pt"', 'width "%s"' % width, "collapsed false" ] if flag: flag = 0 if hfill: start = ["","\hfill",""]+start else: start = ['\\layout %s' % document.default_layout,''] + start j0 = find_token_backwards(lines,"\\layout", i-1) j = get_next_paragraph(lines, i, document.format + 1) count = 0 while 1: # collect more paragraphs to the minipage count = count+1 if j == -1 or not check_token(lines[j], "\\layout"): break i = find_re(lines, pextra_type2_rexp2, j+1) if i == -1: break mo = pextra_rexp.search(lines[i]) if not mo: break if mo.group(7) == "1": flag = 1 break lines[i] = re.sub(pextra_rexp, "", lines[i]) j = find_tokens(lines, ["\\layout", "\\end_float"], i+1) mid = lines[j0:j] end = ["\\end_inset "] lines[j0:j] = start+mid+end i = i+1
def remove_oldfloat(document): " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset" lines = document.body i = 0 while 1: i = find_token(lines, "\\begin_float", i) if i == -1: break # There are no nested floats, so finding the end of the float is simple j = find_token(lines, "\\end_float", i+1) floattype = lines[i].split()[1] if not floats.has_key(floattype): document.warning("Error! Unknown float type " + floattype) floattype = "fig" # skip \end_deeper tokens i2 = i+1 while check_token(lines[i2], "\\end_deeper"): i2 = i2+1 if i2 > i+1: j2 = get_next_paragraph(lines, j + 1, document.format + 1) lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1)) new = floats[floattype]+[""] # Check if the float is floatingfigure k = find_re(lines, pextra_type3_rexp, i, j) if k != -1: mo = pextra_rexp.search(lines[k]) width = get_width(mo) lines[k] = re.sub(pextra_rexp, "", lines[k]) new = ["\\begin_inset Wrap figure", 'width "%s"' % width, "collapsed false", ""] new = new+lines[i2:j]+["\\end_inset ", ""] # After a float, all font attributes are reseted. # We need to output '\foo default' for every attribute foo # whose value is not default before the float. # The check here is not accurate, but it doesn't matter # as extra '\foo default' commands are ignored. # In fact, it might be safer to output '\foo default' for all # font attributes. k = get_paragraph(lines, i, document.format + 1) flag = 0 for token in font_tokens: if find_token(lines, token, k, i) != -1: if not flag: # This is not necessary, but we want the output to be # as similar as posible to the lyx format flag = 1 new.append("") if token == "\\lang": new.append(token+" "+ document.language) else: new.append(token+" default ") lines[i:j+1] = new i = i+1
def read(self): """Reads a file into the self.header and self.body parts, from self.input.""" # First pass: Read header to determine file encoding # If we are running under python3 then all strings are binary in this # pass. In some cases we need to convert binary to unicode in order to # use our parser tools. Since we do not know the true encoding yet we # use latin1. This works since a) the parts we are interested in are # pure ASCII (subset of latin1) and b) in contrast to pure ascii or # utf8, one can decode any 8byte string using latin1. first_line = True while True: line = self.input.readline() if not line: # eof found before end of header self.error("Invalid LyX file: Missing body.") if first_line: # Remove UTF8 BOM marker if present if line.startswith(codecs.BOM_UTF8): line = line[len(codecs.BOM_UTF8):] first_line = False if PY2: line = trim_eol(line) decoded = line else: line = trim_eol_binary(line) decoded = line.decode('latin1') if check_token(decoded, '\\begin_preamble'): while True: line = self.input.readline() if not line: # eof found before end of header self.error("Invalid LyX file: Missing body.") if PY2: line = trim_eol(line) decoded = line else: line = trim_eol_binary(line) decoded = line.decode('latin1') if check_token(decoded, '\\end_preamble'): break if decoded.split()[:0] in ("\\layout", "\\begin_layout", "\\begin_body"): self.warning("Malformed LyX file:" "Missing '\\end_preamble'." "\nAdding it now and hoping" "for the best.") self.preamble.append(line) if check_token(decoded, '\\end_preamble'): continue line = line.rstrip() if not line: continue if decoded.split()[0] in ("\\layout", "\\begin_layout", "\\begin_body", "\\begin_deeper"): self.body.append(line) break self.header.append(line) if PY2: i = find_token(self.header, '\\textclass', 0) else: i = find_token(self.header, b'\\textclass', 0) if i == -1: self.warning("Malformed LyX file: Missing '\\textclass'.") if PY2: i = find_token(self.header, '\\lyxformat', 0) + 1 self.header[i:i] = ['\\textclass article'] else: i = find_token(self.header, b'\\lyxformat', 0) + 1 self.header[i:i] = [b'\\textclass article'] if PY2: self.textclass = get_value(self.header, "\\textclass", 0, default="") self.language = get_value(self.header, "\\language", 0, default="english") self.inputencoding = get_value(self.header, "\\inputencoding", 0, default="auto") else: self.textclass = get_value(self.header, b"\\textclass", 0, default=b"") self.language = get_value(self.header, b"\\language", 0, default=b"english").decode('ascii') self.inputencoding = get_value(self.header, b"\\inputencoding", 0, default=b"auto").decode('ascii') self.format = self.read_format() self.initial_format = self.format self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding) self.initial_version = self.read_version() # Second pass over header and preamble, now we know the file encoding # Do not forget the textclass (Debian bug #700828) self.textclass = self.textclass.decode(self.encoding) self.backend = get_backend(self.textclass) for i in range(len(self.header)): self.header[i] = self.header[i].decode(self.encoding) for i in range(len(self.preamble)): self.preamble[i] = self.preamble[i].decode(self.encoding) for i in range(len(self.body)): self.body[i] = self.body[i].decode(self.encoding) # Read document body while True: line = self.input.readline().decode(self.encoding) if not line: break self.body.append(trim_eol(line))
def read(self): """Reads a file into the self.header and self.body parts, from self.input.""" while True: line = self.input.readline() if not line: self.error("Invalid LyX file.") line = trim_eol(line) if check_token(line, '\\begin_preamble'): while 1: line = self.input.readline() if not line: self.error("Invalid LyX file.") line = trim_eol(line) if check_token(line, '\\end_preamble'): break if line.split()[:0] in ("\\layout", "\\begin_layout", "\\begin_body"): self.warning("Malformed LyX file:" "Missing '\\end_preamble'." "\nAdding it now and hoping" "for the best.") self.preamble.append(line) if check_token(line, '\\end_preamble'): continue line = line.strip() if not line: continue if line.split()[0] in ("\\layout", "\\begin_layout", "\\begin_body", "\\begin_deeper"): self.body.append(line) break self.header.append(line) i = find_token(self.header, '\\textclass', 0) if i == -1: self.warning("Malformed LyX file: Missing '\\textclass'.") i = find_token(self.header, '\\lyxformat', 0) + 1 self.header[i:i] = ['\\textclass article'] self.textclass = get_value(self.header, "\\textclass", 0) self.backend = get_backend(self.textclass) self.format = self.read_format() self.language = get_value(self.header, "\\language", 0, default="english") self.inputencoding = get_value(self.header, "\\inputencoding", 0, default="auto") self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding) self.initial_version = self.read_version() # Second pass over header and preamble, now we know the file encoding # Do not forget the textclass (Debian bug #700828) self.textclass = self.textclass.decode(self.encoding) for i in range(len(self.header)): self.header[i] = self.header[i].decode(self.encoding) for i in range(len(self.preamble)): self.preamble[i] = self.preamble[i].decode(self.encoding) # Read document body while 1: line = self.input.readline().decode(self.encoding) if not line: break self.body.append(trim_eol(line))
def read(self): """Reads a file into the self.header and self.body parts, from self.input.""" # First pass: Read header to determine file encoding # If we are running under python3 then all strings are binary in this # pass. In some cases we need to convert binary to unicode in order to # use our parser tools. Since we do not know the true encoding yet we # use latin1. This works since a) the parts we are interested in are # pure ASCII (subset of latin1) and b) in contrast to pure ascii or # utf8, one can decode any 8byte string using latin1. first_line = True while True: line = self.input.readline() if not line: # eof found before end of header self.error("Invalid LyX file: Missing body.") if first_line: # Remove UTF8 BOM marker if present if line.startswith(codecs.BOM_UTF8): line = line[len(codecs.BOM_UTF8):] first_line = False if PY2: line = trim_eol(line) decoded = line else: line = trim_eol_binary(line) decoded = line.decode('latin1') if check_token(decoded, '\\begin_preamble'): while True: line = self.input.readline() if not line: # eof found before end of header self.error("Invalid LyX file: Missing body.") if PY2: line = trim_eol(line) decoded = line else: line = trim_eol_binary(line) decoded = line.decode('latin1') if check_token(decoded, '\\end_preamble'): break if decoded.split()[:0] in ("\\layout", "\\begin_layout", "\\begin_body"): self.warning("Malformed LyX file:" "Missing '\\end_preamble'." "\nAdding it now and hoping" "for the best.") self.preamble.append(line) if check_token(decoded, '\\end_preamble'): continue line = line.rstrip() if not line: continue if decoded.split()[0] in ("\\layout", "\\begin_layout", "\\begin_body", "\\begin_deeper"): self.body.append(line) break self.header.append(line) if PY2: i = find_token(self.header, '\\textclass', 0) else: i = find_token(self.header, b'\\textclass', 0) if i == -1: self.warning("Malformed LyX file: Missing '\\textclass'.") if PY2: i = find_token(self.header, '\\lyxformat', 0) + 1 self.header[i:i] = ['\\textclass article'] else: i = find_token(self.header, b'\\lyxformat', 0) + 1 self.header[i:i] = [b'\\textclass article'] if PY2: self.textclass = get_value(self.header, "\\textclass", 0, default = "") self.language = get_value(self.header, "\\language", 0, default = "english") self.inputencoding = get_value(self.header, "\\inputencoding", 0, default = "auto") else: self.textclass = get_value(self.header, b"\\textclass", 0, default = b"") self.language = get_value(self.header, b"\\language", 0, default = b"english").decode('ascii') self.inputencoding = get_value(self.header, b"\\inputencoding", 0, default = b"auto").decode('ascii') self.format = self.read_format() self.initial_format = self.format self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding) self.initial_version = self.read_version() # Second pass over header and preamble, now we know the file encoding # Do not forget the textclass (Debian bug #700828) self.textclass = self.textclass.decode(self.encoding) self.backend = get_backend(self.textclass) for i in range(len(self.header)): self.header[i] = self.header[i].decode(self.encoding) for i in range(len(self.preamble)): self.preamble[i] = self.preamble[i].decode(self.encoding) for i in range(len(self.body)): self.body[i] = self.body[i].decode(self.encoding) # Read document body while True: line = self.input.readline().decode(self.encoding) if not line: break self.body.append(trim_eol(line))