Beispiel #1
0
def header_update(document):
    " Update document header."
    lines = document.header
    i = 0
    l = len(lines)
    while i < l:
        if lines[i][-1:] == ' ':
            lines[i] = lines[i][:-1]

        if check_token(lines[i], '\\epsfig'):
            lines[i] = lines[i].replace('\\epsfig', '\\graphics')
            i = i + 1
            continue

        if check_token(lines[i], '\\papersize'):
            size = lines[i].split()[1]
            new_size = size
            paperpackage = ""

            if size == 'usletter':
                new_size = 'letterpaper'
            if size == 'a4wide':
                new_size = 'Default'
                paperpackage = "widemarginsa4"

            lines[i] = '\\papersize ' + new_size
            i = i + 1
            if paperpackage:
                lines.insert(i, '\\paperpackage ' + paperpackage)
                i = i + 1

            lines.insert(i,'\\use_geometry 0')
            lines.insert(i + 1,'\\use_amsmath 0')
            i = i + 2
            continue


        if check_token(lines[i], '\\baselinestretch'):
            size = lines[i].split()[1]
            if size == '1.00':
                name = 'single'
            elif size == '1.50':
                name = 'onehalf'
            elif size == '2.00':
                name = 'double'
            else:
                name = 'other ' + size
            lines[i] = '\\spacing %s ' % name
            i = i + 1
            continue

        i = i + 1
Beispiel #2
0
def header_update(document):
    " Update document header."
    lines = document.header
    i = 0
    l = len(lines)
    while i < l:
        if lines[i][-1:] == ' ':
            lines[i] = lines[i][:-1]

        if check_token(lines[i], '\\epsfig'):
            lines[i] = lines[i].replace('\\epsfig', '\\graphics')
            i = i + 1
            continue

        if check_token(lines[i], '\\papersize'):
            size = lines[i].split()[1]
            new_size = size
            paperpackage = ""

            if size == 'usletter':
                new_size = 'letterpaper'
            if size == 'a4wide':
                new_size = 'Default'
                paperpackage = "widemarginsa4"

            lines[i] = '\\papersize ' + new_size
            i = i + 1
            if paperpackage:
                lines.insert(i, '\\paperpackage ' + paperpackage)
                i = i + 1

            lines.insert(i, '\\use_geometry 0')
            lines.insert(i + 1, '\\use_amsmath 0')
            i = i + 2
            continue

        if check_token(lines[i], '\\baselinestretch'):
            size = lines[i].split()[1]
            if size == '1.00':
                name = 'single'
            elif size == '1.50':
                name = 'onehalf'
            elif size == '2.00':
                name = 'double'
            else:
                name = 'other ' + size
            lines[i] = '\\spacing %s ' % name
            i = i + 1
            continue

        i = i + 1
Beispiel #3
0
def is_ert_paragraph(document, i):
    " Is this a ert paragraph? "
    lines = document.body
    if not check_token(lines[i], "\\layout"):
        return 0
    if not document.is_default_layout(get_layout(lines[i], document.default_layout)):
        return 0

    i = find_nonempty_line(lines, i+1)
    if not check_token(lines[i], "\\begin_inset ERT"):
        return 0

    j = find_end_of_inset(lines, i)
    k = find_nonempty_line(lines, j+1)
    return check_token(lines[k], "\\layout")
Beispiel #4
0
def is_ert_paragraph(document, i):
    " Is this a ert paragraph? "
    lines = document.body
    if not check_token(lines[i], "\\layout"):
        return 0
    if not document.is_default_layout(
            get_layout(lines[i], document.default_layout)):
        return 0

    i = find_nonempty_line(lines, i + 1)
    if not check_token(lines[i], "\\begin_inset ERT"):
        return 0

    j = find_end_of_inset(lines, i)
    k = find_nonempty_line(lines, j + 1)
    return check_token(lines[k], "\\layout")
Beispiel #5
0
Datei: LyX.py Projekt: bsjung/Lyx
    def get_toc(self, depth = 4):
        " Returns the TOC of this LyX document."
        paragraphs_filter = {'Title' : 0,'Chapter' : 1, 'Section' : 2,
                             'Subsection' : 3, 'Subsubsection': 4}
        allowed_insets = ['Quotes']
        allowed_parameters = ('\\paragraph_spacing', '\\noindent',
                              '\\align', '\\labelwidthstring',
                              "\\start_of_appendix", "\\leftindent")
        sections = []
        for section in paragraphs_filter.keys():
            sections.append('\\begin_layout %s' % section)

        toc_par = []
        i = 0
        while 1:
            i = find_tokens(self.body, sections, i)
            if i == -1:
                break

            j = find_end_of(self.body,  i + 1, '\\begin_layout', '\\end_layout')
            if j == -1:
                self.warning('Incomplete file.', 0)
                break

            section = self.body[i].split()[1]
            if section[-1] == '*':
                section = section[:-1]

            par = []

            k = i + 1
            # skip paragraph parameters
            while not self.body[k].strip() or self.body[k].split()[0] \
                      in allowed_parameters:
                k += 1

            while k < j:
                if check_token(self.body[k], '\\begin_inset'):
                    inset = self.body[k].split()[1]
                    end = find_end_of_inset(self.body, k)
                    if end == -1 or end > j:
                        self.warning('Malformed file.', 0)

                    if inset in allowed_insets:
                        par.extend(self.body[k: end+1])
                    k = end + 1
                else:
                    par.append(self.body[k])
                    k += 1

            # trim empty lines in the end.
            while par and par[-1].strip() == '':
                par.pop()

            toc_par.append(Paragraph(section, par))

            i = j + 1

        return toc_par
Beispiel #6
0
 def _fix_text_styling(self):
     lines = self.lines
     self.stack = []
     stack = self.stack
     fixes = []
     # First we'll rename the \color in the header, if any.
     i = find_tokens(lines, ('\\color', '\\end_header'), 0)
     if i != -1 and \
        check_token(lines[i], '\\color') and \
        not get_containing_layout(lines, i):
         lines[i] = '\\color_in_header ' + lines[i].split()[1]
     # Now let's get on with the rest
     i = 0
     while i < len(lines):
         line = lines[i]
         self.dbg(4, 'looking at line %d: %s' % (i, line))
         # XXX We really should simplify all this startswith()
         # nonsense.
         if line.startswith('\\end_'):
             self.dbg(2, 'fixing unended style tags %s' % (repr(stack)))
             for k in range(len(stack) - 1, -1, -1):
                 lines.insert(
                     i, '\\' + stack[k][0] + ' ' + mixed_tags[stack[k][0]])
                 i += 1
             del (stack[0:])
             i += 1
             continue
         if not line.startswith('\\') or line.find(' ') == -1:
             self.dbg(5, '1 i++ at %d' % (i, ))
             i += 1
             continue
         # XXX And this parsing nonsense needs refactoring too
         line = _chomp(line[1:])
         a = line[0:line.find(' ')]
         if not a in mixed_tags:
             self.dbg(5, '2 i++ at %d' % (i, ))
             i += 1
             continue
         v = line[line.find(' ') + 1:]
         # We're opening a new whatever it is.  But we need to handle
         # the possibility that we're changing the whatever it is!
         # How to handle this?  We could convert:
         #  \lang french foo \lang spanish bar \lang english foobar
         # to
         #  <lang a="french">foo<lang a="spanish">bar</lang></lang>
         # or to
         #  <lang a="french">foo</lang><lang a="spanish">bar</lang>
         # The former might be easier: just close all tags up to the
         # farthest one in the stack for the tag being closed, then
         # re-open any others that were found along the way.  But the
         # latter is more sensible, so that's what we do.
         # Invariant: we never have more than one style tag in the
         # stack (XXX assert this in code).
         self.dbg(4, 'seen \\%s at line %d' % (line, i))
         i = self._close_and_reopen_styling(i, a, v)
         self.dbg(5, '3 i++ at %d' % (i, ))
Beispiel #7
0
def get_next_paragraph(lines, i, format):
    " Finds the paragraph after the paragraph that contains line i."
    tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]

    while i != -1:
        i = find_tokens(lines, tokens, i)
        if not check_token(lines[i], "\\begin_inset"):
            return i
        i = find_end_of_inset(lines, i)
    return -1
Beispiel #8
0
def get_next_paragraph(lines, i, format):
    " Finds the paragraph after the paragraph that contains line i."
    tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]

    while i != -1:
        i = find_tokens(lines, tokens, i)
        if not check_token(lines[i], "\\begin_inset"):
            return i
        i = find_end_of_inset(lines, i)
    return -1
Beispiel #9
0
 def _fix_text_styling(self):
     lines = self.lines
     self.stack = []
     stack = self.stack
     fixes = []
     # First we'll rename the \color in the header, if any.
     i = find_tokens(lines, ('\\color', '\\end_header'), 0)
     if i != -1 and \
        check_token(lines[i], '\\color') and \
        not get_containing_layout(lines, i):
         lines[i] = '\\color_in_header ' + lines[i].split()[1]
     # Now let's get on with the rest
     i = 0
     while i < len(lines):
         line = lines[i]
         self.dbg(4, 'looking at line %d: %s' % (i, line))
         # XXX We really should simplify all this startswith()
         # nonsense.
         if line.startswith('\\end_'):
             self.dbg(2, 'fixing unended style tags %s' % (repr(stack)))
             for k in range(len(stack) - 1, -1, -1):
                 lines.insert(i, '\\' + stack[k][0] + ' ' + mixed_tags[stack[k][0]])
                 i += 1
             del(stack[0:])
             i += 1
             continue
         if not line.startswith('\\') or line.find(' ') == -1:
             self.dbg(5, '1 i++ at %d' % (i,))
             i += 1
             continue
         # XXX And this parsing nonsense needs refactoring too
         line = _chomp(line[1:])
         a = line[0:line.find(' ')]
         if not a in mixed_tags:
             self.dbg(5, '2 i++ at %d' % (i,))
             i += 1
             continue
         v = line[line.find(' ') + 1:]
         # We're opening a new whatever it is.  But we need to handle
         # the possibility that we're changing the whatever it is!
         # How to handle this?  We could convert:
         #  \lang french foo \lang spanish bar \lang english foobar
         # to
         #  <lang a="french">foo<lang a="spanish">bar</lang></lang>
         # or to
         #  <lang a="french">foo</lang><lang a="spanish">bar</lang>
         # The former might be easier: just close all tags up to the
         # farthest one in the stack for the tag being closed, then
         # re-open any others that were found along the way.  But the
         # latter is more sensible, so that's what we do.
         # Invariant: we never have more than one style tag in the
         # stack (XXX assert this in code).
         self.dbg(4, 'seen \\%s at line %d' % (line, i))
         i = self._close_and_reopen_styling(i, a, v)
         self.dbg(5, '3 i++ at %d' % (i,))
Beispiel #10
0
def get_paragraph(lines, i, format):
    " Finds the paragraph that contains line i."
    begin_layout = "\\layout"

    while i != -1:
        i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
        if i == -1: return -1
        if check_token(lines[i], begin_layout):
            return i
        i = find_beginning_of_inset(lines, i)
    return -1
Beispiel #11
0
def get_paragraph(lines, i, format):
    " Finds the paragraph that contains line i."
    begin_layout = "\\layout"

    while i != -1:
        i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
        if i == -1: return -1
        if check_token(lines[i], begin_layout):
            return i
        i = find_beginning_of_inset(lines, i)
    return -1
Beispiel #12
0
def update_tabular(document):
    " Convert tabular format 2 to 3."
    regexp = re.compile(r'^\\begin_inset\s+Tabular')
    lines = document.body
    i = 0
    while 1:
        i = find_re(lines, regexp, i)
        if i == -1:
            break

        for k in get_tabular_lines(lines, i):
            if check_token(lines[k], "<lyxtabular"):
                lines[k] = lines[k].replace('version="2"', 'version="3"')
            elif check_token(lines[k], "<column"):
                lines[k] = lines[k].replace('width=""', 'width="0pt"')

            if line_re.match(lines[k]):
                lines[k] = re.sub(attr_re, "", lines[k])

        i = i+1
Beispiel #13
0
def update_tabular(document):
    " Convert tabular format 2 to 3."
    regexp = re.compile(r'^\\begin_inset\s+Tabular')
    lines = document.body
    i = 0
    while 1:
        i = find_re(lines, regexp, i)
        if i == -1:
            break

        for k in get_tabular_lines(lines, i):
            if check_token(lines[k], "<lyxtabular"):
                lines[k] = lines[k].replace('version="2"', 'version="3"')
            elif check_token(lines[k], "<column"):
                lines[k] = lines[k].replace('width=""', 'width="0pt"')

            if line_re.match(lines[k]):
                lines[k] = re.sub(attr_re, "", lines[k])

        i = i + 1
Beispiel #14
0
def get_tabular_lines(lines, i):
    " Returns a lists of tabular lines."
    result = []
    i = i + 1
    j = find_end_of_tabular(lines, i)
    if j == -1:
        return []

    while i <= j:
        if check_token(lines[i], "\\begin_inset"):
            i = find_end_of_inset(lines, i) + 1
        else:
            result.append(i)
            i = i + 1
    return result
Beispiel #15
0
def get_tabular_lines(lines, i):
    " Returns a lists of tabular lines."
    result = []
    i = i+1
    j = find_end_of_tabular(lines, i)
    if j == -1:
        return []

    while i <= j:
        if check_token(lines[i], "\\begin_inset"):
            i = find_end_of_inset(lines, i)+1
        else:
            result.append(i)
            i = i+1
    return result
Beispiel #16
0
    def read(self):
        """Reads a file into the self.header and
        self.body parts, from self.input."""

        while True:
            line = self.input.readline()
            if not line:
                self.error("Invalid LyX file.")

            line = trim_eol(line)
            if check_token(line, '\\begin_preamble'):
                while 1:
                    line = self.input.readline()
                    if not line:
                        self.error("Invalid LyX file.")

                    line = trim_eol(line)
                    if check_token(line, '\\end_preamble'):
                        break

                    if line.split()[:0] in ("\\layout",
                                            "\\begin_layout", "\\begin_body"):

                        self.warning("Malformed LyX file:"
                                     "Missing '\\end_preamble'."
                                     "\nAdding it now and hoping"
                                     "for the best.")

                    self.preamble.append(line)

            if check_token(line, '\\end_preamble'):
                continue

            line = line.strip()
            if not line:
                continue

            if line.split()[0] in ("\\layout", "\\begin_layout",
                                   "\\begin_body", "\\begin_deeper"):
                self.body.append(line)
                break

            self.header.append(line)

        i = find_token(self.header, '\\textclass', 0)
        if i == -1:
            self.warning("Malformed LyX file: Missing '\\textclass'.")
            i = find_token(self.header, '\\lyxformat', 0) + 1
            self.header[i:i] = ['\\textclass article']

        self.textclass = get_value(self.header, "\\textclass", 0)
        self.backend = get_backend(self.textclass)
        self.format  = self.read_format()
        self.language = get_value(self.header, "\\language", 0,
                                  default = "english")
        self.inputencoding = get_value(self.header, "\\inputencoding",
                                       0, default = "auto")
        self.encoding = get_encoding(self.language,
                                     self.inputencoding, self.format,
                                     self.cjk_encoding)
        self.initial_version = self.read_version()

        # Second pass over header and preamble, now we know the file encoding
        # Do not forget the textclass (Debian bug #700828)
        self.textclass = self.textclass.decode(self.encoding)
        for i in range(len(self.header)):
            self.header[i] = self.header[i].decode(self.encoding)
        for i in range(len(self.preamble)):
            self.preamble[i] = self.preamble[i].decode(self.encoding)

        # Read document body
        while 1:
            line = self.input.readline().decode(self.encoding)
            if not line:
                break
            self.body.append(trim_eol(line))
Beispiel #17
0
def revert_separator(document):
    " Revert separator insets to layout separators "

    beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
    if document.textclass in beamer_classes:
        beglaysep = "\\begin_layout Separator"
    else:
        beglaysep = "\\begin_layout --Separator--"

    parsep = [beglaysep, "", "\\end_layout", ""]
    comert = ["\\begin_inset ERT", "status collapsed", "",
              "\\begin_layout Plain Layout", "%", "\\end_layout",
              "", "\\end_inset", ""]
    empert = ["\\begin_inset ERT", "status collapsed", "",
              "\\begin_layout Plain Layout", " ", "\\end_layout",
              "", "\\end_inset", ""]

    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset Separator", i)
        if i == -1:
            return

        lay = get_containing_layout(document.body, i)
        if lay == False:
            document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i))
            i = i + 1
            continue

        layoutname = lay[0]
        beg = lay[1]
        end = lay[2]
        kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1]
        before = document.body[beg+1:i]
        something_before = len(before) > 0 and len("".join(before)) > 0
        j = find_end_of_inset(document.body, i)
        after = document.body[j+1:end]
        something_after = len(after) > 0 and len("".join(after)) > 0
        if kind == "plain":
            beg = beg + len(before) + 1
        elif something_before:
            document.body[i:i] = ["\\end_layout", ""]
            i = i + 2
            j = j + 2
            beg = i
            end = end + 2

        if kind == "plain":
            if something_after:
                document.body[beg:j+1] = empert
                i = i + len(empert)
            else:
                document.body[beg:j+1] = comert
                i = i + len(comert)
        else:
            if something_after:
                if layoutname == "Standard":
                    if not something_before:
                        document.body[beg:j+1] = parsep
                        i = i + len(parsep)
                        document.body[i:i] = ["", "\\begin_layout Standard"]
                        i = i + 2
                    else:
                        document.body[beg:j+1] = ["\\begin_layout Standard"]
                        i = i + 1
                else:
                    document.body[beg:j+1] = ["\\begin_deeper"]
                    i = i + 1
                    end = end + 1 - (j + 1 - beg)
                    if not something_before:
                        document.body[i:i] = parsep
                        i = i + len(parsep)
                        end = end + len(parsep)
                    document.body[i:i] = ["\\begin_layout Standard"]
                    document.body[end+2:end+2] = ["", "\\end_deeper", ""]
                    i = i + 4
            else:
                next_par_is_aligned = False
                k = find_nonempty_line(document.body, end+1)
                if k != -1 and check_token(document.body[k], "\\begin_layout"):
                    lay = get_containing_layout(document.body, k)
                    next_par_is_aligned = lay != False and \
                            find_token(document.body, "\\align", lay[1], lay[2]) != -1
                if k != -1 and not next_par_is_aligned \
                        and not check_token(document.body[k], "\\end_deeper") \
                        and not check_token(document.body[k], "\\begin_deeper"):
                    if layoutname == "Standard":
                        document.body[beg:j+1] = [beglaysep]
                        i = i + 1
                    else:
                        document.body[beg:j+1] = ["\\begin_deeper", beglaysep]
                        end = end + 2 - (j + 1 - beg)
                        document.body[end+1:end+1] = ["", "\\end_deeper", ""]
                        i = i + 3
                else:
                    if something_before:
                        del document.body[i:end+1]
                    else:
                        del document.body[i:end-1]

        i = i + 1
Beispiel #18
0
def set_paragraph_properties(lines, prop_dict):
    " Set paragraph properties."
    # we need to preserve the order of options
    properties = [
        "family", "series", "shape", "size", "emph", "bar", "noun", "latex",
        "color"
    ]
    prop_value = {
        "family": "default",
        "series": "medium",
        "shape": "up",
        "size": "normal",
        "emph": "off",
        "bar": "no",
        "noun": "off",
        "latex": "no_latex",
        "color": "none"
    }

    start = 0
    end = 0
    i = 0
    n = len(lines)

    #skip empty lines
    while i < n and lines[i] == "":
        i = i + 1
    start = i

    #catch open char properties
    while i < n and lines[i][:1] == "\\":
        result = prop_exp.match(lines[i])
        # sys.stderr.write(lines[i]+"\n")
        prop = result.group(1)
        if prop not in properties:
            break
        else:
            prop_dict[prop] = result.group(2)
        i = i + 1
    end = i

    aux = []
    insert = 0
    for prop in properties:
        if prop_dict[prop] != 'default':
            insert = 1
            if prop == "color":
                aux.append("\\%s %s" % (prop, prop_dict[prop]))
            elif prop != "family" or prop_dict[prop] != "roman":
                aux.append("\\%s %s " % (prop, prop_dict[prop]))

    # remove final char properties
    n = len(lines)
    changed_prop = []

    while n:
        n = n - 1
        if not lines[n]:
            del lines[n]
            continue

        if lines[n][:1] == '\\':
            result = prop_exp.match(lines[n])
            prop = result.group(1)
            if prop in properties:
                changed_prop.append(prop)
                prop_dict[prop] = result.group(2)
                del lines[n]
                continue

            if check_token(lines[n], '\\end_inset'):
                # ensure proper newlines after inset end
                lines.append('')
                lines.append('')
        break

    for line in lines[end:]:
        if line[:1] == '\\':
            result = prop_exp.match(line)
            prop = result.group(1)
            if prop in properties and prop not in changed_prop:
                prop_dict[prop] = result.group(2)

    if not lines[start:] and not lines[end:]:
        return []

    result = lines[:start] + aux[:] + lines[end:]
    if insert and result[0] != '':
        return [''] + result[:]

    return result[:]
Beispiel #19
0
def revert_separator(document):
    " Revert separator insets to layout separators "

    beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
    if document.textclass in beamer_classes:
        beglaysep = "\\begin_layout Separator"
    else:
        beglaysep = "\\begin_layout --Separator--"

    parsep = [beglaysep, "", "\\end_layout", ""]
    comert = [
        "\\begin_inset ERT", "status collapsed", "",
        "\\begin_layout Plain Layout", "%", "\\end_layout", "", "\\end_inset",
        ""
    ]
    empert = [
        "\\begin_inset ERT", "status collapsed", "",
        "\\begin_layout Plain Layout", " ", "\\end_layout", "", "\\end_inset",
        ""
    ]

    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset Separator", i)
        if i == -1:
            return

        lay = get_containing_layout(document.body, i)
        if lay == False:
            document.warning(
                "Malformed LyX document: Can't convert separator inset at line "
                + str(i))
            i = i + 1
            continue

        layoutname = lay[0]
        beg = lay[1]
        end = lay[2]
        kind = get_value(document.body, "\\begin_inset Separator", i, i + 1,
                         "plain").split()[1]
        before = document.body[beg + 1:i]
        something_before = len(before) > 0 and len("".join(before)) > 0
        j = find_end_of_inset(document.body, i)
        after = document.body[j + 1:end]
        something_after = len(after) > 0 and len("".join(after)) > 0
        if kind == "plain":
            beg = beg + len(before) + 1
        elif something_before:
            document.body[i:i] = ["\\end_layout", ""]
            i = i + 2
            j = j + 2
            beg = i
            end = end + 2

        if kind == "plain":
            if something_after:
                document.body[beg:j + 1] = empert
                i = i + len(empert)
            else:
                document.body[beg:j + 1] = comert
                i = i + len(comert)
        else:
            if something_after:
                if layoutname == "Standard":
                    if not something_before:
                        document.body[beg:j + 1] = parsep
                        i = i + len(parsep)
                        document.body[i:i] = ["", "\\begin_layout Standard"]
                        i = i + 2
                    else:
                        document.body[beg:j + 1] = ["\\begin_layout Standard"]
                        i = i + 1
                else:
                    document.body[beg:j + 1] = ["\\begin_deeper"]
                    i = i + 1
                    end = end + 1 - (j + 1 - beg)
                    if not something_before:
                        document.body[i:i] = parsep
                        i = i + len(parsep)
                        end = end + len(parsep)
                    document.body[i:i] = ["\\begin_layout Standard"]
                    document.body[end + 2:end + 2] = ["", "\\end_deeper", ""]
                    i = i + 4
            else:
                next_par_is_aligned = False
                k = find_nonempty_line(document.body, end + 1)
                if k != -1 and check_token(document.body[k], "\\begin_layout"):
                    lay = get_containing_layout(document.body, k)
                    next_par_is_aligned = lay != False and \
                            find_token(document.body, "\\align", lay[1], lay[2]) != -1
                if k != -1 and not next_par_is_aligned \
                        and not check_token(document.body[k], "\\end_deeper") \
                        and not check_token(document.body[k], "\\begin_deeper"):
                    if layoutname == "Standard":
                        document.body[beg:j + 1] = [beglaysep]
                        i = i + 1
                    else:
                        document.body[beg:j +
                                      1] = ["\\begin_deeper", beglaysep]
                        end = end + 2 - (j + 1 - beg)
                        document.body[end + 1:end +
                                      1] = ["", "\\end_deeper", ""]
                        i = i + 3
                else:
                    if something_before:
                        del document.body[i:end + 1]
                    else:
                        del document.body[i:end - 1]

        i = i + 1
Beispiel #20
0
def set_paragraph_properties(lines, prop_dict):
    " Set paragraph properties."
    # we need to preserve the order of options
    properties = ["family","series","shape","size",
                  "emph","bar","noun","latex","color"]
    prop_value = {"family" : "default", "series" : "medium",
                   "shape" : "up", "size" : "normal",
                   "emph" : "off", "bar" : "no",
                   "noun" : "off", "latex" : "no_latex", "color" : "none"}

    start = 0
    end = 0
    i = 0
    n = len(lines)

    #skip empty lines
    while i<n and lines[i] == "":
        i = i + 1
    start = i

    #catch open char properties
    while i<n and lines[i][:1] == "\\":
        result = prop_exp.match(lines[i])
        # sys.stderr.write(lines[i]+"\n")
        prop = result.group(1)
        if prop not in properties:
            break
        else:
            prop_dict[prop] = result.group(2)
        i = i + 1
    end = i

    aux = []
    insert = 0
    for prop in properties:
        if prop_dict[prop] != 'default':
            insert = 1
            if prop == "color":
                aux.append("\\%s %s" % (prop, prop_dict[prop]))
            elif prop != "family" or prop_dict[prop] != "roman":
                    aux.append("\\%s %s " % (prop, prop_dict[prop]))

    # remove final char properties
    n = len(lines)
    changed_prop = []

    while n:
        n = n - 1
        if not lines[n]:
            del lines[n]
            continue

        if lines[n][:1] == '\\':
            result = prop_exp.match(lines[n])
            prop = result.group(1)
            if prop in properties:
                changed_prop.append(prop)
                prop_dict[prop] = result.group(2)
                del lines[n]
                continue

            if check_token(lines[n],'\\end_inset'):
                # ensure proper newlines after inset end
                lines.append('')
                lines.append('')
        break

    for line in lines[end:]:
        if line[:1] == '\\':
            result = prop_exp.match(line)
            prop = result.group(1)
            if prop in properties and prop not in changed_prop:
                prop_dict[prop] = result.group(2)

    if not lines[start:] and not lines[end:]:
        return []

    result = lines[:start] + aux[:] + lines[end:]
    if insert and result[0] != '':
        return [''] + result[:]

    return result[:]
Beispiel #21
0
def remove_pextra(document):
    " Remove pextra token."
    lines = document.body
    i = 0
    flag = 0
    while 1:
        i = find_re(lines, pextra_type2_rexp, i)
        if i == -1:
            break

        # Sometimes the \pextra_widthp argument comes in it own
        # line. If that happens insert it back in this line.
        if pextra_widthp.search(lines[i + 1]):
            lines[i] = lines[i] + ' ' + lines[i + 1]
            del lines[i + 1]

        mo = pextra_rexp.search(lines[i])
        width = get_width(mo)

        if mo.group(1) == "1":
            # handle \pextra_type 1 (indented paragraph)
            lines[i] = re.sub(pextra_rexp, "\\leftindent " + width + " ",
                              lines[i])
            i = i + 1
            continue

        # handle \pextra_type 2 (minipage)
        position = mo.group(3)
        hfill = mo.group(5)
        lines[i] = re.sub(pextra_rexp, "", lines[i])

        start = [
            "\\begin_inset Minipage", "position " + position,
            "inner_position 0", 'height "0pt"',
            'width "%s"' % width, "collapsed false"
        ]
        if flag:
            flag = 0
            if hfill:
                start = ["", "\hfill", ""] + start
        else:
            start = ['\\layout %s' % document.default_layout, ''] + start

        j0 = find_token_backwards(lines, "\\layout", i - 1)
        j = get_next_paragraph(lines, i, document.format + 1)

        count = 0
        while 1:
            # collect more paragraphs to the minipage
            count = count + 1
            if j == -1 or not check_token(lines[j], "\\layout"):
                break
            i = find_re(lines, pextra_type2_rexp2, j + 1)
            if i == -1:
                break
            mo = pextra_rexp.search(lines[i])
            if not mo:
                break
            if mo.group(7) == "1":
                flag = 1
                break
            lines[i] = re.sub(pextra_rexp, "", lines[i])
            j = find_tokens(lines, ["\\layout", "\\end_float"], i + 1)

        mid = lines[j0:j]
        end = ["\\end_inset "]

        lines[j0:j] = start + mid + end
        i = i + 1
Beispiel #22
0
def remove_oldfloat(document):
    " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
    lines = document.body
    i = 0
    while 1:
        i = find_token(lines, "\\begin_float", i)
        if i == -1:
            break
        # There are no nested floats, so finding the end of the float is simple
        j = find_token(lines, "\\end_float", i + 1)

        floattype = lines[i].split()[1]
        if floattype not in floats:
            document.warning("Error! Unknown float type " + floattype)
            floattype = "fig"

        # skip \end_deeper tokens
        i2 = i + 1
        while check_token(lines[i2], "\\end_deeper"):
            i2 = i2 + 1
        if i2 > i + 1:
            j2 = get_next_paragraph(lines, j + 1, document.format + 1)
            lines[j2:j2] = ["\\end_deeper "] * (i2 - (i + 1))

        new = floats[floattype] + [""]

        # Check if the float is floatingfigure
        k = find_re(lines, pextra_type3_rexp, i, j)
        if k != -1:
            mo = pextra_rexp.search(lines[k])
            width = get_width(mo)
            lines[k] = re.sub(pextra_rexp, "", lines[k])
            new = [
                "\\begin_inset Wrap figure",
                'width "%s"' % width, "collapsed false", ""
            ]

        new = new + lines[i2:j] + ["\\end_inset ", ""]

        # After a float, all font attributes are reseted.
        # We need to output '\foo default' for every attribute foo
        # whose value is not default before the float.
        # The check here is not accurate, but it doesn't matter
        # as extra '\foo default' commands are ignored.
        # In fact, it might be safer to output '\foo default' for all
        # font attributes.
        k = get_paragraph(lines, i, document.format + 1)
        flag = 0
        for token in font_tokens:
            if find_token(lines, token, k, i) != -1:
                if not flag:
                    # This is not necessary, but we want the output to be
                    # as similar as posible to the lyx format
                    flag = 1
                    new.append("")
                if token == "\\lang":
                    new.append(token + " " + document.language)
                else:
                    new.append(token + " default ")

        lines[i:j + 1] = new
        i = i + 1
Beispiel #23
0
def remove_oldert(document):
    " Remove old ERT inset."
    ert_begin = [
        "\\begin_inset ERT", "status Collapsed", "",
        '\\layout %s' % document.default_layout, ""
    ]
    lines = document.body
    i = 0
    while 1:
        i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
        if i == -1:
            break
        j = i + 1
        while 1:
            # \end_inset is for ert inside a tabular cell. The other tokens
            # are obvious.
            j = find_tokens(lines, [
                "\\latex default", "\\layout", "\\begin_inset", "\\end_inset",
                "\\end_float", "\\the_end"
            ], j)
            if check_token(lines[j], "\\begin_inset"):
                j = find_end_of_inset(lines, j) + 1
            else:
                break

        if check_token(lines[j], "\\layout"):
            while j - 1 >= 0 and check_token(lines[j - 1], "\\begin_deeper"):
                j = j - 1

        # We need to remove insets, special chars & font commands from ERT text
        new = []
        new2 = []
        if check_token(lines[i], "\\layout LaTeX"):
            new = ['\layout %s' % document.default_layout, "", ""]

        k = i + 1
        while 1:
            k2 = find_re(lines, ert_rexp, k, j)
            inset = hfill = specialchar = 0
            if k2 == -1:
                k2 = j
            elif check_token(lines[k2], "\\begin_inset"):
                inset = 1
            elif check_token(lines[k2], "\\hfill"):
                hfill = 1
                del lines[k2]
                j = j - 1
            else:
                specialchar = 1
                mo = spchar_rexp.match(lines[k2])
                lines[k2] = mo.group(1)
                specialchar_str = mo.group(2)
                k2 = k2 + 1

            tmp = []
            for line in lines[k:k2]:
                # Move some lines outside the ERT inset:
                if move_rexp.match(line):
                    if new2 == []:
                        # This is not necessary, but we want the output to be
                        # as similar as posible to the lyx format
                        new2 = [""]
                    new2.append(line)
                elif not check_token(line, "\\latex"):
                    tmp.append(line)

            if is_empty(tmp):
                if [x for x in tmp if x != ""] != []:
                    if new == []:
                        # This is not necessary, but we want the output to be
                        # as similar as posible to the lyx format
                        lines[i - 1] = lines[i - 1] + " "
                    else:
                        new = new + [" "]
            else:
                new = new + ert_begin + tmp + ["\\end_inset ", ""]

            if inset:
                k3 = find_end_of_inset(lines, k2)
                new = new + [""] + lines[k2:k3 + 1] + [
                    ""
                ]  # Put an empty line after \end_inset
                k = k3 + 1
                # Skip the empty line after \end_inset
                if not is_nonempty_line(lines[k]):
                    k = k + 1
                    new.append("")
            elif hfill:
                new = new + ["\\hfill", ""]
                k = k2
            elif specialchar:
                if new == []:
                    # This is not necessary, but we want the output to be
                    # as similar as posible to the lyx format
                    lines[i - 1] = lines[i - 1] + specialchar_str
                    new = [""]
                else:
                    new = new + [specialchar_str, ""]
                k = k2
            else:
                break

        new = new + new2
        if not check_token(lines[j], "\\latex "):
            new = new + [""] + [lines[j]]
        lines[i:j + 1] = new
        i = i + 1

    # Delete remaining "\latex xxx" tokens
    i = 0
    while 1:
        i = find_token(lines, "\\latex ", i)
        if i == -1:
            break
        del lines[i]
Beispiel #24
0
def remove_oldert(document):
    " Remove old ERT inset."
    ert_begin = ["\\begin_inset ERT",
                 "status Collapsed",
                 "",
                 '\\layout %s' % document.default_layout,
                 ""]
    lines = document.body
    i = 0
    while 1:
        i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
        if i == -1:
            break
        j = i+1
        while 1:
            # \end_inset is for ert inside a tabular cell. The other tokens
            # are obvious.
            j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
                            j)
            if check_token(lines[j], "\\begin_inset"):
                j = find_end_of_inset(lines, j)+1
            else:
                break

        if check_token(lines[j], "\\layout"):
            while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"):
                j = j-1

        # We need to remove insets, special chars & font commands from ERT text
        new = []
        new2 = []
        if check_token(lines[i], "\\layout LaTeX"):
            new = ['\layout %s' % document.default_layout, "", ""]

        k = i+1
        while 1:
            k2 = find_re(lines, ert_rexp, k, j)
            inset = hfill = specialchar = 0
            if k2 == -1:
                k2 = j
            elif check_token(lines[k2], "\\begin_inset"):
                inset = 1
            elif check_token(lines[k2], "\\hfill"):
                hfill = 1
                del lines[k2]
                j = j-1
            else:
                specialchar = 1
                mo = spchar_rexp.match(lines[k2])
                lines[k2] = mo.group(1)
                specialchar_str = mo.group(2)
                k2 = k2+1

            tmp = []
            for line in lines[k:k2]:
                # Move some lines outside the ERT inset:
                if move_rexp.match(line):
                    if new2 == []:
                        # This is not necessary, but we want the output to be
                        # as similar as posible to the lyx format
                        new2 = [""]
                    new2.append(line)
                elif not check_token(line, "\\latex"):
                    tmp.append(line)

            if is_empty(tmp):
                if filter(lambda x:x != "", tmp) != []:
                    if new == []:
                        # This is not necessary, but we want the output to be
                        # as similar as posible to the lyx format
                        lines[i-1] = lines[i-1]+" "
                    else:
                        new = new+[" "]
            else:
                new = new+ert_begin+tmp+["\\end_inset ", ""]

            if inset:
                k3 = find_end_of_inset(lines, k2)
                new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset
                k = k3+1
                # Skip the empty line after \end_inset
                if not is_nonempty_line(lines[k]):
                    k = k+1
                    new.append("")
            elif hfill:
                new = new + ["\\hfill", ""]
                k = k2
            elif specialchar:
                if new == []:
                    # This is not necessary, but we want the output to be
                    # as similar as posible to the lyx format
                    lines[i-1] = lines[i-1]+specialchar_str
                    new = [""]
                else:
                    new = new+[specialchar_str, ""]
                k = k2
            else:
                break

        new = new+new2
        if not check_token(lines[j], "\\latex "):
            new = new+[""]+[lines[j]]
        lines[i:j+1] = new
        i = i+1

    # Delete remaining "\latex xxx" tokens
    i = 0
    while 1:
        i = find_token(lines, "\\latex ", i)
        if i == -1:
            break
        del lines[i]
Beispiel #25
0
def remove_pextra(document):
    " Remove pextra token."
    lines = document.body
    i = 0
    flag = 0
    while 1:
        i = find_re(lines, pextra_type2_rexp, i)
        if i == -1:
            break

        # Sometimes the \pextra_widthp argument comes in it own
        # line. If that happens insert it back in this line.
        if pextra_widthp.search(lines[i+1]):
            lines[i] = lines[i] + ' ' + lines[i+1]
            del lines[i+1]

        mo = pextra_rexp.search(lines[i])
        width = get_width(mo)

        if mo.group(1) == "1":
            # handle \pextra_type 1 (indented paragraph)
            lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i])
            i = i+1
            continue

        # handle \pextra_type 2 (minipage)
        position = mo.group(3)
        hfill = mo.group(5)
        lines[i] = re.sub(pextra_rexp, "", lines[i])

        start = ["\\begin_inset Minipage",
                 "position " + position,
                 "inner_position 0",
                 'height "0pt"',
                 'width "%s"' % width,
                 "collapsed false"
                 ]
        if flag:
            flag = 0
            if hfill:
                start = ["","\hfill",""]+start
        else:
            start = ['\\layout %s' % document.default_layout,''] + start

        j0 = find_token_backwards(lines,"\\layout", i-1)
        j = get_next_paragraph(lines, i, document.format + 1)

        count = 0
        while 1:
            # collect more paragraphs to the minipage
            count = count+1
            if j == -1 or not check_token(lines[j], "\\layout"):
                break
            i = find_re(lines, pextra_type2_rexp2, j+1)
            if i == -1:
                break
            mo = pextra_rexp.search(lines[i])
            if not mo:
                break
            if mo.group(7) == "1":
                flag = 1
                break
            lines[i] = re.sub(pextra_rexp, "", lines[i])
            j = find_tokens(lines, ["\\layout", "\\end_float"], i+1)

        mid = lines[j0:j]
        end = ["\\end_inset "]

        lines[j0:j] = start+mid+end
        i = i+1
Beispiel #26
0
def remove_oldfloat(document):
    " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
    lines = document.body
    i = 0
    while 1:
        i = find_token(lines, "\\begin_float", i)
        if i == -1:
            break
        # There are no nested floats, so finding the end of the float is simple
        j = find_token(lines, "\\end_float", i+1)

        floattype = lines[i].split()[1]
        if not floats.has_key(floattype):
            document.warning("Error! Unknown float type " + floattype)
            floattype = "fig"

        # skip \end_deeper tokens
        i2 = i+1
        while check_token(lines[i2], "\\end_deeper"):
            i2 = i2+1
        if i2 > i+1:
            j2 = get_next_paragraph(lines, j + 1, document.format + 1)
            lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))

        new = floats[floattype]+[""]

        # Check if the float is floatingfigure
        k = find_re(lines, pextra_type3_rexp, i, j)
        if k != -1:
            mo = pextra_rexp.search(lines[k])
            width = get_width(mo)
            lines[k] = re.sub(pextra_rexp, "", lines[k])
            new = ["\\begin_inset Wrap figure",
                   'width "%s"' % width,
                   "collapsed false",
                   ""]

        new = new+lines[i2:j]+["\\end_inset ", ""]

        # After a float, all font attributes are reseted.
        # We need to output '\foo default' for every attribute foo
        # whose value is not default before the float.
        # The check here is not accurate, but it doesn't matter
        # as extra '\foo default' commands are ignored.
        # In fact, it might be safer to output '\foo default' for all
        # font attributes.
        k = get_paragraph(lines, i, document.format + 1)
        flag = 0
        for token in font_tokens:
            if find_token(lines, token, k, i) != -1:
                if not flag:
                    # This is not necessary, but we want the output to be
                    # as similar as posible to the lyx format
                    flag = 1
                    new.append("")
                if token == "\\lang":
                    new.append(token+" "+ document.language)
                else:
                    new.append(token+" default ")

        lines[i:j+1] = new
        i = i+1
Beispiel #27
0
    def read(self):
        """Reads a file into the self.header and
        self.body parts, from self.input."""

        # First pass: Read header to determine file encoding
        # If we are running under python3 then all strings are binary in this
        # pass. In some cases we need to convert binary to unicode in order to
        # use our parser tools. Since we do not know the true encoding yet we
        # use latin1. This works since a) the parts we are interested in are
        # pure ASCII (subset of latin1) and b) in contrast to pure ascii or
        # utf8, one can decode any 8byte string using latin1.
        first_line = True
        while True:
            line = self.input.readline()
            if not line:
                # eof found before end of header
                self.error("Invalid LyX file: Missing body.")

            if first_line:
                # Remove UTF8 BOM marker if present
                if line.startswith(codecs.BOM_UTF8):
                    line = line[len(codecs.BOM_UTF8):]

                first_line = False

            if PY2:
                line = trim_eol(line)
                decoded = line
            else:
                line = trim_eol_binary(line)
                decoded = line.decode('latin1')
            if check_token(decoded, '\\begin_preamble'):
                while True:
                    line = self.input.readline()
                    if not line:
                        # eof found before end of header
                        self.error("Invalid LyX file: Missing body.")

                    if PY2:
                        line = trim_eol(line)
                        decoded = line
                    else:
                        line = trim_eol_binary(line)
                        decoded = line.decode('latin1')
                    if check_token(decoded, '\\end_preamble'):
                        break

                    if decoded.split()[:0] in ("\\layout", "\\begin_layout",
                                               "\\begin_body"):

                        self.warning("Malformed LyX file:"
                                     "Missing '\\end_preamble'."
                                     "\nAdding it now and hoping"
                                     "for the best.")

                    self.preamble.append(line)

            if check_token(decoded, '\\end_preamble'):
                continue

            line = line.rstrip()
            if not line:
                continue

            if decoded.split()[0] in ("\\layout", "\\begin_layout",
                                      "\\begin_body", "\\begin_deeper"):
                self.body.append(line)
                break

            self.header.append(line)

        if PY2:
            i = find_token(self.header, '\\textclass', 0)
        else:
            i = find_token(self.header, b'\\textclass', 0)
        if i == -1:
            self.warning("Malformed LyX file: Missing '\\textclass'.")
            if PY2:
                i = find_token(self.header, '\\lyxformat', 0) + 1
                self.header[i:i] = ['\\textclass article']
            else:
                i = find_token(self.header, b'\\lyxformat', 0) + 1
                self.header[i:i] = [b'\\textclass article']

        if PY2:
            self.textclass = get_value(self.header,
                                       "\\textclass",
                                       0,
                                       default="")
            self.language = get_value(self.header,
                                      "\\language",
                                      0,
                                      default="english")
            self.inputencoding = get_value(self.header,
                                           "\\inputencoding",
                                           0,
                                           default="auto")
        else:
            self.textclass = get_value(self.header,
                                       b"\\textclass",
                                       0,
                                       default=b"")
            self.language = get_value(self.header,
                                      b"\\language",
                                      0,
                                      default=b"english").decode('ascii')
            self.inputencoding = get_value(self.header,
                                           b"\\inputencoding",
                                           0,
                                           default=b"auto").decode('ascii')
        self.format = self.read_format()
        self.initial_format = self.format
        self.encoding = get_encoding(self.language, self.inputencoding,
                                     self.format, self.cjk_encoding)
        self.initial_version = self.read_version()

        # Second pass over header and preamble, now we know the file encoding
        # Do not forget the textclass (Debian bug #700828)
        self.textclass = self.textclass.decode(self.encoding)
        self.backend = get_backend(self.textclass)
        for i in range(len(self.header)):
            self.header[i] = self.header[i].decode(self.encoding)
        for i in range(len(self.preamble)):
            self.preamble[i] = self.preamble[i].decode(self.encoding)
        for i in range(len(self.body)):
            self.body[i] = self.body[i].decode(self.encoding)

        # Read document body
        while True:
            line = self.input.readline().decode(self.encoding)
            if not line:
                break
            self.body.append(trim_eol(line))
Beispiel #28
0
    def read(self):
        """Reads a file into the self.header and
        self.body parts, from self.input."""

        while True:
            line = self.input.readline()
            if not line:
                self.error("Invalid LyX file.")

            line = trim_eol(line)
            if check_token(line, '\\begin_preamble'):
                while 1:
                    line = self.input.readline()
                    if not line:
                        self.error("Invalid LyX file.")

                    line = trim_eol(line)
                    if check_token(line, '\\end_preamble'):
                        break

                    if line.split()[:0] in ("\\layout", "\\begin_layout",
                                            "\\begin_body"):

                        self.warning("Malformed LyX file:"
                                     "Missing '\\end_preamble'."
                                     "\nAdding it now and hoping"
                                     "for the best.")

                    self.preamble.append(line)

            if check_token(line, '\\end_preamble'):
                continue

            line = line.strip()
            if not line:
                continue

            if line.split()[0] in ("\\layout", "\\begin_layout",
                                   "\\begin_body", "\\begin_deeper"):
                self.body.append(line)
                break

            self.header.append(line)

        i = find_token(self.header, '\\textclass', 0)
        if i == -1:
            self.warning("Malformed LyX file: Missing '\\textclass'.")
            i = find_token(self.header, '\\lyxformat', 0) + 1
            self.header[i:i] = ['\\textclass article']

        self.textclass = get_value(self.header, "\\textclass", 0)
        self.backend = get_backend(self.textclass)
        self.format = self.read_format()
        self.language = get_value(self.header,
                                  "\\language",
                                  0,
                                  default="english")
        self.inputencoding = get_value(self.header,
                                       "\\inputencoding",
                                       0,
                                       default="auto")
        self.encoding = get_encoding(self.language, self.inputencoding,
                                     self.format, self.cjk_encoding)
        self.initial_version = self.read_version()

        # Second pass over header and preamble, now we know the file encoding
        # Do not forget the textclass (Debian bug #700828)
        self.textclass = self.textclass.decode(self.encoding)
        for i in range(len(self.header)):
            self.header[i] = self.header[i].decode(self.encoding)
        for i in range(len(self.preamble)):
            self.preamble[i] = self.preamble[i].decode(self.encoding)

        # Read document body
        while 1:
            line = self.input.readline().decode(self.encoding)
            if not line:
                break
            self.body.append(trim_eol(line))
Beispiel #29
0
    def read(self):
        """Reads a file into the self.header and
        self.body parts, from self.input."""

        # First pass: Read header to determine file encoding
        # If we are running under python3 then all strings are binary in this
        # pass. In some cases we need to convert binary to unicode in order to
        # use our parser tools. Since we do not know the true encoding yet we
        # use latin1. This works since a) the parts we are interested in are
        # pure ASCII (subset of latin1) and b) in contrast to pure ascii or
        # utf8, one can decode any 8byte string using latin1.
        first_line = True
        while True:
            line = self.input.readline()
            if not line:
                # eof found before end of header
                self.error("Invalid LyX file: Missing body.")

            if first_line:
                # Remove UTF8 BOM marker if present
                if line.startswith(codecs.BOM_UTF8):
                    line = line[len(codecs.BOM_UTF8):]

                first_line = False

            if PY2:
                line = trim_eol(line)
                decoded = line
            else:
                line = trim_eol_binary(line)
                decoded = line.decode('latin1')
            if check_token(decoded, '\\begin_preamble'):
                while True:
                    line = self.input.readline()
                    if not line:
                        # eof found before end of header
                        self.error("Invalid LyX file: Missing body.")

                    if PY2:
                        line = trim_eol(line)
                        decoded = line
                    else:
                        line = trim_eol_binary(line)
                        decoded = line.decode('latin1')
                    if check_token(decoded, '\\end_preamble'):
                        break

                    if decoded.split()[:0] in ("\\layout",
                                            "\\begin_layout", "\\begin_body"):

                        self.warning("Malformed LyX file:"
                                     "Missing '\\end_preamble'."
                                     "\nAdding it now and hoping"
                                     "for the best.")

                    self.preamble.append(line)

            if check_token(decoded, '\\end_preamble'):
                continue

            line = line.rstrip()
            if not line:
                continue

            if decoded.split()[0] in ("\\layout", "\\begin_layout",
                                   "\\begin_body", "\\begin_deeper"):
                self.body.append(line)
                break

            self.header.append(line)

        if PY2:
            i = find_token(self.header, '\\textclass', 0)
        else:
            i = find_token(self.header, b'\\textclass', 0)
        if i == -1:
            self.warning("Malformed LyX file: Missing '\\textclass'.")
            if PY2:
                i = find_token(self.header, '\\lyxformat', 0) + 1
                self.header[i:i] = ['\\textclass article']
            else:
                i = find_token(self.header, b'\\lyxformat', 0) + 1
                self.header[i:i] = [b'\\textclass article']

        if PY2:
            self.textclass = get_value(self.header, "\\textclass", 0,
                                       default = "")
            self.language = get_value(self.header, "\\language", 0,
                                      default = "english")
            self.inputencoding = get_value(self.header, "\\inputencoding", 0,
                                           default = "auto")
        else:
            self.textclass = get_value(self.header, b"\\textclass", 0,
                                       default = b"")
            self.language = get_value(self.header, b"\\language", 0,
                                      default = b"english").decode('ascii')
            self.inputencoding = get_value(self.header, b"\\inputencoding", 0,
                                           default = b"auto").decode('ascii')
        self.format = self.read_format()
        self.initial_format = self.format
        self.encoding = get_encoding(self.language,
                                     self.inputencoding, self.format,
                                     self.cjk_encoding)
        self.initial_version = self.read_version()

        # Second pass over header and preamble, now we know the file encoding
        # Do not forget the textclass (Debian bug #700828)
        self.textclass = self.textclass.decode(self.encoding)
        self.backend = get_backend(self.textclass)
        for i in range(len(self.header)):
            self.header[i] = self.header[i].decode(self.encoding)
        for i in range(len(self.preamble)):
            self.preamble[i] = self.preamble[i].decode(self.encoding)
        for i in range(len(self.body)):
            self.body[i] = self.body[i].decode(self.encoding)

        # Read document body
        while True:
            line = self.input.readline().decode(self.encoding)
            if not line:
                break
            self.body.append(trim_eol(line))