def write_esis(doc, ofp, knownempty):
    for node in doc.childNodes:
        nodeType = node.nodeType
        if nodeType == ELEMENT:
            gi = node.tagName
            if knownempty(gi):
                if node.hasChildNodes():
                    raise ValueError, \
                          "declared-empty node <%s> has children" % gi
                ofp.write("e\n")
            for k, value in node.attributes.items():
                if _token_rx.match(value):
                    dtype = "TOKEN"
                else:
                    dtype = "CDATA"
                ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
            ofp.write("(%s\n" % gi)
            write_esis(node, ofp, knownempty)
            ofp.write(")%s\n" % gi)
        elif nodeType == TEXT:
            ofp.write("-%s\n" % esistools.encode(node.data))
        elif nodeType == ENTITY_REFERENCE:
            ofp.write("&%s\n" % node.nodeName)
        else:
            raise RuntimeError, "unsupported node type: %s" % nodeType
 def dump_attr(self, pentry, value):
     if not (pentry.name and value):
         return
     if _token_rx.match(value):
         dtype = "TOKEN"
     else:
         dtype = "CDATA"
     self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
Exemple #3
0
 def dump_attr(self, pentry, value):
     if not (pentry.name and value):
         return
     if _token_rx.match(value):
         dtype = "TOKEN"
     else:
         dtype = "CDATA"
     self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
    def subconvert(self, endchar=None, depth=0):
        #
        # Parses content, including sub-structures, until the character
        # 'endchar' is found (with no open structures), or until the end
        # of the input data is endchar is None.
        #
        stack = new_stack()
        line = self.line
        while line:
            if line[0] == endchar and not stack:
                self.line = line
                return line
            m = _comment_rx.match(line)
            if m:
                text = m.group(1)
                if text:
                    self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
                               % encode(text))
                line = line[m.end():]
                continue
            m = _begin_env_rx.match(line)
            if m:
                name = m.group(1)
                entry = self.get_env_entry(name)
                # re-write to use the macro handler
                line = r"\%s %s" % (name, line[m.end():])
                continue
            m = _end_env_rx.match(line)
            if m:
                # end of environment
                envname = m.group(1)
                entry = self.get_entry(envname)
                while stack and envname != stack[-1] \
                      and stack[-1] in entry.endcloses:
                    self.write(")%s\n" % stack.pop())
                if stack and envname == stack[-1]:
                    self.write(")%s\n" % entry.outputname)
                    del stack[-1]
                else:
                    raise LaTeXStackError(envname, stack)
                line = line[m.end():]
                continue
            m = _begin_macro_rx.match(line)
            if m:
                # start of macro
                macroname = m.group(1)
                if macroname == "c":
                    # Ugh!  This is a combining character...
                    endpos = m.end()
                    self.combining_char("c", line[endpos])
                    line = line[endpos + 1:]
                    continue
                entry = self.get_entry(macroname)
                if entry.verbatim:
                    # magic case!
                    pos = line.find("\\end{%s}" % macroname)
                    text = line[m.end(1):pos]
                    stack.append(entry.name)
                    self.write("(%s\n" % entry.outputname)
                    self.write("-%s\n" % encode(text))
                    self.write(")%s\n" % entry.outputname)
                    stack.pop()
                    line = line[pos + len("\\end{%s}" % macroname):]
                    continue
                while stack and stack[-1] in entry.closes:
                    top = stack.pop()
                    topentry = self.get_entry(top)
                    if topentry.outputname:
                        self.write(")%s\n-\\n\n" % topentry.outputname)
                #
                if entry.outputname and entry.empty:
                    self.write("e\n")
                #
                params, optional, empty = self.start_macro(macroname)
                # rip off the macroname
                if params:
                    line = line[m.end(1):]
                elif empty:
                    line = line[m.end(1):]
                else:
                    line = line[m.end():]
                opened = 0
                implied_content = 0

                # handle attribute mappings here:
                for pentry in params:
                    if pentry.type == "attribute":
                        if pentry.optional:
                            m = _optional_rx.match(line)
                            if m and entry.outputname:
                                line = line[m.end():]
                                self.dump_attr(pentry, m.group(1))
                        elif pentry.text and entry.outputname:
                            # value supplied by conversion spec:
                            self.dump_attr(pentry, pentry.text)
                        else:
                            m = _parameter_rx.match(line)
                            if not m:
                                raise LaTeXFormatError(
                                    "could not extract parameter %s for %s: %r"
                                    % (pentry.name, macroname, line[:100]))
                            if entry.outputname:
                                self.dump_attr(pentry, m.group(1))
                            line = line[m.end():]
                    elif pentry.type == "child":
                        if pentry.optional:
                            m = _optional_rx.match(line)
                            if m:
                                line = line[m.end():]
                                if entry.outputname and not opened:
                                    opened = 1
                                    self.write("(%s\n" % entry.outputname)
                                    stack.append(macroname)
                                stack.append(pentry.name)
                                self.write("(%s\n" % pentry.name)
                                self.write("-%s\n" % encode(m.group(1)))
                                self.write(")%s\n" % pentry.name)
                                stack.pop()
                        else:
                            if entry.outputname and not opened:
                                opened = 1
                                self.write("(%s\n" % entry.outputname)
                                stack.append(entry.name)
                            self.write("(%s\n" % pentry.name)
                            stack.append(pentry.name)
                            self.line = skip_white(line)[1:]
                            line = self.subconvert(
                                "}", len(stack) + depth + 1)[1:]
                            self.write(")%s\n" % stack.pop())
                    elif pentry.type == "content":
                        if pentry.implied:
                            implied_content = 1
                        else:
                            if entry.outputname and not opened:
                                opened = 1
                                self.write("(%s\n" % entry.outputname)
                                stack.append(entry.name)
                            line = skip_white(line)
                            if line[0] != "{":
                                raise LaTeXFormatError(
                                    "missing content for " + macroname)
                            self.line = line[1:]
                            line = self.subconvert("}", len(stack) + depth + 1)
                            if line and line[0] == "}":
                                line = line[1:]
                    elif pentry.type == "text" and pentry.text:
                        if entry.outputname and not opened:
                            opened = 1
                            stack.append(entry.name)
                            self.write("(%s\n" % entry.outputname)
                        #dbgmsg("--- text: %r" % pentry.text)
                        self.write("-%s\n" % encode(pentry.text))
                    elif pentry.type == "entityref":
                        self.write("&%s\n" % pentry.name)
                if entry.outputname:
                    if not opened:
                        self.write("(%s\n" % entry.outputname)
                        stack.append(entry.name)
                    if not implied_content:
                        self.write(")%s\n" % entry.outputname)
                        stack.pop()
                continue
            if line[0] == endchar and not stack:
                self.line = line[1:]
                return self.line
            if line[0] == "}":
                # end of macro or group
                macroname = stack[-1]
                if macroname:
                    conversion = self.table[macroname]
                    if conversion.outputname:
                        # otherwise, it was just a bare group
                        self.write(")%s\n" % conversion.outputname)
                del stack[-1]
                line = line[1:]
                continue
            if line[0] == "~":
                # don't worry about the "tie" aspect of this command
                line = line[1:]
                self.write("- \n")
                continue
            if line[0] == "{":
                stack.append("")
                line = line[1:]
                continue
            if line[0] == "\\" and line[1] in ESCAPED_CHARS:
                self.write("-%s\n" % encode(line[1]))
                line = line[2:]
                continue
            if line[:2] == r"\\":
                self.write("(BREAK\n)BREAK\n")
                line = line[2:]
                continue
            if line[:2] == r"\_":
                line = "_" + line[2:]
                continue
            if line[:2] in (r"\'", r'\"'):
                # combining characters...
                self.combining_char(line[1], line[2])
                line = line[3:]
                continue
            m = _text_rx.match(line)
            if m:
                text = encode(m.group())
                self.write("-%s\n" % text)
                line = line[m.end():]
                continue
            # special case because of \item[]
            # XXX can we axe this???
            if line[0] == "]":
                self.write("-]\n")
                line = line[1:]
                continue
            # avoid infinite loops
            extra = ""
            if len(line) > 100:
                extra = "..."
            raise LaTeXFormatError("could not identify markup: %r%s"
                                   % (line[:100], extra))
        while stack:
            entry = self.get_entry(stack[-1])
            if entry.closes:
                self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
                del stack[-1]
            else:
                break
        if stack:
            raise LaTeXFormatError("elements remain on stack: "
                                   + ", ".join(stack))
Exemple #5
0
    def subconvert(self, endchar=None, depth=0):
        #
        # Parses content, including sub-structures, until the character
        # 'endchar' is found (with no open structures), or until the end
        # of the input data is endchar is None.
        #
        stack = new_stack()
        line = self.line
        while line:
            if line[0] == endchar and not stack:
                self.line = line
                return line
            m = _comment_rx.match(line)
            if m:
                text = m.group(1)
                if text:
                    self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" %
                               encode(text))
                line = line[m.end():]
                continue
            m = _begin_env_rx.match(line)
            if m:
                name = m.group(1)
                entry = self.get_env_entry(name)
                # re-write to use the macro handler
                line = r"\%s %s" % (name, line[m.end():])
                continue
            m = _end_env_rx.match(line)
            if m:
                # end of environment
                envname = m.group(1)
                entry = self.get_entry(envname)
                while stack and envname != stack[-1] \
                      and stack[-1] in entry.endcloses:
                    self.write(")%s\n" % stack.pop())
                if stack and envname == stack[-1]:
                    self.write(")%s\n" % entry.outputname)
                    del stack[-1]
                else:
                    raise LaTeXStackError(envname, stack)
                line = line[m.end():]
                continue
            m = _begin_macro_rx.match(line)
            if m:
                # start of macro
                macroname = m.group(1)
                if macroname == "c":
                    # Ugh!  This is a combining character...
                    endpos = m.end()
                    self.combining_char("c", line[endpos])
                    line = line[endpos + 1:]
                    continue
                entry = self.get_entry(macroname)
                if entry.verbatim:
                    # magic case!
                    pos = line.find("\\end{%s}" % macroname)
                    text = line[m.end(1):pos]
                    stack.append(entry.name)
                    self.write("(%s\n" % entry.outputname)
                    self.write("-%s\n" % encode(text))
                    self.write(")%s\n" % entry.outputname)
                    stack.pop()
                    line = line[pos + len("\\end{%s}" % macroname):]
                    continue
                while stack and stack[-1] in entry.closes:
                    top = stack.pop()
                    topentry = self.get_entry(top)
                    if topentry.outputname:
                        self.write(")%s\n-\\n\n" % topentry.outputname)
                #
                if entry.outputname and entry.empty:
                    self.write("e\n")
                #
                params, optional, empty = self.start_macro(macroname)
                # rip off the macroname
                if params:
                    line = line[m.end(1):]
                elif empty:
                    line = line[m.end(1):]
                else:
                    line = line[m.end():]
                opened = 0
                implied_content = 0

                # handle attribute mappings here:
                for pentry in params:
                    if pentry.type == "attribute":
                        if pentry.optional:
                            m = _optional_rx.match(line)
                            if m and entry.outputname:
                                line = line[m.end():]
                                self.dump_attr(pentry, m.group(1))
                        elif pentry.text and entry.outputname:
                            # value supplied by conversion spec:
                            self.dump_attr(pentry, pentry.text)
                        else:
                            m = _parameter_rx.match(line)
                            if not m:
                                raise LaTeXFormatError(
                                    "could not extract parameter %s for %s: %r"
                                    % (pentry.name, macroname, line[:100]))
                            if entry.outputname:
                                self.dump_attr(pentry, m.group(1))
                            line = line[m.end():]
                    elif pentry.type == "child":
                        if pentry.optional:
                            m = _optional_rx.match(line)
                            if m:
                                line = line[m.end():]
                                if entry.outputname and not opened:
                                    opened = 1
                                    self.write("(%s\n" % entry.outputname)
                                    stack.append(macroname)
                                stack.append(pentry.name)
                                self.write("(%s\n" % pentry.name)
                                self.write("-%s\n" % encode(m.group(1)))
                                self.write(")%s\n" % pentry.name)
                                stack.pop()
                        else:
                            if entry.outputname and not opened:
                                opened = 1
                                self.write("(%s\n" % entry.outputname)
                                stack.append(entry.name)
                            self.write("(%s\n" % pentry.name)
                            stack.append(pentry.name)
                            self.line = skip_white(line)[1:]
                            line = self.subconvert("}",
                                                   len(stack) + depth + 1)[1:]
                            self.write(")%s\n" % stack.pop())
                    elif pentry.type == "content":
                        if pentry.implied:
                            implied_content = 1
                        else:
                            if entry.outputname and not opened:
                                opened = 1
                                self.write("(%s\n" % entry.outputname)
                                stack.append(entry.name)
                            line = skip_white(line)
                            if line[0] != "{":
                                raise LaTeXFormatError("missing content for " +
                                                       macroname)
                            self.line = line[1:]
                            line = self.subconvert("}", len(stack) + depth + 1)
                            if line and line[0] == "}":
                                line = line[1:]
                    elif pentry.type == "text" and pentry.text:
                        if entry.outputname and not opened:
                            opened = 1
                            stack.append(entry.name)
                            self.write("(%s\n" % entry.outputname)
                        #dbgmsg("--- text: %r" % pentry.text)
                        self.write("-%s\n" % encode(pentry.text))
                    elif pentry.type == "entityref":
                        self.write("&%s\n" % pentry.name)
                if entry.outputname:
                    if not opened:
                        self.write("(%s\n" % entry.outputname)
                        stack.append(entry.name)
                    if not implied_content:
                        self.write(")%s\n" % entry.outputname)
                        stack.pop()
                continue
            if line[0] == endchar and not stack:
                self.line = line[1:]
                return self.line
            if line[0] == "}":
                # end of macro or group
                macroname = stack[-1]
                if macroname:
                    conversion = self.table[macroname]
                    if conversion.outputname:
                        # otherwise, it was just a bare group
                        self.write(")%s\n" % conversion.outputname)
                del stack[-1]
                line = line[1:]
                continue
            if line[0] == "~":
                # don't worry about the "tie" aspect of this command
                line = line[1:]
                self.write("- \n")
                continue
            if line[0] == "{":
                stack.append("")
                line = line[1:]
                continue
            if line[0] == "\\" and line[1] in ESCAPED_CHARS:
                self.write("-%s\n" % encode(line[1]))
                line = line[2:]
                continue
            if line[:2] == r"\\":
                self.write("(BREAK\n)BREAK\n")
                line = line[2:]
                continue
            if line[:2] == r"\_":
                line = "_" + line[2:]
                continue
            if line[:2] in (r"\'", r'\"'):
                # combining characters...
                self.combining_char(line[1], line[2])
                line = line[3:]
                continue
            m = _text_rx.match(line)
            if m:
                text = encode(m.group())
                self.write("-%s\n" % text)
                line = line[m.end():]
                continue
            # special case because of \item[]
            # XXX can we axe this???
            if line[0] == "]":
                self.write("-]\n")
                line = line[1:]
                continue
            # avoid infinite loops
            extra = ""
            if len(line) > 100:
                extra = "..."
            raise LaTeXFormatError("could not identify markup: %r%s" %
                                   (line[:100], extra))
        while stack:
            entry = self.get_entry(stack[-1])
            if entry.closes:
                self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
                del stack[-1]
            else:
                break
        if stack:
            raise LaTeXFormatError("elements remain on stack: " +
                                   ", ".join(stack))