Python ITextNode Examples, kataja.parser.INodes.ITextNode Python Examples

Example #1

0

Show file

    def parse_curlies(self):
        """ Turn text into ITextNodes, but don't let space end the current
        ITextNode. Only closing curly brace will end the node parsing.
            :param feed: list of chars (strings of length 1)
        """
        node = ITextNode()

        self.feed.pop()  # eat first "{"

        while self.feed:
            c = self.feed[-1]
            if c == '{':
                self.parse_curlies()
                node.append(new_node)
            elif c == '}':
                self.feed.pop()
                break
            elif c == '\\':
                new_node = self.parse_command()
                node.append(new_node)
            elif c == '$':
                self.toggle_math_mode()
            elif c in one_character_commands:
                new_node = self.parse_one_character_command()
                node.append(new_node)
            else:
                self.feed.pop()
                node.append(c)
        return node

Example #2

0

Show file

    def process(self, text):
        """ Simpler version of parse, turns values of text elements into INodes
        (intermediary nodes).  Results are ITextNodes that may contain more
        ITextNodes and ICommandNodes.
            :param text: string to parse.
        """
        #print('LatexFieldToINode called with "%s"' % text)
        self.math_mode = False
        self.node = None
        if not text:
            return ""
        text = text.strip()
        if not text:
            return ""
        self.feed = list(text)
        self.feed.reverse()
        nodes = []
        while self.feed:
            feed_progress = len(self.feed)
            node = self.parse_word()
            # ensure that we are not stuck in endless loop
            if len(self.feed) < feed_progress:
                nodes.append(node)
            else:
                self.feed.pop()

        if len(nodes) == 1:
            return nodes[0]
        elif nodes:
            node = ITextNode(parts=nodes)
            return node.tidy()
        else:
            return ""

Example #3

0

Show file

    def handle_startendtag(self, tag, attrs):
        if tag == 'br':
            if self.rows_mode:
                self.rows.append(self.current)
                # you may want to start again all currently open tags for the next line
                self.stack = []
                self.current = ITextNode()

            command = html_to_command[tag]
            self.current.append(ICommandNode(command))

Example #4

0

Show file

 def continue_on_new_line(stack):
     new_stack = [ITextNode()]
     for item in stack:
         if isinstance(item, ICommandNode):
             new_command = ICommandNode(item.command)
             if new_stack:
                 new_stack[-1].append(new_command)
             new_stack.append(new_command)
     return new_stack

Example #5

0

Show file

    def parse_command(self):
        """ Turn text into ICommandNodes. These are best understood as tags, where
         the tag is the command, and parts of the node are the scope of the tag.
         Reads a word and stores it as a command, and then depending how the word
         ends, either ends the command node or starts reading next entries as a
         nodes inside the ICommandNode.
            :param feed: list of chars (strings of length 1)
        """
        parts = []
        command = ''
        self.feed.pop()  # this is the beginning "\"

        while self.feed:
            c = self.feed[-1]
            if c == '{':
                new_node = self.parse_curlies()
                parts.append(new_node)
                print(repr(new_node))
                break
            elif c == '}':
                print('odd ending curly')
                break
            elif c == '\\':
                if not command:
                    # this is a line break in latex, '\\'', two backslashes in row.
                    # not two command words
                    self.feed.pop()
                    command += c
                    break
                else:
                    break
            elif c == ' ':
                break
            #elif c == self.rbracket and not self.math_mode:
            #    break
            elif c in ['<', '>', '&']:
                break
            elif c == '$':
                self.toggle_math_mode()
            else:
                self.feed.pop()
                command += c

        if command and command in latex_to_command:
            command = latex_to_command[command]
        if command:
            node = ICommandNode(command=command, parts=parts)
            return node
        elif parts:
            node = ITextNode(parts=parts)
            return node
        return ''

Example #6

0

Show file

class HTMLToINode(HTMLParser):
    """  Convert HTML to ICommandNodes and ITextNodes to use as field values. Doesn't handle
    brackets or tree parsing, only fields.
    """
    def error(self, message):
        print('HTML parse error: ', message)

    def __init__(self, rows_mode=False):
        super().__init__(convert_charrefs=True)
        self.current = ITextNode()
        self.stack = []
        self.rows_mode = rows_mode
        self.rows = []

    def handle_starttag(self, tag, attrs):
        if tag in html_to_command:
            command = html_to_command[tag]
            self.stack.append(self.current)
            new = ICommandNode(command)
            self.current.append(new)
            self.current = new

    def handle_endtag(self, tag):
        if self.stack:
            self.current = self.stack.pop()

    def handle_startendtag(self, tag, attrs):
        if tag == 'br':
            if self.rows_mode:
                self.rows.append(self.current)
                # you may want to start again all currently open tags for the next line
                self.stack = []
                self.current = ITextNode()

            command = html_to_command[tag]
            self.current.append(ICommandNode(command))

    def handle_data(self, data):
        """ Brackets and other tree defining structures are only found within data objects.
        If we are entering data to field, just take whatever it is here.
        :param data:
        :return:
        """
        self.current.append(data)

    def process(self, string):
        self.feed(string)
        result = self.current
        self.reset()
        if self.rows_mode:
            self.rows.append(result)
            return self.rows
        else:
            return result

    def reset(self):
        self.current = ITextNode()
        self.stack = []
        super().reset()

Example #7

0

Show file

    def process(self, doc):

        def removed(stack, command):
            for item in reversed(stack):
                if isinstance(item, ICommandNode) and item.command == command:
                    stack.remove(item)
                    break
            return stack[-1]

        def continue_on_new_line(stack):
            new_stack = [ITextNode()]
            for item in stack:
                if isinstance(item, ICommandNode):
                    new_command = ICommandNode(item.command)
                    if new_stack:
                        new_stack[-1].append(new_command)
                    new_stack.append(new_command)
            return new_stack

        b = doc.firstBlock()
        end = doc.end()
        count = 0
        rows = []
        while b:
            result = ITextNode()
            stack = [result]
            cf = b.charFormat()
            #b.blockFormat().alignment(),
            caps = cf.fontCapitalization()
            family = cf.fontFamily()
            italic = cf.fontItalic()
            overline = cf.fontOverline()
            strikeout = cf.fontStrikeOut()
            weight = cf.fontWeight()
            underline = cf.fontUnderline()
            vertalign = cf.verticalAlignment()
            for frange in b.textFormats():
                #print('---- block %s ----' % count)
                #print(b.text())
                #print(frange.start, frange.length)
                cf = frange.format
                if caps != cf.fontCapitalization():
                    caps = cf.fontCapitalization()
                    if caps == 3:
                        command = ICommandNode('smallcaps')
                        stack.append(command)
                        result.append(command)
                        result = command
                        #print('cap: ', caps)
                    else:
                        result = removed(stack, 'smallcaps')
                if family != cf.fontFamily():
                    family = cf.fontFamily()
                    #command = ICommandNode('paa')
                    #stack.append(command)
                    #result.append(command)
                    #result = command
                    #print('family: ', family)
                if italic != cf.fontItalic():
                    italic = cf.fontItalic()
                    if italic:
                        command = ICommandNode('italic')
                        stack.append(command)
                        result.append(command)
                        result = command
                        #print('italic: ', cf.fontItalic())
                    else:
                        result = removed(stack, 'emph')
                        result = removed(stack, 'italic')
                if overline != cf.fontOverline():
                    pass
                    #command = ICommandNode('overline')
                    #stack.append(command)
                    #result.append(command)
                    #result = command
                    #print('overline: ', overline)
                if strikeout != cf.fontStrikeOut():
                    strikeout = cf.fontStrikeOut()
                    if strikeout:
                        command = ICommandNode('strikeout')
                        stack.append(command)
                        result.append(command)
                        result = command
                        #print('strikeout: ', strikeout)
                    else:
                        result = removed(stack, 'strikeout')
                if weight != cf.fontWeight():
                    weight = cf.fontWeight()
                    if weight > 50:
                        command = ICommandNode('bold')
                        stack.append(command)
                        result.append(command)
                        result = command
                        #print('weight: ', weight)
                    else:
                        result = removed(stack, 'bold')
                if underline != cf.fontUnderline():
                    underline = cf.fontUnderline()
                    if underline:
                        command = ICommandNode('underline')
                        stack.append(command)
                        result.append(command)
                        result = command
                        #print('underline: ', underline)
                    else:
                        result = removed(stack, 'underline')
                if vertalign != cf.verticalAlignment():
                    if vertalign == 1:
                        result = removed(stack, 'sup')
                    elif vertalign == 2:
                        result = removed(stack, 'sub')
                    vertalign = cf.verticalAlignment()
                    if vertalign == 1:
                        command = ICommandNode('sup')
                        stack.append(command)
                        result.append(command)
                        result = command
                        #print('vertical align: ', vertalign)
                    elif vertalign == 2:
                        command = ICommandNode('sub')
                        stack.append(command)
                        result.append(command)
                        result = command
                        #print('vertical align: ', vertalign)
                text_piece = b.text()[frange.start:frange.start + frange.length]
                lines = text_piece.splitlines()
                if len(lines) > 1:
                    for text_piece in lines:
                        result.append(text_piece)
                        base = stack[0].tidy(keep_node=False)
                        stack = continue_on_new_line(stack)

                        result = stack[-1]
                        rows.append(base)
                else:
                    result.append(text_piece)

            base = stack[0].tidy(keep_node=False)
            if base or b != end: # omit last empty line
                rows.append(base)
            if b == end:
                b = None
            else:
                b = b.next()
                count += 1
        # Return one node or one string instead of rows
        has_nodes = False
        for row in rows:
            if isinstance(row, ITextNode):
                has_nodes = True
                break
        if has_nodes:
            parts = []
            for row in rows:
                parts.append(row)
                parts.append('\n')
            return ITextNode(parts=parts)
        else:
            return '\n'.join(rows)

Example #8

0

Show file

    def parse_word(self, end_on_space=False, return_rows=False):
        """ Turn text into ITextNodes. If something special (commands, curlybraces,
        brackets is found, deal with them by creating new Nodes of specific types
            :param feed: list of chars (strings of length 1)
        """

        node = ITextNode()
        rows = []

        while self.feed:
            c = self.feed[-1]
            if c == '{':
                new_node = self.parse_curlies()
                node.append(new_node)
            elif c == '}':
                break
            elif c == '\\':
                new_node = self.parse_command()
                if return_rows and isinstance(new_node, ICommandNode) and new_node.command == 'br':
                    # fixme: doesn't handle if some style scope continues across line break
                    rows.append(node)
                    node = ITextNode()
                else:
                    node.append(new_node)
            elif c == '$':
                self.toggle_math_mode()
            elif c in one_character_commands:
                new_node = self.parse_one_character_command()
                node.append(new_node)
            elif c.isspace() and end_on_space:
                self.feed.pop()
                break
            #elif c == self.rbracket and not self.math_mode:
            #    break
            #elif c == self.lbracket and not self.math_mode:
            #    break
            #elif c in ['&', '<', '>'] and False:
            #    self.feed.pop()
            #    node.append(html.escape(c))
            else:
                self.feed.pop()
                node.append(c)
        node = node.tidy(keep_node=False)
        if return_rows:
            if node:
                rows.append(node)
            return rows
        else:
            return node

Example #9

0

Show file

 def reset(self):
     self.current = ITextNode()
     self.stack = []
     super().reset()

Example #10

0

Show file

 def __init__(self, rows_mode=False):
     super().__init__(convert_charrefs=True)
     self.current = ITextNode()
     self.stack = []
     self.rows_mode = rows_mode
     self.rows = []