コード例 #1
0
ファイル: blocks.py プロジェクト: Python3pkg/CommonMark-py
class Parser(object):
    def __init__(self, options={}):
        self.doc = Node('document', [[1, 1], [0, 0]])
        self.block_starts = BlockStarts()
        self.tip = self.doc
        self.oldtip = self.doc
        self.current_line = ''
        self.line_number = 0
        self.offset = 0
        self.column = 0
        self.next_nonspace = 0
        self.next_nonspace_column = 0
        self.indent = 0
        self.indented = False
        self.blank = False
        self.partially_consumed_tab = False
        self.all_closed = True
        self.last_matched_container = self.doc
        self.refmap = {}
        self.last_line_length = 0
        self.inline_parser = InlineParser(options)
        self.options = options

    def add_line(self):
        """ Add a line to the block at the tip.  We assume the tip
        can accept lines -- that check should be done before calling this."""
        if self.partially_consumed_tab:
            # Skip over tab
            self.offset += 1
            # Add space characters
            chars_to_tab = 4 - (self.column % 4)
            self.tip.string_content += (' ' * chars_to_tab)
        self.tip.string_content += (self.current_line[self.offset:] + '\n')

    def add_child(self, tag, offset):
        """ Add block of type tag as a child of the tip.  If the tip can't
        accept children, close and finalize it and try its parent,
        and so on til we find a block that can accept children."""
        block_class = getattr(import_module('CommonMark.blocks'),
                              to_camel_case(self.tip.t))
        while not block_class.can_contain(tag):
            self.finalize(self.tip, self.line_number - 1)
            block_class = getattr(
                import_module('CommonMark.blocks'),
                to_camel_case(self.tip.t))

        column_number = offset + 1
        new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
        new_block.string_content = ''
        self.tip.append_child(new_block)
        self.tip = new_block
        return new_block

    def close_unmatched_blocks(self):
        """Finalize and close any unmatched blocks."""
        if not self.all_closed:
            while self.oldtip != self.last_matched_container:
                parent = self.oldtip.parent
                self.finalize(self.oldtip, self.line_number - 1)
                self.oldtip = parent
            self.all_closed = True

    def find_next_nonspace(self):
        current_line = self.current_line
        i = self.offset
        cols = self.column

        try:
            c = current_line[i]
        except IndexError:
            c = ''
        while c != '':
            if c == ' ':
                i += 1
                cols += 1
            elif c == '\t':
                i += 1
                cols += (4 - (cols % 4))
            else:
                break

            try:
                c = current_line[i]
            except IndexError:
                c = ''

        self.blank = (c == '\n' or c == '\r' or c == '')
        self.next_nonspace = i
        self.next_nonspace_column = cols
        self.indent = self.next_nonspace_column - self.column
        self.indented = self.indent >= CODE_INDENT

    def advance_next_nonspace(self):
        self.offset = self.next_nonspace
        self.column = self.next_nonspace_column
        self.partially_consumed_tab = False

    def advance_offset(self, count, columns):
        current_line = self.current_line
        try:
            c = current_line[self.offset]
        except IndexError:
            c = None
        while count > 0 and c is not None:
            if c == '\t':
                chars_to_tab = 4 - (self.column % 4)
                if columns:
                    self.partially_consumed_tab = chars_to_tab > count
                    chars_to_advance = min(count, chars_to_tab)
                    self.column += chars_to_advance
                    self.offset += 0 if self.partially_consumed_tab else 1
                    count -= chars_to_advance
                else:
                    self.partially_consumed_tab = False
                    self.column += chars_to_tab
                    self.offset += 1
                    count -= 1
            else:
                self.partially_consumed_tab = False
                self.offset += 1
                # assume ascii; block starts are ascii
                self.column += 1
                count -= 1
            try:
                c = current_line[self.offset]
            except IndexError:
                c = None

    def incorporate_line(self, ln):
        """Analyze a line of text and update the document appropriately.

        We parse markdown text by calling this on each line of input,
        then finalizing the document.
        """
        all_matched = True

        container = self.doc
        self.oldtip = self.tip
        self.offset = 0
        self.column = 0
        self.blank = False
        self.partially_consumed_tab = False
        self.line_number += 1

        # replace NUL characters for security
        if re.search(r'\u0000', ln) is not None:
            ln = re.sub(r'\0', '\uFFFD', ln)

        self.current_line = ln

        # For each containing block, try to parse the associated line start.
        # Bail out on failure: container will point to the last matching block.
        # Set all_matched to false if not all containers match.
        last_child = container.last_child
        while last_child and last_child.is_open:
            container = last_child

            self.find_next_nonspace()
            block_class = getattr(
                import_module('CommonMark.blocks'),
                to_camel_case(container.t))
            rv = block_class.continue_(self, container)
            if rv == 0:
                # we've matched, keep going
                pass
            elif rv == 1:
                # we've failed to match a block
                all_matched = False
            elif rv == 2:
                # we've hit end of line for fenced code close and can return
                self.last_line_length = len(ln)
                return
            else:
                raise ValueError('returned illegal value, must be 0, 1, or 2')

            if not all_matched:
                # back up to last matching block
                container = container.parent
                break

            last_child = container.last_child

        self.all_closed = (container == self.oldtip)
        self.last_matched_container = container

        block_class = getattr(import_module('CommonMark.blocks'),
                              to_camel_case(container.t))
        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
        starts = self.block_starts
        starts_len = len(starts.METHODS)
        # Unless last matched container is a code block, try new container
        # starts, adding children to the last matched container:
        while not matched_leaf:
            self.find_next_nonspace()

            # this is a little performance optimization:
            if not self.indented and \
               not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
                self.advance_next_nonspace()
                break

            i = 0
            while i < starts_len:
                res = getattr(starts, starts.METHODS[i])(self, container)
                if res == 1:
                    container = self.tip
                    break
                elif res == 2:
                    container = self.tip
                    matched_leaf = True
                    break
                else:
                    i += 1

            if i == starts_len:
                # nothing matched
                self.advance_next_nonspace()
                break

        # What remains at the offset is a text line. Add the text to the
        # appropriate container.
        if not self.all_closed and not self.blank and \
           self.tip.t == 'paragraph':
            # lazy paragraph continuation
            self.add_line()
        else:
            # not a lazy continuation
            # finalize any blocks not matched
            self.close_unmatched_blocks()
            if self.blank and container.last_child:
                container.last_child.last_line_blank = True

            t = container.t

            # Block quote lines are never blank as they start with >
            # and we don't count blanks in fenced code for purposes of
            # tight/loose lists or breaking out of lists.  We also
            # don't set last_line_blank on an empty list item, or if we
            # just closed a fenced block.
            last_line_blank = self.blank and \
                not (t == 'block_quote' or
                     (t == 'code_block' and container.is_fenced) or
                     (t == 'item' and
                      not container.first_child and
                      container.sourcepos[0][0] == self.line_number))

            # propagate last_line_blank up through parents:
            cont = container
            while cont:
                cont.last_line_blank = last_line_blank
                cont = cont.parent

            block_class = getattr(import_module('CommonMark.blocks'),
                                  to_camel_case(t))
            if block_class.accepts_lines:
                self.add_line()
                # if HtmlBlock, check for end condition
                if t == 'html_block' and \
                   container.html_block_type >= 1 and \
                   container.html_block_type <= 5 and \
                   re.search(
                       reHtmlBlockClose[container.html_block_type],
                       self.current_line[self.offset:]):
                    self.finalize(container, self.line_number)
            elif self.offset < len(ln) and not self.blank:
                # create a paragraph container for one line
                container = self.add_child('paragraph', self.offset)
                self.advance_next_nonspace()
                self.add_line()

        self.last_line_length = len(ln)

    def finalize(self, block, line_number):
        """ Finalize a block.  Close it and do any necessary postprocessing,
        e.g. creating string_content from strings, setting the 'tight'
        or 'loose' status of a list, and parsing the beginnings
        of paragraphs for reference definitions.  Reset the tip to the
        parent of the closed block."""
        above = block.parent
        block.is_open = False
        block.sourcepos[1] = [line_number, self.last_line_length]
        block_class = getattr(import_module('CommonMark.blocks'),
                              to_camel_case(block.t))
        block_class.finalize(self, block)

        self.tip = above

    def process_inlines(self, block):
        """
        Walk through a block & children recursively, parsing string content
        into inline content where appropriate.
        """
        walker = block.walker()
        self.inline_parser.refmap = self.refmap
        self.inline_parser.options = self.options
        event = walker.nxt()
        while event is not None:
            node = event['node']
            t = node.t
            if not event['entering'] and (t == 'paragraph' or t == 'heading'):
                self.inline_parser.parse(node)
            event = walker.nxt()

    def parse(self, my_input):
        """ The main parsing function.  Returns a parsed document AST."""
        self.doc = Node('document', [[1, 1], [0, 0]])
        self.tip = self.doc
        self.refmap = {}
        self.line_number = 0
        self.last_line_length = 0
        self.offset = 0
        self.column = 0
        self.last_matched_container = self.doc
        self.current_line = ''
        lines = re.split(reLineEnding, my_input)
        length = len(lines)
        if len(my_input) > 0 and my_input[-1] == '\n':
            # ignore last blank line created by final newline
            length -= 1
        for i in range(length):
            self.incorporate_line(lines[i])
        while (self.tip):
            self.finalize(self.tip, length)
        self.process_inlines(self.doc)
        return self.doc
コード例 #2
0
ファイル: blocks.py プロジェクト: brechtm/CommonMark-py
class Parser:

    def __init__(self, subject=None, pos=0):
        self.doc = Node.makeNode("Document", 1, 1)
        self.subject = subject
        self.pos = pos
        self.tip = self.doc
        self.refmap = {}
        self.inlineParser = InlineParser()

    def acceptsLines(self, block_type):
        """ Returns true if block type can accept lines of text."""
        return block_type == 'Paragraph' or \
            block_type == 'IndentedCode' or \
            block_type == 'FencedCode' or \
            block_type == 'HtmlBlock'

    def endsWithBlankLine(self, block):
        """ Returns true if block ends with a blank line,
        descending if needed into lists and sublists."""
        if block.last_line_blank:
            return True
        if (block.t == "List" or block.t == "Item") and \
           len(block.children) > 0:
            return self.endsWithBlankLine(block.children[-1])
        else:
            return False

    def breakOutOfLists(self, block, line_number):
        """ Break out of all containing lists, resetting the tip of the
        document to the parent of the highest list, and finalizing
        all the lists.  (This is used to implement the "two blank lines
        break out of all lists" feature.)"""
        b = block
        last_list = None
        while True:
            if (b.t == "List"):
                last_list = b
            b = b.parent
            if not b:
                break

        if (last_list):
            while block != last_list:
                self.finalize(block, line_number)
                block = block.parent
            self.finalize(last_list, line_number)
            self.tip = last_list.parent

    def addLine(self, ln, offset):
        """ Add a line to the block at the tip.  We assume the tip
        can accept lines -- that check should be done before calling this."""
        s = ln[offset:]
        if not self.tip.is_open:
            raise Exception(
                "Attempted to add line (" + ln + ") to closed container.")
        self.tip.strings.append(s)

    def addChild(self, tag, line_number, offset):
        """ Add block of type tag as a child of the tip.  If the tip can't
        accept children, close and finalize it and try its parent,
        and so on til we find a block that can accept children."""
        while not (self.tip.t == "Document" or
                   self.tip.t == "BlockQuote" or
                   self.tip.t == "Item" or
                   (self.tip.t == "List" and tag == "Item")):
            self.finalize(self.tip, line_number - 1)
        column_number = offset + 1
        newNode = Node.makeNode(tag, line_number, column_number)
        self.tip.children.append(newNode)
        newNode.parent = self.tip
        self.tip = newNode
        return newNode

    def listsMatch(self, list_data, item_data):
        """ Returns true if the two list items are of the same type,
        with the same delimiter and bullet character.  This is used
        in agglomerating list items into lists."""
        return (list_data.get("type", None) ==
                item_data.get("type", None) and
                list_data.get("delimiter", None) ==
                item_data.get("delimiter", None) and
                list_data.get("bullet_char", None) ==
                item_data.get("bullet_char", None))

    def parseListMarker(self, ln, offset):
        """ Parse a list marker and return data on the marker (type,
        start, delimiter, bullet character, padding) or None."""
        rest = ln[offset:]
        data = {}
        blank_item = bool()
        if re.match(reHrule, rest):
            return None
        match = re.search(reBulletListMarker, rest)
        match2 = re.search(reOrderedListMarker, rest)
        if match:
            spaces_after_marker = len(match.group(1))
            data['type'] = 'Bullet'
            data['bullet_char'] = match.group(0)[0]
            blank_item = match.group(0) == len(rest)
        elif match2:
            spaces_after_marker = len(match2.group(3))
            data['type'] = 'Ordered'
            data['start'] = int(match2.group(1))
            data['delimiter'] = match2.group(2)
            blank_item = match2.group(0) == len(rest)
        else:
            return None
        if spaces_after_marker >= 5 or spaces_after_marker < 1 or blank_item:
            if match:
                data['padding'] = len(match.group(0)) - spaces_after_marker + 1
            elif match2:
                data['padding'] = len(
                    match2.group(0)) - spaces_after_marker + 1
        else:
            if match:
                data['padding'] = len(match.group(0))
            elif match2:
                data['padding'] = len(match2.group(0))
        return data

    def parseIAL(self, ln):
        values = []
        css_class = re.findall(r"\.(\w+) *", ln)
        if css_class:
            values.append(("class", " ".join(css_class)))
        css_id = re.findall(r"\#.(\w+) *", ln)
        if css_id:
            values.append(("id", css_id[0]))
        keyed_values = re.findall(r"(\w+)(?:=(\w+))? *", ln)
        if keyed_values:
            values += keyed_values

        return dict(values)

    def incorporateLine(self, ln, line_number):
        """ Analyze a line of text and update the document appropriately.
        We parse markdown text by calling this on each line of input,
        then finalizing the document."""
        all_matched = True
        offset = 0
        CODE_INDENT = 4
        blank = None
        already_done = False

        container = self.doc
        oldtip = self.tip

        ln = detabLine(ln)

        while len(container.children) > 0:
            last_child = container.children[-1]
            if not last_child.is_open:
                break
            container = last_child

            match = matchAt(r"[^ ]", ln, offset)
            if match is None:
                first_nonspace = len(ln)
                blank = True
            else:
                first_nonspace = match
                blank = False
            indent = first_nonspace - offset
            if container.t == "BlockQuote":
                matched = bool()
                if len(ln) > first_nonspace and len(ln) > 0:
                    matched = ln[first_nonspace] == ">"
                matched = indent <= 3 and matched
                if matched:
                    offset = first_nonspace + 1
                    try:
                        if ln[offset] == " ":
                            offset += 1
                    except IndexError:
                        pass
                else:
                    all_matched = False
            elif container.t == "Item":
                if (indent >= container.list_data['marker_offset'] +
                   container.list_data['padding']):
                    offset += container.list_data[
                        'marker_offset'] + container.list_data['padding']
                elif blank:
                    offset = first_nonspace
                else:
                    all_matched = False
            elif container.t == "IndentedCode":
                if indent >= CODE_INDENT:
                    offset += CODE_INDENT
                elif blank:
                    offset = first_nonspace
                else:
                    all_matched = False
            elif container.t in ["ATXHeader",
                                 "SetextHeader",
                                 "HorizontalRule"]:
                all_matched = False
            elif container.t == "FencedCode":
                i = container.fence_offset
                while i > 0 and len(ln) > offset and ln[offset] == " ":
                    offset += 1
                    i -= 1
            elif container.t == "HtmlBlock":
                if blank:
                    all_matched = False
            elif container.t == "Paragraph":
                if blank:
                    container.last_line_blank = True
                    all_matched = False
            if not all_matched:
                container = container.parent
                break
        last_matched_container = container

        def closeUnmatchedBlocks(self, already_done, oldtip):
            """ This function is used to finalize and close any unmatched
            blocks.  We aren't ready to do this now, because we might
            have a lazy paragraph continuation, in which case we don't
            want to close unmatched blocks.  So we store this closure for
            use later, when we have more information."""
            while not already_done and not oldtip == last_matched_container:
                self.finalize(oldtip, line_number)
                oldtip = oldtip.parent
            return True, oldtip

        if blank and container.last_line_blank:
            self.breakOutOfLists(container, line_number)

        while container.t != "ExtensionBlock" and \
                container.t != "FencedCode" and \
                container.t != "IndentedCode" and \
                container.t != "HtmlBlock" and \
                matchAt(r"^[ #`~*+_=<>0-9-{]", ln, offset) is not None:
            match = matchAt("[^ ]", ln, offset)
            if match is None:
                first_nonspace = len(ln)
                blank = True
            else:
                first_nonspace = match
                blank = False
            ATXmatch = re.search(reATXHeaderMarker, ln[first_nonspace:])
            FENmatch = re.search(reCodeFence, ln[first_nonspace:])
            PARmatch = re.search(reSetextHeaderLine, ln[first_nonspace:])
            IALmatch = re.search(r"^{:((\}|[^}])*)} *$", ln[first_nonspace:])
            EXTmatch = re.search(r"^{::((\\\}|[^\\}])*)/?} *$",
                                 ln[first_nonspace:])
            data = self.parseListMarker(ln, first_nonspace)

            indent = first_nonspace - offset
            if data:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                data['marker_offset'] = indent
                offset = first_nonspace + data['padding']
                if not container.t == "List" or not self.listsMatch(
                   container.list_data, data):
                    container = self.addChild(
                        "List", line_number, first_nonspace)
                    container.list_data = data
                container = self.addChild(
                    "Item", line_number, first_nonspace)
                container.list_data = data
            elif indent >= CODE_INDENT:
                if not self.tip.t == "Paragraph" and not blank:
                    offset += CODE_INDENT
                    already_done, oldtip = closeUnmatchedBlocks(
                        self, already_done, oldtip)
                    container = self.addChild(
                        'IndentedCode', line_number, offset)
                else:
                    break
            elif len(ln) > first_nonspace and ln[first_nonspace] == ">":
                offset = first_nonspace + 1
                try:
                    if ln[offset] == " ":
                        offset += 1
                except IndexError:
                    pass
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild("BlockQuote", line_number, offset)
            elif EXTmatch:
                args = EXTmatch.group(1)
                keyed_values = re.findall(r"(\w+)(?:=(\w+))? *", args)
                offset = first_nonspace + len(EXTmatch.group(0))
                print("EXT {} {}".format(args, offset))
                already_done, oldtip = closeUnmatchedBlocks(self,
                                                            already_done,
                                                            oldtip)
                container = self.addChild("ExtensionBlock", line_number,
                                          first_nonspace)
                container.title = keyed_values.pop(0)[0]
                container.attributes = dict(keyed_values)
                print(EXTmatch.group(0))
                print(args)
                if (EXTmatch.group(0)[-2] == '/'):
                    self.finalize(container, line_number)

                break
            elif IALmatch:
                offset = first_nonspace + len(IALmatch.group(0))
                print("Found {}".format(IALmatch.group(0)))
                print("blank {}".format(blank))
                print("container {} {}".format(
                    self.tip.t,
                    container.last_line_blank))
                if blank:
                    # FIXME
                    # attributes.update(self.parseIAL(IALmatch.group(1)))
                    pass
                else:
                    self.tip.attributes = self.parseIAL(IALmatch.group(1))
                break
            elif ATXmatch:
                offset = first_nonspace + len(ATXmatch.group(0))
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild(
                    "ATXHeader", line_number, first_nonspace)
                container.level = len(ATXmatch.group(0).strip())
                if not re.search(r'\\#', ln[offset:]) is None:
                    container.strings = [
                        re.sub(r'(?:(\\#) *#*| *#+) *$', '\g<1>', ln[offset:])]
                else:
                    container.strings = [
                        re.sub(r'(?:(\\#) *#*| *#+) *$', '', ln[offset:])]
                break
            elif FENmatch:
                fence_length = len(FENmatch.group(0))
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild(
                    "FencedCode", line_number, first_nonspace)
                container.fence_length = fence_length
                container.fence_char = FENmatch.group(0)[0]
                container.fence_offset = first_nonspace - offset
                offset = first_nonspace + fence_length
                break
            elif not matchAt(reHtmlBlockOpen, ln, first_nonspace) is None:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild(
                    'HtmlBlock', line_number, first_nonspace)
                break
            elif container.t == "Paragraph" and \
                    len(container.strings) == 1 and PARmatch:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container.t = "SetextHeader"
                container.level = 1 if PARmatch.group(0)[0] == '=' else 2
                offset = len(ln)
            elif not matchAt(reHrule, ln, first_nonspace) is None:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild(
                    "HorizontalRule", line_number, first_nonspace)
                offset = len(ln) - 1
                break
            else:
                break
            if self.acceptsLines(container.t):
                break

        match = matchAt(r"[^ ]", ln, offset)
        if match is None:
            first_nonspace = len(ln)
            blank = True
        else:
            first_nonspace = match
            blank = False
        indent = first_nonspace - offset

        if not self.tip == last_matched_container and \
           not blank and self.tip.t == "Paragraph" and \
           len(self.tip.strings) > 0:
            self.last_line_blank = False
            self.addLine(ln, offset)
        else:
            already_done, oldtip = closeUnmatchedBlocks(
                self, already_done, oldtip)
            container.last_line_blank = \
                blank and \
                not (container.t == "BlockQuote" or
                     container.t == "FencedCode" or
                     (container.t == "Item" and
                      len(container.children) == 0 and
                      container.start_line == line_number))
            cont = container
            while cont.parent:
                cont.parent.last_line_blank = False
                cont = cont.parent
            if container.t == "IndentedCode" or container.t == "HtmlBlock":
                self.addLine(ln, offset)
            if container.t == "ExtensionBlock":
                EXTmatch = re.search(r"^{:/((\\\}|[^\\}])*)} *$",
                                     ln[first_nonspace:])
                if EXTmatch:
                    self.finalize(container, line_number)
                else:
                    self.addLine(ln, offset)
            elif container.t == "FencedCode":
                match = bool()
                if len(ln) > 0:
                    match = len(ln) > first_nonspace and \
                        ln[first_nonspace] == container.fence_char and \
                        re.match(
                            r"^(?:`{3,}|~{3,})(?= *$)",
                            ln[first_nonspace:])
                match = indent <= 3 and match
                FENmatch = re.search(
                    r"^(?:`{3,}|~{3,})(?= *$)", ln[first_nonspace:])
                if match and len(FENmatch.group(0)) >= container.fence_length:
                    self.finalize(container, line_number)
                else:
                    self.addLine(ln, offset)
            elif container.t in ["ATXHeader", "SetextHeader", "HtmlBlock"]:
                # nothing to do; we already added the contents.
                pass
            else:
                if self.acceptsLines(container.t):
                    self.addLine(ln, first_nonspace)
                elif blank:
                    pass
                elif container.t != "HorizontalRule" and \
                        container.t != "SetextHeader":
                    container = self.addChild(
                        "Paragraph", line_number, first_nonspace)
                    self.addLine(ln, first_nonspace)
                else:
                    # print("Line " + str(line_number) +
                    #       " with container type " +
                    #       container.t + " did not match any condition.")
                    pass

    def finalize(self, block, line_number):
        """ Finalize a block.  Close it and do any necessary postprocessing,
        e.g. creating string_content from strings, setting the 'tight'
        or 'loose' status of a list, and parsing the beginnings
        of paragraphs for reference definitions.  Reset the tip to the
        parent of the closed block."""
        if (not block.is_open):
            return 0

        block.is_open = False
        if (line_number > block.start_line):
            block.end_line = line_number - 1
        else:
            block.end_line = line_number

        if (block.t == "Paragraph"):
            block.string_content = ""
            for i, line in enumerate(block.strings):
                block.strings[i] = re.sub(r'^  *', '', line, re.MULTILINE)
            block.string_content = '\n'.join(block.strings)

            pos = self.inlineParser.parseReference(
                block.string_content, self.refmap)
            while (block.string_content[0] == "[" and pos):
                block.string_content = block.string_content[pos:]
                if (isBlank(block.string_content)):
                    block.t = "ReferenceDef"
                    break
                pos = self.inlineParser.parseReference(
                    block.string_content, self.refmap)
        elif (block.t in ["ATXHeader", "SetextHeader", "HtmlBlock"]):
            block.string_content = "\n".join(block.strings)
        elif (block.t == "IndentedCode"):
            block.string_content = re.sub(
                r"(\n *)*$", "\n", "\n".join(block.strings))
        elif (block.t == "FencedCode"):
            block.info = unescape(block.strings[0].strip())
            if (len(block.strings) == 1):
                block.string_content = ""
            else:
                block.string_content = "\n".join(block.strings[1:]) + "\n"
        elif (block.t == "List"):
            block.tight = True

            numitems = len(block.children)
            i = 0
            while (i < numitems):
                item = block.children[i]
                last_item = (i == numitems-1)
                if (self.endsWithBlankLine(item) and not last_item):
                    block.tight = False
                    break
                numsubitems = len(item.children)
                j = 0
                while (j < numsubitems):
                    subitem = item.children[j]
                    last_subitem = j == (numsubitems - 1)
                    if (self.endsWithBlankLine(subitem) and
                       not (last_item and last_subitem)):
                        block.tight = False
                        break
                    j += 1
                i += 1
        else:
            pass

        self.tip = block.parent

    def processInlines(self, block):
        """ Walk through a block & children recursively, parsing string content
        into inline content where appropriate."""
        if block.t in ["ATXHeader", "Paragraph", "SetextHeader"]:
            block.inline_content = self.inlineParser.parse(
                block.string_content.strip(), self.refmap)
            block.string_content = ""

        if block.children:
            for i in block.children:
                self.processInlines(i)

    def parse(self, my_input):
        """ The main parsing function.  Returns a parsed document AST."""
        self.doc = Node.makeNode("Document", 1, 1)
        self.tip = self.doc
        self.refmap = {}
        lines = re.split(reLineEnding, re.sub(r'\n$', '', my_input))
        length = len(lines)
        for i in range(length):
            self.incorporateLine(lines[i], i + 1)
        while (self.tip):
            self.finalize(self.tip, length)
        self.processInlines(self.doc)
        return self.doc
コード例 #3
0
ファイル: blocks.py プロジェクト: ashang/CommonMark-py
class Parser(object):
    def __init__(self, options={}):
        self.doc = Node('document', [[1, 1], [0, 0]])
        self.block_starts = BlockStarts()
        self.tip = self.doc
        self.oldtip = self.doc
        self.current_line = ''
        self.line_number = 0
        self.offset = 0
        self.column = 0
        self.next_nonspace = 0
        self.next_nonspace_column = 0
        self.indent = 0
        self.indented = False
        self.blank = False
        self.partially_consumed_tab = False
        self.all_closed = True
        self.last_matched_container = self.doc
        self.refmap = {}
        self.last_line_length = 0
        self.inline_parser = InlineParser(options)
        self.options = options

    def break_out_of_lists(self, block):
        """
        Break out of all containing lists, resetting the tip of the
        document to the parent of the highest list, and finalizing
        all the lists.  (This is used to implement the "two blank lines
        break out of all lists" feature.)
        """
        b = block
        last_list = None
        while True:
            if (b.t == 'list'):
                last_list = b
            b = b.parent
            if not b:
                break

        if (last_list):
            while block != last_list:
                self.finalize(block, self.line_number)
                block = block.parent
            self.finalize(last_list, self.line_number)
            self.tip = last_list.parent

    def add_line(self):
        """ Add a line to the block at the tip.  We assume the tip
        can accept lines -- that check should be done before calling this."""
        if self.partially_consumed_tab:
            # Skip over tab
            self.offset += 1
            # Add space characters
            chars_to_tab = 4 - (self.column % 4)
            self.tip.string_content += (' ' * chars_to_tab)
        self.tip.string_content += (self.current_line[self.offset:] + '\n')

    def add_child(self, tag, offset):
        """ Add block of type tag as a child of the tip.  If the tip can't
        accept children, close and finalize it and try its parent,
        and so on til we find a block that can accept children."""
        block_class = getattr(import_module('CommonMark.blocks'),
                              to_camel_case(self.tip.t))
        while not block_class.can_contain(tag):
            self.finalize(self.tip, self.line_number - 1)
            block_class = getattr(
                import_module('CommonMark.blocks'),
                to_camel_case(self.tip.t))

        column_number = offset + 1
        new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
        new_block.string_content = ''
        self.tip.append_child(new_block)
        self.tip = new_block
        return new_block

    def close_unmatched_blocks(self):
        """Finalize and close any unmatched blocks."""
        if not self.all_closed:
            while self.oldtip != self.last_matched_container:
                parent = self.oldtip.parent
                self.finalize(self.oldtip, self.line_number - 1)
                self.oldtip = parent
            self.all_closed = True

    def find_next_nonspace(self):
        current_line = self.current_line
        i = self.offset
        cols = self.column

        try:
            c = current_line[i]
        except IndexError:
            c = ''
        while c != '':
            if c == ' ':
                i += 1
                cols += 1
            elif c == '\t':
                i += 1
                cols += (4 - (cols % 4))
            else:
                break

            try:
                c = current_line[i]
            except IndexError:
                c = ''

        self.blank = (c == '\n' or c == '\r' or c == '')
        self.next_nonspace = i
        self.next_nonspace_column = cols
        self.indent = self.next_nonspace_column - self.column
        self.indented = self.indent >= CODE_INDENT

    def advance_next_nonspace(self):
        self.offset = self.next_nonspace
        self.column = self.next_nonspace_column
        self.partially_consumed_tab = False

    def advance_offset(self, count, columns):
        cols = 0
        current_line = self.current_line
        try:
            c = current_line[self.offset]
        except IndexError:
            c = None
        while count > 0 and c is not None:
            if c == '\t':
                chars_to_tab = 4 - (self.column % 4)
                if columns:
                    self.partially_consumed_tab = chars_to_tab > count
                    chars_to_advance = min(count, chars_to_tab)
                    self.column += chars_to_advance
                    self.offset += 0 if self.partially_consumed_tab else 1
                    count -= chars_to_advance
                else:
                    self.partially_consumed_tab = False
                    self.column += chars_to_tab
                    self.offset += 1
                    self.count -= 1
            else:
                self.partially_consumed_tab = False
                cols += 1
                self.offset += 1
                # assume ascii; block starts are ascii
                self.column += 1
                count -= 1
            try:
                c = current_line[self.offset]
            except IndexError:
                c = None

    def incorporate_line(self, ln):
        """Analyze a line of text and update the document appropriately.

        We parse markdown text by calling this on each line of input,
        then finalizing the document.
        """
        all_matched = True

        container = self.doc
        self.oldtip = self.tip
        self.offset = 0
        self.column = 0
        self.blank = False
        self.partially_consumed_tab = False
        self.line_number += 1

        # replace NUL characters for security
        if re.search(r'\u0000', ln) is not None:
            ln = re.sub(r'\0', '\uFFFD', ln)

        self.current_line = ln

        # For each containing block, try to parse the associated line start.
        # Bail out on failure: container will point to the last matching block.
        # Set all_matched to false if not all containers match.
        last_child = container.last_child
        while last_child and last_child.is_open:
            container = last_child

            self.find_next_nonspace()
            block_class = getattr(
                import_module('CommonMark.blocks'),
                to_camel_case(container.t))
            rv = block_class.continue_(self, container)
            if rv == 0:
                # we've matched, keep going
                pass
            elif rv == 1:
                # we've failed to match a block
                all_matched = False
            elif rv == 2:
                # we've hit end of line for fenced code close and can return
                self.last_line_length = len(ln)
                return
            else:
                raise ValueError('returned illegal value, must be 0, 1, or 2')

            if not all_matched:
                # back up to last matching block
                container = container.parent
                break

            last_child = container.last_child

        self.all_closed = (container == self.oldtip)
        self.last_matched_container = container

        # Check to see if we've hit 2nd blank line; if so break out of list:
        if self.blank and container.last_line_blank:
            self.break_out_of_lists(container)
            container = self.tip

        block_class = getattr(import_module('CommonMark.blocks'),
                              to_camel_case(container.t))
        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
        starts = self.block_starts
        starts_len = len(starts.METHODS)
        # Unless last matched container is a code block, try new container
        # starts, adding children to the last matched container:
        while not matched_leaf:
            self.find_next_nonspace()

            # this is a little performance optimization:
            if not self.indented and \
               not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
                self.advance_next_nonspace()
                break

            i = 0
            while i < starts_len:
                res = getattr(starts, starts.METHODS[i])(self, container)
                if res == 1:
                    container = self.tip
                    break
                elif res == 2:
                    container = self.tip
                    matched_leaf = True
                    break
                else:
                    i += 1

            if i == starts_len:
                # nothing matched
                self.advance_next_nonspace()
                break

        # What remains at the offset is a text line. Add the text to the
        # appropriate container.
        if not self.all_closed and not self.blank and \
           self.tip.t == 'paragraph':
            # lazy paragraph continuation
            self.add_line()
        else:
            # not a lazy continuation
            # finalize any blocks not matched
            self.close_unmatched_blocks()
            if self.blank and container.last_child:
                container.last_child.last_line_blank = True

            t = container.t

            # Block quote lines are never blank as they start with >
            # and we don't count blanks in fenced code for purposes of
            # tight/loose lists or breaking out of lists.  We also
            # don't set last_line_blank on an empty list item, or if we
            # just closed a fenced block.
            last_line_blank = self.blank and \
                not (t == 'block_quote' or
                     (t == 'code_block' and container.is_fenced) or
                     (t == 'item' and
                      not container.first_child and
                      container.sourcepos[0][0] == self.line_number))

            # propagate last_line_blank up through parents:
            cont = container
            while cont:
                cont.last_line_blank = last_line_blank
                cont = cont.parent

            block_class = getattr(import_module('CommonMark.blocks'),
                                  to_camel_case(t))
            if block_class.accepts_lines:
                self.add_line()
                # if HtmlBlock, check for end condition
                if t == 'html_block' and \
                   container.html_block_type >= 1 and \
                   container.html_block_type <= 5 and \
                   re.search(
                       reHtmlBlockClose[container.html_block_type],
                       self.current_line[self.offset:]):
                    self.finalize(container, self.line_number)
            elif self.offset < len(ln) and not self.blank:
                # create a paragraph container for one line
                container = self.add_child('paragraph', self.offset)
                self.advance_next_nonspace()
                self.add_line()

        self.last_line_length = len(ln)

    def finalize(self, block, line_number):
        """ Finalize a block.  Close it and do any necessary postprocessing,
        e.g. creating string_content from strings, setting the 'tight'
        or 'loose' status of a list, and parsing the beginnings
        of paragraphs for reference definitions.  Reset the tip to the
        parent of the closed block."""
        above = block.parent
        block.is_open = False
        block.sourcepos[1] = [line_number, self.last_line_length]
        block_class = getattr(import_module('CommonMark.blocks'),
                              to_camel_case(block.t))
        block_class.finalize(self, block)

        self.tip = above

    def process_inlines(self, block):
        """
        Walk through a block & children recursively, parsing string content
        into inline content where appropriate.
        """
        walker = block.walker()
        self.inline_parser.refmap = self.refmap
        self.inline_parser.options = self.options
        event = walker.nxt()
        while event is not None:
            node = event['node']
            t = node.t
            if not event['entering'] and (t == 'paragraph' or t == 'heading'):
                self.inline_parser.parse(node)
            event = walker.nxt()

    def parse(self, my_input):
        """ The main parsing function.  Returns a parsed document AST."""
        self.doc = Node('document', [[1, 1], [0, 0]])
        self.tip = self.doc
        self.refmap = {}
        self.line_number = 0
        self.last_line_length = 0
        self.offset = 0
        self.column = 0
        self.last_matched_container = self.doc
        self.current_line = ''
        lines = re.split(reLineEnding, my_input)
        length = len(lines)
        if len(my_input) > 0 and my_input[-1] == '\n':
            # ignore last blank line created by final newline
            length -= 1
        for i in range(length):
            self.incorporate_line(lines[i])
        while (self.tip):
            self.finalize(self.tip, length)
        self.process_inlines(self.doc)
        return self.doc
コード例 #4
0
class Parser:
    def __init__(self, subject=None, pos=0):
        self.doc = Node.makeNode("Document", 1, 1)
        self.subject = subject
        self.pos = pos
        self.tip = self.doc
        self.refmap = {}
        self.inlineParser = InlineParser()

    def acceptsLines(self, block_type):
        """ Returns true if block type can accept lines of text."""
        return block_type == 'Paragraph' or \
            block_type == 'IndentedCode' or \
            block_type == 'FencedCode' or \
            block_type == 'HtmlBlock'

    def endsWithBlankLine(self, block):
        """ Returns true if block ends with a blank line,
        descending if needed into lists and sublists."""
        if block.last_line_blank:
            return True
        if (block.t == "List" or block.t == "Item") and \
           len(block.children) > 0:
            return self.endsWithBlankLine(block.children[-1])
        else:
            return False

    def breakOutOfLists(self, block, line_number):
        """ Break out of all containing lists, resetting the tip of the
        document to the parent of the highest list, and finalizing
        all the lists.  (This is used to implement the "two blank lines
        break out of all lists" feature.)"""
        b = block
        last_list = None
        while True:
            if (b.t == "List"):
                last_list = b
            b = b.parent
            if not b:
                break

        if (last_list):
            while block != last_list:
                self.finalize(block, line_number)
                block = block.parent
            self.finalize(last_list, line_number)
            self.tip = last_list.parent

    def addLine(self, ln, offset):
        """ Add a line to the block at the tip.  We assume the tip
        can accept lines -- that check should be done before calling this."""
        s = ln[offset:]
        if not self.tip.is_open:
            raise Exception("Attempted to add line (" + ln +
                            ") to closed container.")
        self.tip.strings.append(s)

    def addChild(self, tag, line_number, offset):
        """ Add block of type tag as a child of the tip.  If the tip can't
        accept children, close and finalize it and try its parent,
        and so on til we find a block that can accept children."""
        while not (self.tip.t == "Document" or self.tip.t == "BlockQuote"
                   or self.tip.t == "Item" or
                   (self.tip.t == "List" and tag == "Item")):
            self.finalize(self.tip, line_number - 1)
        column_number = offset + 1
        newNode = Node.makeNode(tag, line_number, column_number)
        self.tip.children.append(newNode)
        newNode.parent = self.tip
        self.tip = newNode
        return newNode

    def listsMatch(self, list_data, item_data):
        """ Returns true if the two list items are of the same type,
        with the same delimiter and bullet character.  This is used
        in agglomerating list items into lists."""
        return (list_data.get("type", None) == item_data.get("type", None)
                and list_data.get("delimiter", None) == item_data.get(
                    "delimiter", None)
                and list_data.get("bullet_char", None) == item_data.get(
                    "bullet_char", None))

    def parseListMarker(self, ln, offset):
        """ Parse a list marker and return data on the marker (type,
        start, delimiter, bullet character, padding) or None."""
        rest = ln[offset:]
        data = {}
        blank_item = bool()
        if re.match(reHrule, rest):
            return None
        match = re.search(reBulletListMarker, rest)
        match2 = re.search(reOrderedListMarker, rest)
        if match:
            spaces_after_marker = len(match.group(1))
            data['type'] = 'Bullet'
            data['bullet_char'] = match.group(0)[0]
            blank_item = match.group(0) == len(rest)
        elif match2:
            spaces_after_marker = len(match2.group(3))
            data['type'] = 'Ordered'
            data['start'] = int(match2.group(1))
            data['delimiter'] = match2.group(2)
            blank_item = match2.group(0) == len(rest)
        else:
            return None
        if spaces_after_marker >= 5 or spaces_after_marker < 1 or blank_item:
            if match:
                data['padding'] = len(match.group(0)) - spaces_after_marker + 1
            elif match2:
                data['padding'] = len(
                    match2.group(0)) - spaces_after_marker + 1
        else:
            if match:
                data['padding'] = len(match.group(0))
            elif match2:
                data['padding'] = len(match2.group(0))
        return data

    def parseIAL(self, ln):
        values = []
        css_class = re.findall(r"\.(\w+) *", ln)
        if css_class:
            values.append(("class", " ".join(css_class)))
        css_id = re.findall(r"\#.(\w+) *", ln)
        if css_id:
            values.append(("id", css_id[0]))
        keyed_values = re.findall(r"(\w+)(?:=(\w+))? *", ln)
        if keyed_values:
            values += keyed_values

        return dict(values)

    def incorporateLine(self, ln, line_number):
        """ Analyze a line of text and update the document appropriately.
        We parse markdown text by calling this on each line of input,
        then finalizing the document."""
        all_matched = True
        offset = 0
        CODE_INDENT = 4
        blank = None
        already_done = False

        container = self.doc
        oldtip = self.tip

        ln = detabLine(ln)

        while len(container.children) > 0:
            last_child = container.children[-1]
            if not last_child.is_open:
                break
            container = last_child

            match = matchAt(r"[^ ]", ln, offset)
            if match is None:
                first_nonspace = len(ln)
                blank = True
            else:
                first_nonspace = match
                blank = False
            indent = first_nonspace - offset
            if container.t == "BlockQuote":
                matched = bool()
                if len(ln) > first_nonspace and len(ln) > 0:
                    matched = ln[first_nonspace] == ">"
                matched = indent <= 3 and matched
                if matched:
                    offset = first_nonspace + 1
                    try:
                        if ln[offset] == " ":
                            offset += 1
                    except IndexError:
                        pass
                else:
                    all_matched = False
            elif container.t == "Item":
                if (indent >= container.list_data['marker_offset'] +
                        container.list_data['padding']):
                    offset += container.list_data[
                        'marker_offset'] + container.list_data['padding']
                elif blank:
                    offset = first_nonspace
                else:
                    all_matched = False
            elif container.t == "IndentedCode":
                if indent >= CODE_INDENT:
                    offset += CODE_INDENT
                elif blank:
                    offset = first_nonspace
                else:
                    all_matched = False
            elif container.t in [
                    "ATXHeader", "SetextHeader", "HorizontalRule"
            ]:
                all_matched = False
            elif container.t == "FencedCode":
                i = container.fence_offset
                while i > 0 and len(ln) > offset and ln[offset] == " ":
                    offset += 1
                    i -= 1
            elif container.t == "HtmlBlock":
                if blank:
                    all_matched = False
            elif container.t == "Paragraph":
                if blank:
                    container.last_line_blank = True
                    all_matched = False
            if not all_matched:
                container = container.parent
                break
        last_matched_container = container

        def closeUnmatchedBlocks(self, already_done, oldtip):
            """ This function is used to finalize and close any unmatched
            blocks.  We aren't ready to do this now, because we might
            have a lazy paragraph continuation, in which case we don't
            want to close unmatched blocks.  So we store this closure for
            use later, when we have more information."""
            while not already_done and not oldtip == last_matched_container:
                self.finalize(oldtip, line_number)
                oldtip = oldtip.parent
            return True, oldtip

        if blank and container.last_line_blank:
            self.breakOutOfLists(container, line_number)

        while container.t != "ExtensionBlock" and \
                container.t != "FencedCode" and \
                container.t != "IndentedCode" and \
                container.t != "HtmlBlock" and \
                matchAt(r"^[ #`~*+_=<>0-9-{]", ln, offset) is not None:
            match = matchAt("[^ ]", ln, offset)
            if match is None:
                first_nonspace = len(ln)
                blank = True
            else:
                first_nonspace = match
                blank = False
            ATXmatch = re.search(reATXHeaderMarker, ln[first_nonspace:])
            FENmatch = re.search(reCodeFence, ln[first_nonspace:])
            PARmatch = re.search(reSetextHeaderLine, ln[first_nonspace:])
            IALmatch = re.search(r"^{:((\}|[^}])*)} *$", ln[first_nonspace:])
            EXTmatch = re.search(r"^{::((\\\}|[^\\}])*)/?} *$",
                                 ln[first_nonspace:])
            data = self.parseListMarker(ln, first_nonspace)

            indent = first_nonspace - offset
            if data:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                data['marker_offset'] = indent
                offset = first_nonspace + data['padding']
                if not container.t == "List" or not self.listsMatch(
                        container.list_data, data):
                    container = self.addChild("List", line_number,
                                              first_nonspace)
                    container.list_data = data
                container = self.addChild("Item", line_number, first_nonspace)
                container.list_data = data
            elif indent >= CODE_INDENT:
                if not self.tip.t == "Paragraph" and not blank:
                    offset += CODE_INDENT
                    already_done, oldtip = closeUnmatchedBlocks(
                        self, already_done, oldtip)
                    container = self.addChild('IndentedCode', line_number,
                                              offset)
                else:
                    break
            elif len(ln) > first_nonspace and ln[first_nonspace] == ">":
                offset = first_nonspace + 1
                try:
                    if ln[offset] == " ":
                        offset += 1
                except IndexError:
                    pass
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild("BlockQuote", line_number, offset)
            elif EXTmatch:
                args = EXTmatch.group(1)
                keyed_values = re.findall(r"(\w+)(?:=(\w+))? *", args)
                offset = first_nonspace + len(EXTmatch.group(0))
                print("EXT {} {}".format(args, offset))
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild("ExtensionBlock", line_number,
                                          first_nonspace)
                container.title = keyed_values.pop(0)[0]
                container.attributes = dict(keyed_values)
                print(EXTmatch.group(0))
                print(args)
                if (EXTmatch.group(0)[-2] == '/'):
                    self.finalize(container, line_number)

                break
            elif IALmatch:
                offset = first_nonspace + len(IALmatch.group(0))
                print("Found {}".format(IALmatch.group(0)))
                print("blank {}".format(blank))
                print("container {} {}".format(self.tip.t,
                                               container.last_line_blank))
                if blank:
                    # FIXME
                    # attributes.update(self.parseIAL(IALmatch.group(1)))
                    pass
                else:
                    self.tip.attributes = self.parseIAL(IALmatch.group(1))
                break
            elif ATXmatch:
                offset = first_nonspace + len(ATXmatch.group(0))
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild("ATXHeader", line_number,
                                          first_nonspace)
                container.level = len(ATXmatch.group(0).strip())
                if not re.search(r'\\#', ln[offset:]) is None:
                    container.strings = [
                        re.sub(r'(?:(\\#) *#*| *#+) *$', '\g<1>', ln[offset:])
                    ]
                else:
                    container.strings = [
                        re.sub(r'(?:(\\#) *#*| *#+) *$', '', ln[offset:])
                    ]
                break
            elif FENmatch:
                fence_length = len(FENmatch.group(0))
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild("FencedCode", line_number,
                                          first_nonspace)
                container.fence_length = fence_length
                container.fence_char = FENmatch.group(0)[0]
                container.fence_offset = first_nonspace - offset
                offset = first_nonspace + fence_length
                break
            elif not matchAt(reHtmlBlockOpen, ln, first_nonspace) is None:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild('HtmlBlock', line_number,
                                          first_nonspace)
                break
            elif container.t == "Paragraph" and \
                    len(container.strings) == 1 and PARmatch:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container.t = "SetextHeader"
                container.level = 1 if PARmatch.group(0)[0] == '=' else 2
                offset = len(ln)
            elif not matchAt(reHrule, ln, first_nonspace) is None:
                already_done, oldtip = closeUnmatchedBlocks(
                    self, already_done, oldtip)
                container = self.addChild("HorizontalRule", line_number,
                                          first_nonspace)
                offset = len(ln) - 1
                break
            else:
                break
            if self.acceptsLines(container.t):
                break

        match = matchAt(r"[^ ]", ln, offset)
        if match is None:
            first_nonspace = len(ln)
            blank = True
        else:
            first_nonspace = match
            blank = False
        indent = first_nonspace - offset

        if not self.tip == last_matched_container and \
           not blank and self.tip.t == "Paragraph" and \
           len(self.tip.strings) > 0:
            self.last_line_blank = False
            self.addLine(ln, offset)
        else:
            already_done, oldtip = closeUnmatchedBlocks(
                self, already_done, oldtip)
            container.last_line_blank = \
                blank and \
                not (container.t == "BlockQuote" or
                     container.t == "FencedCode" or
                     (container.t == "Item" and
                      len(container.children) == 0 and
                      container.start_line == line_number))
            cont = container
            while cont.parent:
                cont.parent.last_line_blank = False
                cont = cont.parent
            if container.t == "IndentedCode" or container.t == "HtmlBlock":
                self.addLine(ln, offset)
            if container.t == "ExtensionBlock":
                EXTmatch = re.search(r"^{:/((\\\}|[^\\}])*)} *$",
                                     ln[first_nonspace:])
                if EXTmatch:
                    self.finalize(container, line_number)
                else:
                    self.addLine(ln, offset)
            elif container.t == "FencedCode":
                match = bool()
                if len(ln) > 0:
                    match = len(ln) > first_nonspace and \
                        ln[first_nonspace] == container.fence_char and \
                        re.match(
                            r"^(?:`{3,}|~{3,})(?= *$)",
                            ln[first_nonspace:])
                match = indent <= 3 and match
                FENmatch = re.search(r"^(?:`{3,}|~{3,})(?= *$)",
                                     ln[first_nonspace:])
                if match and len(FENmatch.group(0)) >= container.fence_length:
                    self.finalize(container, line_number)
                else:
                    self.addLine(ln, offset)
            elif container.t in ["ATXHeader", "SetextHeader", "HtmlBlock"]:
                # nothing to do; we already added the contents.
                pass
            else:
                if self.acceptsLines(container.t):
                    self.addLine(ln, first_nonspace)
                elif blank:
                    pass
                elif container.t != "HorizontalRule" and \
                        container.t != "SetextHeader":
                    container = self.addChild("Paragraph", line_number,
                                              first_nonspace)
                    self.addLine(ln, first_nonspace)
                else:
                    # print("Line " + str(line_number) +
                    #       " with container type " +
                    #       container.t + " did not match any condition.")
                    pass

    def finalize(self, block, line_number):
        """ Finalize a block.  Close it and do any necessary postprocessing,
        e.g. creating string_content from strings, setting the 'tight'
        or 'loose' status of a list, and parsing the beginnings
        of paragraphs for reference definitions.  Reset the tip to the
        parent of the closed block."""
        if (not block.is_open):
            return 0

        block.is_open = False
        if (line_number > block.start_line):
            block.end_line = line_number - 1
        else:
            block.end_line = line_number

        if (block.t == "Paragraph"):
            block.string_content = ""
            for i, line in enumerate(block.strings):
                block.strings[i] = re.sub(r'^  *', '', line, re.MULTILINE)
            block.string_content = '\n'.join(block.strings)

            pos = self.inlineParser.parseReference(block.string_content,
                                                   self.refmap)
            while (block.string_content[0] == "[" and pos):
                block.string_content = block.string_content[pos:]
                if (isBlank(block.string_content)):
                    block.t = "ReferenceDef"
                    break
                pos = self.inlineParser.parseReference(block.string_content,
                                                       self.refmap)
        elif (block.t in ["ATXHeader", "SetextHeader", "HtmlBlock"]):
            block.string_content = "\n".join(block.strings)
        elif (block.t == "IndentedCode"):
            block.string_content = re.sub(r"(\n *)*$", "\n",
                                          "\n".join(block.strings))
        elif (block.t == "FencedCode"):
            block.info = unescape(block.strings[0].strip())
            if (len(block.strings) == 1):
                block.string_content = ""
            else:
                block.string_content = "\n".join(block.strings[1:]) + "\n"
        elif (block.t == "List"):
            block.tight = True

            numitems = len(block.children)
            i = 0
            while (i < numitems):
                item = block.children[i]
                last_item = (i == numitems - 1)
                if (self.endsWithBlankLine(item) and not last_item):
                    block.tight = False
                    break
                numsubitems = len(item.children)
                j = 0
                while (j < numsubitems):
                    subitem = item.children[j]
                    last_subitem = j == (numsubitems - 1)
                    if (self.endsWithBlankLine(subitem)
                            and not (last_item and last_subitem)):
                        block.tight = False
                        break
                    j += 1
                i += 1
        else:
            pass

        self.tip = block.parent

    def processInlines(self, block):
        """ Walk through a block & children recursively, parsing string content
        into inline content where appropriate."""
        if block.t in ["ATXHeader", "Paragraph", "SetextHeader"]:
            block.inline_content = self.inlineParser.parse(
                block.string_content.strip(), self.refmap)
            block.string_content = ""

        if block.children:
            for i in block.children:
                self.processInlines(i)

    def parse(self, my_input):
        """ The main parsing function.  Returns a parsed document AST."""
        self.doc = Node.makeNode("Document", 1, 1)
        self.tip = self.doc
        self.refmap = {}
        lines = re.split(reLineEnding, re.sub(r'\n$', '', my_input))
        length = len(lines)
        for i in range(length):
            self.incorporateLine(lines[i], i + 1)
        while (self.tip):
            self.finalize(self.tip, length)
        self.processInlines(self.doc)
        return self.doc