Esempio n. 1
0
def parseHeaders(source):
    """ Parse headers to construct Table Of Contents
        return: [(level, text, position, anchor)]
        position/anchor is the header position in notesEdit/notesView
    """

    hdrs = []
    headers = []
    used_ids = set()           # In case there are headers with the same name.

    # hash headers
    RE = re.compile(r'^(#+)(.+)', re.MULTILINE)
    for m in RE.finditer(source):
        level = len(m.group(1))
        hdr = m.group(2)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    # setext headers
    RE = re.compile(r'(.+)\n([=-]+[ ]*)(\n|$)', re.MULTILINE)
    for m in RE.finditer(source):
        if m.group(2).startswith('='):
            level = 1
        else:
            level = 2
        hdr = m.group(1)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    hdrs.sort()
    for (p, l, h) in hdrs:
        anchor = unique(slugify(h, '-'), used_ids)
        headers.append((l, h, p, anchor))
    return headers
Esempio n. 2
0
def parseHeaders(source):
    """ Parse headers to construct Table Of Contents
        return: [(level, text, position, anchor)]
        position/anchor is the header position in notesEdit/notesView
    """

    hdrs = []
    headers = []
    used_ids = set()  # In case there are headers with the same name.

    # hash headers
    RE = re.compile(r'^(#+)(.+)', re.MULTILINE)
    for m in RE.finditer(source):
        level = len(m.group(1))
        hdr = m.group(2)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    # setext headers
    RE = re.compile(r'(.+)\n([=-]+[ ]*)(\n|$)', re.MULTILINE)
    for m in RE.finditer(source):
        if m.group(2).startswith('='):
            level = 1
        else:
            level = 2
        hdr = m.group(1)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    hdrs.sort()
    for (p, l, h) in hdrs:
        anchor = unique(slugify(h, '-'), used_ids)
        headers.append((l, h, p, anchor))
    return headers
Esempio n. 3
0
def parseHeaders(source):
    ''' Quite basic header parser
        Headers are used to construct Table Of Contents

        return: [(hdrLevel, hdrText, hdrPosition, hdrAnchor)]
    '''
    # RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)')
    hdrs = []
    used_ids = set()           # In case there are headers with the same name.
    RE = re.compile(r'^(#+)(.+)', re.MULTILINE)
    for m in RE.finditer(source):
        hdrLevel = m.group(1)
        hdr = m.group(2)
        pos = m.start()
        anchor = unique(slugify(hdr, '-'), used_ids)
        hdrs.append((hdrLevel, hdr, pos, anchor))
    return hdrs
Esempio n. 4
0
 def testUniqueFunc(self):
     """ Test 'unique' function. """
     from markdown.extensions.headerid import unique
     ids = set(['foo'])
     self.assertEqual(unique('foo', ids), 'foo_1')
     self.assertEqual(ids, set(['foo', 'foo_1']))
Esempio n. 5
0
 def testUniqueFunc(self):
     """ Test 'unique' function. """
     from markdown.extensions.headerid import unique
     ids = set(['foo'])
     self.assertEqual(unique('foo', ids), 'foo_1')
     self.assertEqual(ids, set(['foo', 'foo_1']))
Esempio n. 6
0
    def run(self, doc):

        div = etree.Element("div")
        div.attrib["class"] = "toc"
        header_rgx = re.compile("[Hh][123456]")

        self.use_anchors = self.config["anchorlink"] in [1, '1', True, 'True', 'true']

        # Get a list of id attributes
        used_ids = set()
        for c in doc.getiterator():
            if "id" in c.attrib:
                used_ids.add(c.attrib["id"])

        toc_list = []
        marker_found = False
        for (p, c) in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue

            # To keep the output from screwing up the
            # validation by putting a <div> inside of a <p>
            # we actually replace the <p> in its entirety.
            # We do not allow the marker inside a header as that
            # would causes an enless loop of placing a new TOC
            # inside previously generated TOC.
            if c.text and c.text.strip() == self.config["marker"] and \
               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
                marker_found = True

            if header_rgx.match(c.tag):

                # Do not override pre-existing ids
                if not "id" in c.attrib:
                    elem_id = unique(self.config["slugify"](text, '-'), used_ids)
                    c.attrib["id"] = elem_id
                else:
                    elem_id = c.attrib["id"]

                tag_level = int(c.tag[-1])

                toc_list.append({
                    'level': tag_level,
                    'id': elem_id,
                    'name': c.text})

                self.add_anchor(c, elem_id)

        if marker_found:
            toc_list_nested = order_toc_list(toc_list)
            self.build_toc_etree(div, toc_list_nested)
            # serialize and attach to markdown instance.
            prettify = self.markdown.treeprocessors.get('prettify')
            if prettify:
                prettify.run(div)
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)
            self.markdown.toc = toc
Esempio n. 7
0
    def run(self, doc):
        marker_found = False

        div = etree.Element("div")
        div.attrib["class"] = "toc"
        last_li = None

        # Add title to the div
        if self.config["title"]:
            header = etree.SubElement(div, "span")
            header.attrib["class"] = "toctitle"
            header.text = self.config["title"]

        level = 0
        list_stack=[div]
        header_rgx = re.compile("[Hh][123456]")

        # Get a list of id attributes
        used_ids = []
        for c in doc.getiterator():
            if "id" in c.attrib:
                used_ids.append(c.attrib["id"])

        for (p, c) in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue

            # To keep the output from screwing up the
            # validation by putting a <div> inside of a <p>
            # we actually replace the <p> in its entirety.
            # We do not allow the marker inside a header as that
            # would causes an enless loop of placing a new TOC 
            # inside previously generated TOC.

            if c.text and c.text.strip() == self.config["marker"] and \
               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
                marker_found = True
                    
            if header_rgx.match(c.tag):
                tag_level = int(c.tag[-1])
                
                while tag_level < level:
                    list_stack.pop()
                    level -= 1

                if tag_level > level:
                    if self.config['ordered']:
                        newlist = etree.Element("ol")
                    else:
                        newlist = etree.Element("ul")

                    if last_li:
                        last_li.append(newlist)
                    else:
                        list_stack[-1].append(newlist)
                    list_stack.append(newlist)
                    if level == 0:
                        level = tag_level
                    else:
                        level += 1

                # Do not override pre-existing ids 
                if not "id" in c.attrib:
                    id = unique(self.config["slugify"](text, '-'), used_ids)
                    c.attrib["id"] = id
                else:
                    id = c.attrib["id"]

                # List item link, to be inserted into the toc div
                last_li = etree.Element("li")
                link = etree.SubElement(last_li, "a")
                link.text = text
                link.attrib["href"] = '#' + id

                if int(self.config["anchorlink"]):
                    anchor = etree.Element("a")
                    anchor.text = c.text
                    anchor.attrib["href"] = "#" + id
                    anchor.attrib["class"] = "toclink"
                    c.text = ""
                    for elem in c.getchildren():
                        anchor.append(elem)
                        c.remove(elem)
                    c.append(anchor)

                list_stack[-1].append(last_li)
        if not marker_found:
            # searialize and attach to markdown instance.
            prettify = self.markdown.treeprocessors.get('prettify')
            if prettify: prettify.run(div)
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)
            self.markdown.toc = toc
Esempio n. 8
0
    def run(self, doc):
        marker_found = False

        div = etree.Element("div")
        div.attrib["class"] = "toc"
        last_li = None

        # Add title to the div
        if self.config["title"]:
            header = etree.SubElement(div, "span")
            header.attrib["class"] = "toctitle"
            header.text = self.config["title"]

        level = 0
        list_stack = [div]
        header_rgx = re.compile("[Hh][123456]")

        # Get a list of id attributes
        used_ids = []
        for c in doc.getiterator():
            if "id" in c.attrib:
                used_ids.append(c.attrib["id"])

        for (p, c) in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue

            # To keep the output from screwing up the
            # validation by putting a <div> inside of a <p>
            # we actually replace the <p> in its entirety.
            # We do not allow the marker inside a header as that
            # would causes an enless loop of placing a new TOC
            # inside previously generated TOC.

            if c.text and c.text.strip() == self.config["marker"] and \
               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
                marker_found = True

            if header_rgx.match(c.tag):
                try:
                    tag_level = int(c.tag[-1])

                    while tag_level < level:
                        list_stack.pop()
                        level -= 1

                    if tag_level > level:
                        newlist = etree.Element("ul")
                        if last_li:
                            last_li.append(newlist)
                        else:
                            list_stack[-1].append(newlist)
                        list_stack.append(newlist)
                        if level == 0:
                            level = tag_level
                        else:
                            level += 1

                    # Do not override pre-existing ids
                    if not "id" in c.attrib:
                        id = unique(self.config["slugify"](text, '-'),
                                    used_ids)
                        c.attrib["id"] = id
                    else:
                        id = c.attrib["id"]

                    # List item link, to be inserted into the toc div
                    last_li = etree.Element("li")
                    link = etree.SubElement(last_li, "a")
                    link.text = text
                    link.attrib["href"] = '#' + id

                    if self.config["anchorlink"] in [
                            1, '1', True, 'True', 'true'
                    ]:
                        anchor = etree.Element("a")
                        anchor.text = c.text
                        anchor.attrib["href"] = "#" + id
                        anchor.attrib["class"] = "toclink"
                        c.text = ""
                        for elem in c.getchildren():
                            anchor.append(elem)
                            c.remove(elem)
                        c.append(anchor)

                    list_stack[-1].append(last_li)
                except IndexError:
                    # We have bad ordering of headers. Just move on.
                    pass
        if not marker_found:
            # searialize and attach to markdown instance.
            prettify = self.markdown.treeprocessors.get('prettify')
            if prettify: prettify.run(div)
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)
            self.markdown.toc = toc
Esempio n. 9
0
    def run(self, doc):
        marker_found = False
        div = etree.Element('div')
        div.attrib['class'] = 'toc'
        last_li = None
        if self.config['title']:
            header = etree.SubElement(div, 'span')
            header.attrib['class'] = 'toctitle'
            header.text = self.config['title']
        level = 0
        list_stack = [div]
        header_rgx = re.compile('[Hh][123456]')
        used_ids = []
        for c in doc.getiterator():
            if 'id' in c.attrib:
                used_ids.append(c.attrib['id'])

        for p, c in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue
            if c.text and c.text.strip() == self.config['marker'] and not header_rgx.match(c.tag) and c.tag not in ('pre', 'code'):
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break

                marker_found = True
            if header_rgx.match(c.tag):
                try:
                    tag_level = int(c.tag[-1])
                    while tag_level < level:
                        list_stack.pop()
                        level -= 1

                    if tag_level > level:
                        newlist = etree.Element('ul')
                        if last_li:
                            last_li.append(newlist)
                        else:
                            list_stack[-1].append(newlist)
                        list_stack.append(newlist)
                        if level == 0:
                            level = tag_level
                        else:
                            level += 1
                    if 'id' not in c.attrib:
                        id = unique(self.config['slugify'](text, '-'), used_ids)
                        c.attrib['id'] = id
                    else:
                        id = c.attrib['id']
                    last_li = etree.Element('li')
                    link = etree.SubElement(last_li, 'a')
                    link.text = text
                    link.attrib['href'] = '#' + id
                    if self.config['anchorlink'] in [1,
                     '1',
                     True,
                     'True',
                     'true']:
                        anchor = etree.Element('a')
                        anchor.text = c.text
                        anchor.attrib['href'] = '#' + id
                        anchor.attrib['class'] = 'toclink'
                        c.text = ''
                        for elem in c._children:
                            anchor.append(elem)
                            c.remove(elem)

                        c.append(anchor)
                    list_stack[-1].append(last_li)
                except IndexError:
                    pass

        if not marker_found:
            prettify = self.markdown.treeprocessors.get('prettify')
            if prettify:
                prettify.run(div)
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)

            self.markdown.toc = toc
Esempio n. 10
0
    def run(self, doc):
        marker_found = False
        div = etree.Element('div')
        div.attrib['class'] = 'toc'
        last_li = None
        if self.config['title']:
            header = etree.SubElement(div, 'span')
            header.attrib['class'] = 'toctitle'
            header.text = self.config['title']
        level = 0
        list_stack = [div]
        header_rgx = re.compile('[Hh][123456]')
        used_ids = []
        for c in doc.getiterator():
            if 'id' in c.attrib:
                used_ids.append(c.attrib['id'])

        for p, c in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue
            if c.text and c.text.strip(
            ) == self.config['marker'] and not header_rgx.match(
                    c.tag) and c.tag not in ('pre', 'code'):
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break

                marker_found = True
            if header_rgx.match(c.tag):
                try:
                    tag_level = int(c.tag[-1])
                    while tag_level < level:
                        list_stack.pop()
                        level -= 1

                    if tag_level > level:
                        newlist = etree.Element('ul')
                        if last_li:
                            last_li.append(newlist)
                        else:
                            list_stack[-1].append(newlist)
                        list_stack.append(newlist)
                        if level == 0:
                            level = tag_level
                        else:
                            level += 1
                    if 'id' not in c.attrib:
                        id = unique(self.config['slugify'](text, '-'),
                                    used_ids)
                        c.attrib['id'] = id
                    else:
                        id = c.attrib['id']
                    last_li = etree.Element('li')
                    link = etree.SubElement(last_li, 'a')
                    link.text = text
                    link.attrib['href'] = '#' + id
                    if self.config['anchorlink'] in [
                            1, '1', True, 'True', 'true'
                    ]:
                        anchor = etree.Element('a')
                        anchor.text = c.text
                        anchor.attrib['href'] = '#' + id
                        anchor.attrib['class'] = 'toclink'
                        c.text = ''
                        for elem in c._children:
                            anchor.append(elem)
                            c.remove(elem)

                        c.append(anchor)
                    list_stack[-1].append(last_li)
                except IndexError:
                    pass

        if not marker_found:
            prettify = self.markdown.treeprocessors.get('prettify')
            if prettify:
                prettify.run(div)
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)

            self.markdown.toc = toc
Esempio n. 11
0
    def run(self, doc):
        marker_found = False

        div = etree.Element("div")
        div.attrib["class"] = "toc"
        last_li = None

        # Add title to the div
        if self.config["title"]:
            header = etree.SubElement(div, "span")
            header.attrib["class"] = "toctitle"
            header.text = self.config["title"]

        level = 0
        list_stack = [div]
        header_rgx = re.compile("[Hh][123456]")

        # Get a list of id attributes
        used_ids = []
        for c in doc.getiterator():
            if "id" in c.attrib:
                used_ids.append(c.attrib["id"])

        last_numbers = [0] * 5
        last_tag_level = 1
        last_level = -1

        for (p, c) in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue

            # To keep the output from screwing up the
            # validation by putting a <div> inside of a <p>
            # we actually replace the <p> in its entirety.
            # We do not allow the marker inside a header as that
            # would causes an endless loop of placing a new TOC
            # inside a previously generated TOC.

            if c.text and c.text.strip() == self.config["marker"] and \
               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
                marker_found = True

            if header_rgx.match(c.tag):
                try:
                    tag_level = int(c.tag[-1])
                    level_diff = last_tag_level - tag_level

                    if tag_level > last_tag_level:
                        this_level = last_level + 1
                        # Mimic the behaviour of the table of contents
                        if level_diff < 1:
                            tag_level = last_tag_level + 1
                    elif tag_level < last_tag_level:
                        # If the tag difference is MORE than one, go up that many
                        if level_diff > 1:
                            this_level = last_level - level_diff
                        else:
                            this_level = last_level - 1
                        for i in xrange(this_level + 1, len(last_numbers)):
                            last_numbers[i] = 0
                    else:
                        this_level = last_level

                    last_numbers[this_level] += 1
                    section_number = '.'.join(
                        map(unicode, last_numbers[:this_level + 1]))
                    placeholder = self.markdown.htmlStash.store(
                        u'<span>%s</span>' % section_number, safe=True)
                    last_tag_level = tag_level
                    last_level = this_level

                    while tag_level < level:
                        list_stack.pop()
                        level -= 1

                    if tag_level > level:
                        newlist = etree.Element("ul")
                        if last_li:
                            last_li.append(newlist)
                        else:
                            list_stack[-1].append(newlist)
                        list_stack.append(newlist)
                        if level == 0:
                            level = tag_level
                        else:
                            level += 1

                    id = unique(self.config["slugify"](text, '-'), used_ids)

                    # List item link, to be inserted into the toc div
                    last_li = etree.Element("li")
                    link = etree.SubElement(last_li, "a")
                    link.text = c.text
                    link.attrib["href"] = '#' + id
                    pilcrow_html = u'<a class="headerlink" name="%(id)s" href="#%(id)s">&para;</a>' % {
                        'id': id
                    }
                    header_link = self.markdown.htmlStash.store(pilcrow_html,
                                                                safe=True)

                    c.text = c.text + header_link
                    c.attrib['class'] = 'header'

                    if self.config["anchorlink"] in [
                            1, '1', True, 'True', 'true'
                    ]:
                        anchor = etree.Element("a")
                        anchor.text = c.text
                        anchor.attrib["href"] = "#" + id
                        anchor.attrib["class"] = "toclink"
                        c.text = ""
                        for elem in c.getchildren():
                            anchor.append(elem)
                            c.remove(elem)
                        c.append(anchor)

                    list_stack[-1].append(last_li)
                except IndexError:
                    # We have bad ordering of headers. Just move on.
                    pass
        if not marker_found:
            # serialize and attach to markdown instance.
            prettify = self.markdown.treeprocessors.get('prettify')
            if prettify: prettify.run(div)
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)
            self.markdown.toc = toc
Esempio n. 12
0
    def run(self, doc):
        marker_found = False

        div = etree.Element("div")
        div.attrib["class"] = "toc"
        last_li = None

        # Add title to the div
        if self.config["title"]:
            header = etree.SubElement(div, "span")
            header.attrib["class"] = "toctitle"
            header.text = self.config["title"]

        level = 0
        list_stack=[div]
        header_rgx = re.compile("[Hh][123456]")

        # Get a list of id attributes
        used_ids = []
        for c in doc.getiterator():
            if "id" in c.attrib:
                used_ids.append(c.attrib["id"])

        last_numbers = [0] * 5
        last_tag_level = 0
        last_level = -1

        for (p, c) in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue

            # To keep the output from screwing up the
            # validation by putting a <div> inside of a <p>
            # we actually replace the <p> in its entirety.
            # We do not allow the marker inside a header as that
            # would causes an endless loop of placing a new TOC
            # inside a previously generated TOC.

            if c.text and c.text.strip() == self.config["marker"] and \
               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
                marker_found = True

            if header_rgx.match(c.tag):
                try:
                    tag_level = int(c.tag[-1])
                    level_diff = last_tag_level - tag_level

                    if tag_level > last_tag_level:
                        this_level = last_level + 1
                        # Mimic the behaviour of the table of contents
                        if level_diff < 1:
                            tag_level = last_tag_level + 1
                    elif tag_level < last_tag_level:
                        # If the tag difference is MORE than one, go up that many
                        if level_diff > 1:
                            this_level = last_level - level_diff
                        else:
                            this_level = last_level - 1
                        for i in xrange(this_level + 1, len(last_numbers)):
                            last_numbers[i] = 0
                    else:
                        this_level = last_level

                    last_numbers[this_level] += 1
                    section_number = '.'.join(map(unicode, last_numbers[:this_level+1]))
                    placeholder = self.markdown.htmlStash.store(u'<i>%s</i>' % section_number, safe=True)
                    last_tag_level = tag_level
                    last_level = this_level

                    while tag_level < level:
                        list_stack.pop()
                        level -= 1

                    if tag_level > level:
                        newlist = etree.Element("ul")
                        if last_li:
                            last_li.append(newlist)
                        else:
                            list_stack[-1].append(newlist)
                        list_stack.append(newlist)
                        if level == 0:
                            level = tag_level
                        else:
                            level += 1

                    id = unique(self.config["slugify"](text, '-'), used_ids)

                    # List item link, to be inserted into the toc div
                    last_li = etree.Element("li")
                    link = etree.SubElement(last_li, "a")
                    link.text = '%s %s' % (section_number, text)
                    link.attrib["href"] = '#' + id

                    # Needed in case the header contains subelements
                    c.clear()
                    c.text = placeholder + text
                    pilcrow = etree.SubElement(c, "a")
                    pilcrow.set('class', 'headerlink')
                    pilcrow.set('name', id)
                    pilcrow.set('href', '#%s' % id)
                    pilcrow.text = self.markdown.htmlStash.store('&para;', safe=True)
                    c.attrib['class'] = 'header'

                    if self.config["anchorlink"] in [1, '1', True, 'True', 'true']:
                        anchor = etree.Element("a")
                        anchor.text = c.text
                        anchor.attrib["href"] = "#" + id
                        anchor.attrib["class"] = "toclink"
                        c.text = ""
                        for elem in c.getchildren():
                            anchor.append(elem)
                            c.remove(elem)
                        c.append(anchor)

                    list_stack[-1].append(last_li)
                except IndexError:
                    # We have bad ordering of headers. Just move on.
                    pass
        if not marker_found:
            # serialize and attach to markdown instance.
            prettify = self.markdown.treeprocessors.get('prettify')
            if prettify: prettify.run(div)
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)
            self.markdown.toc = toc
Esempio n. 13
0
def parseHeaders(source, strip_fenced_block=False, strip_ascii_math=False):
    """ Parse headers to construct Table Of Contents
        return: [(level, text, position, anchor)]
        position/anchor is the header position in notesEdit/notesView
    """

    hdrs = []
    headers = []
    used_ids = set()  # In case there are headers with the same name.

    # copied from the asciimathml so we don't have to have a hard dependency to strip
    ASCIIMATHML_RE = re.compile(r'^(.*)\$\$([^\$]*)\$\$(.*)$', re.M)

    # copied from the fenced block code
    FENCED_BLOCK_RE = re.compile(
        r'''
(?P<fence>^(?:~{3,}|`{3,}))[ ]*         # Opening ``` or ~~~
(\{?\.?(?P<lang>[a-zA-Z0-9_+-]*))?[ ]*  # Optional {, and lang
# Optional highlight lines, single- or double-quote-delimited
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]*
}?[ ]*\n                                # Optional closing }
(?P<code>.*?)(?<=\n)
(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE)

    filtered_source = source
    #if applicable, strip out any trouble text before we start parsing the headers
    if strip_fenced_block:
        m = FENCED_BLOCK_RE.search(filtered_source)
        while m is not None:
            nfillers = (m.end() - m.start())
            filtered_source = FENCED_BLOCK_RE.sub("\n" * nfillers,
                                                  filtered_source,
                                                  count=1)
            m = FENCED_BLOCK_RE.search(filtered_source)

    if strip_ascii_math:
        m = ASCIIMATHML_RE.search(filtered_source)
        while m is not None:
            nfillers = (m.end() - m.start())
            filtered_source = ASCIIMATHML_RE.sub("\n" * nfillers,
                                                 filtered_source,
                                                 count=1)
            m = ASCIIMATHML_RE.search(filtered_source)

    # hash headers
    RE = re.compile(r'^(#+)(.+)', re.MULTILINE)
    for m in RE.finditer(filtered_source):
        level = len(m.group(1))
        hdr = m.group(2)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    # setext headers
    RE = re.compile(r'(.+)\n([=-]+[ ]*)(\n|$)', re.MULTILINE)
    for m in RE.finditer(filtered_source):
        if m.group(2).startswith('='):
            level = 1
        else:
            level = 2
        hdr = m.group(1)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    hdrs.sort()
    for (p, l, h) in hdrs:
        anchor = unique(slugify(h, '-'), used_ids)
        headers.append((l, h, p, anchor))
    return headers
Esempio n. 14
0
File: md_toc.py Progetto: plaes/Clay
    def run(self, doc):
        div = etree.Element("div")
        last_li = None

        level = 0
        list_stack=[div]
        header_rgx = re.compile("[Hh][123456]")

        # Get a list of id attributes
        used_ids = []
        for c in doc.getiterator():
            if "id" in c.attrib:
                used_ids.append(c.attrib["id"])

        for (p, c) in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue
                    
            if header_rgx.match(c.tag):
                try:
                    tag_level = int(c.tag[-1])
                    
                    while tag_level < level:
                        list_stack.pop()
                        level -= 1

                    if tag_level > level:
                        newlist = etree.Element("ul")
                        if last_li:
                            last_li.append(newlist)
                        else:
                            list_stack[-1].append(newlist)
                        list_stack.append(newlist)

                        if level == 0:
                            level = tag_level
                        else:
                            level += 1

                    # Do not override pre-existing ids 
                    if not "id" in c.attrib:
                        id = unique(self.config["slugify"](text, '-'), used_ids)
                        c.attrib["id"] = id
                    else:
                        id = c.attrib["id"]

                    # List item link, to be inserted into the toc
                    last_li = etree.Element("li")
                    link = etree.SubElement(last_li, "a")
                    link.text = text
                    link.attrib["href"] = '#' + id

                    if self.config["anchorlink"]:
                        anchor = etree.Element("a")
                        anchor.text = c.text
                        anchor.attrib["href"] = "#" + id
                        anchor.attrib["class"] = "toclink"
                        c.text = ""
                        for elem in c.getchildren():
                            anchor.append(elem)
                            c.remove(elem)
                        c.append(anchor)

                    list_stack[-1].append(last_li)
                except IndexError:
                    # We have bad ordering of headers. Just move on.
                    pass
        
        # searialize and attach to markdown instance.
        ul = div.find('ul')
        if not ul:
            self.markdown.toc = ''
            return
        
        ul.attrib["class"] = "toc"
        toc = self.markdown.serializer(ul)
        for pp in self.markdown.postprocessors.values():
            toc = pp.run(toc)
        self.markdown.toc = toc
Esempio n. 15
0
    def run(self, doc):

        div = etree.Element("div")
        p_title = etree.Element("p")
        p_title.attrib["class"] = "first sidebar-title"
        p_title.text = "Contents"
        div.append(p_title)
        div.attrib["class"] = "contents alert alert-info pull-left topic"
        header_rgx = re.compile("[Hh][123456]")
        
        self.use_anchors = self.config["anchorlink"] in [1, '1', True, 'True', 'true']
        
        # Get a list of id attributes
        used_ids = set()
        for c in doc.getiterator():
            if "id" in c.attrib:
                used_ids.add(c.attrib["id"])

        toc_list = []
        marker_found = False
        for (p, c) in self.iterparent(doc):
            text = ''.join(itertext(c)).strip()
            if not text:
                continue

            # To keep the output from screwing up the
            # validation by putting a <div> inside of a <p>
            # we actually replace the <p> in its entirety.
            # We do not allow the marker inside a header as that
            # would causes an enless loop of placing a new TOC 
            # inside previously generated TOC.
            if c.text and c.text.strip() == self.config["marker"] and \
               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
                marker_found = True
                            
            if header_rgx.match(c.tag):
                
                # Do not override pre-existing ids 
                if not "id" in c.attrib:
                    elem_id = unique(self.config["slugify"](text, '-'), used_ids)
                    c.attrib["id"] = elem_id
                else:
                    elem_id = c.attrib["id"]

                tag_level = int(c.tag[-1])
                
                toc_list.append({'level': tag_level,
                    'id': elem_id,
                    'name': text})
                
                self.add_anchor(c, elem_id)
                
        toc_list_nested = order_toc_list(toc_list)
        self.build_toc_etree(div, toc_list_nested)
        prettify = self.markdown.treeprocessors.get('prettify')
        if prettify: prettify.run(div)
        if not marker_found:
            # serialize and attach to markdown instance.
            toc = self.markdown.serializer(div)
            for pp in self.markdown.postprocessors.values():
                toc = pp.run(toc)
            self.markdown.toc = toc
Esempio n. 16
0
def parseHeaders(source, strip_fenced_block=False, strip_ascii_math=False):
    """ Parse headers to construct Table Of Contents
        return: [(level, text, position, anchor)]
        position/anchor is the header position in notesEdit/notesView
    """

    hdrs = []
    headers = []
    used_ids = set()           # In case there are headers with the same name.

    # copied from the asciimathml so we don't have to have a hard dependency to strip
    ASCIIMATHML_RE = re.compile(r'^(.*)\$\$([^\$]*)\$\$(.*)$', re.M)

    # copied from the fenced block code
    FENCED_BLOCK_RE = re.compile(r'''
(?P<fence>^(?:~{3,}|`{3,}))[ ]*         # Opening ``` or ~~~
(\{?\.?(?P<lang>[a-zA-Z0-9_+-]*))?[ ]*  # Optional {, and lang
# Optional highlight lines, single- or double-quote-delimited
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]*
}?[ ]*\n                                # Optional closing }
(?P<code>.*?)(?<=\n)
(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE)

    filtered_source = source
    #if applicable, strip out any trouble text before we start parsing the headers
    if strip_fenced_block:
        m = FENCED_BLOCK_RE.search(filtered_source)
        while m is not None:
            nfillers = (m.end() - m.start())
            filtered_source = FENCED_BLOCK_RE.sub("\n"*nfillers, filtered_source, count=1)
            m = FENCED_BLOCK_RE.search(filtered_source)

    if strip_ascii_math:
        m = ASCIIMATHML_RE.search(filtered_source)
        while m is not None:
            nfillers = (m.end() - m.start())
            filtered_source = ASCIIMATHML_RE.sub("\n"*nfillers, filtered_source, count=1)
            m = ASCIIMATHML_RE.search(filtered_source)


    # hash headers
    RE = re.compile(r'^(#+)(.+)', re.MULTILINE)
    for m in RE.finditer(filtered_source):
        level = len(m.group(1))
        hdr = m.group(2)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    # setext headers
    RE = re.compile(r'(.+)\n([=-]+[ ]*)(\n|$)', re.MULTILINE)
    for m in RE.finditer(filtered_source):
        if m.group(2).startswith('='):
            level = 1
        else:
            level = 2
        hdr = m.group(1)
        pos = m.start()
        hdrs.append((pos, level, hdr))

    hdrs.sort()
    for (p, l, h) in hdrs:
        anchor = unique(slugify(h, '-'), used_ids)
        headers.append((l, h, p, anchor))
    return headers