コード例 #1
0
ファイル: section.py プロジェクト: TrueBrain/wikitexthtml
def replace(instance: WikiTextHtml, wikitext: wikitextparser.WikiText):
    slugs = defaultdict(int)  # type: Dict[str, int]

    # Calculate how many times we see the same slug.
    # We want the sections to be numbered from top to bottom. So we have to do
    # this in a separate step, as we run the sections in reverse while
    # modifying them.
    for section in wikitext.get_sections():
        if not section or not section.title:
            continue

        slugs[_slugify(section.title)] += 1

    for section in reversed(wikitext.get_sections()):
        if not section:
            continue

        if section.title:
            slug = _slugify(section.title)
            if slugs[slug] != 1:
                slugs[slug] -= 1
                slug = f"{slug}_{slugs[slug] + 1}"

            title = section.title.strip()

            content = f"<h{section.level}>"
            content += f'<a class="anchor" href="#{slug}"></a>'
            content += f'<span class="mw-headline" id="{slug}">{title}</span>'
            content += f"</h{section.level}>\n"
        else:
            content = ""

        section.string = content + section.contents
コード例 #2
0
ファイル: article_parser.py プロジェクト: nheist/CaLiGraph
def _extract_sections(wiki_text: WikiText) -> list:
    sections = []
    for section_idx, section in enumerate(
            wiki_text.get_sections(include_subsections=False)):
        #markup_without_lists = _remove_listing_markup(section)
        #text, entities = _convert_markup(markup_without_lists)
        enums = [
            _extract_enum(l) for l in section.get_lists(VALID_ENUM_PATTERNS)
        ]
        tables = [_extract_table(t) for t in section.get_tables()]
        sections.append({
            'index':
            section_idx,
            'name':
            section.title.strip()
            if section.title and section.title.strip() else 'Main',
            'level':
            section.level,
            #'text': text,
            #'entities': entities,
            'enums': [e for e in enums if len(e) >= 2],
            'tables': [t for t in tables if t]
        })
    return sections
コード例 #3
0
ファイル: section.py プロジェクト: TrueBrain/wikitexthtml
def toc(instance: WikiTextHtml, wikitext: wikitextparser.WikiText) -> str:
    body = wikitext.string

    # FORCETOC will always win over NOTOC
    if "__NOTOC__" in wikitext.string and "__FORCETOC__" not in wikitext.string:
        body = body.replace("__NOTOC__", "")
        body = body.replace("__FORCETOC__", "")
        return body

    index = body.find("__TOC__")
    if index == -1:
        # Not forced and not more than 3 sections? No ToC. See fot details:
        # https://www.mediawiki.org/wiki/Manual:Table_of_contents
        # The get_sections() always returns an additional section, which is
        # the head of the page.
        if len(wikitext.get_sections()) - 1 <= 3:
            body = body.replace("__NOTOC__", "")
            body = body.replace("__FORCETOC__", "")
            return body

        index = wikitext.get_sections()[1].span[0]

    # If there is only 1 section, there is nothing to put in a TOC.
    if len(wikitext.get_sections()) == 1:
        body = body.replace("__NOTOC__", "")
        body = body.replace("__TOC__", "")
        body = body.replace("__FORCETOC__", "")
        return body

    toc = '<table id="toc">\n'
    toc += "<tbody>\n"
    toc += "<tr><td>\n"

    toc += '<div id="toctitle">\n'
    toc += "<h2>Contents</h2>\n"
    toc += "</div>\n"

    last_level = 0
    skipped_levels = []
    toc_level = 0
    toc_number = []
    slugs = defaultdict(int)  # type: Dict[str, int]
    for section in wikitext.get_sections():
        if not section or not section.title:
            continue

        slug = _slugify(section.title)
        slugs[slug] += 1
        if slugs[slug] != 1:
            slug += f"_{slugs[slug]}"

        title = section.title.strip()
        title = _remove_known_tags(title)
        current_level = section.level

        if current_level > last_level:
            # Keep track how much level we went up in one time; the TOC
            # level always goes up by one.
            skipped_levels.append(current_level - last_level)

            toc_level += 1
            toc_number.append(1)

            toc += "<ul>\n"
        elif current_level < last_level:
            change = last_level - current_level

            # Check how many levels we jump back. This means you can skip
            # levels between chapters, but the TOC won't show an empty
            # chapter in between.
            level_change = 0
            while skipped_levels[-1] <= change:
                change -= skipped_levels[-1]
                skipped_levels.pop()
                level_change += 1
            skipped_levels[-1] -= change

            # For the amount of levels we changed, close the lists.
            if level_change > 0:
                for _ in range(level_change):
                    toc += "</li>\n"
                    toc += "</ul>\n"
                    toc_number.pop()
                    toc_level -= 1

            toc_number[-1] += 1
            toc += "</li>\n"
        else:
            # Level was the same, so continue to the next.
            toc_number[-1] += 1
            toc += "</li>\n"

        toc += f'<li class="toclevel-{toc_level}">\n'
        toc += f'<a href="#{slug}">\n'
        toc += f'<span class="tocnumber">{".".join([str(t) for t in toc_number])}</span>\n'
        toc += f'<span class="toctext">{title}</span>\n'
        toc += "</a>\n"

        last_level = current_level

    # Close all remaining lists.
    for _ in range(toc_level):
        toc += "</li>\n"
        toc += "</ul>\n"

    toc += "</td></tr>\n"
    toc += "</tbody>\n"
    toc += "</table>\n"

    body = body[:index] + toc + body[index:]
    body = body.replace("__TOC__", "")
    body = body.replace("__NOTOC__", "")
    body = body.replace("__FORCETOC__", "")
    return body