Beispiel #1
0
def _strip_tags(tree: mwparserfromhell.wikicode.Wikicode,
                threshold: int = 200):
    for tag in tree.filter_tags():
        if tag.tag != 'ref' and not tag.attributes:
            tree.replace(tag, tag.contents)

    for tag in tree.filter_tags():
        if tag.tag == 'ref' and len(tag.contents) < threshold:
            _tree_remove(tree, tag)
Beispiel #2
0
def remove_no_include(
    wikicode: mwparserfromhell.wikicode.Wikicode
) -> mwparserfromhell.wikicode.Wikicode:
    """Removes the noinclude tags"""
    for tag in wikicode.filter_tags():
        if tag.tag.matches('noinclude'):
            try:
                wikicode.remove(tag)
            except ValueError:
                pass
    return wikicode
 def naiveStrip(self, wikiCode:mwparserfromhell.wikicode.Wikicode):
     """removes the code delimiter, keeps the text. Accepts pages and sections in Wikicode format
     
     Arguments:
         wikiCode {mwparserfromhell.wikicode.Wikicode} -- wikicode for a page or a section.
     """
     return wikiCode.strip_code()
Beispiel #4
0
def _strip_ext_links(tree: mwparserfromhell.wikicode.Wikicode,
                     threshold: int = 250,
                     url_threshold: int = 1000):
    for link in tree.filter_external_links():
        if len(link.url) < url_threshold and (link.title is None
                                              or len(link.title) < threshold):
            _tree_remove(tree, link)
Beispiel #5
0
    def replace_gallery_files(
            self, wikicode: mwparserfromhell.wikicode.Wikicode) -> None:
        """
        Replace files in <gallery>.

        :param wikicode: Parsed wikitext
        """
        for tag in wikicode.ifilter_tags():
            if tag.tag.lower() != "gallery":
                continue
            lines = str(tag.contents).splitlines()
            for i, line in enumerate(lines):
                title, sep, caption = removeDisabledParts(line).partition("|")
                if not title:
                    continue
                try:
                    current_icon = BSiconPage(self.current_page.site, title)
                    current_icon.title()
                except (pywikibot.exceptions.Error, ValueError):
                    continue
                new_icon = self.opt.bsicons_map.get(current_icon, None)
                if new_icon:
                    lines[i] = f"{new_icon.title()}{sep}{caption}"
                    self.current_page.replacements.add(
                        Replacement(current_icon, new_icon))
            if self.current_page.replacements:
                tag.contents = "\n".join(lines) + "\n"
Beispiel #6
0
def _strip_templates(tree: mwparserfromhell.wikicode.Wikicode,
                     threshold: int = 30):
    for template in tree.filter_templates(recursive=tree.RECURSE_OTHERS):
        for param in reversed(template.params):
            if len(param.name) < threshold and len(param.value) < threshold:
                template.remove(param)
        if not template.params and len(template.name) < threshold:
            _tree_remove(tree, template)
Beispiel #7
0
def _parse_case_page(
        index: str, name: str,
        tree: mwparserfromhell.wikicode.Wikicode) -> Optional[CasePage]:
    diff_sections = tree.get_sections(
        matches=lambda title: title.matches(name))
    if not diff_sections and '<!-- Template:Courtesy blanked -->' in tree:
        return None
    assert len(diff_sections) == 1, tree.filter_headings()
    diff_section = diff_sections[0]
    assert diff_section.nodes[0].level == 2

    excluded = [diff_section]
    unknown = []
    for section in tree.get_sections(include_lead=False):
        if any(section in excl for excl in excluded):
            continue
        if section.nodes[0].title.matches(_IGNORED_HEADINGS):
            excluded.append(section)
        else:
            unknown.append(section)

    unknown = [
        section for section in unknown
        if not any(section in excl for excl in excluded)
    ]
    assert not unknown, [section.nodes[0] for section in unknown]

    sections = {}
    for section in diff_section.get_sections(levels=[3]):
        title = section.nodes[0].title.strip()
        match = re.match(r'^Pages (\d+) through (\d+)$', title)
        assert match, title
        index = f'{match.group(1)}-{match.group(2)}'
        lines = [
            line.strip() for line in section.splitlines()[1:] if line.strip()
        ]
        if _is_collapsed(lines):
            continue
        pages = [_parse_line(line) for line in lines]
        pages = [line for line in pages if line]
        if not pages:
            continue
        sections[index] = Section(title, pages)

    return CasePage(index, sections)
        def parse_family(wikicode: mwp.wikicode.Wikicode):
            wikicode = parse_markup(wikicode)

            links = [
                link
                for link in wikicode.filter_wikilinks()
                if "category" not in link.lower()
            ]
            if not links:
                print("unable to find family", wikicode)
                return None
            elif len(links) == 1:
                family = links[0].title.strip_code().strip()
            else:
                print("unexpected number of wikilinks", wikicode)
                for wc in links:
                    wikicode.replace(wc, wc.title.strip_code().strip())
                family = wikicode.strip_code().strip()

            return family
Beispiel #9
0
def keep_only_includes(
    wikicode: mwparserfromhell.wikicode.Wikicode
) -> mwparserfromhell.wikicode.Wikicode:
    """Keeps only the onlyincludes tags if any"""
    only_include_present = False
    to_remove = list()
    for tag in wikicode.filter_tags(
            recursive=False):  # select only the most external one
        if tag.tag.matches('onlyinclude'):
            only_include_present = True
        else:
            to_remove.append(tag)
    if only_include_present:
        for tag in to_remove:
            try:
                wikicode.remove(tag)
            except ValueError:
                pass
    wikicode = mwparserfromhell.parse(
        re.sub(onlyinclude_tag, '', str(wikicode)))
    return wikicode
        def parse_names(wikicode: mwp.wikicode.Wikicode):
            wikicode = parse_markup(wikicode)
            for t in wikicode.filter(forcetype=Template):
                if len(t.params) == 1:
                    wikicode.replace(t, t.params[0])
                elif len(t.params) > 1:
                    if "highprince" in t.params[0].lower():
                        wikicode.replace(t, "{0} of {1}".format(*t.params))
                    elif "army" in t.params[0].lower():
                        wikicode.replace(t, "{1} {0}".format(*t.params))

            return [
                n for n in (n.strip() for n in wikicode.strip_code().split(",")) if n
            ]
        def parse_markup(wikicode: mwp.wikicode.Wikicode):
            templates = wikicode.filter_templates()
            links = wikicode.filter_wikilinks()
            for t in templates:
                # remove references
                if "ref" in t.name or "wob" in t.name:
                    wikicode.remove(t)
            for l in links:
                # simplify links
                text = l.text if l.text else l.title
                wikicode.replace(l, Wikilink(text))

            return wikicode
        def parse_abilities(wikicode: mwp.wikicode.Wikicode):
            wikicode = parse_markup(wikicode)
            abilities = []
            for wc in wikicode.filter(forcetype=(Template, Wikilink)):
                if isinstance(wc, Template):
                    if "tag" in wc.name:
                        params = tuple(
                            p.lower()
                            for p in (
                                p.name.strip_code()
                                if p.showkey is True
                                else p.value.strip_code()
                                for p in wc.params
                            )
                            if p != "cat"
                        )

                        if len(params) == 1:
                            abilities.append(params[0])
                        elif len(params) > 1:
                            if any(
                                params[0] == s for s in ("shard", "vessel", "splinter")
                            ):
                                abilities.extend(
                                    (params[0], f"{params[0]} of {params[1]}")
                                )
                            elif params[0] == "squire":
                                abilities.append(f"squire ({params[1].split()[-1]})")
                            else:
                                print(
                                    "unknown ability while parsing character: ", params
                                )

                elif isinstance(wc, Wikilink):
                    abilities.append(wc.title.strip_code().lower())

            return abilities
Beispiel #13
0
    def replace_template_files(
            self, wikicode: mwparserfromhell.wikicode.Wikicode) -> None:
        """
        Replace files in templates.

        :param wikicode: Parsed wikitext
        """
        assert self.site_config is not None
        for tpl in wikicode.ifilter_templates():
            try:
                template = Page(
                    self.current_page.site,
                    removeDisabledParts(str(tpl.name)),
                    ns=self.current_page.site.namespaces.TEMPLATE,
                )
                template.title()
            except (pywikibot.exceptions.Error, ValueError):
                continue
            if template in self.site_config.routemap_templates:
                self._replace_routemap_files(tpl)
            elif template in self.site_config.railway_track_templates:
                self._replace_rt_template_files(tpl)
            else:
                self._replace_bs_template_files(tpl, template)
 def parse_species(wikicode: mwp.wikicode.Wikicode):
     spec = species.get(wikicode.strip_code().lower().strip(), None)
     if spec is not None and any(s in val.lower() for s in ("spren", "cryptic")):
         char_info["subspecies"] = spec
         spec = "Spren"
     return spec
        def parse_ethnicity(wikicode: mwp.wikicode.Wikicode):
            wikicode = parse_markup(wikicode)
            wikicode_elements = wikicode.filter(forcetype=(Template, Wikilink))
            if " and " in wikicode:
                wikicode.replace(" and ", ", ")

            if not wikicode_elements:
                ethnicity = wikicode.strip_code().strip().title()
            else:
                for wc in wikicode_elements:
                    if isinstance(wc, Wikilink):
                        wikicode.replace(wc, wc.title.strip_code().strip())
                    elif isinstance(wc, Template):
                        params = tuple(
                            p
                            for p in (
                                p.name.strip_code()
                                if p.showkey is True
                                else p.value.strip_code()
                                for p in wc.params
                            )
                            if p != "cat"
                        )
                        if len(params) == 1:
                            wikicode.replace(wc, params[0])
                        elif "Noble" in params or "noble" in params:
                            wikicode.replace(wc, "Noble")
                        else:
                            wikicode.replace(wc, params)
                ethnicity = wikicode.strip_code().strip().title()

            if ethnicity == "Skaa, Noble":
                ethnicity = "Half-Skaa"
            return ethnicity
        def parse_residence(wikicode: mwp.wikicode.Wikicode):
            wikicode = parse_markup(wikicode)
            wikicode_elements = wikicode.filter(forcetype=(Template, Wikilink))
            if "<br>" in wikicode:
                wikicode.replace("<br>", ", ")
            if not wikicode_elements:
                residence = wikicode.strip_code().strip()
            else:
                for wc in wikicode_elements:
                    if isinstance(wc, Wikilink):
                        wikicode.replace(wc, wc.title.strip_code().strip().title())
                    elif isinstance(wc, Template):
                        params = tuple(
                            p
                            for p in (
                                p.name.strip_code()
                                if p.showkey is True
                                else p.value.strip_code()
                                for p in wc.params
                            )
                            if p != "cat"
                        )
                        if len(params) == 1:
                            wikicode.replace(wc, params[0])
                        else:
                            wikicode.replace(wc, params)
                residence = wikicode.strip_code().strip()

            residence = re.sub(r"\s?\([\w\s]+\)", "", residence)
            # special cases
            if residence.startswith("15 Stranat Place"):
                residence = "Elendel"
            return residence
 def parse_nation(wikicode: mwp.wikicode.Wikicode):
     return demonyms.get(wikicode.strip_code().lower().strip(), None)
 def parse_profession(wikicode: mwp.wikicode.Wikicode):
     # todo: parse with simple NLP
     return str(wikicode.strip_code().lower().strip())
Beispiel #19
0
def _tree_remove(tree: mwparserfromhell.wikicode.Wikicode,
                 node: mwparserfromhell.nodes.Node):
    try:
        tree.remove(node)
    except ValueError:
        pass
Beispiel #20
0
def _strip_wikilinks(tree: mwparserfromhell.wikicode.Wikicode,
                     threshold: int = 50):
    for link in tree.filter_wikilinks():
        if len(link.title) < threshold and (link.text is None
                                            or len(link.text) < threshold):
            _tree_remove(tree, link)