def update_page(title, text, langlinks, weak_update=True): """ :param str title: title of the page :param str text: wikitext of the page :param langlinks: a sorted list of ``(tag, title)`` tuples as obtained from :py:meth:`self.get_langlinks` :param weak_update: When ``True``, the langlinks present on the page are mixed with those suggested by ``family_titles``. This is necessary only when there are multiple "intersecting" families, in which case the intersection should be preserved and solved manually. This is reported in _merge_families. :returns: updated wikicode """ # temporarily skip main pages until the behavior switches # (__NOTOC__ etc.) can be parsed by mwparserfromhell # NOTE: handling whitespace right will be hard: https://wiki.archlinux.org/index.php?title=Main_page&diff=383144&oldid=382787 if re.search("__NOTOC__|__NOEDITSECTION__", text): logger.warning("Skipping page '{}' (contains behavior switch(es))".format(title)) return text # format langlinks, in the prefix form # (e.g. "cs:Some title" for title="Some title" and tag="cs") langlinks = ["[[{}:{}]]".format(tag, title) for tag, title in langlinks] logger.info("Parsing page [[{}]] ...".format(title)) wikicode = mwparserfromhell.parse(text) if weak_update is True: parent, magics, cats, langlinks = header.get_header_parts(wikicode, langlinks=langlinks, remove_from_parent=True) else: # drop the extracted langlinks parent, magics, cats, _ = header.get_header_parts(wikicode, remove_from_parent=True) header.build_header(wikicode, parent, magics, cats, langlinks) return wikicode
def fix_page(title, text_old): langname = lang.detect_language(title)[1] wikicode = mwparserfromhell.parse(text_old) parent, magics, cats, langlinks = get_header_parts(wikicode, remove_from_parent=True) for cat in cats: # get_header_parts returns list of wikicode objects, each with one node cat = cat.nodes[0] pure, ln = lang.detect_language(str(cat.title)) if ln != langname: cat.title = lang.format_title(pure, langname) build_header(wikicode, parent, magics, cats, langlinks) return wikicode
def update_page(title, text, langlinks, weak_update=True): """ :param str title: title of the page :param str text: wikitext of the page :param langlinks: a sorted list of ``(tag, title)`` tuples as obtained from :py:meth:`self.get_langlinks` :param weak_update: When ``True``, the langlinks present on the page are mixed with those suggested by ``family_titles``. This is necessary only when there are multiple "intersecting" families, in which case the intersection should be preserved and solved manually. This is reported in _merge_families. :returns: updated wikicode """ # temporarily skip main pages until the behavior switches # (__NOTOC__ etc.) can be parsed by mwparserfromhell # NOTE: handling whitespace right will be hard: https://wiki.archlinux.org/index.php?title=Main_page&diff=383144&oldid=382787 if re.search("__NOTOC__|__NOEDITSECTION__", text): logger.warning( "Skipping page '{}' (contains behavior switch(es))".format( title)) return text # format langlinks, in the prefix form # (e.g. "cs:Some title" for title="Some title" and tag="cs") langlinks = [ "[[{}:{}]]".format(tag, title) for tag, title in langlinks ] logger.info("Parsing page [[{}]] ...".format(title)) wikicode = mwparserfromhell.parse(text) if weak_update is True: parent, magics, cats, langlinks = header.get_header_parts( wikicode, langlinks=langlinks, remove_from_parent=True) else: # drop the extracted langlinks parent, magics, cats, _ = header.get_header_parts( wikicode, remove_from_parent=True) header.build_header(wikicode, parent, magics, cats, langlinks) return wikicode
def decategorize(title, text_old): wikicode = mwparserfromhell.parse(text_old) parent, magics, cats, langlinks = get_header_parts( wikicode, remove_from_parent=True) build_header(wikicode, parent, magics, [], langlinks) return wikicode