Ejemplo n.º 1
0
    def rename_non_english(self):
        del self.allpages

        # FIXME: starting with English pages is not very good:
        # - some pages are omitted (e.g. when two pages link to the same English page, at least warning should be printed)
        # - it suggests to move e.g. Xfce (Česky) to Xfwm (Česky) because multiple English pages link to it
        # Therefore we limit it only to categories...
        for page in self.allpages:
            title = page["title"]
            if lang.detect_language(
                    title)[1] == "English" and title.startswith("Category:"):
                langlinks = self.get_langlinks(title)
                for tag, localized_title in langlinks:
                    logger.info("Checking [[{}:{}]] for renaming...".format(
                        tag, localized_title))
                    if lang.is_internal_tag(tag) and localized_title != title:
                        source = "{} ({})".format(localized_title,
                                                  lang.langname_for_tag(tag))
                        target = "{} ({})".format(title,
                                                  lang.langname_for_tag(tag))
                        if self._page_exists(target):
                            logger.warning(
                                "Cannot move page [[{}]] to [[{}]]: target page already exists"
                                .format(source, target))
                        else:
                            # interactive mode is necessary because this assumes that all English pages are named correctly
                            ans = ask_yesno(
                                "Move page [[{}]] to [[{}]]?".format(
                                    source, target))
                            if ans is True:
                                summary = "comply with [[Help:I18n#Page titles]] and match the title of the English page"
                                self.api.move(source, target, summary)
Ejemplo n.º 2
0
 def _is_valid_internal(self, tag, title):
     if not lang.is_internal_tag(tag):
         return False
     if "/" in title:
         full_title = lang.format_title(title,
                                        lang.langname_for_tag(tag),
                                        augment_all_subpage_parts=False)
         if full_title in self.wrapped_titles:
             return True
     full_title = lang.format_title(title, lang.langname_for_tag(tag))
     return full_title in self.wrapped_titles
 def _is_valid_internal(self, tag, title):
     if not lang.is_internal_tag(tag):
         return False
     if tag == "en":
         full_title = title
     else:
         full_title = "{} ({})".format(title, lang.langname_for_tag(tag))
     return full_title in self.wrapped_titles
Ejemplo n.º 4
0
 def _title_from_langlink(self, langlink):
     langname = lang.langname_for_tag(langlink["lang"])
     title = lang.format_title(langlink["*"], langname)
     if lang.is_internal_tag(langlink["lang"]):
         title = canonicalize(title)
         # resolve redirects
         resolved = self.api.redirects.resolve(title)
         if resolved is not None:
             title = resolved.split("#", maxsplit=1)[0]
     return title
Ejemplo n.º 5
0
 def _title_from_langlink(self, langlink):
     langname = lang.langname_for_tag(langlink["lang"])
     title = lang.format_title(langlink["*"], langname)
     if lang.is_internal_tag(langlink["lang"]):
         title = canonicalize(title)
         # resolve redirects
         resolved = self.api.redirects.resolve(title)
         if resolved is not None:
             title = resolved.split("#", maxsplit=1)[0]
     return title
Ejemplo n.º 6
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # build category graph
        category_graph = CategoryGraph(self.api)
        graph_parents, graph_subcats, info = category_graph.build_graph()

        # detect target pages, fetch content at once
        contents, timestamps, pageids = self.get_pages_contents(self.titles)

        for title in self.titles:
            if title not in contents:
                continue

            wikicode = mwparserfromhell.parse(contents[title])
            toc_table, columns, dictionary = self.parse_toc_table(title, wikicode)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                            "The wiki page '{}' does not contain the ToC table. "
                            "Create the following entry point manually: "
                            "{{| id=\"wiki-scripts-toc-table\"\n|}}".format(title))
                    continue
                else:
                    logger.warning(
                            "The wiki page '{}' does not contain the ToC table, "
                            "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph_parents, info, dictionary)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph_parents, info, dictionary, include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(self.cliargs.save))

            roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in category_graph.walk(graph_subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in category_graph.compare_components(graph_subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                self.save_page(title, pageids[title], contents[title], str(wikicode), timestamps[title])
 def _title_from_langlink(self, langlink):
     langname = lang.langname_for_tag(langlink["lang"])
     if langname == "English":
         title = langlink["*"]
     else:
         title = "{} ({})".format(langlink["*"], langname)
     if lang.is_internal_tag(langlink["lang"]):
         title = canonicalize(title)
         # resolve redirects
         if title in self.redirects:
             title = self.redirects[title].split("#", maxsplit=1)[0]
     return title
Ejemplo n.º 8
0
 def from_argparser(klass, args, api=None):
     if api is None:
         api = API.from_argparser(args)
     if args.lang:
         tags = args.lang.split(",")
         for tag in tags:
             if tag not in lang.get_internal_tags():
                 # FIXME: more elegant solution
                 raise Exception("{} is not a valid language tag".format(tag))
         langnames = {lang.langname_for_tag(tag) for tag in tags}
     else:
         langnames = set()
     interactive = args.interactive if klass.force_interactive is False else True
     return klass(api, interactive=interactive, dry_run=args.dry_run, first=args.first, title=args.title, langnames=langnames)
Ejemplo n.º 9
0
 def from_argparser(klass, args, api=None, db=None):
     if api is None:
         api = API.from_argparser(args)
     if db is None:
         db = Database.from_argparser(args)
     if args.lang:
         tags = args.lang.split(",")
         for tag in tags:
             if tag not in lang.get_internal_tags():
                 # FIXME: more elegant solution
                 raise Exception("{} is not a valid language tag".format(tag))
         langnames = {lang.langname_for_tag(tag) for tag in tags}
     else:
         langnames = set()
     return klass(api, db, interactive=args.interactive, dry_run=args.dry_run, first=args.first, title=args.title, langnames=langnames, connection_timeout=args.connection_timeout, max_retries=args.connection_max_retries)
Ejemplo n.º 10
0
    def rename_non_english(self):
        del self.allpages

        # FIXME: starting with English pages is not very good:
        # - some pages are omitted (e.g. when two pages link to the same English page, at least warning should be printed)
        # - it suggests to move e.g. Xfce (Česky) to Xfwm (Česky) because multiple English pages link to it
        # Therefore we limit it only to categories...
        for page in self.allpages:
            title = page["title"]
            if lang.detect_language(title)[1] == "English" and title.startswith("Category:"):
                langlinks = self.get_langlinks(title)
                for tag, localized_title in langlinks:
                    logger.info("Checking [[{}:{}]] for renaming...".format(tag, localized_title))
                    if lang.is_internal_tag(tag) and localized_title != title:
                        source = "{} ({})".format(localized_title, lang.langname_for_tag(tag))
                        target = "{} ({})".format(title, lang.langname_for_tag(tag))
                        if self._page_exists(target):
                            logger.warning("Cannot move page [[{}]] to [[{}]]: target page already exists".format(source, target))
                        else:
                            # interactive mode is necessary because this assumes that all English pages are named correctly
                            ans = ask_yesno("Move page [[{}]] to [[{}]]?".format(source, target))
                            if ans is True:
                                summary = "comply with [[Help:I18n#Page titles]] and match the title of the English page"
                                self.api.move(source, target, summary)
Ejemplo n.º 11
0
 def from_argparser(klass, args, api=None, db=None):
     if api is None:
         api = API.from_argparser(args)
     if db is None:
         db = Database.from_argparser(args)
     if args.lang:
         tags = args.lang.split(",")
         for tag in tags:
             if tag not in lang.get_internal_tags():
                 # FIXME: more elegant solution
                 raise Exception("{} is not a valid language tag".format(tag))
         langnames = {lang.langname_for_tag(tag) for tag in tags}
     else:
         langnames = set()
     return klass(api, db, first=args.first, title=args.title, langnames=langnames, connection_timeout=args.connection_timeout, max_retries=args.connection_max_retries)
Ejemplo n.º 12
0
    def __init__(self, api, cliargs):
        self.api = api
        self.cliargs = cliargs

        if self.cliargs.save is False and self.cliargs.print is False:
            self.cliargs.print = True

        if len(self.cliargs.toc_languages) == 1 and self.cliargs.toc_languages[0] == "all":
            self.cliargs.toc_languages = lang.get_internal_tags()
        # strip "(Language)" suffix
        self.cliargs.toc_page = lang.detect_language(canonicalize(self.cliargs.toc_page))[0]

        # detect page titles
        self.titles = []
        for ln in sorted(self.cliargs.toc_languages):
            if ln == lang.tag_for_langname(lang.get_local_language()):
                self.titles.append(self.cliargs.toc_page)
            else:
                self.titles.append("{} ({})".format(self.cliargs.toc_page, lang.langname_for_tag(ln)))
Ejemplo n.º 13
0
    def __init__(self, api, cliargs):
        self.api = api
        self.cliargs = cliargs

        if self.cliargs.save is False and self.cliargs.print is False:
            self.cliargs.print = True

        if len(self.cliargs.toc_languages) == 1 and self.cliargs.toc_languages[0] == "all":
            self.cliargs.toc_languages = lang.get_internal_tags()
        # strip "(Language)" suffix
        self.cliargs.toc_page = lang.detect_language(canonicalize(self.cliargs.toc_page))[0]

        # detect page titles
        self.titles = []
        for ln in sorted(self.cliargs.toc_languages):
            if ln == lang.tag_for_langname(lang.get_local_language()):
                self.titles.append(self.cliargs.toc_page)
            else:
                self.titles.append("{} ({})".format(self.cliargs.toc_page, lang.langname_for_tag(ln)))
Ejemplo n.º 14
0
    def get_langlinks(self, full_title):
        """
        Uses :py:meth:`self.titles_in_family` to get the titles of all pages in
        the family, removes the link to the passed title and sorts the list by
        the language subtag.

        :returns: a list of ``(tag, title)`` tuples
        """
        # get all titles in the family
        tags, titles = self.titles_in_family(full_title)
        langlinks = set(zip(tags, titles))

        title, langname = lang.detect_language(full_title)
        tag = lang.tag_for_langname(langname)

        # remove links to ArchWiki:Archive and translations
        if title != "ArchWiki:Archive":
            for _tag, _title in list(langlinks):
                if _title == "ArchWiki:Archive":
                    langlinks.remove((_tag, _title))

        # remove title of the page to be updated
        langlinks.remove((tag, title))

        # transform to list, sort by the language tag
        langlinks = sorted(langlinks, key=lambda t: t[0])

        # conversion back-and-forth is necessary to add the "(Language)" suffix into all subpage parts
        new_langlinks = []
        for tag, title in langlinks:
            new_title = lang.format_title(title, lang.langname_for_tag(tag))
            # do it only when the new_title exists, otherwise the title without "(Language)" in
            # all subpage parts is still valid as per Help:i18n
            if self._page_exists(new_title):
                title = lang.detect_language(new_title,
                                             strip_all_subpage_parts=False)[0]
            new_langlinks.append((tag, title))

        return new_langlinks
Ejemplo n.º 15
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # if we are going to save, make sure that the categories are correct first
        if self.cliargs.save is True:
            cat = Categorization(self.api)
            cat.fix_allpages()
            decat = Decategorization(self.api)
            decat.fix_allpages()

        # build category graph
        graph = CategoryGraph(self.api)

        # if we are going to save, init wanted categories
        if self.cliargs.save is True:
            graph.init_wanted_categories()

        # detect target pages, fetch content at once
        page = AutoPage(self.api, fetch_titles=self.titles)

        for title in self.titles:
            try:
                page.set_title(title)
            except ValueError:
                # page not fetched
                continue

            toc_table = page.get_tag_by_id(tag="table", id="wiki-scripts-toc-table")
            columns, category_names, alsoin = self.parse_toc_table(title, toc_table)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                        "The wiki page [[{}]] does not contain the ToC table. "
                        "Create the following entry point manually:\n"
                        "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(title))
                    continue
                else:
                    logger.warning(
                        "The wiki page [[{}]] does not contain the ToC table, "
                        "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph.parents, graph.info, category_names, alsoin)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph.parents, graph.info, category_names, alsoin, include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(self.cliargs.save))

            roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in graph.walk(graph.subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in graph.compare_components(graph.subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                if self.cliargs.force or page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True):
                    try:
                        page.save(self.cliargs.summary)
                    except APIError:
                        pass
                else:
                    logger.info("The page [[{}]] has already been updated this UTC day.".format(title))
Ejemplo n.º 16
0
 def _is_valid_internal(self, tag, title):
     if not lang.is_internal_tag(tag):
         return False
     full_title = lang.format_title(title, lang.langname_for_tag(tag))
     return full_title in self.wrapped_titles
Ejemplo n.º 17
0
 def _is_valid_internal(self, tag, title):
     if not lang.is_internal_tag(tag):
         return False
     full_title = lang.format_title(title, lang.langname_for_tag(tag))
     return full_title in self.wrapped_titles
Ejemplo n.º 18
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # if we are going to save, make sure that the categories are correct first
        if self.cliargs.save is True:
            cat = Categorization(self.api)
            cat.fix_allpages()
            decat = Decategorization(self.api)
            decat.fix_allpages()

        # build category graph
        graph = CategoryGraph(self.api)

        # if we are going to save, init wanted categories
        if self.cliargs.save is True:
            graph.init_wanted_categories()

        # detect target pages, fetch content at once
        page = AutoPage(self.api, fetch_titles=self.titles)

        for title in self.titles:
            try:
                page.set_title(title)
            except ValueError:
                # page not fetched
                continue

            toc_table = page.get_tag_by_id(tag="table",
                                           id="wiki-scripts-toc-table")
            columns, category_names, alsoin = self.parse_toc_table(
                title, toc_table)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                        "The wiki page [[{}]] does not contain the ToC table. "
                        "Create the following entry point manually:\n"
                        "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(
                            title))
                    continue
                else:
                    logger.warning(
                        "The wiki page [[{}]] does not contain the ToC table, "
                        "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph.parents, graph.info, category_names,
                                    alsoin)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph.parents,
                                        graph.info,
                                        category_names,
                                        alsoin,
                                        include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(
                    self.cliargs.save))

            roots = [
                "Category:{}".format(lang.langname_for_tag(c)) for c in columns
            ]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in graph.walk(graph.subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in graph.compare_components(graph.subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error(
                    "Cannot compare more than 2 languages at once. Requested: {}"
                    .format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                if self.cliargs.force or page.is_old_enough(
                        min_interval=datetime.timedelta(days=1),
                        strip_time=True):
                    try:
                        page.save(self.cliargs.summary)
                    except APIError:
                        pass
                else:
                    logger.info(
                        "The page [[{}]] has already been updated this UTC day."
                        .format(title))