def recategorize_over_redirect(self, category_namespace=14):
        # FIXME: the source_namespace parameter of redirects_map does not work,
        #        so we need to do manual filtering
        redirects = self.api.redirects_map()
        catredirs = dict((key, value) for key, value in redirects.items() if Title(self.api, key).namespace == "Category")
        for source, target in catredirs.items():
            ans = ask_yesno("Recategorize pages from '{}' to '{}'?".format(source, target))
            if ans is False:
                continue

            catmembers = self.api.generator(generator="categorymembers", gcmtitle=source, gcmlimit="max", prop="revisions", rvprop="content|timestamp")
            for page in catmembers:
                # the same page might be yielded multiple times
                if "revisions" in page:
                    self.recategorize_page(page, source, target)
            # check again to see if the category is empty
            catmembers = list(self.api.list(list="categorymembers", cmtitle=source, cmlimit="max"))
            if len(catmembers) == 0:
                self.flag_for_deletion(source)
            else:
                logger.warning("'{}' is still not empty: {}".format(source, sorted(page["title"] for page in catmembers)))
                input("Press Enter to continue...")
        print("""
Recategorization complete. Before deleting the unused categories, make sure to \
update interlanguage links. The unused categories are still redirects and are \
not listed under Special:UnusedCategories, but they can be found in \
Special:WhatLinksHere/Template:Deletion.
""")
Esempio n. 2
0
    def rename_non_english(self):
        del self.allpages

        # FIXME: starting with English pages is not very good:
        # - some pages are omitted (e.g. when two pages link to the same English page, at least warning should be printed)
        # - it suggests to move e.g. Xfce (Česky) to Xfwm (Česky) because multiple English pages link to it
        # Therefore we limit it only to categories...
        for page in self.allpages:
            title = page["title"]
            if lang.detect_language(
                    title)[1] == "English" and title.startswith("Category:"):
                langlinks = self.get_langlinks(title)
                for tag, localized_title in langlinks:
                    logger.info("Checking [[{}:{}]] for renaming...".format(
                        tag, localized_title))
                    if lang.is_internal_tag(tag) and localized_title != title:
                        source = "{} ({})".format(localized_title,
                                                  lang.langname_for_tag(tag))
                        target = "{} ({})".format(title,
                                                  lang.langname_for_tag(tag))
                        if self._page_exists(target):
                            logger.warning(
                                "Cannot move page [[{}]] to [[{}]]: target page already exists"
                                .format(source, target))
                        else:
                            # interactive mode is necessary because this assumes that all English pages are named correctly
                            ans = ask_yesno(
                                "Move page [[{}]] to [[{}]]?".format(
                                    source, target))
                            if ans is True:
                                summary = "comply with [[Help:I18n#Page titles]] and match the title of the English page"
                                self.api.move(source, target, summary)
Esempio n. 3
0
    def filter_pages(self, pages):
        for page in pages:
            if "revisions" in page:
                # skip truncated results (due to PHP's 8MiB limit)
                if len(page["revisions"]) == 0:
                    continue
                rev = page["revisions"][0]
            else:
                # empty prop in generator due to continuation
                continue

            content = rev["*"]
            if self.is_spam(page["title"], content):
                print("Detected spam:")
                pprint({"title": page["title"], "content": content, "timestamp": rev["timestamp"]})
                if self.interactive and ask_yesno("Proceed with user account blocking and deletion?") is False:
                    continue

                # block the account
                block_user(self.api, rev["user"])
                if rev["parentid"] == 0:
                    # first revision, delete whole page
                    delete_page(self.api, page["title"], page["pageid"])
                else:
                    # TODO: if all revisions of the page are spam, delete the whole page
                    print("Warning: deletion of individual revisions is not implemented!")
            else:
                print("Page '{}' is not a spam.".format(page["title"]))
    def recategorize_over_redirect(self, category_namespace=14):
        # FIXME: the source_namespace parameter of redirects.fetch does not work, so we need to do manual filtering
        redirects = self.api.redirects.fetch()
        catredirs = dict((key, value) for key, value in redirects.items()
                         if self.api.Title(key).namespace == "Category")
        for source, target in catredirs.items():
            ans = ask_yesno("Recategorize pages from [[{}]] to [[{}]]?".format(
                source, target))
            if ans is False:
                continue

            catmembers = self.api.generator(generator="categorymembers",
                                            gcmtitle=source,
                                            gcmlimit="max",
                                            prop="revisions",
                                            rvprop="content|timestamp",
                                            rvslots="main")
            for page in catmembers:
                # the same page might be yielded multiple times
                if "revisions" in page:
                    self.recategorize_page(page, source, target)
            # check again to see if the category is empty
            catmembers = list(
                self.api.list(list="categorymembers",
                              cmtitle=source,
                              cmlimit="max"))
            if len(catmembers) == 0:
                self.flag_for_deletion(source)
            else:
                logger.warning("Page [[{}]] is still not empty: {}".format(
                    source, sorted(page["title"] for page in catmembers)))
                input("Press Enter to continue...")
        print("""
Recategorization complete. Before deleting the unused categories, make sure to \
update interlanguage links. The unused categories are still redirects and are \
not listed under Special:UnusedCategories, but they can be found in \
Special:WhatLinksHere/Template:Deletion.
""")