Esempio n. 1
0
    def __init__(self,
                 api,
                 db,
                 interactive=False,
                 dry_run=False,
                 first=None,
                 title=None,
                 langnames=None,
                 connection_timeout=30,
                 max_retries=3):
        if not dry_run:
            # ensure that we are authenticated
            require_login(api)

        # init inherited
        ExtlinkRules.__init__(self)
        WikilinkRules.__init__(self, api, db, interactive=interactive)
        ManTemplateRules.__init__(self, connection_timeout, max_retries)

        self.api = api
        self.db = db
        self.interactive = interactive
        self.dry_run = dry_run

        # parameters for self.run()
        self.first = first
        self.title = title
        self.langnames = langnames

        self.db.sync_with_api(api)
        self.db.sync_revisions_content(api, mode="latest")
        self.db.update_parser_cache()
    def check_allpages(self):
        if not self.finder.refresh():
            raise Exception("Failed to refresh package information.")

        # ensure that we are authenticated
        require_login(self.api)

        namespaces = [0, 4, 14, 3000]
        for ns in namespaces:
            for page in self.api.generator(generator="allpages", gaplimit="100", gapfilterredir="nonredirects", gapnamespace=ns,
                                           prop="revisions", rvprop="content|timestamp", rvslots="main"):
                title = page["title"]
                if title in self.blacklist_pages:
                    logger.info("skipping blacklisted page [[{}]]".format(title))
                    continue
                timestamp = page["revisions"][0]["timestamp"]
                text_old = page["revisions"][0]["slots"]["main"]["*"]
                text_new = self.update_page(title, text_old)
                if text_old != text_new:
                    try:
                        if self.interactive:
                            edit_interactive(self.api, title, page["pageid"], text_old, text_new, timestamp, self.edit_summary, bot="")
                        else:
                            self.api.edit(title, page["pageid"], text_new, timestamp, self.edit_summary, bot="")
                    except APIError:
                        pass
Esempio n. 3
0
    def __init__(self,
                 api,
                 interactive=False,
                 dry_run=False,
                 first=None,
                 title=None,
                 langnames=None):
        if not dry_run:
            # ensure that we are authenticated
            require_login(api)
        self.api = api

        self.interactive = interactive if self.force_interactive is False else True
        self.dry_run = dry_run

        # parameters for the selection of page titles
        self.first = first
        self.title = title
        self.langnames = langnames

        self.namespaces = [0, 4, 14, 3000]
        if self.interactive is True:
            self.namespaces.append(12)

        # mapping of mwparserfromhell node types to lists of checker objects
        self.checkers = {}
Esempio n. 4
0
    def main_loop(self):
        require_login(self.api)
        if "block" not in self.api.user_rights:
            print("Your account does not have the 'block' right.")
            return False
        if "delete" not in self.api.user_rights:
            print("Your account does not have the 'delete' right.")
            return False

        start = datetime.datetime.utcnow() - datetime.timedelta(days=1)

        while True:
            # drop microseconds (included in the default format string, but MediaWiki does not like it)
            start -= datetime.timedelta(microseconds=start.microsecond)

            try:
                start2 = datetime.datetime.utcnow()
                pages = self.api.generator(generator="recentchanges", grcstart=start, grcdir="newer", grcshow="unpatrolled", grclimit="max", prop="revisions", rvprop="ids|timestamp|user|comment|content")
                self.filter_pages(pages)
            except (rexc.ConnectionError, rexc.Timeout) as e:
                # query failed, set short timeout and retry from the previous timestamp
                timeout = 30
                # FIXME: better representation of the exception as str()
                print("Catched {} exception, sleeping {} seconds before retrying...".format(repr(e), timeout))
            else:
                # query succeeded, shift start timestamp and set timeout
                start = start2
                timeout = self.timeout_func()
                print("{}  Sleeping for {:.3g} seconds...".format(start, timeout))

            try:
                time.sleep(timeout)
            except KeyboardInterrupt:
                try:
                    # short timeout to allow interruption of the main loop
                    time.sleep(0.5)
                except KeyboardInterrupt as e:
                    raise e from None

#        # go through recently deleted revisions, detect spam and block the remaining users
#        logs = api.list(list="logevents", letype="delete", lelimit="max", ledir="newer", lestart=start)
#        titles = [log["title"] for log in logs if log["comment"].lower().startswith("spam")]
#        for chunk in list_chunks(titles, 50):
#            result = api.call_api(action="query", titles="|".join(chunk), prop="deletedrevisions", drvprop="ids|timestamp|user|comment|content", rawcontinue="")
#            pages = result["pages"].values()
#            for page in pages:
#                if "deletedrevisions" not in page:
#                    # empty prop in generator due to continuation
#                    continue
#                rev = page["deletedrevisions"][0]
#                if not is_blocked(api, rev["user"]):
#                    ans = ask_yesno("Block user '{}' who created page '{}'?")
#                    if ans is True:
#                        block_user(api, rev["user"])

        return True
Esempio n. 5
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # build category graph
        category_graph = CategoryGraph(self.api)
        graph_parents, graph_subcats, info = category_graph.build_graph()

        # detect target pages, fetch content at once
        contents, timestamps, pageids = self.get_pages_contents(self.titles)

        for title in self.titles:
            if title not in contents:
                continue

            wikicode = mwparserfromhell.parse(contents[title])
            toc_table, columns, dictionary = self.parse_toc_table(title, wikicode)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                            "The wiki page '{}' does not contain the ToC table. "
                            "Create the following entry point manually: "
                            "{{| id=\"wiki-scripts-toc-table\"\n|}}".format(title))
                    continue
                else:
                    logger.warning(
                            "The wiki page '{}' does not contain the ToC table, "
                            "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph_parents, info, dictionary)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph_parents, info, dictionary, include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(self.cliargs.save))

            roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in category_graph.walk(graph_subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in category_graph.compare_components(graph_subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                self.save_page(title, pageids[title], contents[title], str(wikicode), timestamps[title])
Esempio n. 6
0
    def main_loop(self):
        require_login(self.api)
        if "block" not in self.api.user.rights:
            print("Your account does not have the 'block' right.")
            return False
        if "delete" not in self.api.user.rights:
            print("Your account does not have the 'delete' right.")
            return False

        start = datetime.datetime.utcnow() - datetime.timedelta(days=1)

        while True:
            try:
                start2 = datetime.datetime.utcnow()
                pages = self.api.generator(generator="recentchanges", grcstart=start, grcdir="newer", grcshow="unpatrolled", grclimit="max", prop="revisions", rvprop="ids|timestamp|user|comment|content")
                self.filter_pages(pages)
            except (rexc.ConnectionError, rexc.Timeout) as e:
                # query failed, set short timeout and retry from the previous timestamp
                timeout = 30
                # FIXME: better representation of the exception as str()
                print("Catched {} exception, sleeping {} seconds before retrying...".format(repr(e), timeout))
            else:
                # query succeeded, shift start timestamp and set timeout
                start = start2
                timeout = self.timeout_func()
                print("{}  Sleeping for {:.3g} seconds...".format(start, timeout))

            try:
                time.sleep(timeout)
            except KeyboardInterrupt:
                try:
                    # short timeout to allow interruption of the main loop
                    time.sleep(0.5)
                except KeyboardInterrupt as e:
                    raise e from None

#        # go through recently deleted revisions, detect spam and block the remaining users
#        logs = api.list(list="logevents", letype="delete", lelimit="max", ledir="newer", lestart=start)
#        titles = [log["title"] for log in logs if log["comment"].lower().startswith("spam")]
#        for chunk in list_chunks(titles, 50):
#            result = api.call_api(action="query", titles="|".join(chunk), prop="deletedrevisions", drvprop="ids|timestamp|user|comment|content", rawcontinue="")
#            pages = result["pages"].values()
#            for page in pages:
#                if "deletedrevisions" not in page:
#                    # empty prop in generator due to continuation
#                    continue
#                rev = page["deletedrevisions"][0]
#                if not is_blocked(api, rev["user"]):
#                    ans = ask_yesno("Block user '{}' who created page '{}'?")
#                    if ans is True:
#                        block_user(api, rev["user"])

        return True
    def run(self):
        require_login(self.api)

        # synchronize the database
        self.db.sync_with_api(self.api)

        try:
            page = AutoPage(self.api, self.pagename)
        except ValueError:
            logger.error(
                "The page [[{}]] currently does not exist. It must be "
                "created manually before the script can update it.".format(
                    self.pagename))
            return

        tables = page.wikicode.filter_tags(
            matches=lambda node: node.tag == "table",
            recursive=page.wikicode.RECURSE_OTHERS)
        assert len(tables) == 2
        table_active, table_inactive = tables

        # extract rows
        rows = self.extract_rows(table_active)
        rows += self.extract_rows(table_inactive)

        # sort
        def sort_key(row):
            return self._get_editcount(row), self._get_last_edit_timestamp(row)

        rows.sort(key=sort_key, reverse=True)

        # split
        rows_active = [
            row for row in rows if self._get_editcount(row) >= self.min_edits
        ]
        rows_inactive = [
            row for row in rows if self._get_editcount(row) < self.min_edits
        ]

        # assemble
        for row in rows_active:
            table_active.contents.append(row)
        for row in rows_inactive:
            table_inactive.contents.append(row)

        # save
        page.save(self.edit_summary, minor="1")
Esempio n. 8
0
    def _output_page(self):
        ret = 0

        if self.cliargs.save:
            require_login(self.api)

            try:
                result = self.api.edit(self.cliargs.statistics_page, self.pageid,
                        self.text, self.timestamp, self.cliargs.summary, bot="1",
                        minor="1")
            except APIError as err:
                ret |= 1
            else:
                if result["result"].lower() != "success":
                    logger.exception("The page was not saved correctly")
                    ret |= 1
                else:
                    logger.info("The page has been saved: do not forget to "
                                                    "double-check the diff")
                    ret |= 2

        if self.cliargs.clipboard or ret is False:
            if Tk:
                w = Tk()
                w.withdraw()
                w.clipboard_clear()
                w.clipboard_append(self.text)
                # The copied text is lost once the script terminates
                input("The updated page text has been copied to the "
                        "clipboard: paste it in the browser, then press Enter "
                        "to continue")
                w.destroy()

                ret |= 2
            else:
                logger.error("It has not been possible to copy the updated "
                             "text to the clipboard")
                ret |= 1

        # If no other action was chosen, always print the output, so that all
        # the effort doesn't go wasted
        if self.cliargs.print or ret == 0:
            print(self.text)

        return ret & 1
Esempio n. 9
0
    def __init__(self, api, db, first=None, title=None, langnames=None, connection_timeout=60, max_retries=3):
        # init inherited
        ExtlinkStatusChecker.__init__(self, connection_timeout, max_retries)

        # ensure that we are authenticated
        require_login(api)

        self.api = api
        self.db = db

        # parameters for self.run()
        self.first = first
        self.title = title
        self.langnames = langnames

        self.db.sync_with_api(api)
        self.db.sync_revisions_content(api, mode="latest")
        self.db.update_parser_cache()
Esempio n. 10
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        try:
            self.page = AutoPage(self.api, self.cliargs.statistics_page)
        except ValueError:
            logger.error("The page [[{}]] currently does not exist. It must be "
                  "created manually before the script can update it."
                  .format(self.cliargs.statistics_page))
            return 1

        if self.cliargs.force or self.page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True):
            self._compose_page()
            return self._output_page()
        else:
            logger.info("The page has already been updated this UTC day")
            return 1
Esempio n. 11
0
    def __init__(self,
                 api,
                 first=None,
                 title=None,
                 langnames=None,
                 connection_timeout=60,
                 max_retries=3):
        # init inherited
        ExtlinkStatusChecker.__init__(self, connection_timeout, max_retries)

        # ensure that we are authenticated
        require_login(api)

        self.api = api

        # parameters for self.run()
        self.first = first
        self.title = title
        self.langnames = langnames
Esempio n. 12
0
def update_page_language(api):
    # ensure that we are authenticated
    require_login(api)

    namespaces = [0, 4, 10, 12, 14]
    for ns in namespaces:
        for page in api.generator(generator="allpages",
                                  gapnamespace=ns,
                                  gaplimit="max",
                                  prop="info"):
            title = page["title"]
            pagelanguage = page["pagelanguage"]

            pure, langname = lang.detect_language(title)
            langtag = lang.tag_for_langname(langname)

            if pagelanguage != langtag:
                api.set_page_language(
                    title, langtag, "update language based on the page title")
Esempio n. 13
0
def main(api):
    require_login(api)

    # check for necessary rights
    if "deletedhistory" not in api.user.rights:
        print(
            "The current user does not have the 'deletedhistory' right, which is necessary to use this script. Sorry."
        )
        sys.exit(1)

    pages = api.list(list="alldeletedrevisions", adrlimit="max")

    pages_counts = {}
    users_counts = {}

    for page in pages:
        title = page["title"]
        pages_counts.setdefault(title, 0)
        for r in page.get("revisions", []):
            # print revision
            pprint(r)
            # increment counters
            pages_counts[title] += 1
            user = r["user"]
            users_counts.setdefault(user, 0)
            users_counts[user] += 1

    # total count of deleted revisions
    total_count = sum(count for _, count in pages_counts.items())
    # count of pages with non-zero number of deleted revisions
    pages_count = len([1 for _, count in pages_counts.items() if count > 0])
    # count of users whose at least one revision has been deleted
    users_count = len(users_counts.keys())

    print("{} deleted revisions on {} pages by {} users".format(
        total_count, pages_count, users_count))

    # print top 20 users with most deleted revisions
    for user, count in sorted(users_counts.items(),
                              key=lambda t: t[1],
                              reverse=True)[:20]:
        print(user, count)
Esempio n. 14
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        try:
            self.page = AutoPage(self.api, self.cliargs.statistics_page)
        except ValueError:
            logger.error(
                "The page [[{}]] currently does not exist. It must be "
                "created manually before the script can update it.".format(
                    self.cliargs.statistics_page))
            return 1

        if self.cliargs.force or self.page.is_old_enough(
                min_interval=datetime.timedelta(days=1), strip_time=True):
            self._compose_page()
            return self._output_page()
        else:
            logger.info("The page has already been updated this UTC day")
            return 1
Esempio n. 15
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        try:
            self._parse_page()

            if not self.cliargs.force and \
                                datetime.datetime.utcnow().date() <= \
                                parse_date(self.timestamp).date():
                logger.info("The page has already been updated this UTC day")
                return 1

            self._compose_page()
            return self._output_page()
        except MissingPageError:
            logger.error("The page '{}' currently does not exist. It must be "
                  "created manually before the script can update it.".format(
                                        self.cliargs.statistics_page))
        return 1
def main(api):
    require_login(api)

    # check for necessary rights
    if "unwatchedpages" not in api.user.rights:
        print(
            "The current user does not have the 'unwatchedpages' right, which is necessary to use this script. Sorry."
        )
        sys.exit(1)

    # get list of unwatched pages
    query_unwatched = {
        "action": "query",
        "list": "querypage",
        "qppage": "Unwatchedpages",
        "qplimit": "max",
        "continue": "",
    }

    # list flattening, limit to the Main namespace
    unwatched = (page for snippet in api.query_continue(query_unwatched)
                 for page in snippet["querypage"]["results"]
                 if page["ns"] == 0)

    # split into sections by language
    by_language = {}
    for page in unwatched:
        title = page["title"]
        lang = detect_language(title)[1]
        if lang not in by_language:
            by_language[lang] = []
        by_language[lang].append(title)

    # print wikitext
    for lang in sorted(by_language.keys()):
        print("== %s ==" % lang)
        print()
        for title in by_language[lang]:
            print("* %s" % title)
        print()
Esempio n. 17
0
    def _output_page(self):
        ret = 0

        if self.cliargs.save:
            require_login(self.api)

            try:
                self.page.save(self.cliargs.summary, minor="1")
                logger.info("The page has been saved: do not forget to "
                            "double-check the diff")
                ret |= 2
            except APIError as err:
                ret |= 1

        if self.cliargs.clipboard or ret is False:
            if Tk:
                w = Tk()
                w.withdraw()
                w.clipboard_clear()
                w.clipboard_append(self.page.wikicode)
                # The copied text is lost once the script terminates
                input(
                    "The updated page text has been copied to the clipboard: "
                    "paste it in the browser, then press Enter to continue")
                w.destroy()

                ret |= 2
            else:
                logger.error("It has not been possible to copy the updated "
                             "text to the clipboard")
                ret |= 1

        # If no other action was chosen, always print the output, so that all
        # the effort doesn't go wasted
        if self.cliargs.print or ret == 0:
            print(self.page.wikicode)

        return ret & 1
Esempio n. 18
0
    def __init__(self, api, db, interactive=False, dry_run=False, first=None, title=None, langnames=None, connection_timeout=30, max_retries=3):
        if not dry_run:
            # ensure that we are authenticated
            require_login(api)

        # init inherited
        ExtlinkRules.__init__(self)
        WikilinkRules.__init__(self, api, db, interactive=interactive)
        ManTemplateRules.__init__(self, connection_timeout, max_retries)

        self.api = api
        self.db = db
        self.interactive = interactive
        self.dry_run = dry_run

        # parameters for self.run()
        self.first = first
        self.title = title
        self.langnames = langnames

        self.db.sync_with_api(api)
        self.db.sync_latest_revisions_content(api)
        self.db.update_parser_cache()
def main(api):
    require_login(api)

    # check for necessary rights
    if "deletedhistory" not in api.user.rights:
        print("The current user does not have the 'deletedhistory' right, which is necessary to use this script. Sorry.")
        sys.exit(1)

    pages = api.list(list="alldeletedrevisions", adrlimit="max")

    pages_counts = {}
    users_counts = {}

    for page in pages:
        title = page["title"]
        pages_counts.setdefault(title, 0)
        for r in page.get("revisions", []):
            # print revision
            pprint(r)
            # increment counters
            pages_counts[title] += 1
            user = r["user"]
            users_counts.setdefault(user, 0)
            users_counts[user] += 1

    # total count of deleted revisions
    total_count = sum(count for _, count in pages_counts.items())
    # count of pages with non-zero number of deleted revisions
    pages_count = len([1 for _, count in pages_counts.items() if count > 0])
    # count of users whose at least one revision has been deleted
    users_count = len(users_counts.keys())

    print("{} deleted revisions on {} pages by {} users".format(total_count, pages_count, users_count))

    # print top 20 users with most deleted revisions
    for user, count in sorted(users_counts.items(), key=lambda t: t[1], reverse=True)[:20]:
        print(user, count)
Esempio n. 20
0
    def _output_page(self):
        ret = 0

        if self.cliargs.save:
            require_login(self.api)

            try:
                self.page.save(self.cliargs.summary, minor="1")
                logger.info("The page has been saved: do not forget to "
                            "double-check the diff")
                ret |= 2
            except APIError as err:
                ret |= 1

        if self.cliargs.clipboard or ret is False:
            if Tk:
                w = Tk()
                w.withdraw()
                w.clipboard_clear()
                w.clipboard_append(self.page.wikicode)
                # The copied text is lost once the script terminates
                input("The updated page text has been copied to the clipboard: "
                      "paste it in the browser, then press Enter to continue")
                w.destroy()

                ret |= 2
            else:
                logger.error("It has not been possible to copy the updated "
                             "text to the clipboard")
                ret |= 1

        # If no other action was chosen, always print the output, so that all
        # the effort doesn't go wasted
        if self.cliargs.print or ret == 0:
            print(self.page.wikicode)

        return ret & 1
Esempio n. 21
0
    def run(self):
        require_login(self.api)

        try:
            page = AutoPage(self.api, self.pagename)
        except ValueError:
            logger.error("The page [[{}]] currently does not exist. It must be "
                  "created manually before the script can update it."
                  .format(self.pagename))
            return

        tables = page.wikicode.filter_tags(matches=lambda node: node.tag == "table", recursive=page.wikicode.RECURSE_OTHERS)
        assert len(tables) == 2
        table_active, table_inactive = tables

        # extract rows
        rows = self.extract_rows(table_active)
        rows += self.extract_rows(table_inactive)

        # sort
        def sort_key(row):
            return self._get_editcount(row), self._get_last_edit_timestamp(row)
        rows.sort(key=sort_key, reverse=True)

        # split
        rows_active = [row for row in rows if self._get_editcount(row) >= self.min_edits]
        rows_inactive = [row for row in rows if self._get_editcount(row) < self.min_edits]

        # assemble
        for row in rows_active:
            table_active.contents.append(row)
        for row in rows_inactive:
            table_inactive.contents.append(row)

        # save
        page.save(self.edit_summary, minor="1")
def main(api):
    require_login(api)

    # check for necessary rights
    if "unwatchedpages" not in api.user.rights:
        print("The current user does not have the 'unwatchedpages' right, which is necessary to use this script. Sorry.")
        sys.exit(1)

    # get list of unwatched pages
    query_unwatched = {
        "action": "query",
        "list": "querypage",
        "qppage": "Unwatchedpages",
        "qplimit": "max",
        "continue": "",
    }

    # list flattening, limit to the Main namespace
    unwatched = (page for snippet in api.query_continue(query_unwatched) for page in snippet["querypage"]["results"] if page["ns"] == 0)

    # split into sections by language
    by_language = {}
    for page in unwatched:
        title = page["title"]
        lang = detect_language(title)[1]
        if lang not in by_language:
            by_language[lang] = []
        by_language[lang].append(title)

    # print wikitext
    for lang in sorted(by_language.keys()):
        print("== %s ==" % lang)
        print()
        for title in by_language[lang]:
            print("* %s" % title)
        print()
Esempio n. 23
0
        il = InterlanguageLinks(api)
        il.update_allpages()
    elif args.mode == "orphans":
        il = InterlanguageLinks(api)
        for title in il.find_orphans():
            print("* [[{}]]".format(title))
    elif args.mode == "rename":
        il = InterlanguageLinks(api)
        il.rename_non_english()
    else:
        raise Exception("Unknown mode: {}".format(args.mode))

if __name__ == "__main__":
    import ws.config
    import ws.logging

    argparser = ws.config.getArgParser(description="Update interlanguage links", epilog=modes_description)
    API.set_argparser(argparser)
    _group = argparser.add_argument_group("interlanguage")
    _group.add_argument("--mode", choices=modes, default="update", help="operation mode of the script")

    args = argparser.parse_args()

    # set up logging
    ws.logging.init(args)

    api = API.from_argparser(args)
    require_login(api)

    main(args, api)
Esempio n. 24
0
def plot_save(fname):
    plt.savefig(fname, dpi=192)


if __name__ == "__main__":
    import ws.config

    argparser = ws.config.getArgParser()
    API.set_argparser(argparser)
    Database.set_argparser(argparser)

    args = ws.config.parse_args(argparser)

    api = API.from_argparser(args)
    require_login(api)
    db = Database.from_argparser(args)

    users = [
        "Alad", "Fengchao", "Indigo", "Kynikos", "Lahwaacz", "Lonaowna",
        "Nl6720"
    ]

    all_logs = list(db.query(list="logevents", letype="rights", ledir="newer"))

    ax = plot_setup()
    lines = []
    for user in users:
        revs = list(
            db.query(list="allrevisions",
                     arvlimit="max",
Esempio n. 25
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # if we are going to save, make sure that the categories are correct first
        if self.cliargs.save is True:
            cat = Categorization(self.api)
            cat.fix_allpages()
            decat = Decategorization(self.api)
            decat.fix_allpages()

        # build category graph
        graph = CategoryGraph(self.api)

        # if we are going to save, init wanted categories
        if self.cliargs.save is True:
            graph.init_wanted_categories()

        # detect target pages, fetch content at once
        page = AutoPage(self.api, fetch_titles=self.titles)

        for title in self.titles:
            try:
                page.set_title(title)
            except ValueError:
                # page not fetched
                continue

            toc_table = page.get_tag_by_id(tag="table", id="wiki-scripts-toc-table")
            columns, category_names, alsoin = self.parse_toc_table(title, toc_table)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                        "The wiki page [[{}]] does not contain the ToC table. "
                        "Create the following entry point manually:\n"
                        "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(title))
                    continue
                else:
                    logger.warning(
                        "The wiki page [[{}]] does not contain the ToC table, "
                        "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph.parents, graph.info, category_names, alsoin)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph.parents, graph.info, category_names, alsoin, include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(self.cliargs.save))

            roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in graph.walk(graph.subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in graph.compare_components(graph.subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                if self.cliargs.force or page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True):
                    try:
                        page.save(self.cliargs.summary)
                    except APIError:
                        pass
                else:
                    logger.info("The page [[{}]] has already been updated this UTC day.".format(title))
Esempio n. 26
0
    def __init__(self, api):
        self.api = api

        # ensure that we are authenticated
        require_login(self.api)
Esempio n. 27
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # if we are going to save, make sure that the categories are correct first
        if self.cliargs.save is True:
            cat = Categorization(self.api)
            cat.fix_allpages()
            decat = Decategorization(self.api)
            decat.fix_allpages()

        # build category graph
        graph = CategoryGraph(self.api)

        # if we are going to save, init wanted categories
        if self.cliargs.save is True:
            graph.init_wanted_categories()

        # detect target pages, fetch content at once
        page = AutoPage(self.api, fetch_titles=self.titles)

        for title in self.titles:
            try:
                page.set_title(title)
            except ValueError:
                # page not fetched
                continue

            toc_table = page.get_tag_by_id(tag="table",
                                           id="wiki-scripts-toc-table")
            columns, category_names, alsoin = self.parse_toc_table(
                title, toc_table)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                        "The wiki page [[{}]] does not contain the ToC table. "
                        "Create the following entry point manually:\n"
                        "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(
                            title))
                    continue
                else:
                    logger.warning(
                        "The wiki page [[{}]] does not contain the ToC table, "
                        "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph.parents, graph.info, category_names,
                                    alsoin)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph.parents,
                                        graph.info,
                                        category_names,
                                        alsoin,
                                        include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(
                    self.cliargs.save))

            roots = [
                "Category:{}".format(lang.langname_for_tag(c)) for c in columns
            ]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in graph.walk(graph.subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in graph.compare_components(graph.subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error(
                    "Cannot compare more than 2 languages at once. Requested: {}"
                    .format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                if self.cliargs.force or page.is_old_enough(
                        min_interval=datetime.timedelta(days=1),
                        strip_time=True):
                    try:
                        page.save(self.cliargs.summary)
                    except APIError:
                        pass
                else:
                    logger.info(
                        "The page [[{}]] has already been updated this UTC day."
                        .format(title))