def rename_non_english(self): del self.allpages # FIXME: starting with English pages is not very good: # - some pages are omitted (e.g. when two pages link to the same English page, at least warning should be printed) # - it suggests to move e.g. Xfce (Česky) to Xfwm (Česky) because multiple English pages link to it # Therefore we limit it only to categories... for page in self.allpages: title = page["title"] if lang.detect_language( title)[1] == "English" and title.startswith("Category:"): langlinks = self.get_langlinks(title) for tag, localized_title in langlinks: logger.info("Checking [[{}:{}]] for renaming...".format( tag, localized_title)) if lang.is_internal_tag(tag) and localized_title != title: source = "{} ({})".format(localized_title, lang.langname_for_tag(tag)) target = "{} ({})".format(title, lang.langname_for_tag(tag)) if self._page_exists(target): logger.warning( "Cannot move page [[{}]] to [[{}]]: target page already exists" .format(source, target)) else: # interactive mode is necessary because this assumes that all English pages are named correctly ans = ask_yesno( "Move page [[{}]] to [[{}]]?".format( source, target)) if ans is True: summary = "comply with [[Help:I18n#Page titles]] and match the title of the English page" self.api.move(source, target, summary)
def _is_valid_internal(self, tag, title): if not lang.is_internal_tag(tag): return False if "/" in title: full_title = lang.format_title(title, lang.langname_for_tag(tag), augment_all_subpage_parts=False) if full_title in self.wrapped_titles: return True full_title = lang.format_title(title, lang.langname_for_tag(tag)) return full_title in self.wrapped_titles
def _is_valid_internal(self, tag, title): if not lang.is_internal_tag(tag): return False if tag == "en": full_title = title else: full_title = "{} ({})".format(title, lang.langname_for_tag(tag)) return full_title in self.wrapped_titles
def _title_from_langlink(self, langlink): langname = lang.langname_for_tag(langlink["lang"]) title = lang.format_title(langlink["*"], langname) if lang.is_internal_tag(langlink["lang"]): title = canonicalize(title) # resolve redirects resolved = self.api.redirects.resolve(title) if resolved is not None: title = resolved.split("#", maxsplit=1)[0] return title
def run(self): if not self.cliargs.anonymous: require_login(self.api) # build category graph category_graph = CategoryGraph(self.api) graph_parents, graph_subcats, info = category_graph.build_graph() # detect target pages, fetch content at once contents, timestamps, pageids = self.get_pages_contents(self.titles) for title in self.titles: if title not in contents: continue wikicode = mwparserfromhell.parse(contents[title]) toc_table, columns, dictionary = self.parse_toc_table(title, wikicode) if toc_table is None: if self.cliargs.save is True: logger.error( "The wiki page '{}' does not contain the ToC table. " "Create the following entry point manually: " "{{| id=\"wiki-scripts-toc-table\"\n|}}".format(title)) continue else: logger.warning( "The wiki page '{}' does not contain the ToC table, " "so there will be no translations.".format(title)) if self.cliargs.print: ff = PlainFormatter(graph_parents, info, dictionary) elif self.cliargs.save: ff = MediaWikiFormatter(graph_parents, info, dictionary, include_opening_closing_tokens=False) else: raise NotImplementedError("unknown output action: {}".format(self.cliargs.save)) roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns] ff.format_root(roots) if len(roots) == 1: for item in category_graph.walk(graph_subcats, roots[0]): ff.format_row(item) elif len(roots) == 2: for result in category_graph.compare_components(graph_subcats, *roots): ff.format_row(*result) else: logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns)) continue if self.cliargs.print: print("== {} ==\n".format(title)) print(ff) elif self.cliargs.save: toc_table.contents = str(ff) self.save_page(title, pageids[title], contents[title], str(wikicode), timestamps[title])
def _title_from_langlink(self, langlink): langname = lang.langname_for_tag(langlink["lang"]) if langname == "English": title = langlink["*"] else: title = "{} ({})".format(langlink["*"], langname) if lang.is_internal_tag(langlink["lang"]): title = canonicalize(title) # resolve redirects if title in self.redirects: title = self.redirects[title].split("#", maxsplit=1)[0] return title
def from_argparser(klass, args, api=None): if api is None: api = API.from_argparser(args) if args.lang: tags = args.lang.split(",") for tag in tags: if tag not in lang.get_internal_tags(): # FIXME: more elegant solution raise Exception("{} is not a valid language tag".format(tag)) langnames = {lang.langname_for_tag(tag) for tag in tags} else: langnames = set() interactive = args.interactive if klass.force_interactive is False else True return klass(api, interactive=interactive, dry_run=args.dry_run, first=args.first, title=args.title, langnames=langnames)
def from_argparser(klass, args, api=None, db=None): if api is None: api = API.from_argparser(args) if db is None: db = Database.from_argparser(args) if args.lang: tags = args.lang.split(",") for tag in tags: if tag not in lang.get_internal_tags(): # FIXME: more elegant solution raise Exception("{} is not a valid language tag".format(tag)) langnames = {lang.langname_for_tag(tag) for tag in tags} else: langnames = set() return klass(api, db, interactive=args.interactive, dry_run=args.dry_run, first=args.first, title=args.title, langnames=langnames, connection_timeout=args.connection_timeout, max_retries=args.connection_max_retries)
def rename_non_english(self): del self.allpages # FIXME: starting with English pages is not very good: # - some pages are omitted (e.g. when two pages link to the same English page, at least warning should be printed) # - it suggests to move e.g. Xfce (Česky) to Xfwm (Česky) because multiple English pages link to it # Therefore we limit it only to categories... for page in self.allpages: title = page["title"] if lang.detect_language(title)[1] == "English" and title.startswith("Category:"): langlinks = self.get_langlinks(title) for tag, localized_title in langlinks: logger.info("Checking [[{}:{}]] for renaming...".format(tag, localized_title)) if lang.is_internal_tag(tag) and localized_title != title: source = "{} ({})".format(localized_title, lang.langname_for_tag(tag)) target = "{} ({})".format(title, lang.langname_for_tag(tag)) if self._page_exists(target): logger.warning("Cannot move page [[{}]] to [[{}]]: target page already exists".format(source, target)) else: # interactive mode is necessary because this assumes that all English pages are named correctly ans = ask_yesno("Move page [[{}]] to [[{}]]?".format(source, target)) if ans is True: summary = "comply with [[Help:I18n#Page titles]] and match the title of the English page" self.api.move(source, target, summary)
def from_argparser(klass, args, api=None, db=None): if api is None: api = API.from_argparser(args) if db is None: db = Database.from_argparser(args) if args.lang: tags = args.lang.split(",") for tag in tags: if tag not in lang.get_internal_tags(): # FIXME: more elegant solution raise Exception("{} is not a valid language tag".format(tag)) langnames = {lang.langname_for_tag(tag) for tag in tags} else: langnames = set() return klass(api, db, first=args.first, title=args.title, langnames=langnames, connection_timeout=args.connection_timeout, max_retries=args.connection_max_retries)
def __init__(self, api, cliargs): self.api = api self.cliargs = cliargs if self.cliargs.save is False and self.cliargs.print is False: self.cliargs.print = True if len(self.cliargs.toc_languages) == 1 and self.cliargs.toc_languages[0] == "all": self.cliargs.toc_languages = lang.get_internal_tags() # strip "(Language)" suffix self.cliargs.toc_page = lang.detect_language(canonicalize(self.cliargs.toc_page))[0] # detect page titles self.titles = [] for ln in sorted(self.cliargs.toc_languages): if ln == lang.tag_for_langname(lang.get_local_language()): self.titles.append(self.cliargs.toc_page) else: self.titles.append("{} ({})".format(self.cliargs.toc_page, lang.langname_for_tag(ln)))
def get_langlinks(self, full_title): """ Uses :py:meth:`self.titles_in_family` to get the titles of all pages in the family, removes the link to the passed title and sorts the list by the language subtag. :returns: a list of ``(tag, title)`` tuples """ # get all titles in the family tags, titles = self.titles_in_family(full_title) langlinks = set(zip(tags, titles)) title, langname = lang.detect_language(full_title) tag = lang.tag_for_langname(langname) # remove links to ArchWiki:Archive and translations if title != "ArchWiki:Archive": for _tag, _title in list(langlinks): if _title == "ArchWiki:Archive": langlinks.remove((_tag, _title)) # remove title of the page to be updated langlinks.remove((tag, title)) # transform to list, sort by the language tag langlinks = sorted(langlinks, key=lambda t: t[0]) # conversion back-and-forth is necessary to add the "(Language)" suffix into all subpage parts new_langlinks = [] for tag, title in langlinks: new_title = lang.format_title(title, lang.langname_for_tag(tag)) # do it only when the new_title exists, otherwise the title without "(Language)" in # all subpage parts is still valid as per Help:i18n if self._page_exists(new_title): title = lang.detect_language(new_title, strip_all_subpage_parts=False)[0] new_langlinks.append((tag, title)) return new_langlinks
def run(self): if not self.cliargs.anonymous: require_login(self.api) # if we are going to save, make sure that the categories are correct first if self.cliargs.save is True: cat = Categorization(self.api) cat.fix_allpages() decat = Decategorization(self.api) decat.fix_allpages() # build category graph graph = CategoryGraph(self.api) # if we are going to save, init wanted categories if self.cliargs.save is True: graph.init_wanted_categories() # detect target pages, fetch content at once page = AutoPage(self.api, fetch_titles=self.titles) for title in self.titles: try: page.set_title(title) except ValueError: # page not fetched continue toc_table = page.get_tag_by_id(tag="table", id="wiki-scripts-toc-table") columns, category_names, alsoin = self.parse_toc_table(title, toc_table) if toc_table is None: if self.cliargs.save is True: logger.error( "The wiki page [[{}]] does not contain the ToC table. " "Create the following entry point manually:\n" "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(title)) continue else: logger.warning( "The wiki page [[{}]] does not contain the ToC table, " "so there will be no translations.".format(title)) if self.cliargs.print: ff = PlainFormatter(graph.parents, graph.info, category_names, alsoin) elif self.cliargs.save: ff = MediaWikiFormatter(graph.parents, graph.info, category_names, alsoin, include_opening_closing_tokens=False) else: raise NotImplementedError("unknown output action: {}".format(self.cliargs.save)) roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns] ff.format_root(roots) if len(roots) == 1: for item in graph.walk(graph.subcats, roots[0]): ff.format_row(item) elif len(roots) == 2: for result in graph.compare_components(graph.subcats, *roots): ff.format_row(*result) else: logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns)) continue if self.cliargs.print: print("== {} ==\n".format(title)) print(ff) elif self.cliargs.save: toc_table.contents = str(ff) if self.cliargs.force or page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True): try: page.save(self.cliargs.summary) except APIError: pass else: logger.info("The page [[{}]] has already been updated this UTC day.".format(title))
def _is_valid_internal(self, tag, title): if not lang.is_internal_tag(tag): return False full_title = lang.format_title(title, lang.langname_for_tag(tag)) return full_title in self.wrapped_titles
def run(self): if not self.cliargs.anonymous: require_login(self.api) # if we are going to save, make sure that the categories are correct first if self.cliargs.save is True: cat = Categorization(self.api) cat.fix_allpages() decat = Decategorization(self.api) decat.fix_allpages() # build category graph graph = CategoryGraph(self.api) # if we are going to save, init wanted categories if self.cliargs.save is True: graph.init_wanted_categories() # detect target pages, fetch content at once page = AutoPage(self.api, fetch_titles=self.titles) for title in self.titles: try: page.set_title(title) except ValueError: # page not fetched continue toc_table = page.get_tag_by_id(tag="table", id="wiki-scripts-toc-table") columns, category_names, alsoin = self.parse_toc_table( title, toc_table) if toc_table is None: if self.cliargs.save is True: logger.error( "The wiki page [[{}]] does not contain the ToC table. " "Create the following entry point manually:\n" "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format( title)) continue else: logger.warning( "The wiki page [[{}]] does not contain the ToC table, " "so there will be no translations.".format(title)) if self.cliargs.print: ff = PlainFormatter(graph.parents, graph.info, category_names, alsoin) elif self.cliargs.save: ff = MediaWikiFormatter(graph.parents, graph.info, category_names, alsoin, include_opening_closing_tokens=False) else: raise NotImplementedError("unknown output action: {}".format( self.cliargs.save)) roots = [ "Category:{}".format(lang.langname_for_tag(c)) for c in columns ] ff.format_root(roots) if len(roots) == 1: for item in graph.walk(graph.subcats, roots[0]): ff.format_row(item) elif len(roots) == 2: for result in graph.compare_components(graph.subcats, *roots): ff.format_row(*result) else: logger.error( "Cannot compare more than 2 languages at once. Requested: {}" .format(columns)) continue if self.cliargs.print: print("== {} ==\n".format(title)) print(ff) elif self.cliargs.save: toc_table.contents = str(ff) if self.cliargs.force or page.is_old_enough( min_interval=datetime.timedelta(days=1), strip_time=True): try: page.save(self.cliargs.summary) except APIError: pass else: logger.info( "The page [[{}]] has already been updated this UTC day." .format(title))