def run(self): """Run the bot.""" if not all((self.opt.action, self.generator)): return catmode = (self.opt.action == 'categories') for page in self.generator: try: self.current_page = page commons = page.site.image_repository() commonspage = getattr(pywikibot, ('Page', 'Category')[catmode])(commons, page.title()) try: commonspage.get(get_redirect=True) pagetitle = commonspage.title(with_ns=not catmode) if page.title() == pagetitle: old_text = page.get() text = old_text # for Commons/Commonscat template s = self.findTemplate.search(text) s2 = getattr( self, 'findTemplate{}'.format( (2, 3)[catmode]).search(text)) if s or s2: pywikibot.output('** Already done.') else: cats = textlib.getCategoryLinks(text, site=page.site) text = textlib.replaceCategoryLinks( '%s{{commons%s|%s}}' % (text, ('', 'cat')[catmode], pagetitle), cats, site=page.site) comment = i18n.twtranslate( page.site, 'commons_link{}-template-added'.format( ('', '-cat')[catmode])) try: self.userPut(page, old_text, text, summary=comment) except EditConflictError: pywikibot.output( 'Skipping {} because of edit conflict'. format(page.title())) except NoPageError: pywikibot.output('{} does not exist in Commons'.format( page.__class__.__name__)) except NoPageError: pywikibot.output('Page {} does not exist'.format(page.title())) except IsRedirectPageError: pywikibot.output('Page {} is a redirect; skipping.'.format( page.title())) except LockedPageError: pywikibot.output('Page {} is locked'.format(page.title()))
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks(text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def apply(self, page, summaries=[], *args): result = super().apply(page, summaries, *args) if result: categories = textlib.getCategoryLinks(page.text, site=self.site) categories.sort(key=self.sortCategories) page.text = textlib.replaceCategoryLinks(page.text, categories, self.site) return result
def run(self): """Run the bot.""" if not all((self.getOption('action'), self.generator)): return catmode = (self.getOption('action') == 'categories') for page in self.generator: try: self.current_page = page commons = page.site.image_repository() commonspage = getattr(pywikibot, ('Page', 'Category')[catmode])(commons, page.title()) try: commonspage.get(get_redirect=True) pagetitle = commonspage.title(withNamespace=not catmode) if page.title() == pagetitle: oldText = page.get() text = oldText # for Commons/Commonscat template s = self.findTemplate.search(text) s2 = getattr(self, 'findTemplate%d' % (2, 3)[catmode]).search(text) if s or s2: pywikibot.output(u'** Already done.') else: cats = textlib.getCategoryLinks(text, site=page.site) text = textlib.replaceCategoryLinks( u'%s{{commons%s|%s}}' % (text, ('', 'cat')[catmode], pagetitle), cats, site=page.site) comment = i18n.twtranslate( page.site, 'commons_link%s-template-added' % ('', '-cat')[catmode]) try: self.userPut(page, oldText, text, summary=comment) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % page.title()) except pywikibot.NoPage: pywikibot.output(u'%s does not exist in Commons' % page.__class__.__name__) except pywikibot.NoPage: pywikibot.output(u'Page %s does not exist' % page.title()) except pywikibot.IsRedirectPage: pywikibot.output(u'Page %s is a redirect; skipping.' % page.title()) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked' % page.title())
def test_adjoining_links(self): old = self.old.replace(config.LS, '') cats = textlib.getCategoryLinks(old, site=self.site) self.assertEqual(self.cats, cats) sep = config.LS config.line_separator = '' # use an empty separator temporarily new = textlib.replaceCategoryLinks(old, cats, site=self.site) self.assertEqual(old, new) config.line_separator = sep # restore the default separator
def make_categories(page, list, site=None): if site is None: site = pywikibot.Site() pllist = [] for p in list: cattitle = "%s:%s" % (site.namespaces.CATEGORY, p) pllist.append(pywikibot.Page(site, cattitle)) page.put_async(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site), summary=i18n.twtranslate(site, 'catall-changing'))
def make_categories(page, list, site=None): if site is None: site = pywikibot.Site() pllist = [] for p in list: cattitle = "%s:%s" % (site.category_namespace(), p) pllist.append(pywikibot.Page(site, cattitle)) page.put_async(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site), summary=i18n.twtranslate(site, 'catall-changing'))
def make_categories(page, list, site=None): """Make categories.""" if site is None: site = pywikibot.Site() pllist = [] for p in list: pllist.append(pywikibot.Page(site, 'Category:' + p)) page.put(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site), asynchronous=True, summary=i18n.twtranslate(site, 'catall-changing'))
def test_adjoining_links(self): cats_std = textlib.getCategoryLinks(self.old, site=self.site) old = self.old.replace(config.LS, "") cats = textlib.getCategoryLinks(old, site=self.site) self.assertEqual(cats_std, cats) sep = config.LS config.line_separator = "" # use an empty separator temporarily new = textlib.replaceCategoryLinks(old, cats, site=self.site) # Restore the default separator. config.line_separator = sep self.assertEqual(old, new)
def run(self): """Run the bot.""" if not all((self.getOption('action'), self.generator)): return catmode = (self.getOption('action') == 'categories') for page in self.generator: try: self.current_page = page commons = page.site.image_repository() commonspage = getattr(pywikibot, ('Page', 'Category')[catmode] )(commons, page.title()) try: commonspage.get(get_redirect=True) pagetitle = commonspage.title(withNamespace=not catmode) if page.title() == pagetitle: oldText = page.get() text = oldText # for Commons/Commonscat template s = self.findTemplate.search(text) s2 = getattr(self, 'findTemplate%d' % (2, 3)[catmode]).search(text) if s or s2: pywikibot.output(u'** Already done.') else: cats = textlib.getCategoryLinks(text, site=page.site) text = textlib.replaceCategoryLinks( u'%s{{commons%s|%s}}' % (text, ('', 'cat')[catmode], pagetitle), cats, site=page.site) comment = i18n.twtranslate( page.site, 'commons_link%s-template-added' % ('', '-cat')[catmode]) try: self.userPut(page, oldText, text, summary=comment) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % page.title()) except pywikibot.NoPage: pywikibot.output(u'%s does not exist in Commons' % page.__class__.__name__) except pywikibot.NoPage: pywikibot.output(u'Page %s does not exist' % page.title()) except pywikibot.IsRedirectPage: pywikibot.output(u'Page %s is a redirect; skipping.' % page.title()) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked' % page.title())
def make_categories(page, list, site=None): """Make categories.""" if site is None: site = pywikibot.Site() pllist = [] for p in list: cattitle = "%s:%s" % (site.namespaces.CATEGORY, p) pllist.append(pywikibot.Page(site, cattitle)) page.put(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site), asynchronous=True, summary=i18n.twtranslate(site, 'catall-changing'))
def test_adjoining_links(self): """Test getting and replacing adjacent categories.""" cats_std = textlib.getCategoryLinks(self.old, site=self.site) old = self.old.replace(config.LS, '') cats = textlib.getCategoryLinks(old, site=self.site) self.assertEqual(cats_std, cats) sep = config.LS config.line_separator = '' # use an empty separator temporarily new = textlib.replaceCategoryLinks(old, cats, site=self.site) # Restore the default separator. config.line_separator = sep self.assertEqual(old, new)
def replace_default_cat_with_new_categories_in_image_text( old_text, base_category, new_categories): """Add new categories to page text and remove any base_category.""" if not new_categories: # No categories to add. We do not want to remove the base one, raise NoCategoryToAddException() # Remove base category page_text_without_base_category = textlib.replaceCategoryInPlace( old_text, base_category, None) final_text = textlib.replaceCategoryLinks( page_text_without_base_category, new_categories, addOnly=True) return final_text
def include(self, pl, checklinks=True, realinclude=True, linkterm=None, summary=''): """Include the current page to the working category.""" global workingcat, parentcats global checked, tocheck cl = checklinks mysite = self.site if linkterm: actualworkingcat = pywikibot.Category(mysite, workingcat.title(), sort_key=linkterm) else: actualworkingcat = workingcat if realinclude: try: text = pl.get() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: cl = True else: cats = [x for x in pl.categories()] if workingcat not in cats: cats = [x for x in pl.categories()] for c in cats: if c in parentcats: if self.removeparent: pl.change_category(actualworkingcat, summary=summary) break else: pl.put(textlib.replaceCategoryLinks(text, cats + [actualworkingcat], site=pl.site), summary=summary) if cl: if self.checkforward: for page2 in pl.linkedPages(): if self.needcheck(page2): tocheck.append(page2) checked[page2] = page2 if self.checkbackward: for ref_page in pl.getReferences(): if self.needcheck(ref_page): tocheck.append(ref_page) checked[ref_page] = ref_page
def make_categories(page, list: list, site=None): """Make categories. :param page: The page to update and save :type page: pywikibot.page.BasePage :param list: The list which contains categories """ if site is None: site = pywikibot.Site() pllist = [] for p in list: pllist.append(pywikibot.Page(site, 'Category:' + p)) page.put(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site), asynchronous=True, summary=i18n.twtranslate(site, 'catall-changing'))
def addCategory(site, page, cat): old_text = page.text cats = textlib.getCategoryLinks(old_text) catpl = pywikibot.Category(site, cat) if catpl not in cats: print("\t'" + cat + "' not in page categories. Adding") cats.append(catpl) text = textlib.replaceCategoryLinks(page.text, cats, site=site) userPut(page, old_text, text, minor=True, botflag=True) return True else: print("\t'" + cat + "' already in page categories") return False
def treat(self, page): """Process one page.""" if page.isRedirectPage(): # if it's a redirect use the redirect target instead redirTarget = page.getRedirectTarget() if self.follow_redirects: self.current_page = redirTarget else: pywikibot.warning( "Page %s is a redirect to %s; skipping." % (page.title(asLink=True), redirTarget.title(asLink=True)) ) # loading it will throw an error if we don't jump out before return else: self.current_page = page if self.current_page.exists(): # Load the page text = self.current_page.text elif self.create: pywikibot.output("Page %s doesn't exist yet; creating." % (self.current_page.title(asLink=True))) text = "" else: pywikibot.output("Page %s does not exist; skipping." % self.current_page.title(asLink=True)) return # store old text, so we don't have reload it every time old_text = text cats = textlib.getCategoryLinks(text) pywikibot.output("Current categories:") for cat in cats: pywikibot.output("* %s" % cat.title()) catpl = pywikibot.Category(self.current_page.site, self.newcat) if catpl in cats: pywikibot.output("%s is already in %s." % (self.current_page.title(), catpl.title())) else: if self.sort: catpl = self.sorted_by_last_name(catpl, self.current_page) pywikibot.output("Adding %s" % catpl.title(asLink=True)) cats.append(catpl) text = textlib.replaceCategoryLinks(text, cats, site=self.current_page.site) comment = self.comment if not comment: comment = i18n.twtranslate( self.current_page.site, "category-adding", {"newcat": catpl.title(withNamespace=False)} ) try: self.userPut(self.current_page, old_text, text, summary=comment, minor=True, botflag=True) except pywikibot.PageSaveRelatedError as error: pywikibot.output("Page %s not saved: %s" % (self.current_page.title(asLink=True), error))
def remove_categories(): site = pywikibot.Site() category = pywikibot.Category(site, "Category:Files") generator = pagegenerators.PreloadingGenerator( pagegenerators.CategorizedPageGenerator(category=category)) for page in generator: pywikibot.output("") pywikibot.output(vars(page)) pywikibot.output(page.text) page.text = textlib.replaceCategoryLinks(page.text, [], site=page.site) page.save(summary="Remove categories.", # asynchronous=True, )
def set_category_status(site, page, cat, status): old_text = page.text cats = textlib.getCategoryLinks(old_text) catpl = pywikibot.Category(site, cat) if status: if catpl not in cats: cats.append(catpl) else: if catpl in cats: cats.remove(catpl) text = textlib.replaceCategoryLinks(page.text, cats, site=site) if old_text != text: page.text = text page.save(minor=True, botflag=True) return True return False
def apply(self, page, *args): # remove empty list items page.text = re.sub(r'^\* *\n', '', page.text, flags=re.M) # sort categories categories = textlib.getCategoryLinks(page.text, site=page.site) category_living = pywikibot.Category(page.site, 'Žijící lidé') if category_living in categories: if any( cat.title(with_ns=False).startswith('Úmrtí ') for cat in categories): categories.remove(category_living) page.text = textlib.replaceCategoryLinks( page.text, categories, page.site) page.text = re.sub( r'(\{\{ *(?:%s)[^}]+\}\}\n)\n(\[\[(?:%s))' % ('|'.join(map(re.escape, self.site.getmagicwords('defaultsort'))), '|'.join(self.site.namespaces[14])), r'\1\2', page.text)
def harvestSortKey(self, match): text = match.group() if self.defaultsortR.search(text): return text keys = {} categories = textlib.getCategoryLinks(text, site=self.site) if not any( category.title(with_ns=False) in ('Muži', 'Žijící lidé', 'Ženy') for category in categories): return text for category in categories: key = category.sortKey if key: key = self.tidy_sortkey(key) if not key.strip(): continue keys.setdefault(key, 0.0) keys[key] += 1 if len(keys) > 1: return text if not keys: return text if sum(keys.values()) < 4: return text key = list(keys.keys()).pop() for category in categories: if category.sortKey is not None: if self.tidy_sortkey(category.sortKey) == key: category.sortKey = None categories.sort(key=self.sort_category) text = textlib.removeCategoryLinks(text, self.site) text += '\n\n{{DEFAULTSORT:%s}}' % key before, _, after = textlib.replaceCategoryLinks( text, categories, self.site).rpartition('\n\n') # fixme: safer return before + '\n' + after
def include(pl, checklinks=True, realinclude=True, linkterm=None): cl = checklinks if linkterm: actualworkingcat = pywikibot.Category(mysite, workingcat.title(), sortKey=linkterm) else: actualworkingcat = workingcat if realinclude: try: text = pl.get() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: cl = True pass else: cats = [x for x in pl.categories()] if workingcat not in cats: cats = [x for x in pl.categories()] for c in cats: if c in parentcats: if removeparent: pl.change_category(actualworkingcat) break else: pl.put( textlib.replaceCategoryLinks(text, cats + [actualworkingcat], site=pl.site)) if cl: if checkforward: for page2 in pl.linkedPages(): if needcheck(page2): tocheck.append(page2) checked[page2] = page2 if checkbackward: for refPage in pl.getReferences(): if needcheck(refPage): tocheck.append(refPage) checked[refPage] = refPage
def include(self, page, checklinks=True, realinclude=True, linkterm=None): """Include the current page to the working category.""" global workingcat, parentcats global checked actualworkingcat = workingcat if linkterm: actualworkingcat.sortKey = linkterm if realinclude and page.exists(): if page.isRedirectPage(): checklinks = True else: cats = list(page.categories()) if workingcat not in cats \ and not self.change_category(page, cats): newtext = textlib.replaceCategoryLinks( page.text, cats + [actualworkingcat], site=page.site) page.put(newtext, summary=self.opt.summary) if checklinks: self.checklinks(page)
def duplicateSortKey(self, match): text = match.group() matches = list(self.defaultsortR.finditer(text)) if not matches: return text defaultsort = matches.pop().group(1).strip() categories = textlib.getCategoryLinks(text, site=self.site) changed = False for category in categories: if self.tidy_sortkey(category.sortKey) == defaultsort: category.sortKey = None changed = True if changed: categories.sort(key=self.sort_category) before, _, after = textlib.replaceCategoryLinks( text, categories, self.site).rpartition('\n\n') # fixme: safer return before + '\n' + after else: return text
def include(pl, checklinks=True, realinclude=True, linkterm=None, summary=''): cl = checklinks if linkterm: actualworkingcat = pywikibot.Category(mysite, workingcat.title(), sortKey=linkterm) else: actualworkingcat = workingcat if realinclude: try: text = pl.get() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: cl = True pass else: cats = [x for x in pl.categories()] if workingcat not in cats: cats = [x for x in pl.categories()] for c in cats: if c in parentcats: if removeparent: pl.change_category(actualworkingcat, summary=summary) break else: pl.put(textlib.replaceCategoryLinks( text, cats + [actualworkingcat], site=pl.site), summary=summary) if cl: if checkforward: for page2 in pl.linkedPages(): if needcheck(page2): tocheck.append(page2) checked[page2] = page2 if checkbackward: for refPage in pl.getReferences(): if needcheck(refPage): tocheck.append(refPage) checked[refPage] = refPage
def categorize_file_page(site, page, p=0): status = {"f": 0, "p": 1, "w": 0, "e": 0} pywikibot.output(f"Page {p + 1}:") is_file_page = page.is_filepage() file_page = None file_info = None if is_file_page: file_page = pywikibot.FilePage(page) try: file_info = file_page.latest_file_info except (NoPage, PageRelatedError): is_file_page = False if not is_file_page: pywikibot.error("Page \"" + page.title(as_link=True) + "\" is not a file page. Skipping page...") pywikibot.output("") status["e"] += 1 return status uri = file_page.full_url() mime = file_info.mime.lower() pywikibot.output(" Title: " + file_page.title(as_link=True)) pywikibot.output(" URI: " + uri) pywikibot.output(" MIME type: " + mime) # Find a matching category for the file's MIME type or extension. mime_categories = flattened_categories["mime_categories"] mime_pattern_category_regexes = flattened_categories[ "mime_pattern_category_regexes"] found_category = None if mime in mime_categories: found_category = mime_categories[mime] else: pywikibot.warning( f"Unrecognized MIME type \"{mime}\". Attempting to search by regex..." ) status["w"] += 1 for pattern_category_regex in mime_pattern_category_regexes: regex = pattern_category_regex["regex"] if regex.search(mime) is not None: category = pattern_category_regex["category"] pywikibot.warning( f"Found category \"{category}\" for unrecognized MIME type \"{mime}\"." ) status["w"] += 1 found_category = category break if found_category is None: pywikibot.warning( f"No category found for MIME type \"{mime}\". Attempting to search by file extension..." ) status["w"] += 1 file_extension = os.path.splitext(uri)[1][1:].strip().lower() if len(file_extension) > 0: extension_categories = flattened_categories["extension_categories"] extension_pattern_category_regexes = flattened_categories[ "extension_pattern_category_regexes"] if file_extension in extension_categories: found_category = extension_categories[file_extension] else: pywikibot.warning( f"Unrecognized file extension \"{file_extension}\". Attempting to search by regex..." ) status["w"] += 1 for pattern_category_regex in extension_pattern_category_regexes: regex = pattern_category_regex["regex"] if regex.search(file_extension) is not None: category = pattern_category_regex["category"] pywikibot.warning( f"Found category \"{category}\" for unrecognized file extension \"{file_extension}\"." ) status["w"] += 1 found_category = category break if found_category is None: pywikibot.error( f"No category found for MIME type \"{mime}\" or file extension \"{file_extension}\". Skipping file page..." ) pywikibot.output("") status["e"] += 1 return status # Build categories, and add them to the file page. categories = build_categories(found_category) pywikibot.output(" Add categories: " + ", ".join(categories)) page_categories = [] category_wikilinks = [] for category in categories: page_category = pywikibot.Page(site, "Category:" + category) page_categories.append(page_category) category_wikilink = "[[Category:{0}|{0}]]".format(category) category_wikilinks.append(category_wikilink) file_page.text = textlib.replaceCategoryLinks(file_page.text, page_categories, site=file_page.site, addOnly=True) summary = "Add the {0} {1}.".format( "categor" + ("y" if len(category_wikilinks) <= 1 else "ies"), ", ".join(category_wikilinks)) file_page.save(summary=summary, minor=False) status["f"] += 1 return status
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put into the correct position and into the right order. This combines the old instances of standardizeInterwiki and standardizeCategories. The page footer consists of the following parts in that sequence: 1. categories 2. additional information depending on the local site policy 3. interwiki """ categories = [] interwiki_links = [] # get categories if not self.template: categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True # get interwiki interwiki_links = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # remove interwiki text = textlib.removeLanguageLinks(text, site=self.site) # add categories, main to top if categories: # TODO: Sort categories in alphabetic order, e.g. using # categories.sort()? (T100265) # TODO: Get main categories from Wikidata? main = pywikibot.Category(self.site, 'Category:' + self.title, sort_key=' ') if main in categories: categories.pop(categories.index(main)) categories.insert(0, main) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # add interwiki if interwiki_links: text = textlib.replaceLanguageLinks(text, interwiki_links, site=self.site, template=self.template, template_subpage=subpage) return text
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( 'Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( 'Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( 'Skipping {0} because the title is on the exceptions list.' .format(page.title(as_link=True))) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output("You can't edit page " + page.title(as_link=True)) continue except pywikibot.NoPage: pywikibot.output('Page {0} not found'.format( page.title(as_link=True))) continue applied = set() new_text = original_text last_text = None context = 0 while True: if self.isTextExcepted(new_text): pywikibot.output('Skipping {0} because it contains text ' 'that is on the exceptions list.'.format( page.title(as_link=True))) break while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.recursive: break if new_text == original_text: pywikibot.output('No changes were necessary in ' + page.title(as_link=True)) break if hasattr(self, 'addedCat'): # Fetch only categories in wikitext, otherwise the others # will be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(original_text, new_text, context=context) if self.getOption('always'): break choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'), ('edit Latest', 'l'), ('open in Browser', 'b'), ('More context', 'm'), ('All', 'a')], default='N') if choice == 'm': context = context * 3 if context else 3 continue if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'l': editor = editarticle.TextEditor() as_edited = editor.edit(new_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited # prevent changes from being applied again last_text = new_text continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output('Page {0} has been deleted.'.format( page.title())) break new_text = original_text last_text = None continue if choice == 'a': self.options['always'] = True if choice == 'y': page.text = new_text page.save(summary=self.generate_summary(applied), asynchronous=True, callback=self._replace_async_callback, quiet=True) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not')) # choice must be 'N' break if self.getOption('always') and new_text != original_text: try: page.text = new_text page.save(summary=self.generate_summary(applied), callback=self._replace_sync_callback, quiet=True) except pywikibot.EditConflict: pywikibot.output( 'Skipping {0} because of edit conflict'.format( page.title(), )) except pywikibot.SpamfilterError as e: pywikibot.output( 'Cannot change {0} because of blacklist entry {1}'. format(page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output('Skipping {0} (locked page)'.format( page.title(), )) except pywikibot.PageNotSaved as error: pywikibot.output('Error putting page: {0}'.format( error.args, )) if self._pending_processed_titles.qsize() > 50: while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not'))
def test_standard_links(self): cats = textlib.getCategoryLinks(self.old, site=self.site) new = textlib.replaceCategoryLinks(self.old, cats, site=self.site) self.assertEqual(self.old, new)
def assertRoundtripCategory(self, text, catcount): cats = textlib.getCategoryLinks(text) self.assertEqual(len(cats), catcount) self.assertEqual(text, textlib.replaceCategoryLinks(text, cats, site = self.site))
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping %s because the title is on the exceptions list.' % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue applied = set() new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output(u'Skipping %s because it contains text ' u'that is on the exceptions list.' % page.title(asLink=True)) break last_text = None while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied) if not self.recursive: break if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) break if hasattr(self, 'addedCat'): # Fetch only categories in wikitext, otherwise the others will # be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if self.getOption('always'): break choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), ('open in Browser', 'b'), ('all', 'a')], default='N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) break new_text = original_text continue if choice == 'a': self.options['always'] = True if choice == 'y': page.put_async(new_text, self.generate_summary(applied), callback=self.count_changes) # choice must be 'N' break if self.getOption('always') and new_text != original_text: try: page.put(new_text, self.generate_summary(applied), callback=self.count_changes) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(), )) except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(), )) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args, ))
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or # name.title() == name.title().split(":")[0] + title): # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output('Loading {}...'.format(page.title())) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += '{}{}'.format(config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += '{}{}'.format(config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if not putText: return (text, newtext, always) if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def treat(self, page): """Work on each page retrieved from generator.""" original_text = page.text applied = set() new_text = original_text last_text = None context = 0 while True: if self.isTextExcepted(new_text): pywikibot.output( 'Skipping {} because it contains text ' 'that is on the exceptions list.'.format(page)) return while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.opt.recursive: break if new_text == original_text: pywikibot.output('No changes were necessary in ' + page.title(as_link=True)) return if self.opt.addcat: # Fetch only categories in wikitext, otherwise the others # will be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.opt.addcat not in cats: cats.append(self.opt.addcat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. self.current_page = page pywikibot.showDiff(original_text, new_text, context=context) if self.opt.always: break choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'), ('edit Latest', 'l'), ('open in Browser', 'b'), ('More context', 'm'), ('All', 'a')], default='N') if choice == 'm': context = context * 3 if context else 3 continue if choice in ('e', 'l'): text_editor = editor.TextEditor() edit_text = original_text if choice == 'e' else new_text as_edited = text_editor.edit(edit_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited if choice == 'l': # prevent changes from being applied again last_text = new_text continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output('Page {0} has been deleted.'.format( page.title())) break new_text = original_text last_text = None continue if choice == 'a': self.opt.always = True if choice == 'y': self.save(page, original_text, new_text, applied, show_diff=False, quiet=True, callback=self._replace_async_callback, asynchronous=True) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not')) # choice must be 'N' break if self.opt.always and new_text != original_text: self.save(page, original_text, new_text, applied, show_diff=False, asynchronous=False)
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links, categories and star templates are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links """ starsList = [ u'bueno', u'bom interwiki', u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed', u'destacado', u'destaca[tu]', u'enllaç[ _]ad', u'enllaz[ _]ad', u'leam[ _]vdc', u'legătură[ _]a[bcf]', u'liamm[ _]pub', u'lien[ _]adq', u'lien[ _]ba', u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt', u'liên[ _]kết[ _]chọn[ _]lọc', u'ligam[ _]adq', u'ligazón[ _]a[bd]', u'ligoelstara', u'ligoleginda', u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km', u'link[ _]sm', u'linkfa', u'na[ _]lotura', u'nasc[ _]ar', u'tengill[ _][úg]g', u'ua', u'yüm yg', u'רא', u'وصلة مقالة جيدة', u'وصلة مقالة مختارة', ] categories = None interwikiLinks = None allstars = [] # The PyWikipediaBot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: text = regex.sub('', text) allstars += found # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title: # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding stars templates if allstars: text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s%s' % (element.strip(), config.line_separator) pywikibot.log(u'%s' % element.strip()) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping {0!s} because the title is on the exceptions list.'.format(page.title(asLink=True))) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output(u"You can't edit page {0!s}".format(page.title(asLink=True))) continue except pywikibot.NoPage: pywikibot.output(u'Page {0!s} not found'.format(page.title(asLink=True))) continue applied = set() new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output(u'Skipping %s because it contains text ' u'that is on the exceptions list.' % page.title(asLink=True)) break last_text = None while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.recursive: break if new_text == original_text: pywikibot.output(u'No changes were necessary in {0!s}'.format(page.title(asLink=True))) break if hasattr(self, 'addedCat'): # Fetch only categories in wikitext, otherwise the others will # be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(color_format( '\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(original_text, new_text) if self.getOption('always'): break choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), ('open in Browser', 'b'), ('all', 'a')], default='N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page {0!s} has been deleted.'.format(page.title())) break new_text = original_text continue if choice == 'a': self.options['always'] = True if choice == 'y': page.text = new_text page.save(summary=self.generate_summary(applied), async=True, callback=self._count_changes, quiet=True) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not')) # choice must be 'N' break if self.getOption('always') and new_text != original_text: try: page.text = new_text page.save(summary=self.generate_summary(applied), callback=self._count_changes, quiet=True) except pywikibot.EditConflict: pywikibot.output(u'Skipping {0!s} because of edit conflict'.format(page.title())) except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change {0!s} because of blacklist entry {1!s}'.format(page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output(u'Skipping {0!s} (locked page)'.format(page.title())) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: {0!s}'.format(error.args)) if self._pending_processed_titles.qsize() > 50: while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not'))
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping %s because the title is on the exceptions list.' % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue applied = set() new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output(u'Skipping %s because it contains text ' u'that is on the exceptions list.' % page.title(asLink=True)) break last_text = None while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied) if not self.recursive: break if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) break if hasattr(self, "addedCat"): cats = page.categories(nofollow_redirects=True) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if self.acceptall: break choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), ('open in Browser', 'b'), ('all', 'a')], default='N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open("http://%s%s" % ( page.site.hostname(), page.site.nice_get_address(page.title(asUrl=True)) )) i18n.input('pywikibot-enter-finished-browser') try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) break new_text = original_text continue if choice == 'a': self.acceptall = True if choice == 'y': page.put_async(new_text, self.generate_summary(applied), callback=self.count_changes) # choice must be 'N' break if self.acceptall and new_text != original_text: try: page.put(new_text, self.generate_summary(applied), callback=self.count_changes) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(),)) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args,))
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output("Skipping %s because the title is on the exceptions list." % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output("You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output("Page %s not found" % page.title(asLink=True)) continue applied = set() new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output( "Skipping %s because it contains text " "that is on the exceptions list." % page.title(asLink=True) ) break last_text = None while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.recursive: break if new_text == original_text: pywikibot.output("No changes were necessary in %s" % page.title(asLink=True)) break if hasattr(self, "addedCat"): # Fetch only categories in wikitext, otherwise the others will # be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(color_format("\n\n>>> {lightpurple}{0}{default} <<<", page.title())) pywikibot.showDiff(original_text, new_text) if self.getOption("always"): break choice = pywikibot.input_choice( "Do you want to accept these changes?", [("Yes", "y"), ("No", "n"), ("Edit", "e"), ("open in Browser", "b"), ("all", "a")], default="N", ) if choice == "e": editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == "b": pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output("Page %s has been deleted." % page.title()) break new_text = original_text continue if choice == "a": self.options["always"] = True if choice == "y": page.text = new_text page.save( summary=self.generate_summary(applied), async=True, callback=self._count_changes, quiet=True ) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not")) # choice must be 'N' break if self.getOption("always") and new_text != original_text: try: page.text = new_text page.save(summary=self.generate_summary(applied), callback=self._count_changes, quiet=True) except pywikibot.EditConflict: pywikibot.output("Skipping %s because of edit conflict" % (page.title(),)) except pywikibot.SpamfilterError as e: pywikibot.output("Cannot change %s because of blacklist entry %s" % (page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output("Skipping %s (locked page)" % (page.title(),)) except pywikibot.PageNotSaved as error: pywikibot.output("Error putting page: %s" % (error.args,)) if self._pending_processed_titles.qsize() > 50: while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not"))
def test_standard_links(self): """Test getting and replacing categories.""" cats = textlib.getCategoryLinks(self.old, site=self.site) new = textlib.replaceCategoryLinks(self.old, cats, site=self.site) self.assertEqual(self.old, new)
def add_text( page, addText: str, summary: Optional[str] = None, regexSkip: Optional[str] = None, regexSkipUrl: Optional[str] = None, always: bool = False, up: bool = False, putText: bool = True, oldTextGiven: Optional[str] = None, reorderEnabled: bool = True, create: bool = False ) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]: """ Add text to a page. @param page: The page to add text to @type page: pywikibot.page.BasePage @param addText: Text to add @param summary: Summary of changes. If None, beginning of addText is used. @param regexSkip: Abort if text on page matches @param regexSkipUrl: Abort if full url matches @param always: Always add text without user confirmation @param up: If True, add text to top of page, else add at bottom. @param putText: If True, save changes to the page, else return (_, newtext, _) @param oldTextGiven: If None fetch page text, else use this text @param reorderEnabled: If True place text above categories and interwiki, else place at page bottom. No effect if up = False. @param create: Create page if it does not exist @return: If putText=True: (success, success, always) else: (_, newtext, _) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output('Loading {}...'.format(page.title())) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', '\n') if reorderEnabled: # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += '\n' + addText # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += '\n' + addText else: newtext = addText + '\n' + text if not putText: # If someone load it as module, maybe it's not so useful to put the # text in the page return (text, newtext, always) if text != newtext: pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) continue # either always or choice == 'y' is selected result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def apply(self, text, page): categories = textlib.getCategoryLinks(text) if len(categories) > len(set(categories)): deduplicate(categories) text = textlib.replaceCategoryLinks(text, categories, page.site) return text
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile( '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)