def test_in_place_replace(self): """Test in-place category change is reversible.""" dummy = pywikibot.Category(self.site, 'foo') dummy.sortKey = 'bah' cats = textlib.getCategoryLinks(self.old, site=self.site) # Sanity checking temp = textlib.replaceCategoryInPlace(self.old, cats[0], dummy, site=self.site) self.assertNotEqual(temp, self.old) new = textlib.replaceCategoryInPlace(temp, dummy, cats[0], site=self.site) self.assertEqual(self.old, new) temp = textlib.replaceCategoryInPlace(self.old, cats[1], dummy, site=self.site) self.assertNotEqual(temp, self.old) new = textlib.replaceCategoryInPlace(temp, dummy, cats[1], site=self.site) self.assertEqual(self.old, new) temp = textlib.replaceCategoryInPlace(self.old, cats[2], dummy, site=self.site) self.assertNotEqual(temp, self.old) new = textlib.replaceCategoryInPlace(temp, dummy, cats[2], site=self.site) self.assertEqual(self.old, new) temp = textlib.replaceCategoryInPlace(self.old, cats[3], dummy, site=self.site) self.assertNotEqual(temp, self.old) new = textlib.replaceCategoryInPlace(temp, dummy, cats[3], site=self.site) self.assertEqual(self.old, new) new_cats = textlib.getCategoryLinks(new, site=self.site) self.assertEqual(cats, new_cats)
def test_adjoining_links(self): cats_std = textlib.getCategoryLinks(self.old, site=self.site) old = self.old.replace(config.LS, '') cats = textlib.getCategoryLinks(old, site=self.site) self.assertEqual(cats_std, cats) sep = config.LS config.line_separator = '' # use an empty separator temporarily new = textlib.replaceCategoryLinks(old, cats, site=self.site) # Restore the default separator. config.line_separator = sep self.assertEqual(old, new)
def test_adjoining_links(self): cats_std = textlib.getCategoryLinks(self.old, site=self.site) old = self.old.replace(config.LS, "") cats = textlib.getCategoryLinks(old, site=self.site) self.assertEqual(cats_std, cats) sep = config.LS config.line_separator = "" # use an empty separator temporarily new = textlib.replaceCategoryLinks(old, cats, site=self.site) # Restore the default separator. config.line_separator = sep self.assertEqual(old, new)
def test_adjoining_links(self): """Test getting and replacing adjacent categories.""" cats_std = textlib.getCategoryLinks(self.old, site=self.site) old = self.old.replace(config.LS, '') cats = textlib.getCategoryLinks(old, site=self.site) self.assertEqual(cats_std, cats) sep = config.LS config.line_separator = '' # use an empty separator temporarily new = textlib.replaceCategoryLinks(old, cats, site=self.site) # Restore the default separator. config.line_separator = sep self.assertEqual(old, new)
def test_in_place_retain_sort(self): """Test in-place category change does not alter the sortkey.""" # sort key should be retained when the new cat sortKey is None dummy = pywikibot.Category(self.site, 'foo') self.assertIsNone(dummy.sortKey) cats = textlib.getCategoryLinks(self.old, site=self.site) self.assertEqual(cats[3].sortKey, 'key') orig_sortkey = cats[3].sortKey temp = textlib.replaceCategoryInPlace(self.old, cats[3], dummy, site=self.site) self.assertNotEqual(self.old, temp) new_dummy = textlib.getCategoryLinks(temp, site=self.site)[3] self.assertIsNotNone(new_dummy.sortKey) self.assertEqual(orig_sortkey, new_dummy.sortKey)
def run(self): """Run the bot.""" if not all((self.opt.action, self.generator)): return catmode = (self.opt.action == 'categories') for page in self.generator: try: self.current_page = page commons = page.site.image_repository() commonspage = getattr(pywikibot, ('Page', 'Category')[catmode])(commons, page.title()) try: commonspage.get(get_redirect=True) pagetitle = commonspage.title(with_ns=not catmode) if page.title() == pagetitle: old_text = page.get() text = old_text # for Commons/Commonscat template s = self.findTemplate.search(text) s2 = getattr( self, 'findTemplate{}'.format( (2, 3)[catmode]).search(text)) if s or s2: pywikibot.output('** Already done.') else: cats = textlib.getCategoryLinks(text, site=page.site) text = textlib.replaceCategoryLinks( '%s{{commons%s|%s}}' % (text, ('', 'cat')[catmode], pagetitle), cats, site=page.site) comment = i18n.twtranslate( page.site, 'commons_link{}-template-added'.format( ('', '-cat')[catmode])) try: self.userPut(page, old_text, text, summary=comment) except EditConflictError: pywikibot.output( 'Skipping {} because of edit conflict'. format(page.title())) except NoPageError: pywikibot.output('{} does not exist in Commons'.format( page.__class__.__name__)) except NoPageError: pywikibot.output('Page {} does not exist'.format(page.title())) except IsRedirectPageError: pywikibot.output('Page {} is a redirect; skipping.'.format( page.title())) except LockedPageError: pywikibot.output('Page {} is locked'.format(page.title()))
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks(text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def setUpClass(cls): super(TestCategoryRearrangement, cls).setUpClass() cls.site = cls.get_site() cls.old = ('[[Category:Cat1]]%(LS)s[[Category:Cat2|]]%(LS)s' '[[Category:Cat1| ]]%(LS)s[[Category:Cat2|key]]' % {'LS': config.LS}) cls.cats = textlib.getCategoryLinks(cls.old, site=cls.site)
def get_final_categories(self): """ Return final categories to keep. We keep any categories added after the first revision plus any categories in the new text which were not also present in the first revision and later removed. :return: list of pywikibot.Category """ first_cats = set(textlib.getCategoryLinks(self.first_text, self.site)) # can't use page.categories() since some cats are embedded by templates last_cats = set(textlib.getCategoryLinks(self.last_text, self.site)) new_cats = set(textlib.getCategoryLinks(self.new_text, self.site)) cats = ((new_cats - (first_cats - last_cats)) | (last_cats - first_cats)) return list(cats)
def apply(self, page, summaries=[], *args): result = super().apply(page, summaries, *args) if result: categories = textlib.getCategoryLinks(page.text, site=self.site) categories.sort(key=self.sortCategories) page.text = textlib.replaceCategoryLinks(page.text, categories, self.site) return result
def run(self): """Run the bot.""" if not all((self.getOption('action'), self.generator)): return catmode = (self.getOption('action') == 'categories') for page in self.generator: try: self.current_page = page commons = page.site.image_repository() commonspage = getattr(pywikibot, ('Page', 'Category')[catmode])(commons, page.title()) try: commonspage.get(get_redirect=True) pagetitle = commonspage.title(withNamespace=not catmode) if page.title() == pagetitle: oldText = page.get() text = oldText # for Commons/Commonscat template s = self.findTemplate.search(text) s2 = getattr(self, 'findTemplate%d' % (2, 3)[catmode]).search(text) if s or s2: pywikibot.output(u'** Already done.') else: cats = textlib.getCategoryLinks(text, site=page.site) text = textlib.replaceCategoryLinks( u'%s{{commons%s|%s}}' % (text, ('', 'cat')[catmode], pagetitle), cats, site=page.site) comment = i18n.twtranslate( page.site, 'commons_link%s-template-added' % ('', '-cat')[catmode]) try: self.userPut(page, oldText, text, summary=comment) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % page.title()) except pywikibot.NoPage: pywikibot.output(u'%s does not exist in Commons' % page.__class__.__name__) except pywikibot.NoPage: pywikibot.output(u'Page %s does not exist' % page.title()) except pywikibot.IsRedirectPage: pywikibot.output(u'Page %s is a redirect; skipping.' % page.title()) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked' % page.title())
def test_adjoining_links(self): old = self.old.replace(config.LS, '') cats = textlib.getCategoryLinks(old, site=self.site) self.assertEqual(self.cats, cats) sep = config.LS config.line_separator = '' # use an empty separator temporarily new = textlib.replaceCategoryLinks(old, cats, site=self.site) self.assertEqual(old, new) config.line_separator = sep # restore the default separator
def get_category_status(site, page, cat): state = False old_text = page.text cats = textlib.getCategoryLinks(old_text) catpl = pywikibot.Category(site, cat) for c in cats: if c.title() == catpl.title(): state = True return state
def run(self): """Run the bot.""" if not all((self.getOption('action'), self.generator)): return catmode = (self.getOption('action') == 'categories') for page in self.generator: try: self.current_page = page commons = page.site.image_repository() commonspage = getattr(pywikibot, ('Page', 'Category')[catmode] )(commons, page.title()) try: commonspage.get(get_redirect=True) pagetitle = commonspage.title(withNamespace=not catmode) if page.title() == pagetitle: oldText = page.get() text = oldText # for Commons/Commonscat template s = self.findTemplate.search(text) s2 = getattr(self, 'findTemplate%d' % (2, 3)[catmode]).search(text) if s or s2: pywikibot.output(u'** Already done.') else: cats = textlib.getCategoryLinks(text, site=page.site) text = textlib.replaceCategoryLinks( u'%s{{commons%s|%s}}' % (text, ('', 'cat')[catmode], pagetitle), cats, site=page.site) comment = i18n.twtranslate( page.site, 'commons_link%s-template-added' % ('', '-cat')[catmode]) try: self.userPut(page, oldText, text, summary=comment) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % page.title()) except pywikibot.NoPage: pywikibot.output(u'%s does not exist in Commons' % page.__class__.__name__) except pywikibot.NoPage: pywikibot.output(u'Page %s does not exist' % page.title()) except pywikibot.IsRedirectPage: pywikibot.output(u'Page %s is a redirect; skipping.' % page.title()) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked' % page.title())
def test_templates(self): self.site = self.get_site() self.assertEqual( textlib.getCategoryLinks("[[Category:{{P1|Foo}}]]", self.site), [pywikibot.page.Category(self.site, "Foo")] ) self.assertEqual( textlib.getCategoryLinks("[[Category:{{P1|Foo}}|bar]]", self.site), [pywikibot.page.Category(self.site, "Foo", sortKey="bar")], ) self.assertEqual( textlib.getCategoryLinks("[[Category:{{P1|{{P2|L33t|Foo}}}}|bar]]", self.site), [pywikibot.page.Category(self.site, "Foo", sortKey="bar")], ) self.assertEqual( textlib.getCategoryLinks("[[Category:Foo{{!}}bar]]", self.site), [pywikibot.page.Category(self.site, "Foo", sortKey="bar")], ) self.assertEqual( textlib.getCategoryLinks("[[Category:Foo{{!}}bar]][[Category:Wiki{{P2||pedia}}]]", self.site), [pywikibot.page.Category(self.site, "Foo", sortKey="bar"), pywikibot.page.Category(self.site, "Wikipedia")], ) self.assertEqual( textlib.getCategoryLinks("[[Category:Foo{{!}}and{{!}}bar]]", self.site), [pywikibot.page.Category(self.site, "Foo", sortKey="and|bar")], ) self.assertRaises(pywikibot.InvalidTitle, textlib.getCategoryLinks, "[[Category:nasty{{{!}}]]", self.site)
def test_templates(self): self.site = self.get_site() self.assertEqual( textlib.getCategoryLinks('[[Category:{{P1|Foo}}]]', self.site), [pywikibot.page.Category(self.site, 'Foo')]) self.assertEqual( textlib.getCategoryLinks('[[Category:{{P1|Foo}}|bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')]) self.assertEqual( textlib.getCategoryLinks('[[Category:{{P1|{{P2|L33t|Foo}}}}|bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')]) self.assertEqual( textlib.getCategoryLinks('[[Category:Foo{{!}}bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')]) self.assertEqual( textlib.getCategoryLinks( '[[Category:Foo{{!}}bar]][[Category:Wiki{{P2||pedia}}]]', self.site), [ pywikibot.page.Category(self.site, 'Foo', sortKey='bar'), pywikibot.page.Category(self.site, 'Wikipedia') ]) self.assertEqual( textlib.getCategoryLinks('[[Category:Foo{{!}}and{{!}}bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='and|bar')]) self.assertRaises(pywikibot.InvalidTitle, textlib.getCategoryLinks, '[[Category:nasty{{{!}}]]', self.site)
def test_templates(self): """Test normal templates inside category links.""" self.site = self.get_site() self.assertEqual(textlib.getCategoryLinks( '[[Category:{{P1|Foo}}]]', self.site), [pywikibot.page.Category(self.site, 'Foo')]) self.assertEqual(textlib.getCategoryLinks( '[[Category:{{P1|Foo}}|bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')]) self.assertEqual(textlib.getCategoryLinks( '[[Category:{{P1|{{P2|L33t|Foo}}}}|bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')]) self.assertEqual(textlib.getCategoryLinks( '[[Category:Foo{{!}}bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')]) self.assertEqual(textlib.getCategoryLinks( '[[Category:Foo{{!}}bar]][[Category:Wiki{{P2||pedia}}]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='bar'), pywikibot.page.Category(self.site, 'Wikipedia')]) self.assertEqual(textlib.getCategoryLinks( '[[Category:Foo{{!}}and{{!}}bar]]', self.site), [pywikibot.page.Category(self.site, 'Foo', sortKey='and|bar')]) self.assertRaises(pywikibot.InvalidTitle, textlib.getCategoryLinks, '[[Category:nasty{{{!}}]]', self.site)
def treat_page(self): page = self.current_page categories = textlib.getCategoryLinks(page.text, site=self.site) titles = map( lambda cat: cat.title(with_ns=False, with_section=False, allow_interwiki=False, insite=self.site), categories) matches = list(filter(bool, map(self.categoryR.fullmatch, titles))) if not matches: pywikibot.output('No birthdate category found') return fullmatch = matches.pop() if matches: pywikibot.output('Multiple birthdate categories found') return birth_date = fullmatch.group(1) search_query = 'linksto:"%s"' % page.title() search_query += r' insource:/\[\[[^\[\]]+\]\]' search_query += r' +\(\* *\[*%s\]*\)/' % birth_date search_query += ' -intitle:"Seznam"' pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join( map( lambda p: re.escape(p.title()), chain([page], page.backlinks(followRedirects=False, filterRedirects=True, namespaces=[0])))) pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date regex = re.compile(pattern) for ref_page in PreloadingGenerator( SearchPageGenerator(search_query, namespaces=[0], site=self.site)): text = ref_page.text # todo: multiple matches match = regex.search(text) if not match: continue inside, left, year1, right = match.groups('') new_text = text[:match.start()] new_text += replace_pattern.format(inside=inside, left=left, right=right, year1=year1, year2=self.year) new_text += text[match.end():] self.userPut(ref_page, ref_page.text, new_text, summary='doplnění data úmrtí')
def addCategory(site, page, cat): old_text = page.text cats = textlib.getCategoryLinks(old_text) catpl = pywikibot.Category(site, cat) if catpl not in cats: print("\t'" + cat + "' not in page categories. Adding") cats.append(catpl) text = textlib.replaceCategoryLinks(page.text, cats, site=site) userPut(page, old_text, text, minor=True, botflag=True) return True else: print("\t'" + cat + "' already in page categories") return False
def treat(self, page): """Process one page.""" if page.isRedirectPage(): # if it's a redirect use the redirect target instead redirTarget = page.getRedirectTarget() if self.follow_redirects: self.current_page = redirTarget else: pywikibot.warning( "Page %s is a redirect to %s; skipping." % (page.title(asLink=True), redirTarget.title(asLink=True)) ) # loading it will throw an error if we don't jump out before return else: self.current_page = page if self.current_page.exists(): # Load the page text = self.current_page.text elif self.create: pywikibot.output("Page %s doesn't exist yet; creating." % (self.current_page.title(asLink=True))) text = "" else: pywikibot.output("Page %s does not exist; skipping." % self.current_page.title(asLink=True)) return # store old text, so we don't have reload it every time old_text = text cats = textlib.getCategoryLinks(text) pywikibot.output("Current categories:") for cat in cats: pywikibot.output("* %s" % cat.title()) catpl = pywikibot.Category(self.current_page.site, self.newcat) if catpl in cats: pywikibot.output("%s is already in %s." % (self.current_page.title(), catpl.title())) else: if self.sort: catpl = self.sorted_by_last_name(catpl, self.current_page) pywikibot.output("Adding %s" % catpl.title(asLink=True)) cats.append(catpl) text = textlib.replaceCategoryLinks(text, cats, site=self.current_page.site) comment = self.comment if not comment: comment = i18n.twtranslate( self.current_page.site, "category-adding", {"newcat": catpl.title(withNamespace=False)} ) try: self.userPut(self.current_page, old_text, text, summary=comment, minor=True, botflag=True) except pywikibot.PageSaveRelatedError as error: pywikibot.output("Page %s not saved: %s" % (self.current_page.title(asLink=True), error))
def set_category_status(site, page, cat, status): old_text = page.text cats = textlib.getCategoryLinks(old_text) catpl = pywikibot.Category(site, cat) if status: if catpl not in cats: cats.append(catpl) else: if catpl in cats: cats.remove(catpl) text = textlib.replaceCategoryLinks(page.text, cats, site=site) if old_text != text: page.text = text page.save(minor=True, botflag=True) return True return False
def apply(self, page, *args): # remove empty list items page.text = re.sub(r'^\* *\n', '', page.text, flags=re.M) # sort categories categories = textlib.getCategoryLinks(page.text, site=page.site) category_living = pywikibot.Category(page.site, 'Žijící lidé') if category_living in categories: if any( cat.title(with_ns=False).startswith('Úmrtí ') for cat in categories): categories.remove(category_living) page.text = textlib.replaceCategoryLinks( page.text, categories, page.site) page.text = re.sub( r'(\{\{ *(?:%s)[^}]+\}\}\n)\n(\[\[(?:%s))' % ('|'.join(map(re.escape, self.site.getmagicwords('defaultsort'))), '|'.join(self.site.namespaces[14])), r'\1\2', page.text)
def harvestSortKey(self, match): text = match.group() if self.defaultsortR.search(text): return text keys = {} categories = textlib.getCategoryLinks(text, site=self.site) if not any( category.title(with_ns=False) in ('Muži', 'Žijící lidé', 'Ženy') for category in categories): return text for category in categories: key = category.sortKey if key: key = self.tidy_sortkey(key) if not key.strip(): continue keys.setdefault(key, 0.0) keys[key] += 1 if len(keys) > 1: return text if not keys: return text if sum(keys.values()) < 4: return text key = list(keys.keys()).pop() for category in categories: if category.sortKey is not None: if self.tidy_sortkey(category.sortKey) == key: category.sortKey = None categories.sort(key=self.sort_category) text = textlib.removeCategoryLinks(text, self.site) text += '\n\n{{DEFAULTSORT:%s}}' % key before, _, after = textlib.replaceCategoryLinks( text, categories, self.site).rpartition('\n\n') # fixme: safer return before + '\n' + after
def duplicateSortKey(self, match): text = match.group() matches = list(self.defaultsortR.finditer(text)) if not matches: return text defaultsort = matches.pop().group(1).strip() categories = textlib.getCategoryLinks(text, site=self.site) changed = False for category in categories: if self.tidy_sortkey(category.sortKey) == defaultsort: category.sortKey = None changed = True if changed: categories.sort(key=self.sort_category) before, _, after = textlib.replaceCategoryLinks( text, categories, self.site).rpartition('\n\n') # fixme: safer return before + '\n' + after else: return text
def has_valid_content(self) -> bool: """Test page only contains a single call to the index template.""" text = self.text if not text.startswith('{{' + self.INDEX_TEMPLATE): return False # Discard possible categories after INDEX_TEMPLATE categories = textlib.getCategoryLinks(text, self.site) for cat in categories: text = text.replace('\n' + cat.title(as_link=True), '') if not text.endswith('}}'): return False # Discard all inner templates as only top-level ones matter templates = textlib.extract_templates_and_params_regex_simple(text) if len(templates) != 1 or templates[0][0] != self.INDEX_TEMPLATE: # Only a single call to the INDEX_TEMPLATE is allowed return False return True
def treat_page(self): page = self.current_page categories = textlib.getCategoryLinks(page.text, site=self.site) titles = (cat.title(with_ns=False, with_section=False, allow_interwiki=False, insite=self.site) for cat in categories) matches = list(filter(bool, map(self.categoryR.fullmatch, titles))) if not matches: pywikibot.output('No birthdate category found') return fullmatch = matches.pop() if matches: pywikibot.output('Multiple birthdate categories found') return birth_date = fullmatch.group(1) search_query = 'linksto:"%s"' % page.title() # todo: sanitize? search_query += r' insource:/\[\[[^\[\]]+\]\]' search_query += r' +\(\* *\[*%s\]*\)/' % birth_date search_query += ' -intitle:"Seznam"' pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join( re.escape(p.title()) for p in chain([page], page.backlinks(followRedirects=False, filterRedirects=True, namespaces=[0]))) pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date regex = re.compile(pattern) for ref_page in PreloadingGenerator( SearchPageGenerator(search_query, namespaces=[0], site=self.site)): new_text, num = regex.subn(self.replace_callback, ref_page.text) if num: self.userPut(ref_page, ref_page.text, new_text, summary='doplnění data úmrtí')
def categoricAncestor(page, depth): # Returns the *categoric ancestor* of the page, # the highest-level category which has a sideicon related to it, # if it exists and is unique, and False otherwise. if page in ancestorMemo: return ancestorMemo[page] # print('\t' * depth + page.title()) MAX_RECURSION_DEPTH = 20 ancestors = set() for cat in textlib.getCategoryLinks(page.text): tit = cat.title()[9:] if tit in codenames: ancestors.add(cat) # sys.stdout.write('\t' * (depth + 1) + cat.title() + ' *\n') continue elif depth < MAX_RECURSION_DEPTH: thisAnc = categoricAncestor(cat, depth + 1) if thisAnc: ancestors.add(thisAnc) if len(ancestors) == 1: ancestorMemo[page] = list(ancestors)[0] return categoricAncestor(page, 0) ancestorMemo[page] = False return False
def treat(self, page): """Work on each page retrieved from generator.""" original_text = page.text applied = set() new_text = original_text last_text = None context = 0 while True: if self.isTextExcepted(new_text): pywikibot.output( 'Skipping {} because it contains text ' 'that is on the exceptions list.'.format(page)) return while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.opt.recursive: break if new_text == original_text: pywikibot.output('No changes were necessary in ' + page.title(as_link=True)) return if self.opt.addcat: # Fetch only categories in wikitext, otherwise the others # will be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.opt.addcat not in cats: cats.append(self.opt.addcat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. self.current_page = page pywikibot.showDiff(original_text, new_text, context=context) if self.opt.always: break choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'), ('edit Latest', 'l'), ('open in Browser', 'b'), ('More context', 'm'), ('All', 'a')], default='N') if choice == 'm': context = context * 3 if context else 3 continue if choice in ('e', 'l'): text_editor = editor.TextEditor() edit_text = original_text if choice == 'e' else new_text as_edited = text_editor.edit(edit_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited if choice == 'l': # prevent changes from being applied again last_text = new_text continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output('Page {0} has been deleted.'.format( page.title())) break new_text = original_text last_text = None continue if choice == 'a': self.opt.always = True if choice == 'y': self.save(page, original_text, new_text, applied, show_diff=False, quiet=True, callback=self._replace_async_callback, asynchronous=True) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not')) # choice must be 'N' break if self.opt.always and new_text != original_text: self.save(page, original_text, new_text, applied, show_diff=False, asynchronous=False)
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output('Loading {}...'.format(page.title())) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += '{}{}'.format(config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += '{}{}'.format(config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if not putText: return (text, newtext, always) if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def add_text( page, addText: str, summary: Optional[str] = None, regexSkip: Optional[str] = None, regexSkipUrl: Optional[str] = None, always: bool = False, up: bool = False, putText: bool = True, oldTextGiven: Optional[str] = None, reorderEnabled: bool = True, create: bool = False ) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]: """ Add text to a page. @param page: The page to add text to @type page: pywikibot.page.BasePage @param addText: Text to add @param summary: Summary of changes. If None, beginning of addText is used. @param regexSkip: Abort if text on page matches @param regexSkipUrl: Abort if full url matches @param always: Always add text without user confirmation @param up: If True, add text to top of page, else add at bottom. @param putText: If True, save changes to the page, else return (_, newtext, _) @param oldTextGiven: If None fetch page text, else use this text @param reorderEnabled: If True place text above categories and interwiki, else place at page bottom. No effect if up = False. @param create: Create page if it does not exist @return: If putText=True: (success, success, always) else: (_, newtext, _) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output('Loading {}...'.format(page.title())) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', '\n') if reorderEnabled: # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += '\n' + addText # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += '\n' + addText else: newtext = addText + '\n' + text if not putText: # If someone load it as module, maybe it's not so useful to put the # text in the page return (text, newtext, always) if text != newtext: pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) continue # either always or choice == 'y' is selected result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output("Skipping %s because the title is on the exceptions list." % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output("You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output("Page %s not found" % page.title(asLink=True)) continue applied = set() new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output( "Skipping %s because it contains text " "that is on the exceptions list." % page.title(asLink=True) ) break last_text = None while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.recursive: break if new_text == original_text: pywikibot.output("No changes were necessary in %s" % page.title(asLink=True)) break if hasattr(self, "addedCat"): # Fetch only categories in wikitext, otherwise the others will # be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(color_format("\n\n>>> {lightpurple}{0}{default} <<<", page.title())) pywikibot.showDiff(original_text, new_text) if self.getOption("always"): break choice = pywikibot.input_choice( "Do you want to accept these changes?", [("Yes", "y"), ("No", "n"), ("Edit", "e"), ("open in Browser", "b"), ("all", "a")], default="N", ) if choice == "e": editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == "b": pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output("Page %s has been deleted." % page.title()) break new_text = original_text continue if choice == "a": self.options["always"] = True if choice == "y": page.text = new_text page.save( summary=self.generate_summary(applied), async=True, callback=self._count_changes, quiet=True ) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not")) # choice must be 'N' break if self.getOption("always") and new_text != original_text: try: page.text = new_text page.save(summary=self.generate_summary(applied), callback=self._count_changes, quiet=True) except pywikibot.EditConflict: pywikibot.output("Skipping %s because of edit conflict" % (page.title(),)) except pywikibot.SpamfilterError as e: pywikibot.output("Cannot change %s because of blacklist entry %s" % (page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output("Skipping %s (locked page)" % (page.title(),)) except pywikibot.PageNotSaved as error: pywikibot.output("Error putting page: %s" % (error.args,)) if self._pending_processed_titles.qsize() > 50: while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not"))
def put_cats(page, new_cats, summary=None, always=False): line_sep = pywikibot.config.line_separator if not summary: summary = "Adding categories using catfiles" oldtext = page.get() old_cats = textlib.getCategoryLinks(oldtext) old_templates = textlib.extract_templates_and_params(oldtext) old_template_titles = [i[0].lower() for i in old_templates] templates, cats = [], [] for val in new_cats: if val.lower().startswith('category:'): tmp_cat = pywikibot.Category(pywikibot.Link(val, page.site)) if tmp_cat not in old_cats: cats.append(tmp_cat) elif val.lower().startswith('{{'): tmp_templates = textlib.extract_templates_and_params(val) if len(tmp_templates) != 1: logging.warn("There was an error when parsing the template " "'{0}'. Contact the developer, skipping it for " "now.".format(val)) tmp_template = tmp_templates[0] if tmp_template[0].lower() not in old_template_titles: templates.append(val) # Add templates to the top, and the categories to the bottom. newtext = oldtext if len(templates) > 0: newtext = line_sep.join(templates) + line_sep + newtext if len(cats) > 0: newtext = (newtext + line_sep + line_sep.join(c.title(asLink=True, underscore=False) for c in cats)) if oldtext == newtext: pywikibot.output("No changes to the page need to be made.") return while True: # Show the diff that has been created pywikibot.output(color_format( '\n\n>>> {lightpurple}{0}{default} <<<', page.title(underscore=False))) pywikibot.showDiff(oldtext, newtext) if always: choice = 'y' else: # Ask user whether to accept choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), ('Open browser', 'b')], 'n', automatic_quit=False) # Apply the choice from above if choice == 'n': break elif choice == 'b': pywikibot.bot.open_webbrowser(page) elif choice == 'e': editor = pywikibot.editor.TextEditor() as_edited = editor.edit(newtext) if as_edited and as_edited != newtext: newtext = as_edited elif choice == 'y': try: page.put_async(newtext, summary) except pywikibot.EditConflict: pywikibot.output('Edit conflict! Skipping') except pywikibot.ServerError: pywikibot.output('Server Error! Skipping') except pywikibot.SpamfilterError as e: pywikibot.output( 'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output('Skipping %s (locked page)' % page.title()) except pywikibot.PageNotSaved as error: pywikibot.output('Error putting page: %s' % error.args) break
def treat_page(self): """Load the given page, do some changes, and save it.""" refR = re.compile(r'(?P<all><ref.*?</ref>)') # clenaupR = re.compile(r'(?i){{dopracować.*?}}') text = self.current_page.text links = { 'links': 0, 'cat': 0, 'template': 0, 'infobox': 0, 'refs': 0, 'dopracować': False } # cleanupTmpl = False summary = [] if self.current_page.isRedirectPage(): pywikibot.output(u'Page %s is REDIRECT!' % self.current_page.title()) return elif self.current_page.isDisambig(): pywikibot.output(u'Page %s is DISAMBIG!' % self.current_page.title()) return else: if self.opt.test: pywikibot.output(u'Title:%s' % self.current_page.title()) pywikibot.output(u'Depth:%s' % self.current_page.depth) for l in self.current_page.linkedPages(namespaces=0): if self.opt.test: pywikibot.output(u'Links to:[[%s]]' % l.title()) links['links'] += 1 # pywikibot.output(u'Links:%s' % len(list(self.current_page.linkedPages(namespaces=0)))) for t, p in textlib.extract_templates_and_params( text, remove_disabled_parts=True): if self.opt.test: pywikibot.output('Template:[[%s]]' % t) links['template'] += 1 if 'infobox' in t: links['infobox'] += 1 if 'dopracować' in t.lower(): links['dopracować'] = True if t.lower( ) in tmplcat: # check for category adding templates links['cat'] += 1 if self.opt.test: pywikibot.output('Current cat#%i' % links['cat']) # cleanupTmpl = (t, p) # if 'rok w' in t or 'Rok w' in t: # links['cat'] += 1 for c in textlib.getCategoryLinks(text): if self.opt.test: pywikibot.output('Category:%s' % c) links['cat'] += 1 if self.opt.test: pywikibot.output('Current cat#%i' % links['cat']) for r in refR.finditer(text): if self.opt.test: pywikibot.output('Ref:%s' % r.group('all')) links['refs'] += 1 if self.opt.test: pywikibot.output('Links=%s' % links) # pywikibot.output('Cleanup=%s' % re.sub('\n','',textlib.glue_template_and_params(cleanupTmpl))) if links['dopracować']: if self.opt.test: pywikibot.output('Cleanup Tmpl FOUND') else: # add {{Dopracować}} t = 'Dopracować' # template title p = {} # template params today = datetime.now() datestr = today.strftime('%Y-%m') if self.opt.test: pywikibot.output('Date:%s' % datestr) if not (links['links'] and links['cat']): if not links['links']: p['linki'] = datestr summary.append('linki') if not links['cat']: p['kategoria'] = datestr summary.append('kategorie') # if not links['refs']: # p['przypisy'] = datestr # summary.append('przypisy') cleanupTmpl = (t, p) if not p: if self.opt.test: pywikibot.output('Nothing to add') return if self.opt.test: pywikibot.output('Cleanup Tmpl TO ADD') pywikibot.output('summary:%s' % summary) pywikibot.output('params:%s' % p) text = re.sub( '\n', '', textlib.glue_template_and_params(cleanupTmpl)) + '\n' + text # if summary option is None, it takes the default i18n summary from # i18n subdirectory with summary_key as summary key. self.put_current( text, summary= 'Sprawdzanie nowych stron, w artykule należy dopracować: %s' % ','.join(summary))
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links, categories and star templates are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links """ starsList = [ u'bueno', u'bom interwiki', u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed', u'destacado', u'destaca[tu]', u'enllaç[ _]ad', u'enllaz[ _]ad', u'leam[ _]vdc', u'legătură[ _]a[bcf]', u'liamm[ _]pub', u'lien[ _]adq', u'lien[ _]ba', u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt', u'liên[ _]kết[ _]chọn[ _]lọc', u'ligam[ _]adq', u'ligazón[ _]a[bd]', u'ligoelstara', u'ligoleginda', u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km', u'link[ _]sm', u'linkfa', u'na[ _]lotura', u'nasc[ _]ar', u'tengill[ _][úg]g', u'ua', u'yüm yg', u'רא', u'وصلة مقالة جيدة', u'وصلة مقالة مختارة', ] categories = None interwikiLinks = None allstars = [] # The PyWikipediaBot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: text = regex.sub('', text) allstars += found # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title: # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding stars templates if allstars: text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s%s' % (element.strip(), config.line_separator) pywikibot.log(u'%s' % element.strip()) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( 'Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( 'Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)
def assertRoundtripCategory(self, text, catcount): cats = textlib.getCategoryLinks(text) self.assertEqual(len(cats), catcount) self.assertEqual(text, textlib.replaceCategoryLinks(text, cats, site = self.site))
def test_standard_links(self): cats = textlib.getCategoryLinks(self.old, site=self.site) new = textlib.replaceCategoryLinks(self.old, cats, site=self.site) self.assertEqual(self.old, new)
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or # name.title() == name.title().split(":")[0] + title): # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping %s because the title is on the exceptions list.' % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue applied = set() new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output(u'Skipping %s because it contains text ' u'that is on the exceptions list.' % page.title(asLink=True)) break last_text = None while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied) if not self.recursive: break if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) break if hasattr(self, 'addedCat'): # Fetch only categories in wikitext, otherwise the others will # be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if self.getOption('always'): break choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), ('open in Browser', 'b'), ('all', 'a')], default='N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) break new_text = original_text continue if choice == 'a': self.options['always'] = True if choice == 'y': page.put_async(new_text, self.generate_summary(applied), callback=self.count_changes) # choice must be 'N' break if self.getOption('always') and new_text != original_text: try: page.put(new_text, self.generate_summary(applied), callback=self.count_changes) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(), )) except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(), )) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args, ))
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( 'Skipping {0} because the title is on the exceptions list.' .format(page.title(as_link=True))) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output("You can't edit page " + page.title(as_link=True)) continue except pywikibot.NoPage: pywikibot.output('Page {0} not found'.format( page.title(as_link=True))) continue applied = set() new_text = original_text last_text = None context = 0 while True: if self.isTextExcepted(new_text): pywikibot.output('Skipping {0} because it contains text ' 'that is on the exceptions list.'.format( page.title(as_link=True))) break while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.recursive: break if new_text == original_text: pywikibot.output('No changes were necessary in ' + page.title(as_link=True)) break if hasattr(self, 'addedCat'): # Fetch only categories in wikitext, otherwise the others # will be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(original_text, new_text, context=context) if self.getOption('always'): break choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'), ('edit Latest', 'l'), ('open in Browser', 'b'), ('More context', 'm'), ('All', 'a')], default='N') if choice == 'm': context = context * 3 if context else 3 continue if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'l': editor = editarticle.TextEditor() as_edited = editor.edit(new_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited # prevent changes from being applied again last_text = new_text continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output('Page {0} has been deleted.'.format( page.title())) break new_text = original_text last_text = None continue if choice == 'a': self.options['always'] = True if choice == 'y': page.text = new_text page.save(summary=self.generate_summary(applied), asynchronous=True, callback=self._replace_async_callback, quiet=True) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not')) # choice must be 'N' break if self.getOption('always') and new_text != original_text: try: page.text = new_text page.save(summary=self.generate_summary(applied), callback=self._replace_sync_callback, quiet=True) except pywikibot.EditConflict: pywikibot.output( 'Skipping {0} because of edit conflict'.format( page.title(), )) except pywikibot.SpamfilterError as e: pywikibot.output( 'Cannot change {0} because of blacklist entry {1}'. format(page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output('Skipping {0} (locked page)'.format( page.title(), )) except pywikibot.PageNotSaved as error: pywikibot.output('Error putting page: {0}'.format( error.args, )) if self._pending_processed_titles.qsize() > 50: while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not'))
def test_standard_links(self): """Test getting and replacing categories.""" cats = textlib.getCategoryLinks(self.old, site=self.site) new = textlib.replaceCategoryLinks(self.old, cats, site=self.site) self.assertEqual(self.old, new)
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping {0!s} because the title is on the exceptions list.'.format(page.title(asLink=True))) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output(u"You can't edit page {0!s}".format(page.title(asLink=True))) continue except pywikibot.NoPage: pywikibot.output(u'Page {0!s} not found'.format(page.title(asLink=True))) continue applied = set() new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output(u'Skipping %s because it contains text ' u'that is on the exceptions list.' % page.title(asLink=True)) break last_text = None while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.recursive: break if new_text == original_text: pywikibot.output(u'No changes were necessary in {0!s}'.format(page.title(asLink=True))) break if hasattr(self, 'addedCat'): # Fetch only categories in wikitext, otherwise the others will # be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(color_format( '\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(original_text, new_text) if self.getOption('always'): break choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'), ('open in Browser', 'b'), ('all', 'a')], default='N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page {0!s} has been deleted.'.format(page.title())) break new_text = original_text continue if choice == 'a': self.options['always'] = True if choice == 'y': page.text = new_text page.save(summary=self.generate_summary(applied), async=True, callback=self._count_changes, quiet=True) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not')) # choice must be 'N' break if self.getOption('always') and new_text != original_text: try: page.text = new_text page.save(summary=self.generate_summary(applied), callback=self._count_changes, quiet=True) except pywikibot.EditConflict: pywikibot.output(u'Skipping {0!s} because of edit conflict'.format(page.title())) except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change {0!s} because of blacklist entry {1!s}'.format(page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output(u'Skipping {0!s} (locked page)'.format(page.title())) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: {0!s}'.format(error.args)) if self._pending_processed_titles.qsize() > 50: while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not'))
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put into the correct position and into the right order. This combines the old instances of standardizeInterwiki and standardizeCategories. The page footer consists of the following parts in that sequence: 1. categories 2. additional information depending on the local site policy 3. interwiki """ categories = [] interwiki_links = [] # get categories if not self.template: categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True # get interwiki interwiki_links = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # remove interwiki text = textlib.removeLanguageLinks(text, site=self.site) # add categories, main to top if categories: # TODO: Sort categories in alphabetic order, e.g. using # categories.sort()? (T100265) # TODO: Get main categories from Wikidata? main = pywikibot.Category(self.site, 'Category:' + self.title, sort_key=' ') if main in categories: categories.pop(categories.index(main)) categories.insert(0, main) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # add interwiki if interwiki_links: text = textlib.replaceLanguageLinks(text, interwiki_links, site=self.site, template=self.template, template_subpage=subpage) return text
def apply(self, text, page): categories = textlib.getCategoryLinks(text) if len(categories) > len(set(categories)): deduplicate(categories) text = textlib.replaceCategoryLinks(text, categories, page.site) return text
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile( '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)