def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put into the correct position and into the right order. This combines the old instances of standardizeInterwiki and standardizeCategories. The page footer consists of the following parts in that sequence: 1. categories 2. additional information depending on the local site policy 3. interwiki """ categories = [] interwiki_links = [] # get categories if not self.template: categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True # get interwiki interwiki_links = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # remove interwiki text = textlib.removeLanguageLinks(text, site=self.site) # add categories, main to top if categories: # TODO: Sort categories in alphabetic order, e.g. using # categories.sort()? (T100265) # TODO: Get main categories from Wikidata? main = pywikibot.Category(self.site, 'Category:' + self.title, sort_key=' ') if main in categories: categories.pop(categories.index(main)) categories.insert(0, main) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # add interwiki if interwiki_links: text = textlib.replaceLanguageLinks(text, interwiki_links, site=self.site, template=self.template, template_subpage=subpage) return text
def add_template(self, source, dest, task, fromsite): """Place or remove the Link_GA/FA template on/from a page.""" def compile_link(site, templates): """Compile one link template list.""" findtemplate = '(%s)' % '|'.join(templates) return re.compile(r"\{\{%s\|%s\}\}" % (findtemplate.replace(u' ', u'[ _]'), site.code), re.IGNORECASE) tosite = dest.site add_tl, remove_tl = self.getTemplateList(tosite.code, task) re_Link_add = compile_link(fromsite, add_tl) re_Link_remove = compile_link(fromsite, remove_tl) text = dest.text m1 = add_tl and re_Link_add.search(text) m2 = remove_tl and re_Link_remove.search(text) changed = False interactive = self.getOption('interactive') if add_tl: if m1: pywikibot.output(u"(already added)") else: # insert just before interwiki if (not interactive or pywikibot.input_yn( u'Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): if self.getOption('side'): # Placing {{Link FA|xx}} right next to # corresponding interwiki text = (text[:m1.end()] + u" {{%s|%s}}" % (add_tl[0], fromsite.code) + text[m1.end():]) else: # Moving {{Link FA|xx}} to top of interwikis iw = textlib.getLanguageLinks(text, tosite) text = textlib.removeLanguageLinks(text, tosite) text += u"%s{{%s|%s}}%s" % (config.LS, add_tl[0], fromsite.code, config.LS) text = textlib.replaceLanguageLinks(text, iw, tosite) changed = True if remove_tl: if m2: if (changed or # Don't force the user to say "Y" twice not interactive or pywikibot.input_yn( u'Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): text = re.sub(re_Link_remove, '', text) changed = True elif task == 'former': pywikibot.output(u"(already removed)") if changed: comment = i18n.twtranslate(tosite, 'featured-' + task, {'page': source}) try: dest.put(text, comment) self._save_counter += 1 except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked!' % dest.title()) except pywikibot.PageNotSaved: pywikibot.output(u"Page not saved")
def assertFailedRoundtripInterwiki(self, text): old_interwikis = textlib.getLanguageLinks(text, self.site) new_text = textlib.replaceLanguageLinks(text, old_interwikis, site = self.site) self.assertNotEqual(text, new_text)
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( 'Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( 'Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(color_format( '\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)
def assertRoundtripInterwiki(self, text, count): old_interwikis = textlib.getLanguageLinks(text, self.site) new_text = textlib.replaceLanguageLinks(text, old_interwikis, site = self.site) self.assertEqual(len(old_interwikis), count) self.assertEqual(text, new_text)
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or # name.title() == name.title().split(":")[0] + title): # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks(text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or # name.title() == name.title().split(":")[0] + title): # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def add_text( page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]: """ Add text to a page. @param page: The page to add text to @type page: pywikibot.page.BasePage @param addText: Text to add @type addText: str @param summary: Summary of changes. If None, beginning of addText is used. @type summary: str @param regexSkip: Abort if text on page matches @type regexSkip: str @param regexSkipUrl: Abort if full url matches @type regexSkipUrl: str @param always: Always add text without user confirmation @type always: bool @param up: If True, add text to top of page, else add at bottom. @type up: bool @param putText: If True, save changes to the page, else return (text, newtext, always) @type putText: bool @param oldTextGiven: If None fetch page text, else use this text @type oldTextGiven: str @param reorderEnabled: If True place text above categories and interwiki, else place at page bottom. No effect if up = False. @type reorderEnabled: bool @param create: Create page if it does not exist @type create: bool @return: If putText=True: (success, success, always) else: (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output('Loading {}...'.format(page.title())) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', '\n') if reorderEnabled: # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += '\n' + addText # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += '\n' + addText else: newtext = addText + '\n' + text if not putText: # If someone load it as module, maybe it's not so useful to put the # text in the page return (text, newtext, always) if text != newtext: pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) continue # either always or choice == 'y' is selected result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = site.nice_get_address(page.title(asUrl=True)) result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -exceptUrl is in the page. Skip! Match was: %s''' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -except is in the page. Skip! Match was: %s''' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile( '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)
def add_template(self, source, dest, task, fromsite): """Place or remove the Link_GA/FA template on/from a page.""" def compile_link(site, templates): """compile one link template list.""" findtemplate = '(%s)' % '|'.join(templates) return re.compile( r"\{\{%s\|%s\}\}" % (findtemplate.replace(u' ', u'[ _]'), site.code), re.IGNORECASE) tosite = dest.site add_tl, remove_tl = self.getTemplateList(tosite.code, task) re_Link_add = compile_link(fromsite, add_tl) re_Link_remove = compile_link(fromsite, remove_tl) text = dest.text m1 = add_tl and re_Link_add.search(text) m2 = remove_tl and re_Link_remove.search(text) changed = False interactive = self.getOption('interactive') if add_tl: if m1: pywikibot.output(u"(already added)") else: # insert just before interwiki if (not interactive or pywikibot.input_yn(u'Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): if self.getOption('side'): # Placing {{Link FA|xx}} right next to # corresponding interwiki text = (text[:m1.end()] + u" {{%s|%s}}" % (add_tl[0], fromsite.code) + text[m1.end():]) else: # Moving {{Link FA|xx}} to top of interwikis iw = textlib.getLanguageLinks(text, tosite) text = textlib.removeLanguageLinks(text, tosite) text += u"%s{{%s|%s}}%s" % (LS, add_tl[0], fromsite.code, LS) text = textlib.replaceLanguageLinks(text, iw, tosite) changed = True if remove_tl: if m2: if (changed or # Don't force the user to say "Y" twice not interactive or pywikibot.input_yn(u'Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): text = re.sub(re_Link_remove, '', text) changed = True elif task == 'former': pywikibot.output(u"(already removed)") if changed: comment = i18n.twtranslate(tosite, 'featured-' + task, {'page': unicode(source)}) try: dest.put(text, comment) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked!' % dest.title()) except pywikibot.PageNotSaved: pywikibot.output(u"Page not saved")
text = page.get(force=True) except Exception, e: pywikibot.warning(e) return toRemove = [] importInto = pywikibot.ItemPage.fromPage(page) items = None if importInto.exists(): importInto.get(force=True) else: # possible targets of link import items = {} # import interlanguage links langlinks = textlib.getLanguageLinks(page.text, insite=page.site) toImport = langlinks.copy() toImport[page.site] = page # import interproject links iws = list(set(interProjectLinks(page))) for iw in iws: if True not in [iw != i and i.site == iw.site for i in iws]: toImport[iw.site] = iw for site, link in toImport.iteritems(): if link.isRedirectPage(): toImport[site] = link.getRedirectTarget() if items is not None: for site, link in toImport.iteritems():
def main(): stuff = cgi.FieldStorage() try: qid = stuff['id'].value.lower() except KeyError: print bootstrap.main(tool='checker.py', stuff=form, title='checker.py') return try: site_lang = stuff['site'].value.lower() except KeyError: site_lang = 'en' site_lang = site_lang.replace('_','-') site=pywikibot.Site(site_lang,'wikipedia') repo=site.data_repository() #qid = 'Q1' sitelinks = repo.get_sitelinks(qid) for lang in sitelinks: if '_' in lang: newlang = lang.replace('_','-') sitelinks[newlang] = {'title':sitelinks[lang]['title']} del sitelinks[lang] #print sitelinks.keys() #pull the enwiki link enwiki = sitelinks['{0}wiki'.format(site_lang)]['title'] pg = pywikibot.Page(site, enwiki) enwiki_text = pg.get() local = textlib.getLanguageLinks(enwiki_text, insite=site) if not local and pg.namespace() == 10: try: enwiki_text = pywikibot.Page(site, pg.title()+'/doc').get() local = textlib.getLanguageLinks(enwiki_text, insite=site, template_subpage=True) except pywikibot.NoPage: pass all_langs = union(sitelinks.keys(), local.keys()) header = """ <table class="table table-bordered"> <thead> <tr> <th>Language</th> <th>Local</th> <th>Wikidata</th> </tr> </thead> <tbody> """ footer = """ </tbody> </table> """ text = '' allgood=True for lang in all_langs: row = '' prefix = lang.replace('wiki','').replace('_','-') row+='<td><a href="//{1}.wikipedia.org/wiki/{0}:">{0}wiki</a></td>'.format(prefix, site_lang) l = None d = None if lang in local: row+='<td><a href="//{2}.wikipedia.org/wiki/{0}:{1}">{0}:{1}</a></td>'.format(prefix, local[lang], site_lang) l = local[lang] else: row+='<td class=muted>----</td>' if lang in sitelinks: row+='<td><a href="//{2}.wikipedia.org/wiki/{0}:{1}">{0}:{1}</a></td>'.format(prefix, sitelinks[lang]['title'], site_lang) d=sitelinks[lang]['title'] else: row+='<td class=muted>----</td>' if (l and d) and (l == d): row = '<tr class="done">'+row elif d and not l: row = '<tr class="done">'+row else: #lets see if its a redirect. checked=False if l and d: s = pywikibot.Site(prefix, 'wikipedia') l_p = pywikibot.Page(s, l) d_p = pywikibot.Page(s, d) if l_p.isRedirectPage(): if d_p == l_p.getRedirectTarget(): row = '<tr class="already">'+row checked=True elif d_p.isRedirectPage(): if l_p == d_p.getRedirectTarget(): row = '<tr class="already">'+row checked=True if not checked: row = '<tr class="not">'+row allgood=False row +='</td>\n' text+=row msg='' if allgood and local: msg='<p><center><a href="//{1}.wikipedia.org/wiki/{0}">{0}</a> can be removed of interwiki links.</center></p>'.format(enwiki, site_lang) elif allgood and not local: msg='<p><center><a href="//{2}.wikipedia.org/wiki/{0}">{0}</a> (<a href="//www.wikidata.org/wiki/{1}">{1}</a>) has successfully been migrated to Wikidata.</center></p>'.format(enwiki, qid.upper(), site_lang) else: msg='<p><center>Status of <a href="//{2}.wikipedia.org/wiki/{0}">{0}</a> (<a href="//www.wikidata.org/wiki/{1}">{1}</a>):</center></p>'.format(enwiki, qid.upper(), site_lang) text = msg+ header + text + footer print bootstrap.main(tool='checker.py', stuff=text, title='checker.py')
def process(self, object): #object = {'revid', 'content', 'title','id'} if 'noexternallanglinks' in object['content'].lower(): print 'noextlanglinks' return qid = self.repo.get_id(g_lang+'wiki', object['title']) if not self.override: print qid id = int(qid.lower().replace('q','')) #int-ify created = False if id == -1: #print 'Item doesn\'t exist, creating now.' #try: # qid = wikidata_create.create_item('en', object['title'], token=self.token,check=False) # created = True #time.sleep(2) #except pywikibot.data.api.APIError: return #self.logger.error(object, 'Item does not exist.',None) #if we just created, that means the links are exact. #if created: # self.remove_links(object, qid) if self.override: self.remove_links(object, qid,'??') return locallanglinks = textlib.getLanguageLinks(object['content'], insite=self.enwp) #print locallanglinks if not locallanglinks: print 'no local langlinks' return #fetch foreign links sitelinks = self.repo.get_sitelinks(qid) #lets reformat sitelinks wd_links = {} for lang in sitelinks: wd_links[lang.replace('_','-')] = sitelinks[lang]['title'] sitelinks = wd_links all_langs = union(sitelinks.keys(), locallanglinks.keys()) #print len(sitelinks.keys()) to_add = {} allgood=True errors=list() for lang in all_langs: ok=True prefix = lang.replace('wiki','') l = locallanglinks.get(lang, None) if l: l = l.strip() f = sitelinks.get(lang, None) if f: f = f.strip() if l == f: continue elif l and not f: to_add[lang] = l elif l and f: #they both exist and arent equal checked=False s = pywikibot.Site(prefix, 'wikipedia') l_p = pywikibot.Page(s, l) try: if not l_p.exists(): #safe to remove anyways i guess. continue except AttributeError: #AttributeError: 'Page' object has no attribute '_pageid' #not sure what causes this exactly errors.append(lang) continue f_p = pywikibot.Page(s, f) if l_p.isRedirectPage(): if f_p == l_p.getRedirectTarget(): checked=True elif f_p.isRedirectPage(): if l_p == f_p.getRedirectTarget(): checked=True if not checked: allgood=False ok=False if not ok: print 'dont match' errors.append(lang) for langwiki in to_add: try: data = add_link(qid, langwiki.replace('-','_'), to_add[langwiki], showerror=True,source=g_lang) except pywikibot.data.api.APIError, e: return self.logger.error(object, unicode(e).encode('utf-8'), qid) except TypeError: #weird pywikibot error return self.logger.error(object, 'Error while adding [[:{0}:{1}]] to wikidata.'.format(langwiki.replace('wiki',''), to_add[langwiki]),qid)
def process(self, object): #object = {'revid', 'content', 'title','id'} if 'noexternallanglinks' in object['content'].lower(): print 'noextlanglinks' return qid = self.repo.get_id(g_lang + 'wiki', object['title']) if not self.override: print qid id = int(qid.lower().replace('q', '')) #int-ify created = False if id == -1: #print 'Item doesn\'t exist, creating now.' #try: # qid = wikidata_create.create_item('en', object['title'], token=self.token,check=False) # created = True #time.sleep(2) #except pywikibot.data.api.APIError: return #self.logger.error(object, 'Item does not exist.',None) #if we just created, that means the links are exact. #if created: # self.remove_links(object, qid) if self.override: self.remove_links(object, qid, '??') return locallanglinks = textlib.getLanguageLinks(object['content'], insite=self.enwp) #print locallanglinks if not locallanglinks: print 'no local langlinks' return #fetch foreign links sitelinks = self.repo.get_sitelinks(qid) #lets reformat sitelinks wd_links = {} for lang in sitelinks: wd_links[lang.replace('_', '-')] = sitelinks[lang]['title'] sitelinks = wd_links all_langs = union(sitelinks.keys(), locallanglinks.keys()) #print len(sitelinks.keys()) to_add = {} allgood = True errors = list() for lang in all_langs: ok = True prefix = lang.replace('wiki', '') l = locallanglinks.get(lang, None) if l: l = l.strip() f = sitelinks.get(lang, None) if f: f = f.strip() if l == f: continue elif l and not f: to_add[lang] = l elif l and f: #they both exist and arent equal checked = False s = pywikibot.Site(prefix, 'wikipedia') l_p = pywikibot.Page(s, l) try: if not l_p.exists(): #safe to remove anyways i guess. continue except AttributeError: #AttributeError: 'Page' object has no attribute '_pageid' #not sure what causes this exactly errors.append(lang) continue f_p = pywikibot.Page(s, f) if l_p.isRedirectPage(): if f_p == l_p.getRedirectTarget(): checked = True elif f_p.isRedirectPage(): if l_p == f_p.getRedirectTarget(): checked = True if not checked: allgood = False ok = False if not ok: print 'dont match' errors.append(lang) for langwiki in to_add: try: data = add_link(qid, langwiki.replace('-', '_'), to_add[langwiki], showerror=True, source=g_lang) except pywikibot.data.api.APIError, e: return self.logger.error(object, unicode(e).encode('utf-8'), qid) except TypeError: #weird pywikibot error return self.logger.error( object, 'Error while adding [[:{0}:{1}]] to wikidata.'.format( langwiki.replace('wiki', ''), to_add[langwiki]), qid)
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links, categories and star templates are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links """ starsList = [ u'bueno', u'bom interwiki', u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed', u'destacado', u'destaca[tu]', u'enllaç[ _]ad', u'enllaz[ _]ad', u'leam[ _]vdc', u'legătură[ _]a[bcf]', u'liamm[ _]pub', u'lien[ _]adq', u'lien[ _]ba', u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt', u'liên[ _]kết[ _]chọn[ _]lọc', u'ligam[ _]adq', u'ligazón[ _]a[bd]', u'ligoelstara', u'ligoleginda', u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km', u'link[ _]sm', u'linkfa', u'na[ _]lotura', u'nasc[ _]ar', u'tengill[ _][úg]g', u'ua', u'yüm yg', u'רא', u'وصلة مقالة جيدة', u'وصلة مقالة مختارة', ] categories = None interwikiLinks = None allstars = [] # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks(text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile( r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: text = regex.sub('', text) allstars += found # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title: # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding stars templates if allstars: text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s%s' % (element.strip(), config.line_separator) pywikibot.log(u'%s' % element.strip()) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( 'Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( 'Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)
def main(): stuff = cgi.FieldStorage() try: qid = stuff['id'].value.lower() except KeyError: print bootstrap.main(tool='checker.py', stuff=form, title='checker.py') return try: site_lang = stuff['site'].value.lower() except KeyError: site_lang = 'en' site_lang = site_lang.replace('_', '-') site = pywikibot.Site(site_lang, 'wikipedia') repo = site.data_repository() #qid = 'Q1' item = pywikibot.ItemPage(repo, qid) sitelinks = item.get().get('sitelinks') # sitelinks = repo.get_sitelinks(qid) for lang in sitelinks: if '_' in lang: newlang = lang.replace('_', '-') sitelinks[newlang] = {'title': sitelinks[lang]['title']} del sitelinks[lang] #print sitelinks.keys() #pull the enwiki link enwiki = sitelinks['{0}wiki'.format(site_lang)]['title'] pg = pywikibot.Page(site, enwiki) enwiki_text = pg.get() local = textlib.getLanguageLinks(enwiki_text, insite=site) if not local and pg.namespace() == 10: try: enwiki_text = pywikibot.Page(site, pg.title() + '/doc').get() local = textlib.getLanguageLinks(enwiki_text, insite=site, template_subpage=True) except pywikibot.NoPage: pass all_langs = union(sitelinks.keys(), local.keys()) header = """ <table class="table table-bordered"> <thead> <tr> <th>Language</th> <th>Local</th> <th>Wikidata</th> </tr> </thead> <tbody> """ footer = """ </tbody> </table> """ text = '' allgood = True for lang in all_langs: row = '' prefix = lang.replace('wiki', '').replace('_', '-') row += '<td><a href="//{1}.wikipedia.org/wiki/{0}:">{0}wiki</a></td>'.format( prefix, site_lang) l = None d = None if lang in local: row += '<td><a href="//{2}.wikipedia.org/wiki/{0}:{1}">{0}:{1}</a></td>'.format( prefix, local[lang], site_lang) l = local[lang] else: row += '<td class=muted>----</td>' if lang in sitelinks: row += '<td><a href="//{2}.wikipedia.org/wiki/{0}:{1}">{0}:{1}</a></td>'.format( prefix, sitelinks[lang]['title'], site_lang) d = sitelinks[lang]['title'] else: row += '<td class=muted>----</td>' if (l and d) and (l == d): row = '<tr class="done">' + row elif d and not l: row = '<tr class="done">' + row else: #lets see if its a redirect. checked = False if l and d: s = pywikibot.Site(prefix, 'wikipedia') l_p = pywikibot.Page(s, l) d_p = pywikibot.Page(s, d) if l_p.isRedirectPage(): if d_p == l_p.getRedirectTarget(): row = '<tr class="already">' + row checked = True elif d_p.isRedirectPage(): if l_p == d_p.getRedirectTarget(): row = '<tr class="already">' + row checked = True if not checked: row = '<tr class="not">' + row allgood = False row += '</td>\n' text += row msg = '' if allgood and local: msg = '<p><center><a href="//{1}.wikipedia.org/wiki/{0}">{0}</a> can be removed of interwiki links.</center></p>'.format( enwiki, site_lang) elif allgood and not local: msg = '<p><center><a href="//{2}.wikipedia.org/wiki/{0}">{0}</a> (<a href="//www.wikidata.org/wiki/{1}">{1}</a>) has successfully been migrated to Wikidata.</center></p>'.format( enwiki, qid.upper(), site_lang) else: msg = '<p><center>Status of <a href="//{2}.wikipedia.org/wiki/{0}">{0}</a> (<a href="//www.wikidata.org/wiki/{1}">{1}</a>):</center></p>'.format( enwiki, qid.upper(), site_lang) text = msg + header + text + footer print bootstrap.main(tool='checker.py', stuff=text, title='checker.py')
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output(u'Loading %s...' % page.title()) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( 'Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( 'Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(color_format( '\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if not putText: return (text, newtext, always) if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links, categories and star templates are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links """ starsList = [ u'bueno', u'bom interwiki', u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed', u'destacado', u'destaca[tu]', u'enllaç[ _]ad', u'enllaz[ _]ad', u'leam[ _]vdc', u'legătură[ _]a[bcf]', u'liamm[ _]pub', u'lien[ _]adq', u'lien[ _]ba', u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt', u'liên[ _]kết[ _]chọn[ _]lọc', u'ligam[ _]adq', u'ligazón[ _]a[bd]', u'ligoelstara', u'ligoleginda', u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km', u'link[ _]sm', u'linkfa', u'na[ _]lotura', u'nasc[ _]ar', u'tengill[ _][úg]g', u'ua', u'yüm yg', u'רא', u'وصلة مقالة جيدة', u'وصلة مقالة مختارة', ] categories = None interwikiLinks = None allstars = [] # The PyWikipediaBot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: text = regex.sub('', text) allstars += found # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title: # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding stars templates if allstars: text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s%s' % (element.strip(), config.line_separator) pywikibot.log(u'%s' % element.strip()) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text