def add_text(generator): fa_page_title_list = get_query() for page in generator: if _cache.get(tuple([page.title(), 'add_text'])): wikipedia.output( u'\03{lightred}>>> Page ' + page.title() + u' was checked before so it will pass\03{default}') continue original_text = u'' if page.namespace() != 0: continue try: pagetitle_source = page.title() original_text = page.get() pagetitle = pagetitle_source redirection = 0 except wikipedia.NoPage: wikipedia.output(u"%s doesn't exist, skip!" % page.title()) continue except wikipedia.IsRedirectPage: wikipedia.output(u"%s is a redirect, skip!" % page.title()) pagemain = page.getRedirectTarget() try: original_text = pagemain.get() except: wikipedia.output(u"%s doesn't exist, skip!" % pagemain.title()) continue pagetitle_source = pagemain.title() pagetitle = page.title() redirection = 1 except: continue pagetitle3 = re.sub( ur'[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]', ur"", pagetitle) if pagetitle3 != pagetitle: continue if pagetitle.find(u'در حال ویرایش') != -1: continue _cache[tuple([page.title(), 'add_text'])] = 1 if original_text: if redirection == 0: wrong_words = ur'ًٌٍَُِّْٔ' + u'يٰك' + u"@#$%^&*'~`" pagetitle2 = re.sub(ur'[' + wrong_words + ur']', ur"", pagetitle) passp = redirect_find(pagetitle) if not passp: if pagetitle != pagetitle2: for vowel in wrong_words: if vowel in pagetitle: break passport = False text = u'{| class="wikitable plainlinks"\n|-\n' page = wikipedia.Page(fasite, u"user:fawikibot/movearticles2") text_fa = page.get() if not pagetitle in text_fa: text += u"|[[" + pagetitle + u"]] ||«" + vowel + u"»\n|-\n" passport = True text += u'\n|}\n' if passport: page.put(text_fa + u'\n' + text, u"ربات:مقالهها برای انتقال") try: #-------------------------1---------------------------------- if u"ی" in pagetitle or u"ک" in pagetitle: if not u"" in pagetitle: New_redirect_name = pagetitle.replace(u"ی", u"ي").replace( u"ک", u"ك") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از ی و ک عربی به ی و ک فارسی (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) else: New_redirect_name = pagetitle.replace( u"ی", u"ي").replace(u"ک", u"ك").replace(u"", u" ") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از ی و ک عربی به ی و ک فارسی و فاصله به فاصلهٔ مجازی (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------2---------------------------------- if u"" in pagetitle: New_redirect_name = pagetitle.replace(u"", u" ") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از فاصله به فاصلهٔ مجازی (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------3---------------------------------- if u"آ" in pagetitle: New_redirect_name = pagetitle.replace(u"آ", u"ا") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از ا به آ (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------3---------------------------------- if u"أ" in pagetitle: New_redirect_name = pagetitle.replace(u"أ", u"ا") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از ا به أ (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------3.5---------------------------------- if u"ء" in pagetitle: New_redirect_name = pagetitle.replace(u"ء", u"") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از ء به (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------3.5---------------------------------- if (u"," or u"،" or u"(") in pagetitle: New_redirect_name = pagetitle.replace(u",", u" ").replace( u"،", u" ").replace(u")", u" ").replace(u"(", u" ").replace( u" ", u" ").strip() if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از ,()، به (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------3.5---------------------------------- if (u"(" or u"،" or u",") in pagetitle: New_redirect_name = pagetitle.replace(u" ،", u"،").replace( u" ,", u",").replace(u"،", u"، ").replace( u",", u", ").replace(u"(", u" (").replace( u")", u") ").replace(u" ", u" ").strip() if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر فاصله برای سجاوندی (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------3.5---------------------------------- if u"ة" in pagetitle: New_redirect_name = pagetitle.replace(u"ة", u"ه") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از ه به ة (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------4---------------------------------- if u"،" in pagetitle: New_redirect_name = pagetitle.replace(u" ،", u"،") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر سجاوندی درست برای ویرگول(" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------5---------------------------------- if u"," in pagetitle: New_redirect_name = pagetitle.replace(u",", u"،") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر سجاوندی درست برای ویرگول غیرفارسی (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------5.5---------------------------------- b = -1 sources = [ u'اول', u'یکم', u'ثانی', u'ثالث', u'نخستین', u'اولین' ] targets = [u'یکم', u'اول', u'دوم', u'سوم', u'اولین', u'نخستین'] for i in sources: b += 1 j = targets[b] if i in pagetitle: New_redirect_name = (u' ' + pagetitle + u' ').replace( u' ' + i + u' ', u' ' + j + u' ').strip() if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر از " + j + u" به " + i + u" (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------6---------------------------------- if u"ؤ" in pagetitle: New_redirect_name = pagetitle.replace(u"ؤ", u"و") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر و به ؤ (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------6---------------------------------- if u"کامپیوتر" in pagetitle: New_redirect_name = pagetitle.replace( u"کامپیوترها", u"رایانهها").replace( u"کامپیوتری", u"رایانهای").replace(u"کامپیوتر", u"رایانه") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر رایانه به کامپیوتر (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------7---------------------------------- if u"هی" in pagetitle: New_redirect_name = pagetitle.replace(u"هی", u"ه") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر ه به هی (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------8---------------------------------- if u"ه " in pagetitle and redirection == 0 and pagetitle.find( u'كه ' ) == -1 and pagetitle.find(u'اه ') == -1 and pagetitle.find( u'ه اي ' ) == -1 and pagetitle.find(u'ه ای ') == -1 and pagetitle.find( u'که ' ) == -1 and pagetitle.find(u'راه ') == -1 and pagetitle.find( u'ه با ' ) == -1 and pagetitle.find(u'گروه ') == -1 and pagetitle.find( u'ه که ' ) == -1 and pagetitle.find(u'ه كه ') == -1 and pagetitle.find( u'ه در ' ) == -1 and pagetitle.find(u'ه براي ') == -1 and pagetitle.find( u'ه برای ' ) == -1 and pagetitle.find(u'ه از ') == -1 and pagetitle.find( u'ه ;' ) == -1 and pagetitle.find(u'علیه ') == -1 and pagetitle.find( u'عليه ' ) == -1 and pagetitle.find(u'ه و ') == -1 and pagetitle.find( u'ه :' ) == -1 and pagetitle.find(u'شاه ') == -1 and pagetitle.find( u'به ' ) == -1 and pagetitle.find(u'الله ') == -1 and pagetitle.find( u'ه (') == -1 and pagetitle.find( u'گه ') == -1 and pagetitle.find( u'ه -') == -1 and pagetitle.find(u'ه-') == -1: New_redirect_name = pagetitle.replace(u"ه ", u"هٔ ") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر هٔ به ه (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------9---------------------------------- if u"ها " in pagetitle and redirection == 0 and pagetitle.find( u'ها (' ) == -1 and pagetitle.find( u'ها براي ' ) == -1 and pagetitle.find( u'ها برای ' ) == -1 and pagetitle.find(u'ها با ') == -1 and pagetitle.find( u'ها در ' ) == -1 and pagetitle.find(u'ها از ') == -1 and pagetitle.find( u'ها كه ' ) == -1 and pagetitle.find(u'ها که ') == -1 and pagetitle.find( u'ها و ') == -1 and pagetitle.find( u'ها :') == -1 and pagetitle.find(u'ها ;') == -1: New_redirect_name = pagetitle.replace(u"ها ", u"های ") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر های به ها (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------10---------------------------------- if u"ها" in pagetitle: New_redirect_name = pagetitle.replace(u"ها", u"ها") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر فاصلهٔ مجازی+ها به ها (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------10.5---------------------------------- if u"ها" in pagetitle: New_redirect_name = pagetitle.replace(u"ها", u" ها") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر فاصلهٔ مجازی+ها به فاصله+ها (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------11---------------------------------- if u"می" in pagetitle or u"مي" in pagetitle: New_redirect_name = pagetitle.replace(u"می", u"می").replace( u"مي", u"مي") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر می+فاصلهٔ مجازی به می (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------12---------------------------------- farsinum = [ u'۰', u'۱', u'۲', u'۳', u'۴', u'۵', u'۶', u'۷', u'۸', u'۹' ] counters = -1 pagetitle2 = pagetitle for num in farsinum: counters += 1 pagetitle2 = pagetitle2.replace(num, str(counters)) if pagetitle2 != pagetitle: if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر عدد لاتین به عدد فارسی (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) #-------------------------13---------------------------------- if u"ایالات متحده آمریکا" in pagetitle: New_redirect_name = pagetitle.replace( u"ایالات متحده آمریکا", u"ایالات متحده") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) elif u"آمریکا" in pagetitle: if not u"آمریکایی" in pagetitle: New_redirect_name = pagetitle.replace( u"آمریکا", u"ایالات متحده") if fa_page_title_list.find(u'\n' + New_redirect_name.strip() + u'\n') == -1: msg = u"ربات:تغییرمسیر (" + botVersion + u")" fa_page_title_list = creat_redirect( fa_page_title_list, New_redirect_name, pagetitle_source, msg) pagetitle_source, New_redirect_name, msg = u'', u'', u'' except: continue
# it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import wikipedia, re, catlib essite = wikipedia.Site('es', 'wikipedia') selfcat = wikipedia.Page( essite, u'Wikipedia:Informes automáticos/Categorías autocontenidas') spam = u"Usando: [[Wikipedia:Informes automáticos/Categorías autocontenidas]]" m = re.compile(ur"(?i)\[\[:(C[^\]].*?)\]\]").finditer(selfcat.get()) for i in m: cattitle = i.group(1) catpage = catlib.Category(essite, cattitle) if catpage.exists( ) and not catpage.isRedirectPage() and not catpage.isDisambig(): cattitleWithout = catpage.titleWithoutNamespace() cattext = catpage.get() wikipedia.output(catpage.title()) #marcamos para destruir las que no tienen artículos ni subcategorías if len(cattext) >= len(cattitle) + 4 and len(
# You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import datetime import os, re, wikipedia site=wikipedia.Site("es", "wikipedia") #discusiones mas activas os.system('mysql -h sql-s3 -e "use eswiki_p;select count(*) as count, rc_title from recentchanges where rc_timestamp>=date_add(now(), interval -3 day) and rc_namespace=1 group by rc_title order by count desc limit 25;" > /home/emijrp/temporal/tarea006data') f=open('/home/emijrp/temporal/tarea006data', 'r') sql=unicode(f.read(), 'utf-8') m=re.compile(ur"(\d+)\s+(.*)").finditer(sql) page=wikipedia.Page(site, "Template:DiscusionesActivas") s=u"<div class='plainlinks'>\n{| class='wikitable' style='width: {{{ancho|275px}}};clear: right;float: right;margin: 0 0 1em 1em;text-align: center;'\n! Discusiones más activas [[Image:FireIcon.svg|18px]]\n! Ediciones\n" c=1 ss="" for i in m: ed=str(i.group(1)) art_=i.group(2) art=re.sub("_", " ", art_) if not re.search(u"Candidatura a destacado", art): if c<=5: ss+=u"|-\n| [[Discusión:%s|%s]] || [http://es.wikipedia.org/w/index.php?title=Discusión:%s&action=history %s] \n" % (art,art,art_,ed) c+=1 s+=ss s+=u"|-\n| colspan='2' | <small>Actualizado: {{subst:CURRENTTIME}} (UTC) del {{subst:CURRENTDAY}} de {{subst:CURRENTMONTHNAME}} de {{subst:CURRENTYEAR}}</small>\n" s+=u"|}\n</div>" wikipedia.output(s)
def main(): limit = 14 conn = MySQLdb.connect(host='sql-s3', db='eswiki_p', read_default_file='~/.my.cnf', use_unicode=True) cursor = conn.cursor() cursor.execute( "SELECT rc_timestamp from recentchanges where rc_user_text='AVBOT' and rc_namespace=0 and rc_deleted=0 and rc_timestamp>=date_add(now(), interval -%d day);" % limit) result = cursor.fetchall() days = {} c = 0 for row in result: if len(row) == 1: rc_timestamp = row[0][:8] if days.has_key(rc_timestamp): days[rc_timestamp] += 1 else: days[rc_timestamp] = 1 c += 1 percent(c) l = [] for day, edits in days.items(): l.append([day, edits]) l.sort() l.reverse() site = wikipedia.Site('es', 'wikipedia') weekday = { 0: u'lunes', 1: u'martes', 2: u'miércoles', 3: u'jueves', 4: u'viernes', 5: u'sábado', 6: u'domingo' } monthname = { 1: u'enero', 2: u'febrero', 3: u'marzo', 4: u'abril', 5: u'mayo', 6: u'junio', 7: u'julio', 8: u'agosto', 9: u'septiembre', 10: u'octubre', 11: u'noviembre', 12: u'diciembre' } output = u"{| class='wikitable sortable' align='right' style='text-align: center' \n! Día !! Ediciones " for day, edits in l: date = datetime.datetime(year=int(day[0:4]), month=int(day[4:6]), day=int(day[6:8])) output += u"\n|-\n| %s, [[%d de %s]] || %d " % ( weekday[date.weekday()], date.day, monthname[date.month], edits) output += u"\n|-\n| colspan=2 | <small>''Esta tabla recoge la actividad de AVBOT<br/>en los últimos días<br/>* La tasa ha disminuido gracias a<br/>la nueva herramienta [[Special:AbuseFilter|AbuseFilter]]''</small>\n|}" wii = wikipedia.Page(site, u"User:AVBOT/Últimos días") wii.put(output, u"BOT - Actualizando plantilla")
def doImage(self, image): r = re.compile(u'\|', re.UNICODE | re.DOTALL) data = re.split(r, image) imageName = data[0] newImageName = data[0] r = re.compile(u'^\s*$', re.UNICODE | re.DOTALL) if len(data) >= 2 and not re.match(r, data[1]): newImageName = data[1] sourceWiki = u'anime' if len(data) >= 3: sourceWiki = data[2] exclusionMode = u'normal' if len(data) >= 4: exclusionMode = data[3] exclusionInfo = u'' if len(data) >= 5: exclusionInfo = data[4] sourceSite = None outputSites = [] sourceImage = None sourcePage = None wikipedia.output(u'Doing Image %s' % imageName) for site in self.siteList: if site.family.name == sourceWiki: sourceSite = site if exclusionMode == u'normal': outputSites.append(site) elif exclusionMode == u'include': r = re.compile(u',', re.UNICODE | re.DOTALL) includes = re.split(r, exclusionInfo) if site.family.name in includes: outputSites.append(site) elif exclusionMode == u'exclude': r = re.compile(u',', re.UNICODE | re.DOTALL) excludes = re.split(r, exclusionInfo) if site.family.name not in includes: outputSites.append(site) else: wikipedia.output(u'Unknown exclusion mode. Skiping %s.' % imageName) return False if sourceSite == None: wikipedia.output(u'No source site found. Skiping %s.' % imageName) return False try: sourceDescriptionPage = wikipedia.Page(sourceSite, imageName, None, 6) #6=Image Namespace sourceImagePage = wikipedia.ImagePage( sourceSite, sourceDescriptionPage.title()) except wikipedia.NoPage: wikipedia.output(u'No source page found. Skiping %s.' % imageName) return False sourceURL = sourceImagePage.fileUrl() if '://' not in sourceURL: sourceURL = u'http://%s%s' % (sourceSite.hostname(), sourceURL) # Get file contents uo = wikipedia.MyURLopener() sourceFile = uo.open(sourceURL, "rb") wikipedia.output(u'Reading file %s' % sourceURL) sourceContents = sourceFile.read() if sourceContents.find( "The requested URL was not found on this server.") != -1: wikipedia.output("Couldn't download the image. Skiping.") return False sourceFile.close() #Setup Description Page pageDescription = sourceDescriptionPage.get() r = re.compile(u'== Summary ==\n?') if re.search(r, pageDescription): pageDescription = re.sub(r, u'', pageDescription) mirrorText = u'{{networkMirror|%s|%s}}' % (imageName, sourceSite.family.name) comm = re.compile(u'({{commons(\|[^{}]*)?}})', re.IGNORECASE) if re.search(comm, pageDescription): pageDescription = re.sub(comm, u'\\1\n%s' % mirrorText, pageDescription) else: pageDescription = u'%s%s' % (mirrorText, pageDescription) pageDescription = u'== Summary ==\n%s' % pageDescription for site in outputSites: if sourceSite.family.name != site.family.name or imageName != newImageName: doUpload = False doDescription = False try: siteDescriptionPage = wikipedia.Page( site, newImageName, None, 6) #6=Image Namespace siteImagePage = wikipedia.ImagePage( site, siteDescriptionPage.title()) siteURL = siteImagePage.fileUrl() if '://' not in siteURL: siteURL = u'http://%s%s' % (site.hostname(), siteURL) uo2 = wikipedia.MyURLopener() siteFile = uo2.open(siteURL, "rb") wikipedia.output(u'Reading file %s' % siteURL) siteContents = siteFile.read() if sourceContents.find( "The requested URL was not found on this server." ) != -1: wikipedia.output( "Couldn't download the image at new location.") doUpload = True break siteFile.close() if siteContents != sourceContents: doUpload = True if siteDescriptionPage.get() != pageDescription: doDescription = True except wikipedia.NoPage: doUpload = True doDescription = True if doUpload: bot = upload.UploadRobot(url=sourceURL, useFilename=newImageName, keepFilename=True, verifyDescription=False, description=msg['en'], targetSite=site, urlEncoding=sourceSite.encoding()) bot.run() if doDescription: siteDescriptionPage.put(pageDescription)
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import wikipedia,re site=wikipedia.Site("es", "wikipedia") page=wikipedia.Page(site, u"Wikipedia:Candidaturas a bibliotecario/Tabla") #{{CandidaturaBibliotecario|Racso|Tomatejc|18/01/2008|03/04/2007|4402|color=#FFFFCC}} m=re.compile(ur"\{\{CandidaturaBibliotecario\|(?P<candidato>[^\|]+)\|(?P<propuesto>[^\|]+)\|").finditer(page.get()) s=u"{| class='wikitable' width='500px' style='font-size: 90%s;text-align: center;'\n! colspan=7 | Candidaturas a bibliotecario \n|-\n! # !! Usuario !! Propuesto por !! A favor !! En contra !! %s !! Estado" % ("%", "%") raw=u"" c=0 send=False limite=0 for i in m: limite+=1 if limite>3: break cafavor=0 cencontra=0 estado=u"Abierta"
def main(): quietMode = False # use -quiet to get less output # if the -file argument is used, page titles are stored in this array. # otherwise it will only contain one page. articles = [] # if -file is not used, this temporary array is used to read the page title. page_title = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] xmlfilename = None gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlfilename = pywikibot.input( u'Please enter the XML dump\'s filename:') else: xmlfilename = arg[5:] gen = TableXmlDumpPageGenerator(xmlfilename) elif arg == '-sql': query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) WHERE old_text LIKE '%<table%' LIMIT 200""" gen = pagegenerators.MySQLPageGenerator(query) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg.startswith('-skip:'): articles = articles[articles.index(arg[6:]):] elif arg.startswith('-auto'): config.table2wikiAskOnlyWarnings = True config.table2wikiSkipWarnings = True print "Automatic mode!\n" elif arg.startswith('-quiet'): quietMode = True else: if not genFactory.handleArg(arg): page_title.append(arg) # if the page is given as a command line argument, # connect the title's parts with spaces if page_title != []: page_title = ' '.join(page_title) page = pywikibot.Page(pywikibot.getSite(), page_title) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = Table2WikiRobot(preloadingGen, quietMode) bot.run() else: pywikibot.showHelp('table2wiki')
def _parseCategory(self, purge=False, startFrom=None): """ Yields all articles and subcategories that are in this category. Set purge to True to instruct MediaWiki not to serve a cached version. Set startFrom to a string which is the title of the page to start from. Yielded results are tuples in the form (tag, page) where tag is one of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category object. Note that results of this method need not be unique. This should not be used outside of this module. """ if self.site().versionnumber() < 4: Rtitle = re.compile('title\s?=\s?\"([^\"]*)\"') elif self.site().versionnumber() < 8: # FIXME seems to parse all links Rtitle = re.compile('/\S*(?: title\s?=\s?)?\"([^\"]*)\"') else: Rtitle = re.compile( '<li>(?:<span.*?>)?<a href=\".*?\"\s?title\s?=\s?\"([^\"]*)\"\>\+?[^\<\+]' ) if self.site().versionnumber() < 8: Rsubcat = None Rimage = None else: Rsubcat = re.compile( 'CategoryTreeLabelCategory\"\s?href=\".+?\">(.+?)</a>') Rimage = re.compile( '<div class\s?=\s?\"thumb\"\sstyle=\"[^\"]*\">(?:<div style=\"[^\"]*\">)?<a href=\".*?\"(?:\sclass="image")?\stitle\s?=\s?\"([^\"]*)\"' ) ns = self.site().category_namespaces() # regular expression matching the "(next 200)" link RLinkToNextPage = re.compile('&from=(.*?)" title="') if startFrom: currentPageOffset = urllib.quote( startFrom.encode(self.site().encoding())) else: currentPageOffset = None while True: path = self.site().get_address(self.urlname()) if purge: path += '&action=purge' if currentPageOffset: path += '&from=' + currentPageOffset wikipedia.output( 'Getting [[%s]] starting at %s...' % (self.title(), wikipedia.url2link(currentPageOffset, self.site(), self.site()))) else: wikipedia.output('Getting [[%s]]...' % self.title()) wikipedia.get_throttle() txt = self.site().getUrl(path) # index where subcategory listing begins if self.site().versionnumber() >= 9: # These IDs were introduced in 1.9 if '<div id="mw-subcategories">' in txt: ibegin = txt.index('<div id="mw-subcategories">') elif '<div id="mw-pages">' in txt: ibegin = txt.index('<div id="mw-pages">') elif '<div id="mw-category-media">' in txt: ibegin = txt.index('<div id="mw-category-media">') else: # No pages return else: ibegin = txt.index('<!-- start content -->' ) # does not work for cats without text # TODO: This parses category text and may think they are # pages in category! Check for versions before 1.9 # index where article listing ends if '<div class="printfooter">' in txt: iend = txt.index('<div class="printfooter">') elif '<div class="catlinks">' in txt: iend = txt.index('<div class="catlinks">') else: iend = txt.index('<!-- end content -->') txt = txt[ibegin:iend] for title in Rtitle.findall(txt): if title == self.title(): # This is only a link to "previous 200" or "next 200". # Ignore it. pass # For MediaWiki versions where subcats look like articles elif isCatTitle(title, self.site()): ncat = Category(self.site(), title) yield SUBCATEGORY, ncat else: yield ARTICLE, wikipedia.Page(self.site(), title) if Rsubcat: # For MediaWiki versions where subcats look differently for titleWithoutNamespace in Rsubcat.findall(txt): title = 'Category:%s' % titleWithoutNamespace ncat = Category(self.site(), title) yield SUBCATEGORY, ncat if Rimage: # For MediaWiki versions where images work through galleries for title in Rimage.findall(txt): # In some MediaWiki versions, the titles contain the namespace, # but they don't in other (newer) versions. Use the ImagePage's # defaultNamespace feature to get everything correctly. yield ARTICLE, wikipedia.ImagePage(self.site(), title) # try to find a link to the next list page matchObj = RLinkToNextPage.search(txt) if matchObj: currentPageOffset = matchObj.group(1) else: break
def treat(text, linkedPage, targetPage): """ Based on the method of the same name in solve_disambiguation.py """ mysite = pywikibot.getSite() linktrail = mysite.linktrail() # make a backup of the original text so we can show the changes later linkR = re.compile( r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') curpos = 0 # This loop will run until we have finished the current page while True: m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 # ignore interwiki links and links to sections of the same page if m.group('title').strip() == '' or \ mysite.isInterwikiLink(m.group('title')): continue else: actualLinkPage = pywikibot.Page(targetPage.site(), m.group('title')) # Check whether the link found is to page. if actualLinkPage != linkedPage: continue # how many bytes should be displayed around the current link context = 15 # at the beginning of the link, start red color. # at the end of the link, reset the color to default #pywikibot.output(text[max(0, m.start() - context) : m.start()] + '\03{lightred}' + text[m.start() : m.end()] + '\03{default}' + text[m.end() : m.end() + context]) choice = 'y' # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') == None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars if choice in "uU": # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] continue replaceit = choice in "rR" # remove preleading ":" if link_text[0] == ':': link_text = link_text[1:] if link_text[0].isupper(): new_page_title = targetPage.title() else: new_page_title = targetPage.title()[0].lower() + \ targetPage.title()[1:] # remove preleading ":" if new_page_title[0] == ':': new_page_title = new_page_title[1:] if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters instead of a # pipelink elif len(new_page_title) <= len(link_text) and \ firstcap(link_text[:len(new_page_title)]) == \ firstcap(new_page_title) and \ re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue return text
def main(): args = wikipedia.handleArgs() all = False force = False for currentArgument in args: if currentArgument.startswith("-always"): all = True if currentArgument.startswith("-force"): force = True templateFile = codecs.open("modello_totocalcio.txt", "r", "utf-8") modelloVoce = templateFile.read() # Legge il modello della pagina templateFile.close() urlo = "http://www.calcio.sisal.it/pages/totocalcio/ultimo.xwb" wikipedia.output(u'Prendo la pagina dal server...') try: htmlText = pageText(urlo) except urllib2.HTTPError: try: wikipedia.output(u"Errore del server. Aspetto 10 secondi... " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()) ) time.sleep(10) htmlText = pageText(urlo) except urllib2.HTTPError: wikipedia.output(u"Errore del server. Chiudo.") return concorso = re.search("<h2>Concorso n. (\d+) di (.*?) (\d+) (.*?) (\d+)</h2>", htmlText) montepremi = re.search("<tr>\s*<th[^>]*>Il Montepremi</th>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Del Concorso \(premi a punteggio\)\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD>\s*(.*?)\s*</TD>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Riporto Jackpot conc\. precedente\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>\s*<tr\s*>\s*<TD[^>]*>\s*Montepremi totale del concorso\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>", htmlText, re.I) montepremi9 = re.search("<tr>\s*<th[^>]*>Il Montepremi \"Il9\"</th>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Del Concorso \(premi a punteggio\)\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD>\s*(.*?)\s*</TD>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Riporto Jackpot conc\. precedente\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Montepremi totale del concorso 9\s*</TD>\s*<TD>\s*EUR\s*</td>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>", htmlText, re.I) bloccoQuote = "<tr>\s*<td>\s*(.*?)\s*</td>\s*<td>\"%s\"</td>\s*<td[^>]*>\s*(.*?)\s*</td>\s*</tr>" bloccoPartita = "<tr[^>]*>\s*<td[^>]*>\s*<b>\s*%s\s*</b>\s*</td>\s*<td[^>]*>\s*(.*?)\s*</td>\s*<td[^>]*>\s*(.*?)\s*</td>\s*<td[^>]*>\s*<b>\s*(\d+)\s*-\s*(\d+) \s*</b>\s*</td>\s*<td[^>]*>\s*<b>\s*([12X]) \s*</b>\s*</td>\s*</tr>" jackpotFuturi = re.search("<table.*?>\s*<tr>\s*<th[^>]*>\s*Jackpot prossimo concorso\s*</th>\s*</tr>\s*<tr>\s*<th[^>]*>.*?</th>\s*</tr>\s*<tr>\s*<td[^>]*><h2><b>14</b></h2></td>\s*<td[^>]*><h1>(.*?)<font[^>]*>.*?</font></h1></td>\s*</tr>\s*</table>", htmlText, re.I) page = wikipedia.Page(wikipedia.getSite(code='it', fam='wikinews'), "Italia: concorso n. " + concorso.group(1) + "/" + concorso.group(5) + " del Totocalcio") if page.exists() and not force: wikipedia.output("Nessuna nuova estrazione. Mi fermo.") return elencoSostituzioni = { # Sostituisce le variabili nel modello '#super-id': concorso.group(1), '#dow': concorso.group(2).replace('ì', u'ì'), '#giorno': concorso.group(3), '#mese': concorso.group(4), '#anno': concorso.group(5), '#montepremi-parz': montepremi.group(1), '#jackpot': montepremi.group(2), '#montepremi-tot': montepremi.group(3), '#9-montepremi-parz': montepremi9.group(1), '#9-jackpot': montepremi9.group(2), '#9-montepremi-tot': montepremi9.group(3), } try: elencoSostituzioni['#futuro-jackpot'] = jackpotFuturi.group(1) except: elencoSostituzioni['#futuro-jackpot'] = '-' try: elencoSostituzioni['#9-futuro-jackpot'] = jackpotFuturi.group(2) except: elencoSostituzioni['#9-futuro-jackpot'] = '-' partite = range(1, 15) for p in partite: match = re.search(bloccoPartita % p, htmlText, re.I) elencoSostituzioni['#sq-' + str(p) + 'a'] = match.group(1).capitalize() elencoSostituzioni['#sq-' + str(p) + 'b'] = match.group(2).capitalize() elencoSostituzioni['#res-' + str(p) + 'a'] = match.group(3) elencoSostituzioni['#res-' + str(p) + 'b'] = match.group(4) elencoSostituzioni['#ok-' + str(p)] = match.group(5) quotes = [9, 12, 13, 14] for c in quotes: match = re.search(bloccoQuote % c, htmlText, re.I) elencoSostituzioni['#vincitori-' + str(c)] = match.group(1).replace('nessuna', '0') elencoSostituzioni['#euro-' + str(c)] = match.group(2).replace('-', '0') nuovoTesto = massiveReplace(elencoSostituzioni, modelloVoce) #page = wikipedia.Page(wikipedia.getSite(code='it', fam='wikinews'), "Utente:BimBot/Sandbox") #DEBUG wikipedia.output(">>>>> " + page.title() + " <<<<<") try: vecchioTesto = page.get() except wikipedia.NoPage: vecchioTesto = '' wikipedia.showDiff(vecchioTesto, nuovoTesto) if not all: choice = wikipedia.inputChoice(u"Modificare?", ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') else: choice = 'y' if choice in ['A', 'a']: all = True choice = 'y' if choice in ['Y', 'y']: page.put(nuovoTesto, u"Bot: Inserisco nuova estrazione del Totocalcio")
def main(): gen = None prefix = None oldName = None newName = None noredirect = True always = False skipredirects = False summary = None fromToPairs = [] # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-pairs'): if len(arg) == len('-pairs'): filename = wikipedia.input( u'Enter the name of the file containing pairs:') else: filename = arg[len('-pairs:'):] oldName1 = None for page in pagegenerators.TextfilePageGenerator(filename): if oldName1: fromToPairs.append([oldName1, page.title()]) oldName1 = None else: oldName1 = page.title() if oldName1: wikipedia.output( u'WARNING: file %s contains odd number of links' % filename) elif arg == '-noredirect': noredirect = False elif arg == '-always': always = True elif arg == '-skipredirects': skipredirects = True elif arg.startswith('-from:'): if oldName: wikipedia.output(u'WARNING: -from:%s without -to:' % oldName) oldName = arg[len('-from:'):] elif arg.startswith('-to:'): if oldName: fromToPairs.append([oldName, arg[len('-to:'):]]) oldName = None else: wikipedia.output(u'WARNING: %s without -from' % arg) elif arg.startswith('-prefix'): if len(arg) == len('-prefix'): prefix = wikipedia.input(u'Enter the prefix:') else: prefix = arg[8:] elif arg.startswith('-summary'): if len(arg) == len('-summary'): summary = wikipedia.input(u'Enter the summary:') else: summary = arg[9:] else: genFactory.handleArg(arg) if oldName: wikipedia.output(u'WARNING: -from:%s without -to:' % oldName) for pair in fromToPairs: page = wikipedia.Page(wikipedia.getSite(), pair[0]) bot = MovePagesBot(None, prefix, noredirect, always, skipredirects, summary) bot.moveOne(page, pair[1]) if not gen: gen = genFactory.getCombinedGenerator() if gen: preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = MovePagesBot(preloadingGen, prefix, noredirect, always, skipredirects, summary) bot.run() elif not fromToPairs: wikipedia.showHelp('movepages')
l = l.strip() if l: if l[0] in [';', ':', '|', '{', '}', '<', '[', ']', '!', '#', '*', ' ']: continue else: if pagetitle.lower() in l.lower(): abstract = compensatehtmlcomments(l) break #capturar imagenes images = re.findall(ur"(?im)(?:(?:Archivo|File|Image)\s*\:|(?:image[ _]?skyline|picture|photo|photography|imagen?|foto|fotograf[íi]a)\s*=)\s*([^\|\[\]]+?\.(?:jpe?g))", revtext) selectedimage = '' caption = '' if images and images[0]: selectedimage = images[0] commonspage = wikipedia.Page(commonssite, u'File:%s' % (selectedimage)) if commonspage.exists(): caption = revtext.split(selectedimage)[1].strip() if caption.startswith('|thumb') or caption.startswith('|left') or caption.startswith('|right'): m = re.findall(ur'(?im)^\s*\|\s*(?:thumb|thumbnail|frame|(?:(?:up)?(?:left|right|center)(?:\s*=?\s*\d*\.?\d*)?))([^\[\]]*?)\]\]', caption) if m: caption = m[0].strip().lstrip('|') else: brackets = 2 c = 0 while len(caption) > c and c <= 500 and brackets != 0: if caption[c] == '[': brackets += 1 elif caption[c] == ']': brackets -= 1 c += 1
def subTemplate(self, content, param): """Substitute the template tags in content according to param. @param content: Content with tags to substitute. @type content: string @param param: Param with data how to substitute tags. @type param: dict Returns a tuple containig the new content with tags substituted and a list of those tags. """ substed_tags = [] # DRTRIGON-73 metadata = { 'mw-signature': u'~~~~', 'mw-timestamp': u'~~~~~', } # DRTRIGON-132 # 0.2.) check for 'simple' mode and get additional params if param['simple']: p = self.site.getExpandedString(param['simple']) param.update(pywikibot.extract_templates_and_params(p)[0][1]) # 0.5.) check cron/date if param['cron']: # [min] [hour] [day of month] [month] [day of week] # (date supported only, thus [min] and [hour] dropped) if not (param['cron'][0] == '@'): param['cron'] = '* * ' + param['cron'] entry = crontab.CronTab(param['cron']) # find the delay from midnight (does not return 0.0 - but next) delay = entry.next(datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - \ datetime.timedelta(microseconds=1)) pywikibot.output(u'CRON delay for execution: %.3f (<= %i)' % (delay, self._bot_config['CRONMaxDelay'])) if not (delay <= self._bot_config['CRONMaxDelay']): return (content, substed_tags, metadata) # 1.) getUrl or wiki text # (security: check url not to point to a local file on the server, # e.g. 'file://' - same as used in xsalt.py) secure = False for item in [ u'http://', u'https://', u'mail://', u'local://', u'wiki://' ]: secure = secure or (param['url'][:len(item)] == item) param['zip'] = ast.literal_eval(param['zip']) if not secure: return (content, substed_tags, metadata) if param['url'][:7] == u'wiki://': url = param['url'][7:].strip('[]') # enable wiki-links if ast.literal_eval(param['expandtemplates'] ): # DRTRIGON-93 (only with 'wiki://') external_buffer = pywikibot.Page(self.site, url).get(expandtemplates=True) else: external_buffer = self.load(pywikibot.Page(self.site, url)) elif (param['url'][:7] == u'mail://'): # DRTRIGON-101 url = param['url'].replace(u'{{@}}', u'@') # e.g. nlwiki mbox = SubsterMailbox( pywikibot.config.datafilepath(self._bot_config['data_path'], self._bot_config['mbox_file'], '')) external_buffer = mbox.find_data(url) mbox.close() elif (param['url'][:8] == u'local://'): # DRTRIGON-131 if (param['url'][8:] == u'cache/state_bots'): # filename hard-coded d = shelve.open( pywikibot.config.datafilepath('cache', 'state_bots')) external_buffer = pprint.pformat( ast.literal_eval(pprint.pformat(d))) d.close() else: external_buffer = u'n/a' else: # consider using 'expires', 'last-modified', 'etag' in order to # make the updating data requests more efficient! use those stored # on page, if the user placed them, else use the conventional mode. # http://www.diveintopython.net/http_web_services/etags.html f_url, external_buffer = http.request(self.site, param['url'], no_hostname=True, back_response=True) headers = f_url.headers # same like 'f_url.info()' #if param['zip']: if ('text/' not in headers['content-type']): pywikibot.output(u'Source is of non-text content-type, ' u'using raw data instead.') external_buffer = f_url.read() del f_url # free some memory (no need to keep copy) for h in ['content-length', 'date', 'last-modified', 'expires']: if h in headers: metadata['url-%s' % h] = headers[h] # some intermediate processing (unzip, xlsx2csv, ...) if param['zip']: # 'application/zip', ... fileno = 0 if (param['zip'] is True) else (param['zip'] - 1) external_buffer = self.unzip(external_buffer, fileno) if param[ 'xlsx']: # 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' external_buffer = self.xlsx2csv(external_buffer, param['xlsx']) if param['ods']: # 'application/vnd.oasis.opendocument.spreadsheet' external_buffer = self.ods2csv(external_buffer, param['ods']) if not ast.literal_eval(param['beautifulsoup']): # DRTRIGON-88 # 2.) regexp #for subitem in param['regex']: subitem = param['regex'] regex = re.compile(subitem, re.S | re.I) # 3.) subst in content external_data = regex.search(external_buffer) external_data_dict = {} if external_data: # not None external_data = external_data.groups() pywikibot.output(u'Groups found by regex: %i' % len(external_data)) # DRTRIGON-114: Support for named groups in regexs if regex.groupindex: for item in regex.groupindex: external_data_dict[u'%s-%s' % (param['value'], item)] = external_data[ regex.groupindex[item] - 1] elif (len(external_data) == 1): external_data_dict = {param['value']: external_data[0]} else: external_data_dict = {param['value']: str(external_data)} pywikibot.debug(str(external_data_dict)) param['postproc'] = eval(param['postproc']) # should be secured as given below, but needs code changes in wiki too #param['postproc'] = ast.literal_eval(param['postproc']) for value in external_data_dict: external_data = external_data_dict[value] # 4.) postprocessing func = param['postproc'][ 0] # needed by exec call of self._code DATA = [external_data] args = param['postproc'][1:] scope = {} # (scope to run in) scope.update(locals()) # (add DATA, *args, ...) scope.update(globals()) # (add imports and else) if func: exec( self._code + (self._bot_config['CodeTemplate'] % func), scope, scope) external_data = DATA[0] pywikibot.debug(external_data) # 5.) subst content (content, tags) = self.subTag(content, value, external_data, int(param['count'])) substed_tags += tags else: # DRTRIGON-105: Support for multiple BS template configurations value = param['value'] if value: value += u'-' # DRTRIGON-88: Enable Beautiful Soup power for Subster BS_tags = self.get_BS_regex(value).findall(content) pywikibot.output(u'BeautifulSoup tags found by regex: %i' % len(BS_tags)) prev_content = content BS = BeautifulSoup.BeautifulSoup(external_buffer) for item in BS_tags: external_data = eval('BS.%s' % item[1]) external_data = self._BS_regex_str % { 'var1': value + 'BS:' + item[1], 'var2': value, 'cont': external_data } content = content.replace(item[0], external_data, 1) if (content != prev_content): substed_tags.append(value + 'BS') metadata['bot-timestamp'] = pywikibot.Timestamp.now().isoformat(' ') return (content, substed_tags, metadata)
def _parseCategory(self, purge=False, startFrom=None, sortby=None, sortdir=None): """ Yields all articles and subcategories that are in this category by API. Set startFrom to a string which is the title of the page to start from. Yielded results are tuples in the form (tag, page) where tag is one of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category object. Note that results of this method need not be unique. This should not be used outside of this module. """ if not self.site().has_api() or self.site().versionnumber() < 11: for tag, page in self._oldParseCategory(purge, startFrom): yield tag, page return currentPageOffset = None params = { 'action': 'query', 'list': 'categorymembers', 'cmtitle': self.title(), 'cmprop': ['title', 'ids', 'sortkey', 'timestamp'], #'': '', } if sortby: params['cmsort'] = sortby if sortdir: params['cmdir'] = sortdir while True: if wikipedia.config.special_page_limit > 500: params['cmlimit'] = 500 else: params['cmlimit'] = wikipedia.config.special_page_limit if currentPageOffset: params.update(currentPageOffset) wikipedia.output( 'Getting [[%s]] list from %s...' % (self.title(), "%s=%s" % currentPageOffset.popitem())) elif startFrom: startFrom = startFrom.upper( ) # category sort keys are uppercase params['cmstartsortkey'] = startFrom wikipedia.output('Getting [[%s]] list starting at %s...' % (self.title(), startFrom)) else: wikipedia.output('Getting [[%s]]...' % self.title()) wikipedia.get_throttle() data = query.GetData(params, self.site()) if 'error' in data: raise RuntimeError("%s" % data['error']) count = 0 for memb in data['query']['categorymembers']: count += 1 # For MediaWiki versions where subcats look like articles if memb['ns'] == 14: yield SUBCATEGORY, Category(self.site(), memb['title'], sortKey=memb['sortkey']) elif memb['ns'] == 6: yield ARTICLE, wikipedia.ImagePage(self.site(), memb['title']) else: yield ARTICLE, wikipedia.Page(self.site(), memb['title'], defaultNamespace=memb['ns']) if count >= params['cmlimit']: break # try to find a link to the next list page if 'query-continue' in data and count < params['cmlimit']: currentPageOffset = data['query-continue']['categorymembers'] else: break
def run(self): """ Starts the robot. """ # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: wikipedia.output(u'\n>>> %s <<<' % page.title()) #Current time sectiont0 = time.time() try: # Load the page's text from the wiki. original_text = page.get() if not page.canBeEdited(): wikipedia.output( u'Pagina %s wordt overgeslagen, deze pagina is beveiligd.' % page.title()) continue #No page, so ignore except wikipedia.NoPage: wikipedia.output(u'Pagina %s bestaat niet.' % page.title()) continue #Get the archiving settings. settings = self.loadConfig(original_text) #No settings were found, leave a message on the page. if not settings: wikipedia.output( u'Er kunnen geen instellingen worden gevonden op %s. Er wordt een bericht achtergelaten.' % page.title()) page.put(original_text + self.nosettingscomment, self.commentsummary, minorEdit=False) continue #Incorrect magicwords settings were found, leave a message on the page. if not self.settings[ 'magicwords'] == u'oudste' and not self.settings[ 'magicwords'] == u'recentste': wikipedia.output( u'Pagina %s wordt overgeslagen, er zijn geen of foute magicwords instellingen opgegeven, opgegeven was %s. Er wordt een bericht achtergelaten.' % (page.title(), self.settings['magicwords'])) page.put(original_text + self.nomagicwordscomment, self.commentsummary, minorEdit=False) continue #Get the number of days after which a section should be archived. self.settings['dagen'] = int(self.settings['dagen']) #Get the template for the archive page, some variables still have to be replaced using the section's oldest or most #recent date. #Make it a subpage of the current page. archive_titletemplate = page.title( ) + '/' + self.doDateReplacements(self.settings['archief'].strip()) #Get a datetime object for the current date and time to compare other dates. todaydt = datetime.datetime.today() #Split the text into sections sections = self.resection.split(original_text) #The text before the first section won't be checked. new_text = sections[0] #A dictionary containing the archive page as key and the text as item. archives_dictionary = {} #The archiving target to be used in summaries. archive_target = 'n.v.t.' #The number of sections that will be archived. numberofsections = 0 #A dictionary containing the archive page as key and the number of sections that will #be archived to that page as item. nos_dictionary = {} #Check all sections for i in range(2, len(sections), 2): archive_text = '' section_text = sections[i] #Check if the page shouldn't be archived. if self.renoarchive.search(section_text): #Ignore this section. new_text += sections[i - 1] + section_text continue #A list of the dates in wikisyntax. dates = self.redate.findall(section_text) if dates: #A list of the dates as datetimeobjects. datesdt = [] #A list of the difference in seconds between the date and now. differences = {} j = 0 #Create datetime objects from all found dates. for date in dates: datematch = self.redatematch.match(date[0]) try: datedt = datetime.datetime( int(datematch.group(3)), self.monthn[datematch.group(2)], int(datematch.group(1)), int(datematch.group(4)), int(datematch.group(5))) except: wikipedia.output( u'Could not create a datetime object, skipping date' ) continue datesdt.append(datedt) differencedt = todaydt - datedt differences[ j] = differencedt.days * 86400 + differencedt.seconds j += 1 try: diferences_sortedkeys = self.sort_by_value(differences) difference = todaydt - datesdt[ diferences_sortedkeys[0]] except: wikipedia.output( u'Could not get the difference, probably because of skipping a date.' ) #Add daylight saving time if time.daylight == 1: dst = 'CEST' else: dst = 'CET' section_text += '\n<!-- %s %s %s -->' % ( time.strftime('%d'), self.month[int( time.strftime('%m'))], time.strftime('%Y %H:%M (%Z)')) new_text += sections[i - 1] + section_text continue #Check if a section should be archived using the most recent date. if difference.days >= self.settings['dagen']: if self.settings['magicwords'] == 'recentste': archive_title = self.doTitleReplacements( archive_titletemplate, datesdt[diferences_sortedkeys[0]]) else: archive_title = self.doTitleReplacements( archive_titletemplate, datesdt[diferences_sortedkeys[ len(diferences_sortedkeys) - 1]]) #Add section to archive. numberofsections += 1 #Add the text and number of sections to the corresponding dictionaries. if archives_dictionary.has_key(archive_title): archives_dictionary[archive_title] += sections[ i - 1] + section_text nos_dictionary[archive_title] += 1 else: archives_dictionary[archive_title] = sections[ i - 1] + section_text nos_dictionary[archive_title] = 1 #Add archive_title to archive_target archive_target = '[[%s]]' % archive_title else: new_text += sections[i - 1] + section_text else: #No date was found, add one. #We have to fill in the date ourselves because MediaWiki ignores <!-- ~~~~~ -->. #Add daylight saving time if time.daylight == 1: dst = 'CEST' else: dst = 'CET' section_text += '\n<!-- %s %s %s (%s) -->' % ( time.strftime('%d'), self.month[int( time.strftime('%m'))], time.strftime('%Y %H:%M'), dst) new_text += sections[i - 1] + section_text #Check if there are multiple archive pages if len(archives_dictionary) > 1: archive_target = '%i archiefpagina\'s' % len( archives_dictionary) if not original_text == new_text: if page.isRedirectPage() or not page.canBeEdited(): wikipedia.output(u'Can not edit %s. Aborting.' % page.title()) continue abort = False for title in archives_dictionary.keys(): ap = wikipedia.Page(self.site, title) if ap.isRedirectPage() or not ap.canBeEdited(): wikipedia.output(u'Can not edit %s. Aborting.' % ap.title()) try: page.put(page.get() + self.cantedit % ap.title(), self.canteditsummary, minorEdit=False) except wikipedia.EditConflict: wikipedia.output( u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.' % (page.title())) abort = True break if abort: continue if not original_text == new_text: diff = len(original_text) - len(new_text) reduction = (diff / len(original_text)) * 100 wikipedia.output( u'Er worden %d onderwerpen gearchiveerd ouder dan %d dagen. In totaal worden %d tekens aangepast, een reductie van %d procent.' % (numberofsections, self.settings['dagen'], diff, reduction)) wikipedia.output( u'Deze onderwerpen worden gearchiveerd naar %d verschillende archiefpagina\'s.' % (len(archives_dictionary))) if not self.acceptall: cview = wikipedia.inputChoice( u'Wilt u deze wijzigingen bekijken?', ['Yes', 'No'], ['y', 'N'], 'N') if cview in ['y', 'Y']: wikipedia.showDiff(original_text, new_text) choice = wikipedia.inputChoice( u'Wilt u deze wijzigingen doorvoeren?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice in ['a', 'A']: self.acceptall = True #Archive the page. if self.acceptall or choice in ['y', 'Y']: if numberofsections: wikipedia.setAction( 'nlwikibots: [[Gebruiker:Erwin85/Bot/Archivering|Archivering]] van %i %s ouder dan %i dagen naar %s.' % (numberofsections, self.plural(numberofsections, 'onderwerp', 'onderwerpen'), self.settings['dagen'], archive_target)) else: wikipedia.setAction( 'nlwikibots: Datum toegevoegd in verband met [[Gebruiker:Erwin85/Bot/Archivering|archivering]].' ) try: page.put(new_text) except wikipedia.EditConflict: wikipedia.output( u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.' % (page.title()), toStdout=True) continue except wikipedia.LockedPage: wikipedia.output(u'Pagina %s is beveiligd.' % (page.title()), toStdout=True) continue for archive_title, archivetext in archives_dictionary.items( ): redirect = False if numberofsections: wikipedia.setAction( 'nlwikibots: [[Gebruiker:Erwin85/Bot/Archivering|Archivering]] van %i %s ouder dan %i dagen van [[%s]].' % (nos_dictionary[archive_title], self.plural(nos_dictionary[archive_title], 'onderwerp', 'onderwerpen'), self.settings['dagen'], page.title())) try: archivepage = wikipedia.Page( self.site, archive_title) # Load the page's text from the wiki original_archivetext = archivepage.get() if not page.canBeEdited(): wikipedia.output( u'Pagina %s wordt overgeslagen, deze pagina is beveiligd.' % archive_title) continue except wikipedia.NoPage: wikipedia.output(u'Pagina %s bestaat niet.' % archive_title) original_archivetext = '' except wikipedia.IsRedirectPage: wikipedia.output( u'Pagina %s is een doorverwijzing.' % archive_title) redirect = True if not redirect: if original_archivetext: archivetext = original_archivetext + '\n' + archivetext else: if self.settings['sjabloon']: archivetext = '{{subst:%s}}\n' % self.settings[ 'sjabloon'] + archivetext try: archivepage.put(archivetext) except wikipedia.EditConflict: wikipedia.output( u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.' % (archive_title)) else: wikipedia.output( u'Leaving message informing that archive page is a redirect.' ) try: page.put(page.get() + self.cantedit % archive_title, self.canteditsummary, minorEdit=False) except wikipedia.EditConflict: wikipedia.output( u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.' % (page.title())) else: #No need for archiving. wikipedia.output(u'Archivering is niet nodig.') #Execution time for this section. sectiontimediff = time.time() - sectiont0 wikipedia.output(u'Executiontime: %ss.' % str(sectiontimediff)) #Total execution time. timediff = time.time() - self.t0 wikipedia.output(u'Total executiontime: %ss.' % str(timediff))
def main(): args = wikipedia.handleArgs() all = False for currentArgument in args: if currentArgument.startswith("-always"): all = True templateFile = codecs.open("modello_meteo.txt", "r", "utf-8") modelloVoce = templateFile.read() # Legge il modello della pagina templateFile.close() urlo = "http://www.meteoam.it/modules/tempoInAtto/infoStazione.php?icao=%s" replacements = {} for i in codiciStazioni: try: htmlText = pageText(urlo % i) except urllib2.HTTPError: try: wikipedia.output( u"Errore del server. Aspetto 10 secondi... " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime())) time.sleep(10) htmlText = pageText(urlo) except urllib2.HTTPError: wikipedia.output(u"Errore del server. Chiudo.") return match = re.search( '<div class="titolo">Informazioni meteorologiche/climatologiche per (.*?)</div>', htmlText) nomeLocalita = unicode(match.group(1)) wikipedia.output(nomeLocalita + " (" + i + ")") nuvoloMatch = re.search( u'<b>Nuvolosità</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?>\s*<p .*?><img .*? alt\s*="(.*?)" .*?>\s*</td>', htmlText) ventoMatch = re.search( u'<b>Vento</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?>\s*<p .*?><img .*? alt\s*="Vento (.*?)( Direzione (.*?))?" .*?>\s*</td>', htmlText) maxMatch = re.search( u'<td .*?>\s*<font .*?><b>Temperatura</b></font>\s*<p><font .*?><b> Max</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?><b>\s*<font .*?>([+-]?\s*\d+)</font></b></td>', htmlText) minMatch = re.search( u'<td .*?>\s*<font .*?><b>Temperatura</b></font>\s*<p><font .*?><b> Min</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?><b>\s*<font .*?>([+-]?\s*\d+)</font></b></td>', htmlText) if nuvoloMatch != None: replacements['#tempo-' + nomeLocalita + '#'] = iconaTempo( nuvoloMatch.group(1)) else: replacements['#tempo-' + nomeLocalita + '#'] = nd if ventoMatch != None: replacements['#intensita-' + nomeLocalita + '#'] = ventoMatch.group(1) if ventoMatch.group(2) != None: replacements['#vento-' + nomeLocalita + '#'] = ventoMatch.group(3) elif ventoMatch.group(2) == None and ventoMatch.group( 1) == "variabile": replacements['#vento-' + nomeLocalita + '#'] = "variabile" replacements['#intensita-' + nomeLocalita + '#'] = "debole" else: replacements['#vento-' + nomeLocalita + '#'] = nd else: replacements['#vento-' + nomeLocalita + '#'] = nd replacements['#intensita-' + nomeLocalita + '#'] = nd if maxMatch != None: replacements['#max-' + nomeLocalita + '#'] = maxMatch.group(1) else: replacements['#max-' + nomeLocalita + '#'] = nd if minMatch != None: replacements['#min-' + nomeLocalita + '#'] = minMatch.group(1) else: replacements['#min-' + nomeLocalita + '#'] = nd nuovoTesto = massiveReplace(replacements, modelloVoce) page = wikipedia.Page(wikipedia.Site('it', 'wikinews'), 'Template:Pagina principale/Secondo piano/Meteo') vecchioTesto = page.get() wikipedia.showDiff(vecchioTesto, nuovoTesto) if not all: choice = wikipedia.inputChoice(u"Modificare?", ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') else: choice = 'y' if choice in ['A', 'a']: all = True choice = 'y' if choice in ['Y', 'y']: page.put(nuovoTesto, u"Bot: Aggiorno meteo")
def replace_image(self, image, site, page_title, summary, replacement=None): """ The actual replacement. Giving None as argument for replacement will delink instead of replace.""" page = wikipedia.Page(site, page_title) hook = None # TODO: Per site config. if page.namespace( ) in self.CommonsDelinker.config['delink_namespaces']: try: text = page.get(get_redirect=True) except wikipedia.NoPage: return 'failed' new_text = text m_image = ImmutableByReference(image) m_replacement = ImmutableByReference(replacement) self.CommonsDelinker.exec_hook( 'before_replace', (page, summary, m_image, m_replacement)) image = m_image.get() replacement = m_replacement.get() def create_regex(s): first, other = re.escape(s[0]), re.escape(s[1:]) return ur'(?:[%s%s]%s)' % (first.upper(), first.lower(), other) def create_regex_i(s): return ur'(?:%s)' % u''.join( [u'[%s%s]' % (c.upper(), c.lower()) for c in s]) namespaces = site.namespace(6, all=True) + site.namespace(-2, all=True) r_namespace = ur'\s*(?:%s)\s*\:\s*' % u'|'.join( map(create_regex_i, namespaces)) # Note that this regex creates a group! r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]') def simple_replacer(match): m_replacement = ImmutableByReference(replacement) groups = list(match.groups()) if hook: if False is self.CommonsDelinker.exec_hook( '%s_replace' % hook, (page, summary, image, m_replacement, match, groups)): return u''.join(groups) if m_replacement.get() is None: return u'' else: groups[1] = m_replacement.get() return u''.join(groups) # Previously links in image descriptions will cause # unexpected behaviour: [[Image:image.jpg|thumb|[[link]] in description]] # will truncate at the first occurence of ]]. This cannot be # fixed using one regular expression. # This means that all ]] after the start of the image # must be located. If it then does not have an associated # [[, this one is the closure of the image. r_simple_s = u'(\[\[%s)%s' % (r_namespace, r_image) r_s = '\[\[' r_e = '\]\]' # First determine where wikilinks start and end image_starts = [ match.start() for match in re.finditer(r_simple_s, text) ] link_starts = [match.start() for match in re.finditer(r_s, text)] link_ends = [match.end() for match in re.finditer(r_e, text)] r_simple = u'(\[\[%s)%s(.*)' % (r_namespace, r_image) hook = 'simple' replacements = [] for image_start in image_starts: current_link_starts = [ link_start for link_start in link_starts if link_start > image_start ] current_link_ends = [ link_end for link_end in link_ends if link_end > image_start ] end = image_start if current_link_ends: end = current_link_ends[0] while current_link_starts and current_link_ends: start = current_link_starts.pop(0) end = current_link_ends.pop(0) if end <= start and end > image_start: # Found the end of the image break # Check whether this image is the first one on the line if image_start == 0: prev = '' else: prev = new_text[image_start - 1] if prev in ('', '\r', '\n') and replacement is None: # Kill all spaces after end while (end + 1) < len(new_text): if new_text[end + 1] in WHITESPACE: end += 1 else: break # Add the replacement to the todo list. Doing the # replacement right know would alter the indices. replacements.append((new_text[image_start:end], re.sub(r_simple, simple_replacer, new_text[image_start:end]))) # Perform the replacements for old, new in replacements: if old: new_text = new_text.replace(old, new) # Remove the image from galleries hook = 'gallery' r_galleries = ur'(?s)(\<%s\>)(.*?)(\<\/%s\>)' % ( create_regex_i('gallery'), create_regex_i('gallery')) r_gallery = ur'(?m)^((?:%s)?)%s(\s*(?:\|.*?)?\s*$)' % (r_namespace, r_image) def gallery_replacer(match): return ur'%s%s%s' % (match.group(1), re.sub(r_gallery, simple_replacer, match.group(2)), match.group(3)) new_text = re.sub(r_galleries, gallery_replacer, new_text) if text == new_text or self.CommonsDelinker.config.get( 'force_complex', False): # All previous steps did not work, so the image is # likely embedded in a complicated template. hook = 'complex' r_templates = ur'(?s)(\{\{.*?\}\})' r_complicated = u'(?s)(?<=[|{=])[\s\u200E\uFEFF\u200B\u200C]*((?:%s)?)%s[\u200E\uFEFF\u200B\u200C]*' % ( r_namespace, r_image) def template_replacer(match): return re.sub(r_complicated, simple_replacer, match.group(1)) new_text = re.sub(r_templates, template_replacer, text) if text != new_text: # Save to the wiki # Code for checking user page existance has been moved # to summary() code, to avoid checking the user page # for each removal. new_text = ImmutableByReference(new_text) m_summary = ImmutableByReference(summary) if False is self.CommonsDelinker.exec_hook( 'before_save', (page, text, new_text, m_summary)): return 'skipped' is_retry = False while True: try: if self.CommonsDelinker.config.get('edit', True) and not \ ((self.CommonsDelinker.site.lang == 'commons') ^ \ (config.usernames.get('commons', {}).get( 'commons') == 'CommonsDelinker')): page.put(new_text.get(), m_summary.get()) return 'ok' except wikipedia.ServerError, e: output(u'Warning! ServerError: %s' % str(e)) except wikipedia.EditConflict: # Try again output(u'Got EditConflict trying to remove %s from %s:%s.' % \ (image, site, page_title)) return self.replace_image(image, site, page_title, summary, replacement=None) except wikipedia.PageNotSaved: if is_retry: return 'failed' is_retry = True except wikipedia.LockedPage: return 'failed'
if newpages: for (page, date, length, loggedIn, user, comment) in pywikibot.getSite().newpages(1000): checkPage(page, checknames, knownonly) elif start: for page in pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start,includeredirects=False)): checkPage(page, checknames, knownonly) if longpages: for (page, length) in pywikibot.getSite().longpages(500): checkPage(page, checknames, knownonly) else: title = ' '.join(title) while title != '': try: page = pywikibot.Page(mysite,title) text = page.get() except pywikibot.NoPage: print "Page does not exist." except pywikibot.IsRedirectPage: print "Page is a redirect page" else: checkPage(page, knownonly=knownonly) title = pywikibot.input(u"Which page to check now? (enter to stop)") finally: pywikibot.stopme() filename = pywikibot.config.datafilepath('externals/spelling', 'spelling-' + checklang + '.txt') if rebuild: list = knownwords.keys() list.sort()
def main(): start = '0' force = False msg = {'en':'Creating state abbreviation redirect', 'ar':u'إنشاء تحويلة اختصار الولاية', 'fa':u'ایجاد تغییرمسیر برای نام اختصاری ایالت', 'he':u'יוצר הפניה מראשי התיבות של המדינה', } abbrev = { 'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA', 'Colorado': 'CO', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM', 'New York': 'NY', 'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY' } for arg in pywikibot.handleArgs(): if arg.startswith('-start:'): start = arg[7:] elif arg == '-force': force = True else: pywikibot.output( u'Warning: argument "%s" not understood; ignoring.' % arg) mysite = pywikibot.getSite() for p in mysite.allpages(start = start): for sn in abbrev: R=re.compile('[^[]]*' + '\%2C_' + sn) for res in R.findall(p.title()): pl=pywikibot.Page(mysite, p.title().replace(sn,abbrev[sn])) # A bit hacking here - the real work is done in the # 'except pywikibot.NoPage' part rather than the 'try'. try: goal = pl.getRedirectTarget().title() if pywikibot.Page(mysite, goal): pywikibot.output( u"Not creating %s - redirect already exists." % goal) else: pywikibot.output( u"WARNING!!! %s already exists but redirects elsewhere!" % goal) except pywikibot.IsNotRedirectPage: pywikibot.output( u"WARNING!!! Page %s already exists and is not a redirect. Please check page!" % goal) except pywikibot.NoPage: change='' if p.isRedirectPage(): p2 = p.getRedirectTarget() wikipeda.ouput( u'Note: goal page is redirect. Creating redirect to "%s" to avoid double redirect.' % p2.title().replace("%2C",",").replace("_"," ")) else: p2 = p if force: change='y' else: while not change in ['y','n']: pywikibot.output( u"Create redirect %s" % pl.title().replace("%2C",",").replace("_"," ")) change = raw_input("(y/n)? ") if change=='y': text = '#REDIRECT [['+p2.title().replace("%2C",",").replace("_"," ")+']]' pl.put(text, comment=pywikibot.translate(mysite, msg), minorEdit = '0')
def main(): args = wikipedia.handleArgs() all = False force = False for currentArgument in args: if currentArgument.startswith("-always"): all = True if currentArgument.startswith("-force"): force = True templateFile = codecs.open("modello_superenalotto.txt", "r", "utf-8") modelloVoce = templateFile.read() # Legge il modello della pagina templateFile.close() now = datetime.datetime.utcnow() urlo = "http://www.sisal.it/se/se_main/1,4136,se_Default,00.html" wikipedia.output(u'Prendo la pagina dal server...') try: htmlText = pageText(urlo) except urllib2.HTTPError: try: wikipedia.output( u"Errore del server. Aspetto 10 secondi... " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime())) time.sleep(10) htmlText = pageText(urlo) except urllib2.HTTPError: wikipedia.output(u"Errore del server. Chiudo.") return numeri = re.search( "<TABLE[^>]*>\s*<tr>\s*<td[^>]*>\s*<a[^>]*><nobr><font[^>]*>\s*(\d+) - \s*(\d+) - \s*(\d+) - \s*(\d+) - \s*(\d+) - \s*(\d+)\s*</font>\s*</nobr>\s*</a>\s*</td>\s*</tr>\s*</table>", htmlText) jolly = re.search( "<td[^>]*background=\"/giochi/se2006/hp2009/img/BTN_JOLLY.gif\"[^>]*>\s*<a[^>]*><font[^>]*><b>(\d+)</b></font></a>\s*</td>", htmlText) superstar = re.search( "<td[^>]*background=\"/giochi/se2006/hp2009/img/BTN_SUPERSTAR.gif\"[^>]*>\s*<a[^>]*><font[^>]*><b>(\d+)</b></font></a>\s*</td>", htmlText) concorso = re.search( "<font[^>]*><font[^>]*><b>Concorso n. (\d+) di (.*?) (\d+)/(\d+)/(\d+)</b></font></a>", htmlText) montepremiparz = re.search( "<td><a[^>]*><font class=testo8[^>]*>Del Concorso</a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?) euro</font></a></td>", htmlText) jackpot = re.search( "<td><a[^>]*><font class=testo8[^>]*>\s*Riporto Jackpot</a>\s*</td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?) euro</font></a></td>", htmlText) montepremitot = re.search( "<td><a[^>]*><font class=testo8[^>]*><b>Totale</a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*><b>(.*?) euro</font></a></td>", htmlText) bloccoQuote = "<tr[^>]*>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?)</font></a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>"%s"</a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?)</font></a></td>\s*</tr>" page = wikipedia.Page( wikipedia.getSite(code='it', fam='wikinews'), "Italia: concorso n. " + concorso.group(1) + "/" + concorso.group(5) + " del SuperEnalotto") if page.exists() and not force: wikipedia.output("Nessuna nuova estrazione. Mi fermo.") return elencoSostituzioni = { # Sostituisce le variabili nel modello '#super-id': concorso.group(1), '#dow': concorso.group(2).replace('ì', u'ì'), '#giorno': concorso.group(3), '#mese': concorso.group(4), '#anno': concorso.group(5), '#num-1': numeri.group(1), '#num-2': numeri.group(2), '#num-3': numeri.group(3), '#num-4': numeri.group(4), '#num-5': numeri.group(5), '#num-6': numeri.group(6), '#num-jolly': jolly.group(1), '#num-superstar': superstar.group(1), '#montepremi-parz': montepremiparz.group(1), '#jackpot': jackpot.group(1), '#montepremi-tot': montepremitot.group(1), } quotes = [ 'punti 6', 'punti 5\+', 'punti 5', 'punti 4', 'punti 3', '5 stella', '4 stella', '3 stella', '2 stella', '1 stella', '0 stella' ] for c in quotes: match = re.search(bloccoQuote % c, htmlText) elencoSostituzioni['#' + c.lower().replace(' ', '-') + '#'] = match.group(2).replace('nessuna', '0') elencoSostituzioni['#vincitori-' + c.lower().replace(' ', '-') + '#'] = match.group(1).replace('nessuna', '0') nuovoTesto = massiveReplace(elencoSostituzioni, modelloVoce) #page = wikipedia.Page(wikipedia.getSite(code='it', fam='wikinews'), "Utente:BimBot/Sandbox") #DEBUG wikipedia.output(">>>>> " + page.title() + " <<<<<") try: vecchioTesto = page.get() except wikipedia.NoPage: vecchioTesto = '' wikipedia.showDiff(vecchioTesto, nuovoTesto) if not all: choice = wikipedia.inputChoice(u"Modificare?", ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') else: choice = 'y' if choice in ['A', 'a']: all = True choice = 'y' if choice in ['Y', 'y']: page.put(nuovoTesto, u"Bot: Inserisco nuova estrazione del SuperEnalotto")
def __iter__(self): tableTagR = re.compile('<table', re.IGNORECASE) for entry in self.xmldump.parse(): if tableTagR.search(entry.text): yield pywikibot.Page(pywikibot.getSite(), entry.title)
'fecha': fecha, 'titulo': titulo, 'textoexp': textoexp, 'titulosub': titulosub, 'textosub': textosub, 'asentimiento': asentimiento, 'presentes': presentes, 'afavor': afavor, 'encontra': encontra, 'abstenciones': abstenciones, 'novotan': novotan, 'votos': votos, }) p = wikipedia.Page( wikipedia.Site('15mpedia', '15mpedia'), u'Lista de votaciones del Congreso de los Diputados/%s/Sesión %s/Votación %s' % (legislatura, sesion, numerovotacion)) p.put(output, u'BOT - Creando página de votación del Congreso de los Diputados') votaciones = u'' votacionesids.sort() for votacionid in votacionesids: votaciones += u""" === Votación %s === {{main|Lista de votaciones del Congreso de los Diputados/%s/Sesión %s/Votación %s}} {{:Lista de votaciones del Congreso de los Diputados/%s/Sesión %s/Votación %s}} """ % (votacionid, legislatura, sesion, votacionid, legislatura, sesion, votacionid) output = string.Template(
wikipediaen=wikipedia.Site('en', 'wikipedia') gen=pagegenerators.AllpagesPageGenerator(start=st, namespace=0, includeredirects=False, site=commons) preloadingGen=pagegenerators.PreloadingGenerator(gen, pageNumber=100, lookahead=100) for page in preloadingGen: if page.isRedirectPage() or page.isDisambig(): continue else: if not getAllInterwikis(page.get()): wtitle=page.title() wtext=newtext=page.get() summary="BOT -" eniw=getEnglishInterwiki(newtext) wikipedia.output("=== %s ===" % wtitle) wikipedia.output("La galería NO tiene interwikis") enpage=wikipedia.Page(ensite, wtitle) if enpage.exists() and not enpage.isRedirectPage() and not enpage.isDisambig(): commonsimages=getImageTitles(wtitle, commonssite) enimages=getImageTitles(wtitle, ensite) for image in enimages: if commonsimages.count(image)!=0: #con que una imagen coincida, ya vale eniws=enpage.interwiki() eniws.append(enpage) eniws.sort() iws_="" for iw in eniws: iws_+="[[%s:%s]]\n" % (iw.site().lang, iw.title()) page.put(u"%s\n\n%s" % (wtext, iws_), u"BOT - Adding %d interwiki(s) from [[:en:%s]]" % (len(eniws), enpage.title())) break continue else:
def run(self): tosend={'language':self.imagePage.site().language().encode('utf-8'), 'image':self.imagePage.titleWithoutNamespace().encode('utf-8'), 'newname':self.newname.encode('utf-8'), 'project':self.imagePage.site().family.name.encode('utf-8'), 'username':'', 'commonsense':'1', 'remove_categories':'1', 'ignorewarnings':'1', 'doit':'Uitvoeren' } tosend=urllib.urlencode(tosend) print tosend CH=pageTextPost('http://www.toolserver.org/~magnus/commonshelper.php', tosend) print 'Got CH desc.' tablock=CH.split('<textarea ')[1].split('>')[0] CH=CH.split('<textarea '+tablock+'>')[1].split('</textarea>')[0] CH=CH.replace(u'×', u'×') CH = self.fixAuthor(CH) pywikibot.output(CH); # I want every picture to be tagged with the bottemplate so i can check my contributions later. CH=u'\n\n{{BotMoveToCommons|'+ self.imagePage.site().language() + '.' + self.imagePage.site().family.name +'|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}' + CH if self.category: CH = CH.replace(u'{{subst:Unc}} <!-- Remove this line once you have added categories -->', u'') CH = CH + u'[[Category:' + self.category + u']]' bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = pywikibot.getSite('commons', 'commons')) bot.run() #Should check if the image actually was uploaded if pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'Image:' + self.newname).exists(): #Get a fresh copy, force to get the page so we dont run into edit conflicts imtxt=self.imagePage.get(force=True) #Remove the move to commons templates if self.imagePage.site().language() in moveToCommonsTemplate: for moveTemplate in moveToCommonsTemplate[self.imagePage.site().language()]: imtxt = re.sub(u'(?i)\{\{' + moveTemplate + u'[^\}]*\}\}', u'', imtxt) #add {{NowCommons}} if self.imagePage.site().language() in nowCommonsTemplate: addTemplate = nowCommonsTemplate[self.imagePage.site().language()] % self.newname else: addTemplate = nowCommonsTemplate['_default'] % self.newname if self.imagePage.site().language() in nowCommonsMessage: commentText = nowCommonsMessage[self.imagePage.site().language()] else: commentText = nowCommonsMessage['_default'] pywikibot.showDiff(self.imagePage.get(), imtxt+addTemplate) self.imagePage.put(imtxt + addTemplate, comment = commentText) self.gen = pagegenerators.FileLinksGenerator(self.imagePage) self.preloadingGen = pagegenerators.PreloadingGenerator(self.gen) #If the image is uploaded under a different name, replace all instances if self.imagePage.titleWithoutNamespace() != self.newname: if self.imagePage.site().language() in imageMoveMessage: moveSummary = imageMoveMessage[self.imagePage.site().language()] % (self.imagePage.titleWithoutNamespace(), self.newname) else: moveSummary = imageMoveMessage['_default'] % (self.imagePage.titleWithoutNamespace(), self.newname) imagebot = ImageRobot(generator = self.preloadingGen, oldImage = self.imagePage.titleWithoutNamespace(), newImage = self.newname, summary = moveSummary, always = True, loose = True) imagebot.run() return
def __init__(self): self.runOk = False #Setup Familys for Wikia Involved self.anime = wikipedia.getSite(code=u'en', fam=u'anime') wikipedia.setAction(wikipedia.translate(self.anime, msg)) self.siteList = [] self.imageList = [] #Get Project Wiki Listing wikiaIds = [] page = wikipedia.Page(self.anime, u'Bots/Wiki', None, 4) #4=Project Namespace try: text = page.get() r = re.compile(u'^.*<!-- \|\|START\|\| -->\n?', re.UNICODE | re.DOTALL) text = re.sub(r, u'', text) r = re.compile(u'\n?<!-- \|\|END\|\| -->.*$', re.UNICODE | re.DOTALL) text = re.sub(r, u'', text) r = re.compile(u'\n', re.UNICODE | re.DOTALL) wikilist = re.split(r, text) r = re.compile(u'^#|^\s*$|^\[', re.UNICODE | re.MULTILINE | re.DOTALL) for wiki in wikilist: if not re.match(r, wiki): wikiaIds.append(wiki) except wikipedia.NoPage: return False for wiki in wikiaIds: self.siteList.append(wikipedia.getSite(code=u'en', fam=wiki)) #Get Image Info List page = wikipedia.Page(self.anime, u'Bots/ImageMirror/Images', None, 4) #4=Project Namespace try: text = page.get() r = re.compile(u'^.*<!-- \|\|START\|\| -->\n?', re.UNICODE | re.DOTALL) text = re.sub(r, u'', text) r = re.compile(u'\n?<!-- \|\|END\|\| -->.*$', re.UNICODE | re.DOTALL) text = re.sub(r, u'', text) r = re.compile(u'\n', re.UNICODE | re.DOTALL) images = re.split(r, text) r = re.compile(u'^#|^\s*$', re.UNICODE | re.MULTILINE | re.DOTALL) for image in images: if not re.match(r, image): self.imageList.append(image) except wikipedia.NoPage: return False self.runOk = True #Mirror the Images category and all subcategorys to all the wiki. ImageCategorys = [] cat = catlib.Category(self.anime, u'Category:Images') ImageCategorys.append(cat) catlist = cat.subcategories(True) for category in catlist: ImageCategorys.append(category) for category in ImageCategorys: categorySource = u'{{networkMirror|%s|anime|category}}\n%s' % ( category.title(), category.get()) if categorySource != u'': for site in self.siteList: siteCategory = catlib.Category(site, category.title()) siteSource = u'' try: siteSource = siteCategory.get() except wikipedia.NoPage: wikipedia.output( u'Site %s has no %s category, creating it' % (site, category.title())) if siteSource != categorySource: wikipedia.output( u'Site \'%s\' category status: Needs Updating' % site) wikipedia.output(u'Updating category on %s' % site) siteCategory.put(categorySource) else: wikipedia.output(u'Site \'%s\' category status: Ok' % site) else: wikipedia.output(u'Category %s is blank, skipping category' % category.title()) #Anime should only be in the list after categorys have been done. self.siteList.append(self.anime)
def main(args): generator = None; #newname = ""; imagepage = None; always = False category = u'' # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg == '-always': always = True elif arg.startswith('-cc:'): category = arg [len('-cc:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: skip = False if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) : imagepage = pywikibot.ImagePage(page.site(), page.title()) #First do autoskip. if doiskip(imagepage.get()): pywikibot.output("Skipping " + page.title()) skip = True else: # The first upload is last in the list. try: username = imagepage.getLatestUploader()[0] except NotImplementedError: #No API, using the page file instead (datetime, username, resolution, size, comment) = imagepage.getFileVersionHistory().pop() if always: newname=imagepage.titleWithoutNamespace() CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:'+newname) if CommonsPage.exists(): skip = True else: while True: # Do the Tkdialog to accept/reject and change te name (newname, skip)=Tkdialog(imagepage.titleWithoutNamespace(), imagepage.get(), username, imagepage.permalink(), imagepage.templates()).getnewname() if skip: pywikibot.output('Skipping this image') break # Did we enter a new name? if len(newname)==0: #Take the old name newname=imagepage.titleWithoutNamespace() else: newname = newname.decode('utf-8') # Check if the image already exists CommonsPage=pywikibot.Page( pywikibot.getSite('commons', 'commons'), u'File:'+newname) if not CommonsPage.exists(): break else: pywikibot.output('Image already exists, pick another name or skip this image') # We dont overwrite images, pick another name, go to the start of the loop if not skip: imageTransfer(imagepage, newname, category).start() pywikibot.output(u'Still ' + str(threading.activeCount()) + u' active threads, lets wait') for openthread in threading.enumerate(): if openthread != threading.currentThread(): openthread.join() pywikibot.output(u'All threads are done')
def main(): # Load the configurations in the function namespace global commento global Template global disambigPage global commenttodisambig global exception enablePage = None # Check if someone set an enablePage or not limit = 50000 # All the pages! (I hope that there aren't so many lonely pages in a project..) generator = None # Check if the bot should use the default generator or not genFactory = pagegenerators.GeneratorFactory( ) # Load all the default generators! nwpages = False # Check variable for newpages always = False # Check variable for always disambigPage = None # If no disambigPage given, not use it. # Arguments! for arg in wikipedia.handleArgs(): if arg.startswith('-enable'): if len(arg) == 7: enablePage = wikipedia.input( u'Would you like to check if the bot should run or not?') else: enablePage = arg[8:] if arg.startswith('-disambig'): if len(arg) == 9: disambigPage = wikipedia.input( u'In which page should the bot save the disambig pages?') else: disambigPage = arg[10:] elif arg.startswith('-limit'): if len(arg) == 6: limit = int( wikipedia.input(u'How many pages do you want to check?')) else: limit = int(arg[7:]) elif arg.startswith('-newpages'): if len(arg) == 9: nwlimit = 50 # Default: 50 pages else: nwlimit = int(arg[10:]) generator = wikipedia.getSite().newpages(number=nwlimit) nwpages = True elif arg == '-always': always = True else: genFactory.handleArg(arg) # Retrive the site wikiSite = wikipedia.getSite() if not generator: generator = genFactory.getCombinedGenerator() # If the generator is not given, use the default one if not generator: generator = wikiSite.lonelypages(repeat=True, number=limit) # Take the configurations according to our project comment = wikipedia.translate(wikiSite, commento) commentdisambig = wikipedia.translate(wikiSite, commenttodisambig) template = wikipedia.translate(wikiSite, Template) exception = wikipedia.translate(wikiSite, exception) # EnablePage part if enablePage != None: # Define the Page Object enable = wikipedia.Page(wikiSite, enablePage) # Loading the page's data try: getenable = enable.get() except wikipedia.NoPage: wikipedia.output( u"%s doesn't esist, I use the page as if it was blank!" % enable.title()) getenable = '' except wikiepedia.IsRedirect: wikipedia.output(u"%s is a redirect, skip!" % enable.title()) getenable = '' # If the enable page is set to disable, turn off the bot # (useful when the bot is run on a server) if getenable != 'enable': wikipedia.output('The bot is disabled') return # DisambigPage part if disambigPage != None: disambigpage = wikipedia.Page(wikiSite, disambigPage) try: disambigtext = disambigpage.get() except wikipedia.NoPage: wikipedia.output(u"%s doesn't esist, skip!" % disambigpage.title()) disambigtext = '' except wikiepedia.IsRedirect: wikipedia.output(u"%s is a redirect, don't use it!" % disambigpage.title()) disambigPage = None # Main Loop for page in generator: if nwpages == True: page = page[ 0] # The newpages generator returns a tuple, not a Page object. wikipedia.output(u"Checking %s..." % page.title()) # Used to skip the first pages in test phase... #if page.title()[0] in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']: #continue if page.isRedirectPage(): # If redirect, skip! wikipedia.output(u'%s is a redirect! Skip...' % page.title()) continue # refs is not a list, it's a generator while resList... is a list, yes. refs = page.getReferences() refsList = list() for j in refs: if j == None: # We have to find out why the function returns that value wikipedia.output(u'Error: 1 --> Skip page') continue refsList.append(j) # This isn't possible with a generator if refsList != []: wikipedia.output(u"%s isn't orphan! Skip..." % page.title()) continue # Never understood how a list can turn in "None", but it happened :-S elif refsList == None: # We have to find out why the function returns that value wikipedia.output(u'Error: 2 --> Skip page') continue else: # Ok, no refs, no redirect... let's check if there's already the template try: oldtxt = page.get() except wikipedia.NoPage: wikipedia.output(u"%s doesn't exist! Skip..." % page.title()) continue except wikipedia.IsRedirectPage: wikipedia.output(u"%s is a redirect! Skip..." % page.title()) continue # I've used a loop in a loop. If I use continue in the second loop, it won't do anything # in the first. So let's create a variable to avoid this problem. Find = False for regexp in exception: res = re.findall(regexp, oldtxt.lower()) # Found a template! Let's skip the page! if res != []: wikipedia.output( u'Your regex has found something in %s, skipping...' % page.title()) Find = True break # Skip the page.. if Find: continue # Is the page a disambig? if page.isDisambig() and disambigPage != None: wikipedia.output(u'%s is a disambig page, report..' % page.title()) if not page.title().lower() in disambigtext.lower(): disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title()) disambigpage.put(disambigtext, commentdisambig) continue # Is the page a disambig but there's not disambigPage? Skip! elif page.isDisambig(): wikipedia.output(u'%s is a disambig page, skip...' % page.title()) continue else: # Ok, the page need the template. Let's put it there! newtxt = u"%s\n%s" % (template, oldtxt ) # Adding the template in the text wikipedia.output(u"\t\t>>> %s <<<" % page.title()) # Showing the title wikipedia.showDiff(oldtxt, newtxt) # Showing the changes choice = 'y' # Default answer if not always: choice = wikipedia.inputChoice( u'Orphan page found, shall I add the template?', ['Yes', 'No', 'All'], ['y', 'n', 'a']) if choice == 'a': always = True choice = 'y' if choice == 'y': try: page.put(newtxt, comment) except wikipedia.EditConflict: wikipedia.output(u'Edit Conflict! Skip...') continue
import os import wikipedia import re import datetime commonssite=wikipedia.Site("commons", "commons") username="******" password="" f=open("/home/emijrp/.my.cnf2", "r") raw=f.read() f.close() m=re.findall(ur'%s = *"(.*)"' % username, raw) password=m[0] today=datetime.date.today() page=wikipedia.Page(commonssite, u"Template:Potd/%s" % today.isoformat()) m=re.findall(ur'(?i)\{\{ *potd filename *\| *1? *=? *([^\|]*?) *\|', page.get()) imagename=m[0] page=wikipedia.Page(commonssite, u"Template:Potd/%s (en)" % today.isoformat()) m=re.findall(ur'(?i)\{\{ *potd description *\| *1? *=? *(.*?) *\| *2? *=? *en *\|', page.get()) imagedesc=m[0] imagedesc=re.sub(ur'(?i)\[\[([^\|]*?)\|(?P<label>[^\]]*?)\]\]', ur'\g<label>', imagedesc) imagedesc=re.sub(ur'(?i)[\[\]]', ur'', imagedesc) if len(imagedesc)>40: imagedesc=u'%s...' % (imagedesc[:40]) imagename_=re.sub(" ", "_", imagename) msg=u'%s → http://commons.wikimedia.org/wiki/File:%s #commons #photos #wikipedia' % (imagedesc, imagename_) orden='curl -u %s:%s -d status="%s" http://twitter.com/statuses/update.json' % (username, password, msg.encode("utf-8")) os.system(orden)
u'eilimit': 'max', u'format': 'json' } counts, destmap, catmap = {}, {}, {} catlist, catpages, nonemptypages = [], [], [] target = self.cat_redirect_cat[self.site.family.name][self.site.lang] # get a list of all members of the category-redirect category for result in self.query_results( generator=u'categorymembers', gcmtitle=target, gcmnamespace=u'14', # CATEGORY gcmlimit=u'max', prop='info|categoryinfo'): for catdata in result['pages'].values(): thispage = pywikibot.Page(self.site, catdata['title']) catpages.append(thispage) if 'categoryinfo' in catdata \ and catdata['categoryinfo']['size'] != "0": # save those categories that have contents nonemptypages.append(thispage) # preload the category pages for redirected categories pywikibot.output(u"") pywikibot.output(u"Preloading %s category redirect pages" % len(catpages)) for cat in pagegenerators.PreloadingGenerator(catpages, 120): cat_title = cat.titleWithoutNamespace() if "category redirect" in cat_title: self.log_text.append(u"* Ignoring %s" % cat.title(asLink=True, textlink=True))
def run(self): wikipedia.setAction(u'Robot: hardware import') page = wikipedia.Page(wikipedia.getSite(), self.name) tmpl = """{{MODEL| |NAME= |LOCATION= |OWNER= |SN= |PN= |OOBIP= |OOBMAC= |RPSUSED=1 |NICUSED=2 |NIC1=eth0 |NIC2=eth1 |NICMAC1= |NICMAC2= |CPUUSED=2x UnknownCPU |RAMUSED=2x UnknownMB |DISKSUSED=2x 146GB |CONTRACT= }} {{Instance |USAGE=[[OpenVZ HN]] |OS=lenny |ARCH=x86_64 |AUTH=ldap |LDAPGROUP=hostgroup-admin-ssh |ETH= }} """ lines = list() oldlines = tmpl.split("\n") data = self.fetchIloData(self.ip) print data # now replace the values for line in oldlines: if line.startswith("{{MODEL"): line = "{{HP_" + data['productname'].replace( 'ProLiant', '').replace(' ', '_') + '|' if line.startswith("|NAME"): line = "|NAME=" + self.name if line.startswith("|OOBIP"): line = "|OOBIP=" + self.ip if line.startswith("|LOCATION"): line = "|LOCATION=" + ( '%s-%s' % (self.name.split('-')[0], self.name.split('-')[1])) if line.startswith("|SN=") and data.has_key('serialnumber'): line = "|SN=" + data['serialnumber'] del data['serialnumber'] if line.startswith("|PN=") and data.has_key('skunumber'): line = "|PN=" + data['skunumber'] del data['skunumber'] if line.startswith("|OOBMAC") and data.has_key('oobmac'): line = "|OOBMAC=" + data['oobmac'] del data['oobmac'] if line.startswith("|RPSUSED") and data.has_key('rpsused'): line = "|RPSUSED=" + str(data['rpsused']) del data['rpsused'] if line.startswith("|NICMAC1") and data.has_key('nicmac1'): line = "|NICMAC1=" + str(data['nicmac1']) del data['nicmac1'] if line.startswith("|NICMAC2") and data.has_key('nicmac2'): line = "|NICMAC2=" + str(data['nicmac2']) del data['nicmac2'] if line.startswith("}}"): # hardware template is over, ensure that no other changes are made data = dict() lines.append(line) pagetext = "\r\n".join(lines) # Save the page try: page.put(pagetext) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked; skipping." % page.aslink()) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title())) except wikipedia.SpamfilterError, error: wikipedia.output( u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))