def main(): date=datetime.date.today().isoformat() commonssite=wikipedia.Site("commons", "commons") eswikisite=wikipedia.Site("es", "wikipedia") page=wikipedia.Page(commonssite, u"Template:Potd/%s" % date) file=u"" if page.exists() and not page.isRedirectPage() and not page.isDisambig(): file=page.get() file=file.split("|")[1].split("|")[0] else: sys.exit() description=u"" for lang in ['es', 'en']: page=wikipedia.Page(commonssite, u"Template:Potd/%s (%s)" % (date, lang)) if page.exists() and not page.isRedirectPage() and not page.isDisambig(): description=page.get() if re.search(ur"(?i)(\{\{ *Potd description *\|1=)", description): description=description.split("|1=")[1].split("|2=")[0] elif re.search(ur"(?i)(\{\{ *Potd description *\|[^ \d])", description): description=description.split("|")[1].split("|")[0] elif not re.search(ur"(?i)\{\{", description): pass
def main(): """ Update a projects list """ site = wikipedia.Site("es", "wikipedia") conn = MySQLdb.connect(host='sql-s3', db='eswiki_p', read_default_file='~/.my.cnf', use_unicode=True) cursor = conn.cursor() cursor.execute('SELECT distinct page_title from page where page_namespace=100 and page_is_redirect=0 and page_title not regexp "/";') result = cursor.fetchall() portales = [] for row in result: if len(row) == 1: portal = re.sub("_", " ", unicode(row[0], "utf-8")) portales.append(portal) page = wikipedia.Page(site, u"Wikiproyecto:Portales/Lista") output = u"Portales que existen en [[Wikipedia en español]]:" for portal in portales: output += u"\n# [[Portal:%s|%s]]" % (portal, portal) if output != page.get(): page.put(output, u"BOT - Actualizando lista de portales [%s]" % len(portales)) page = wikipedia.Page(site, u"Wikiproyecto:Portales/Número") output = u"%s<noinclude>{{documentación}}</noinclude>" % len(portales) if output != page.get(): page.put(output, u"BOT - Actualizando número de portales [%s]" % len(portales)) cursor.close() conn.close()
def secession_page_gen(): file = codecs.open('wlm_secession_title.txt', 'rt', 'utf-8') site = pywikibot.Site('commons', 'commons') for line in file: title = 'File:' + line.strip() page = pywikibot.Page(site, title) yield page
def invite(editors): s = wp.Site('', 'commons') remove = [] try: for editor in editors: p = wp.Page(s, "User talk:" + quote(editor)) pageContent = u'' try: pageContent += p.get() except: pageContent += "{{Welcome}} - ~~~~" pageContent += head if editors[editor]['images'] < 2: pageContent += msg.format(firstName(editor), u"por enviar uma foto") else: pageContent += msg.format( firstName(editor), u"pelas {0} fotos envidas".format( editors[editor]['images'])) p.put( pageContent, u'Mensagem do Concurso WikiLoves Earth - [[pt:Wikipédia:Wiki_Loves_Earth_2014/Brasil|WLE Brasil]]' ) sleepTime = randint(19, 40) print(editor + "foi notificado, esperar " + str(sleepTime)) remove.append(editor) sleep(sleepTime) except UnicodeDecodeError as e: for p in dir(e): if not p.startswith('_'): print '%s=%r' % (p, getattr(e, p)) finally: saveCSV(editors, remove, "teste1.csv")
def getSoftwareRedirect(lang, page): wtitle = page.title() filepagetitles = "/home/emijrp/temporal/tarea037-%s-pagetitles.txt" % lang if not os.path.exists(filepagetitles): loadPageTitles(lang) try: f = codecs.open(filepagetitles, mode="r", encoding="utf-8") except: print "Error al cargar pagetitles" sys.exit() l = f.readline() if not l: #0 bytes en el fichero? f.close() loadPageTitles(lang) try: f = codecs.open(filepagetitles, mode="r", encoding="utf-8") except: print "Error al cargar pagetitles" sys.exit() c = 0 while l: l = l[:-1] c += 1 if c % 250000 == 0: print "lower", c if wtitle.strip().lower() == l.strip().lower(): #sería raro que hubiera dos artículos distintos con diferencia de mayúsculas/minúsculas solo #y que uno de ellos fuera muy visitado #en el caso que estemos devolviendo una redirección, ya se controla luego que coja el target f.close() return wikipedia.Page(wikipedia.Site(lang, "wikipedia"), l) l = f.readline() f.close() return page
def main(): """ Update a list of newbies """ eswiki = wikipedia.Site('es', 'wikipedia') dias = 7 limediciones = 3 conn = MySQLdb.connect(host='sql-s3', db='eswiki_p', read_default_file='~/.my.cnf', use_unicode=True) cursor = conn.cursor() cursor.execute("SELECT rc_user_text, count(*) as count from recentchanges where (rc_type=0 or rc_type=1) and rc_namespace=0 and rc_timestamp>=date_add(now(), interval -%d day) and rc_user_text in (select user_name from user where user_registration>=date_add(now(), interval -%d day)) and rc_user_text not in (select ipb_address from ipblocks) group by rc_user_text order by count desc;" % (dias, dias)) result=cursor.fetchall() users = [] for row in result: user_name = unicode(row[0], "utf-8") edits = int(row[1]) users.append([user_name, edits]) output = u"La siguiente es una lista con los usuarios que han llegado en los últimos %d días y han editado algunas cosas, pero todavía nadie les ha saludado. Es posible que interese darles la bienvenida después de revisar sus contribuciones. Ejemplo de bienvenida: <code>{<nowiki></nowiki>{su<nowiki></nowiki>bst:Usuario:Emijrp/Bienvenida.css}<nowiki></nowiki>} --~~<nowiki></nowiki>~~</code>\n" % (dias) for user_name, edits in users: if edits<limediciones: continue talk = wikipedia.Page(eswiki, u"User talk:%s" % user_name) if not talk.exists(): output += u"* (%d) [[Usuario:%s|%s]] ([[Usuario Discusión:%s|discusión]] · [[Special:Contributions/%s|contribuciones]])\n" % (edits, user_name, user_name, user_name, user_name) novatos = wikipedia.Page(eswiki, u"User:Emijrp/Recién llegados") novatos.put(output, u"BOT - Actualizando lista de recién llegados")
def main(): ''' Esta é a def onde o nosso script vai estar ''' site = wikipedia.Site("pt", "wikipedia") # definimos que o site é a pt.wp '''De seguida, definimos a categoria Ambiente e obtemos a listagem dos títulos dos artigos. Na demonstração o código está por extenso para mais fácil percepção, na prática, bastaria pages = catlib.Category(site, u"Ambiente").articles() para se obter a listagem ''' cat = catlib.Category(site, u"Ambiente") # Aqui definimos a categoria Ambiente. catList = cat.articlesList() '''Agora que temos uma listagem, e antes de contar os elementos, vamos ver os títulos que constam na catList. Esta abordagem serve bem para ilustrar este exemplo, caso fosse para interagir directamente com os artigos, como veremos noutro post, há abordagens mais eficientes. O primeiro print, ou seja, no caso o objecto page, é um objecto python, enquanto que o segundo print, o do page.title(), já tem o formato de unicode. ''' for page in catList: print u"página (objecto):", page print u"Título da página: ", page.title() # mostra o título do artigo ''' Por fim, fazemos a contagem dos artigos ''' print u"\n Nº de artigos na categoria: ", len(catList)
def rcAPI(): site = wikipedia.Site("en", "wikipedia") rctimestamp = "" rcs = site.recentchanges(number=1) for rc in rcs: rctimestamp = rc[1] rcdir = "newer" rchistory = [] while True: rcs = site.recentchanges( number=100, rcstart=rctimestamp, rcdir=rcdir ) #fix no devuelve los oldid, mejor hacerme mi propia wikipedia.query for rc in rcs: rcsimple = [rc[0].title(), rc[1], rc[2], rc[3]] if rcsimple not in rchistory: rchistory = rchistory[-1000:] rchistory.append(rcsimple) edit_props = { 'page': rc[0], 'title': rc[0].title(), 'timestamp': rc[1], 'user': rc[2], 'comment': rc[3], } thread.start_new_thread(fetchedEdit, (edit_props, )) rctimestamp = rc[1] time.sleep(3)
def getLines(page): p = wikipedia.Page(wikipedia.Site('15mpedia', '15mpedia'), page) raw = p.get() raw = re.sub(ur"(?im)^\*\s*", ur"", raw) rss = [] for l in raw.splitlines(): if not l.startswith('#'): rss.append(l) return rss
def login(self): self.site = wikipedia.Site(code=self.lang, fam=self.family, user=self.user) loginManager = login.LoginManager(password=self.password, site=self.site, username=self.user) loginManager.login() return self.site
def main(): # if -file is not used, this temporary array is used to read the page title. pageTitle = [] page = None gen = None interwiki = False keep_name = False targetLang = None targetFamily = None for arg in pywikibot.handleArgs(): if arg == '-interwiki': interwiki = True elif arg.startswith('-keepname'): keepname = True elif arg.startswith('-tolang:'): targetLang = arg[8:] elif arg.startswith('-tofamily:'): targetFamily = arg[10:] elif arg.startswith('-file'): if len(arg) == 5: filename = pywikibot.input( u'Please enter the list\'s filename: ') else: filename = arg[6:] gen = pagegenerators.TextfilePageGenerator(filename) else: pageTitle.append(arg) if not gen: # if the page title is given as a command line argument, # connect the title's parts with spaces if pageTitle != []: pageTitle = ' '.join(pageTitle) page = pywikibot.Page(pywikibot.getSite(), pageTitle) # if no page title was given as an argument, and none was # read from a file, query the user if not page: pageTitle = pywikibot.input(u'Which page to check:') page = pywikibot.Page(pywikibot.getSite(), pageTitle) # generator which will yield only a single Page gen = iter([page]) if not targetLang and not targetFamily: targetSite = pywikibot.getSite('commons', 'commons') else: if not targetLang: targetLang = pywikibot.getSite().language if not targetFamily: targetFamily = pywikibot.getSite().family targetSite = pywikibot.Site(targetLang, targetFamily) bot = ImageTransferBot(gen, interwiki=interwiki, targetSite=targetSite, keep_name=keep_name) bot.run()
def main(): family = 'wikipedia' for lang in tarea000.getLangsByFamily(family): try: if lang == 'en-simple': lang = 'simple' site = wikipedia.Site(lang, family) tarea000.insertBOTijoInfo(site) except: print "Hubo un error en: ", lang
def printContent(l, source=''): day0 = datetime.datetime.now().strftime('%Y-%m-%d') day1 = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') day2 = (datetime.datetime.now() - datetime.timedelta(days=2)).strftime('%Y-%m-%d') day3 = (datetime.datetime.now() - datetime.timedelta(days=3)).strftime('%Y-%m-%d') day0_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n" day1_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n" day2_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n" day3_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n" for ll in l: [updated, sitetitle, title, url] = ll if updated == day0: day0_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % ( title, url, sitetitle, updated) if updated == day1: day1_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % ( title, url, sitetitle, updated) if updated == day2: day2_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % ( title, url, sitetitle, updated) if updated == day3: day3_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % ( title, url, sitetitle, updated) day0_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>" day1_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>" day2_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>" day3_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>" for k, v in [ [day0, day0_stuff], [day1, day1_stuff], [day2, day2_stuff], [day3, day3_stuff], ]: if v: page = wikipedia.Page( wikipedia.Site('15mpedia', '15mpedia'), u'Plantilla:Actualizaciones en las redes/%s/%s' % (source, k)) page.put( v, u"BOT - Añadiendo actualizaciones: %s [%d], %s [%d], %s [%d], %s [%d]" % ( day0, len(re.findall(ur'\n', day0_stuff)) - 1, day1, len(re.findall(ur'\n', day1_stuff)) - 1, day2, len(re.findall(ur'\n', day2_stuff)) - 1, day3, len(re.findall(ur'\n', day3_stuff)) - 1, ))
def magicInterwiki(page, resumen, idioma): """ Buscar interwikis que pueden venirle bien al artículo """ """ Check for userful interwikis """ wtext = page.get() wtitle = page.title() pex = wikipedia.Page(wikipedia.Site(idioma, "wikipedia"), wtitle) if pex.exists() and not pex.isRedirectPage() and not pex.isDisambig(): #descartamos articulos con interwikis a la española iws = pex.interwiki() for iw in iws: if iw.site().lang == 'es': return wtext, resumen linked = page.linkedPages() linkedex = pex.linkedPages() aux = [] for link in linkedex: aux.append(link.title()) linkedex = aux cont = 0 total = 0 for link in linked: if link.exists( ) and not link.isRedirectPage() and not link.isDisambig(): linkiws = link.interwiki() for linkiw in linkiws: if linkiw.site().lang == idioma: total += 1 if linkedex.count(linkiw.title()) != 0: cont += 1 #wikipedia.output(u"Total=%s | Contador=%s" % (str(total), str(cont))) if cont >= total / 2 and cont > 0: #50% de margen iws = pex.interwiki() iws.append(pex) iws.sort() nuevo = u"%s\n" % wtext for iw in iws: nuevo += u"\n[[%s:%s]]" % (iw.site().lang, iw.title()) if len(nuevo) > len(wtext) + 5: #wikipedia.showDiff(wtext, nuevo) return nuevo, u"%s interwikis mágicos," % resumen if idioma == 'en': magicInterwiki(page, resumen, 'de') elif idioma == 'de': magicInterwiki(page, resumen, 'fr') elif idioma == 'fr': magicInterwiki(page, resumen, 'pt') else: return nuevo, resumen
def login(self): """Attempt to log in on the site described by this class. Returns a pywikipedia site object""" self.site = pywikibot.Site(code=self.lang, fam=self.family, user=self.user) loginManager = login.LoginManager(password=self.password, site=self.site, username=self.user) loginManager.login() return self.site
def main(): site = wikipedia.Site("pt", "wikipedia") # definimos que o site é a pt.wp cat = catlib.Category(site, u"Ambiente") # Aqui definimos a categoria Ambiente. catList = cat.articlesList() for page in catList: print u"página (objecto):", page print u"Título da página: ", page.title() # mostra o título do artigo print u"\n Nº de artigos na categoria: ", len(catList)
def getLanglinks(self, page, min=0, step=50, sort=""): q = """ SELECT ll_lang, ll_title FROM %s.langlinks WHERE ll_from=( SELECT page_id FROM %s.page WHERE page_title=%%s AND page_namespace=%%s) """ % ( (page.site().dbName(), ) * 2) q += sort for row in self._generate( q, min, step, (page.titleWithoutNamespace(True).encode('utf-8'), page.namespace())): yield wikipedia.Page(wikipedia.Site(row['ll_lang']), row['ll_title'].decode('utf-8'), page.site())
def translatecat(cat, lang): global cattranslations if cattranslations.has_key(cat): return cattranslations[cat] else: catpage = wikipedia.Page(wikipedia.Site(lang, 'wikipedia'), "Category:%s" % (cat)) if catpage.exists() and not catpage.isRedirectPage(): cattext = catpage.get() m = re.compile( ur"(?im)\[\[\s*%s\s*:\s*(%s)\s*:\s*(?P<catiw>[^\[\]]+?)\s*\]\]" % (targetlang, '|'.join(catsnm.values()))).finditer(cattext) for i in m: cattranslations[cat] = i.group('catiw') return i.group('catiw') return ''
def main(): """ Bot searches for titles with endashes and creates a redirect from hyphens """ for lang, family, host, db, footer, summary in projects: wiki = wikipedia.Site(lang, family) conn = MySQLdb.connect(host=host, db=db, read_default_file='~/.my.cnf', use_unicode=True) cursor = conn.cursor() cursor.execute("SELECT page_title from page where page_namespace=0 and page_title regexp \".*[–-].*\";") row = cursor.fetchone() endashes = sets.Set() hyphens = sets.Set() while row: pagetitle = re.sub(ur"_", ur" ", unicode(row[0], "utf-8")) if re.search(ur"–", pagetitle) and not re.search(ur"[-\(\)]", pagetitle): #descartamos las que tienen paréntesis, (cación) (desambiguación)... endashes.add(pagetitle) if not re.search(ur"–", pagetitle) and re.search(ur"-", pagetitle): hyphens.add(pagetitle) row = cursor.fetchone()
def main(): skip = u'' if len(sys.argv) > 1: site = wikipedia.Site(sys.argv[1], sys.argv[1]) else: print 'python script.py wikifamily [skiptopage]' sys.exit() if len(sys.argv) > 2: skip = sys.argv[2] gen = pagegenerators.AllpagesPageGenerator(start=skip, namespace=0, site=site) pre = pagegenerators.PreloadingGenerator(gen, pageNumber=250) alltitles = [] for page in pre: if not page.exists( ): #do not put .isRedirectPage() or it will never find redirects when checking below before creating continue alltitles.append(page.title()) print page.title() for wtitle in alltitles: if len(wtitle) > 1: wtitle_ = wtitle[0] + wtitle[1:].lower() redirects = set() for t in [wtitle, wtitle_]: redirects.add(t) redirects.add(remove1(t)) redirects.add(remove2(t)) redirects.add(removeaccute(t)) redirects.add(remove1(remove2(t))) redirects.add(remove1(removeaccute(t))) redirects.add(remove2(removeaccute(t))) redirects.add(remove1(remove2(removeaccute(t)))) print redirects for redirect in redirects: redirect = redirect.strip() if redirect and redirect != wtitle and not redirect in alltitles: red = wikipedia.Page(site, redirect) if not red.exists(): output = u"#REDIRECT [[%s]]" % (wtitle) msg = u"BOT - Creating redirect to [[%s]]" % (wtitle) red.put(output, msg)
def main(): site = wikipedia.Site("pt", "wikipedia") cat = catlib.Category(site, u"!Robótica") ''' Como explicado anteriormente, temos definido o site e a categoria podendo então passar-mos a obter a listagem das páginas, onde desta vez usaremos o pagegenerators. Uma opção mais rápida será usar igualmente o preloadingGen, bastando para isso fazer algo como pages = pagegenerators.PreloadingGenerator(pagegenerators.CategorizedPageGenerator(cat)) Isto faz com que as páginas sejam carregadas no início, ao contrário do script actual, que carrega à medida que forem necessárias. ''' pages = pagegenerators.CategorizedPageGenerator(cat) for page in pages: '''Agora que temos a iteração vamos primeiro obter o título ''' print page.title() ''' Com o page.namespace() obtemos o namespace da página embora no formato canonico, ou seja, número. Para obter o nome do namespace, fazemos o site.namespace(). Para fazer tudo junto, basta substituir as duas linhas por namespace = site.namespace(page.namespace()) ''' pageNamespaceNumber = page.namespace() namespace = site.namespace(pageNamespaceNumber) if namespace == u"Ajuda": ''' Aqui filtramos as páginas que pertencem ao namespace Ajuda e obteremos o nome do namespace, assim como as predefinições contidas nas páginas. ''' print len(page.get()) print u"namespace: ", site.namespace(page.namespace()) print u"templates: ", page.templates() elif namespace == u"Wikipédia": ''' Neste bloco, apenas os artigos do namespace wikipédia são filtrados, e obteremos o namespage e o título do artigo, sem namespace ou subpáginas (resumidamente, o título do artigo principal) ''' print u"namespace: ", site.namespace(page.namespace()) print u"Página principal (título sem subpágina): ", page.sectionFreeTitle( ) print u"Página principal sem título nem namespace: ", page.title( withNamespace=False)
def process(img, row): if row['num'] == 1: title = title_template_first % row else: title = title_template % row row['title'] = title text = text_template % row filename = title + '.jpg' # bot = upload.UploadRobot(url = img, description = text, targetSite = pywikibot.Site('commons', 'commons'), useFilename = filename, keepFilename = True, verifyDescription = False) # if not bot.urlOK(): # wikipedia.output('wrong filename: %s' % filename) # return # return bot.upload_image() page = pywikibot.Page(pywikibot.Site('commons', 'commons'), 'File:' + title + '.jpg') if not page.exists(): style = 'style="color: red"' else: style = '' print( u'%s: <a %s href="http://commons.wikimedia.org/wiki/%s">%s</a><br />' % (row['id'], style, page.urlname(), title)).encode('utf-8')
def main(): """ Update a template with my user accounts edits """ eswiki = wikipedia.Site('es', 'wikipedia') users = [ 'AVBOT', 'BOTijo', 'Emijrp', 'Emijrpbot', 'Poc-oban', 'Toolserver' ] path = "http://toolserver.org/~vvv/sulutil.php?user="******"{{#switch:{{{1|}}}" total = 0 oldtotal = 0 #total anterior for user in users: url = path + user f = urllib.urlopen(url, 'r') raw = f.read() m = re.compile( ur"Total editcount: <b>(?P<useredits>\d+)</b>").finditer(raw) for j in m: salida += u"\n|%s=%s" % (user, j.group("useredits")) edits = j.group("useredits") total += int(edits) f.close() print user, edits time.sleep(5) salida += u"\n|Total=%d\n|%d}}" % (total, total) editcount = wikipedia.Page(eswiki, u"User:Emijrp/Editcount") #evitamos regresiones en el contador oldtotal = int(editcount.get().split("Total=")[1].split("\n")[0]) print "Total:", total, "Oldtotal:", oldtotal if total > oldtotal: oldpage = wikipedia.Page(eswiki, u"User:Emijrp/Editcount/Old") oldpage.put( editcount.get(), u"BOT - Datos de la versión anterior de [[User:Emijrp/Editcount]]") editcount.put( salida, u"BOT - Actualizando ediciones globales de %s: %d" % (", ".join(users), total))
def checkBlockInEnglishWikipedia(editData): comment = "" isProxy = False if re.search(avbotglobals.parserRegexps['ip'], editData['author']): #Is it an IP? enwiki = wikipedia.Site('en', 'wikipedia') data = enwiki.getUrl("/w/index.php?title=Special:BlockList&ip=%s" % editData['author']) data = data.split('<!-- start content -->') data = data[1].split('<!-- end content -->')[0] data = data.split('<li>') if len(data) > 1: m = re.compile( ur"</span> *\((?P<expires>[^<]*?)\) *<span class=\"comment\">\((?P<comment>[^<]*?)\)</span>" ).finditer(data[1]) for i in m: comment = u"''Bloqueado en Wikipedia en inglés ([http://en.wikipedia.org/w/index.php?title=Special:BlockList&ip=%s bloqueo vigente], [http://en.wikipedia.org/w/index.php?title=Special:Log&type=block&page=User:%s historial de bloqueos]): %s''" % ( editData['author'], editData['author'], i.group("expires")) if re.search(ur'(?i)proxy', i.group('comment')): isProxy = True break #con el primero basta
def main(): site = wikipedia.Site("pt", "wikipedia") cat = catlib.Category(site, u"!Robótica") pages = pagegenerators.CategorizedPageGenerator(cat) for page in pages: print page.title() pageNamespaceNumber = page.namespace() namespace = site.namespace(pageNamespaceNumber) if namespace == u"Ajuda": print len(page.get()) print u"namespace: ", site.namespace(page.namespace()) print u"templates: ", page.templates() elif namespace == u"Wikipédia": print u"namespace: ", site.namespace(page.namespace()) print u"Página principal (título sem subpágina): ", page.sectionFreeTitle( ) print u"Página principal sem título nem namespace: ", page.title( withNamespace=False)
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import wikipedia, time import tarea000 #w, wikt, b, q, s, n, v botijoinfo = wikipedia.Page(wikipedia.Site("en", "wikipedia"), u"User:Emijrp/BOTijoInfo.css").get() for family in ['wikipedia', 'wiktionary']: #añadir al user-config.py los credenciales langswiki = [] for lang in tarea000.getLangsByFamily(family): if lang == 'en-simple': langswiki.append('simple') else: langswiki.append(lang) for lang in langswiki: time.sleep(0.1) print lang, family
"""Copy some stats from s23.org and paste in a page in Wikipedia for log purposes """ try: opts, args = getopt.getopt(sys.argv[1:], "h", ["help"]) except getopt.error, msg: print msg print "for help use --help" sys.exit(2) for o, a in opts: if o in ("-h", "--help"): print main.__doc__ sys.exit(0) ## @var url # URL to statistics page url = 'http://s23.org/wikistats/wikipedias_wiki.php' f = urllib.urlopen(url, 'r') text = f.read() text = re.sub(ur'(?im)[\n\r]*</?pre>[\n\r]*', ur'', text) #cleaning... text = u'Lista de Wikipedias extraida de %s\n\n%s' % (url, text) ## @var p # Page where to save p = wikipedia.Page(wikipedia.Site('es', 'wikipedia'), u'User:Emijrp/Lista de Wikipedias') p.put(text, u'BOT - Updating from %s' % url) f.close() if __name__ == "__main__": main()
# it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import wikipedia import MySQLdb site = wikipedia.Site('es', 'wikipedia') conn = MySQLdb.connect(host='sql-s3', db='eswiki_p', read_default_file='~/.my.cnf') cursor = conn.cursor() cursor.execute(''' /* SLOW_OK */SELECT CONCAT("| [[:Category:",page_title,"]]\n|-") FROM page JOIN categorylinks ON cl_to = page_title WHERE page_id = cl_from AND page_namespace = 14; ''') query = [unicode(row[0], 'utf-8') for row in cursor.fetchall()] page_list = []
else: b = (punto4['lat'] - punto1['lat']) * conv if a != 0 and b != 0: return math.sqrt(a**2 + b**2) else: return 999999 lang = "es" if len(sys.argv) > 1: lang = sys.argv[1] objs = {} paises = {} site = wikipedia.Site("es", "wikipedia") objspage = wikipedia.Page( site, u"Wikipedia:Imágenes requeridas por zona/Objetivos conocidos") l = [] for line in objspage.get().splitlines(): l.append(line) t = line.strip().split(";") if len(t) == 4: objname = t[0].strip() objcountry = t[1].strip() paises[objname] = objcountry objlat = float(t[2].strip()) objlon = float(t[3].strip()) objs[objname] = {'lat': objlat, 'lon': objlon} l.sort() output3 = "\n".join(l)
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import catlib import pagegenerators import sys import wikipedia start = sys.argv[1] lfsite = wikipedia.Site('librefind', 'librefind') ensite = wikipedia.Site('en', 'wikipedia') cat = catlib.Category(lfsite, u"Category:All the searches") gen = pagegenerators.CategorizedPageGenerator(cat, start=start) pre = pagegenerators.PreloadingGenerator(gen) for page in pre: title = page.title() enpage = wikipedia.Page(ensite, title) if enpage.exists( ) and not enpage.isRedirectPage() and not enpage.isDisambig(): redirects = enpage.getReferences(redirectsOnly=True) for redirect in redirects: if redirect.namespace( ) != 0: #skiping redirects from userpages etc