Ejemplo n.º 1
0
def main():
    date=datetime.date.today().isoformat()

    commonssite=wikipedia.Site("commons", "commons")
    eswikisite=wikipedia.Site("es", "wikipedia")

    page=wikipedia.Page(commonssite, u"Template:Potd/%s" % date)
    file=u""
    if page.exists() and not page.isRedirectPage() and not page.isDisambig():
        file=page.get()
        file=file.split("|")[1].split("|")[0]
    else:
        sys.exit()

    description=u""
    for lang in ['es', 'en']:
        page=wikipedia.Page(commonssite, u"Template:Potd/%s (%s)" % (date, lang))
        if page.exists() and not page.isRedirectPage() and not page.isDisambig():
            description=page.get()
            if re.search(ur"(?i)(\{\{ *Potd description *\|1=)", description):
                description=description.split("|1=")[1].split("|2=")[0]
            elif re.search(ur"(?i)(\{\{ *Potd description *\|[^ \d])", description):
                description=description.split("|")[1].split("|")[0]
            elif not re.search(ur"(?i)\{\{", description):
                pass
Ejemplo n.º 2
0
def main():
    """ Update a projects list """
    site = wikipedia.Site("es", "wikipedia")

    conn = MySQLdb.connect(host='sql-s3', db='eswiki_p', read_default_file='~/.my.cnf', use_unicode=True)
    cursor = conn.cursor()
    cursor.execute('SELECT distinct page_title from page where page_namespace=100 and page_is_redirect=0 and page_title not regexp "/";')
    result = cursor.fetchall()
    portales = []
    for row in result:
        if len(row) == 1:
            portal = re.sub("_", " ", unicode(row[0], "utf-8"))
            portales.append(portal)

    page = wikipedia.Page(site, u"Wikiproyecto:Portales/Lista")
    output = u"Portales que existen en [[Wikipedia en español]]:"
    for portal in portales:
        output += u"\n# [[Portal:%s|%s]]" % (portal, portal)
    if output != page.get():
        page.put(output, u"BOT - Actualizando lista de portales [%s]" % len(portales))
    page = wikipedia.Page(site, u"Wikiproyecto:Portales/Número")
    output = u"%s<noinclude>{{documentación}}</noinclude>" % len(portales)
    if output != page.get():
        page.put(output, u"BOT - Actualizando número de portales [%s]" % len(portales))

    cursor.close()
    conn.close()
Ejemplo n.º 3
0
def secession_page_gen():
    file = codecs.open('wlm_secession_title.txt', 'rt', 'utf-8')
    site = pywikibot.Site('commons', 'commons')
    for line in file:
        title = 'File:' + line.strip()
        page = pywikibot.Page(site, title)
        yield page
Ejemplo n.º 4
0
def invite(editors):
    s = wp.Site('', 'commons')
    remove = []
    try:
        for editor in editors:
            p = wp.Page(s, "User talk:" + quote(editor))
            pageContent = u''
            try:
                pageContent += p.get()
            except:
                pageContent += "{{Welcome}} - ~~~~"
            pageContent += head

            if editors[editor]['images'] < 2:
                pageContent += msg.format(firstName(editor),
                                          u"por enviar uma foto")
            else:
                pageContent += msg.format(
                    firstName(editor), u"pelas {0} fotos envidas".format(
                        editors[editor]['images']))

            p.put(
                pageContent,
                u'Mensagem do Concurso WikiLoves Earth - [[pt:Wikipédia:Wiki_Loves_Earth_2014/Brasil|WLE Brasil]]'
            )
            sleepTime = randint(19, 40)
            print(editor + "foi notificado, esperar " + str(sleepTime))
            remove.append(editor)
            sleep(sleepTime)
    except UnicodeDecodeError as e:
        for p in dir(e):
            if not p.startswith('_'):
                print '%s=%r' % (p, getattr(e, p))
    finally:
        saveCSV(editors, remove, "teste1.csv")
Ejemplo n.º 5
0
def getSoftwareRedirect(lang, page):
    wtitle = page.title()
    filepagetitles = "/home/emijrp/temporal/tarea037-%s-pagetitles.txt" % lang
    if not os.path.exists(filepagetitles):
        loadPageTitles(lang)
    try:
        f = codecs.open(filepagetitles, mode="r", encoding="utf-8")
    except:
        print "Error al cargar pagetitles"
        sys.exit()
    l = f.readline()
    if not l:  #0 bytes en el fichero?
        f.close()
        loadPageTitles(lang)
        try:
            f = codecs.open(filepagetitles, mode="r", encoding="utf-8")
        except:
            print "Error al cargar pagetitles"
            sys.exit()
    c = 0
    while l:
        l = l[:-1]
        c += 1
        if c % 250000 == 0:
            print "lower", c
        if wtitle.strip().lower() == l.strip().lower():
            #sería raro que hubiera dos artículos distintos con diferencia de mayúsculas/minúsculas solo
            #y que uno de ellos fuera muy visitado
            #en el caso que estemos devolviendo una redirección, ya se controla luego que coja el target
            f.close()
            return wikipedia.Page(wikipedia.Site(lang, "wikipedia"), l)
        l = f.readline()
    f.close()
    return page
Ejemplo n.º 6
0
def main():
    """ Update a list of newbies """
    
    eswiki = wikipedia.Site('es', 'wikipedia')
    dias = 7
    limediciones = 3
    conn = MySQLdb.connect(host='sql-s3', db='eswiki_p', read_default_file='~/.my.cnf', use_unicode=True)
    cursor = conn.cursor()
    cursor.execute("SELECT rc_user_text, count(*) as count from recentchanges where (rc_type=0 or rc_type=1) and rc_namespace=0 and rc_timestamp>=date_add(now(), interval -%d day) and rc_user_text in (select user_name from user where user_registration>=date_add(now(), interval -%d day)) and rc_user_text not in (select ipb_address from ipblocks) group by rc_user_text order by count desc;" % (dias, dias))
    result=cursor.fetchall()
    users = []
    for row in result:
        user_name = unicode(row[0], "utf-8")
        edits = int(row[1])
        users.append([user_name, edits])
    
    output = u"La siguiente es una lista con los usuarios que han llegado en los últimos %d días y han editado algunas cosas, pero todavía nadie les ha saludado. Es posible que interese darles la bienvenida después de revisar sus contribuciones. Ejemplo de bienvenida: <code>{<nowiki></nowiki>{su<nowiki></nowiki>bst:Usuario:Emijrp/Bienvenida.css}<nowiki></nowiki>} --~~<nowiki></nowiki>~~</code>\n" % (dias)
    for user_name, edits in users:
        if edits<limediciones:
            continue
        talk = wikipedia.Page(eswiki, u"User talk:%s" % user_name)
        if not talk.exists():
            output += u"* (%d) [[Usuario:%s|%s]] ([[Usuario Discusión:%s|discusión]] · [[Special:Contributions/%s|contribuciones]])\n" % (edits, user_name, user_name, user_name, user_name)
    novatos = wikipedia.Page(eswiki, u"User:Emijrp/Recién llegados")
    novatos.put(output, u"BOT - Actualizando lista de recién llegados")
Ejemplo n.º 7
0
def main():
    ''' Esta é a def onde o nosso script vai estar '''

    site = wikipedia.Site("pt", "wikipedia")  # definimos que o site é a pt.wp
    '''De seguida, definimos a categoria Ambiente
	 e obtemos a listagem dos títulos dos artigos.
	 Na demonstração o código está por extenso para
	 mais fácil percepção, na prática, bastaria
	 pages = catlib.Category(site, u"Ambiente").articles()
	 para se obter a listagem	  
	'''
    cat = catlib.Category(site,
                          u"Ambiente")  # Aqui definimos a categoria Ambiente.
    catList = cat.articlesList()
    '''Agora que temos uma listagem,
	 e antes de contar os elementos,
	 vamos ver os títulos que constam na catList.
	 
	 Esta abordagem serve bem para ilustrar este exemplo,
	 caso fosse para interagir directamente com os artigos,
	 como veremos noutro post, há abordagens mais eficientes.
 
	 O primeiro print, ou seja, no caso o objecto page,
	 é um objecto python, enquanto que o segundo print,
	 o do page.title(), já tem o formato de unicode.
	'''

    for page in catList:
        print u"página (objecto):", page
        print u"Título da página: ", page.title()  # mostra o título do artigo
    ''' Por fim, fazemos a contagem dos artigos	'''

    print u"\n Nº de artigos na categoria: ", len(catList)
Ejemplo n.º 8
0
def rcAPI():
    site = wikipedia.Site("en", "wikipedia")

    rctimestamp = ""
    rcs = site.recentchanges(number=1)
    for rc in rcs:
        rctimestamp = rc[1]

    rcdir = "newer"
    rchistory = []
    while True:
        rcs = site.recentchanges(
            number=100, rcstart=rctimestamp, rcdir=rcdir
        )  #fix no devuelve los oldid, mejor hacerme mi propia wikipedia.query

        for rc in rcs:
            rcsimple = [rc[0].title(), rc[1], rc[2], rc[3]]
            if rcsimple not in rchistory:
                rchistory = rchistory[-1000:]
                rchistory.append(rcsimple)
                edit_props = {
                    'page': rc[0],
                    'title': rc[0].title(),
                    'timestamp': rc[1],
                    'user': rc[2],
                    'comment': rc[3],
                }
                thread.start_new_thread(fetchedEdit, (edit_props, ))
            rctimestamp = rc[1]
        time.sleep(3)
Ejemplo n.º 9
0
def getLines(page):
    p = wikipedia.Page(wikipedia.Site('15mpedia', '15mpedia'), page)
    raw = p.get()
    raw = re.sub(ur"(?im)^\*\s*", ur"", raw)
    rss = []
    for l in raw.splitlines():
        if not l.startswith('#'):
            rss.append(l)
    return rss
Ejemplo n.º 10
0
 def login(self):
     self.site = wikipedia.Site(code=self.lang,
                                fam=self.family,
                                user=self.user)
     loginManager = login.LoginManager(password=self.password,
                                       site=self.site,
                                       username=self.user)
     loginManager.login()
     return self.site
Ejemplo n.º 11
0
def main():
    # if -file is not used, this temporary array is used to read the page title.
    pageTitle = []
    page = None
    gen = None
    interwiki = False
    keep_name = False
    targetLang = None
    targetFamily = None

    for arg in pywikibot.handleArgs():
        if arg == '-interwiki':
            interwiki = True
        elif arg.startswith('-keepname'):
            keepname = True
        elif arg.startswith('-tolang:'):
            targetLang = arg[8:]
        elif arg.startswith('-tofamily:'):
            targetFamily = arg[10:]
        elif arg.startswith('-file'):
            if len(arg) == 5:
                filename = pywikibot.input(
                    u'Please enter the list\'s filename: ')
            else:
                filename = arg[6:]
            gen = pagegenerators.TextfilePageGenerator(filename)
        else:
            pageTitle.append(arg)

    if not gen:
        # if the page title is given as a command line argument,
        # connect the title's parts with spaces
        if pageTitle != []:
            pageTitle = ' '.join(pageTitle)
            page = pywikibot.Page(pywikibot.getSite(), pageTitle)
        # if no page title was given as an argument, and none was
        # read from a file, query the user
        if not page:
            pageTitle = pywikibot.input(u'Which page to check:')
            page = pywikibot.Page(pywikibot.getSite(), pageTitle)
            # generator which will yield only a single Page
        gen = iter([page])

    if not targetLang and not targetFamily:
        targetSite = pywikibot.getSite('commons', 'commons')
    else:
        if not targetLang:
            targetLang = pywikibot.getSite().language
        if not targetFamily:
            targetFamily = pywikibot.getSite().family
        targetSite = pywikibot.Site(targetLang, targetFamily)
    bot = ImageTransferBot(gen,
                           interwiki=interwiki,
                           targetSite=targetSite,
                           keep_name=keep_name)
    bot.run()
Ejemplo n.º 12
0
def main():
    family = 'wikipedia'
    for lang in tarea000.getLangsByFamily(family):
        try:
            if lang == 'en-simple':
                lang = 'simple'
            site = wikipedia.Site(lang, family)
            tarea000.insertBOTijoInfo(site)
        except:
            print "Hubo un error en: ", lang
Ejemplo n.º 13
0
def printContent(l, source=''):
    day0 = datetime.datetime.now().strftime('%Y-%m-%d')
    day1 = (datetime.datetime.now() -
            datetime.timedelta(days=1)).strftime('%Y-%m-%d')
    day2 = (datetime.datetime.now() -
            datetime.timedelta(days=2)).strftime('%Y-%m-%d')
    day3 = (datetime.datetime.now() -
            datetime.timedelta(days=3)).strftime('%Y-%m-%d')

    day0_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n"
    day1_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n"
    day2_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n"
    day3_stuff = u"<noinclude>{{actualizaciones en las redes/inicio}}</noinclude>\n"
    for ll in l:
        [updated, sitetitle, title, url] = ll
        if updated == day0:
            day0_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % (
                title, url, sitetitle, updated)
        if updated == day1:
            day1_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % (
                title, url, sitetitle, updated)
        if updated == day2:
            day2_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % (
                title, url, sitetitle, updated)
        if updated == day3:
            day3_stuff += u"* {{actualización|titular=%s|enlace=%s|fuente=%s|fecha=%s}}\n" % (
                title, url, sitetitle, updated)
    day0_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>"
    day1_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>"
    day2_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>"
    day3_stuff += u"<noinclude>{{actualizaciones en las redes/fin}}</noinclude>"

    for k, v in [
        [day0, day0_stuff],
        [day1, day1_stuff],
        [day2, day2_stuff],
        [day3, day3_stuff],
    ]:
        if v:
            page = wikipedia.Page(
                wikipedia.Site('15mpedia', '15mpedia'),
                u'Plantilla:Actualizaciones en las redes/%s/%s' % (source, k))
            page.put(
                v,
                u"BOT - Añadiendo actualizaciones: %s [%d], %s [%d], %s [%d], %s [%d]"
                % (
                    day0,
                    len(re.findall(ur'\n', day0_stuff)) - 1,
                    day1,
                    len(re.findall(ur'\n', day1_stuff)) - 1,
                    day2,
                    len(re.findall(ur'\n', day2_stuff)) - 1,
                    day3,
                    len(re.findall(ur'\n', day3_stuff)) - 1,
                ))
Ejemplo n.º 14
0
def magicInterwiki(page, resumen, idioma):
    """ Buscar interwikis que pueden venirle bien al artículo """
    """ Check for userful interwikis """
    wtext = page.get()
    wtitle = page.title()

    pex = wikipedia.Page(wikipedia.Site(idioma, "wikipedia"), wtitle)

    if pex.exists() and not pex.isRedirectPage() and not pex.isDisambig():
        #descartamos articulos con interwikis a la española
        iws = pex.interwiki()
        for iw in iws:
            if iw.site().lang == 'es':
                return wtext, resumen

        linked = page.linkedPages()
        linkedex = pex.linkedPages()

        aux = []
        for link in linkedex:
            aux.append(link.title())
        linkedex = aux

        cont = 0
        total = 0
        for link in linked:
            if link.exists(
            ) and not link.isRedirectPage() and not link.isDisambig():
                linkiws = link.interwiki()
                for linkiw in linkiws:
                    if linkiw.site().lang == idioma:
                        total += 1
                        if linkedex.count(linkiw.title()) != 0:
                            cont += 1
        #wikipedia.output(u"Total=%s | Contador=%s" % (str(total), str(cont)))

        if cont >= total / 2 and cont > 0:  #50% de margen
            iws = pex.interwiki()
            iws.append(pex)
            iws.sort()
            nuevo = u"%s\n" % wtext
            for iw in iws:
                nuevo += u"\n[[%s:%s]]" % (iw.site().lang, iw.title())
            if len(nuevo) > len(wtext) + 5:
                #wikipedia.showDiff(wtext, nuevo)
                return nuevo, u"%s interwikis mágicos," % resumen

    if idioma == 'en':
        magicInterwiki(page, resumen, 'de')
    elif idioma == 'de':
        magicInterwiki(page, resumen, 'fr')
    elif idioma == 'fr':
        magicInterwiki(page, resumen, 'pt')
    else:
        return nuevo, resumen
Ejemplo n.º 15
0
 def login(self):
     """Attempt to log in on the site described
     by this class. Returns a pywikipedia site object"""
     self.site = pywikibot.Site(code=self.lang,
                                fam=self.family,
                                user=self.user)
     loginManager = login.LoginManager(password=self.password,
                                       site=self.site,
                                       username=self.user)
     loginManager.login()
     return self.site
Ejemplo n.º 16
0
def main():

    site = wikipedia.Site("pt", "wikipedia")  # definimos que o site é a pt.wp

    cat = catlib.Category(site,
                          u"Ambiente")  # Aqui definimos a categoria Ambiente.
    catList = cat.articlesList()

    for page in catList:
        print u"página (objecto):", page
        print u"Título da página: ", page.title()  # mostra o título do artigo

    print u"\n Nº de artigos na categoria: ", len(catList)
Ejemplo n.º 17
0
 def getLanglinks(self, page, min=0, step=50, sort=""):
     q = """ SELECT ll_lang, ll_title
             FROM %s.langlinks
             WHERE ll_from=(
                 SELECT page_id
                 FROM %s.page
                 WHERE page_title=%%s AND page_namespace=%%s) """ % (
         (page.site().dbName(), ) * 2)
     q += sort
     for row in self._generate(
             q, min,
             step, (page.titleWithoutNamespace(True).encode('utf-8'),
                    page.namespace())):
         yield wikipedia.Page(wikipedia.Site(row['ll_lang']),
                              row['ll_title'].decode('utf-8'), page.site())
Ejemplo n.º 18
0
def translatecat(cat, lang):
    global cattranslations

    if cattranslations.has_key(cat):
        return cattranslations[cat]
    else:
        catpage = wikipedia.Page(wikipedia.Site(lang, 'wikipedia'),
                                 "Category:%s" % (cat))
        if catpage.exists() and not catpage.isRedirectPage():
            cattext = catpage.get()
            m = re.compile(
                ur"(?im)\[\[\s*%s\s*:\s*(%s)\s*:\s*(?P<catiw>[^\[\]]+?)\s*\]\]"
                % (targetlang, '|'.join(catsnm.values()))).finditer(cattext)
            for i in m:
                cattranslations[cat] = i.group('catiw')
                return i.group('catiw')
    return ''
Ejemplo n.º 19
0
def main():
    """ Bot searches for titles with endashes and creates a redirect from hyphens """

    for lang, family, host, db, footer, summary in projects:
        wiki = wikipedia.Site(lang, family)
        conn = MySQLdb.connect(host=host, db=db, read_default_file='~/.my.cnf', use_unicode=True)
        cursor = conn.cursor()
        cursor.execute("SELECT page_title from page where page_namespace=0 and page_title regexp \".*[–-].*\";")
        row = cursor.fetchone()
        endashes = sets.Set()
        hyphens = sets.Set()
        while row:
            pagetitle = re.sub(ur"_", ur" ", unicode(row[0], "utf-8"))
            if re.search(ur"–", pagetitle) and not re.search(ur"[-\(\)]", pagetitle): #descartamos las que tienen paréntesis, (cación) (desambiguación)...
                endashes.add(pagetitle)
            if not re.search(ur"–", pagetitle) and re.search(ur"-", pagetitle):
                hyphens.add(pagetitle)
            row = cursor.fetchone()    
Ejemplo n.º 20
0
def main():
    skip = u''
    if len(sys.argv) > 1:
        site = wikipedia.Site(sys.argv[1], sys.argv[1])
    else:
        print 'python script.py wikifamily [skiptopage]'
        sys.exit()
    if len(sys.argv) > 2:
        skip = sys.argv[2]
    gen = pagegenerators.AllpagesPageGenerator(start=skip,
                                               namespace=0,
                                               site=site)
    pre = pagegenerators.PreloadingGenerator(gen, pageNumber=250)
    alltitles = []
    for page in pre:
        if not page.exists(
        ):  #do not put .isRedirectPage() or it will never find redirects when checking below before creating
            continue
        alltitles.append(page.title())
        print page.title()

    for wtitle in alltitles:
        if len(wtitle) > 1:
            wtitle_ = wtitle[0] + wtitle[1:].lower()
            redirects = set()
            for t in [wtitle, wtitle_]:
                redirects.add(t)
                redirects.add(remove1(t))
                redirects.add(remove2(t))
                redirects.add(removeaccute(t))
                redirects.add(remove1(remove2(t)))
                redirects.add(remove1(removeaccute(t)))
                redirects.add(remove2(removeaccute(t)))
                redirects.add(remove1(remove2(removeaccute(t))))

            print redirects
            for redirect in redirects:
                redirect = redirect.strip()
                if redirect and redirect != wtitle and not redirect in alltitles:
                    red = wikipedia.Page(site, redirect)
                    if not red.exists():
                        output = u"#REDIRECT [[%s]]" % (wtitle)
                        msg = u"BOT - Creating redirect to [[%s]]" % (wtitle)
                        red.put(output, msg)
Ejemplo n.º 21
0
def main():
    site = wikipedia.Site("pt", "wikipedia")
    cat = catlib.Category(site, u"!Robótica")
    ''' Como explicado anteriormente, temos definido o site e a categoria
		podendo então passar-mos a obter a listagem das páginas,
		onde desta vez usaremos o pagegenerators. Uma opção mais rápida será
		usar igualmente o preloadingGen, bastando para isso
		fazer algo como 
		pages = pagegenerators.PreloadingGenerator(pagegenerators.CategorizedPageGenerator(cat))
		Isto faz com que as páginas sejam carregadas no início, ao contrário
		do script actual, que carrega à medida que forem necessárias. 
	'''
    pages = pagegenerators.CategorizedPageGenerator(cat)
    for page in pages:
        '''Agora que temos a iteração vamos primeiro obter o título
		'''
        print page.title()
        ''' Com o page.namespace() obtemos o namespace da página
			embora no formato canonico, ou seja, número. Para obter
			o nome do namespace, fazemos o site.namespace().
			Para fazer tudo junto, basta substituir as duas linhas por
			namespace = site.namespace(page.namespace())
		'''

        pageNamespaceNumber = page.namespace()
        namespace = site.namespace(pageNamespaceNumber)
        if namespace == u"Ajuda":
            ''' Aqui filtramos as páginas que pertencem ao namespace Ajuda
				e obteremos o nome do namespace, assim como as predefinições
				contidas nas páginas. '''
            print len(page.get())
            print u"namespace: ", site.namespace(page.namespace())
            print u"templates: ", page.templates()
        elif namespace == u"Wikipédia":
            ''' Neste bloco, apenas os artigos do namespace wikipédia são filtrados,
				e obteremos o namespage e o título do artigo, sem namespace ou subpáginas
				(resumidamente, o título do artigo principal)
			'''
            print u"namespace: ", site.namespace(page.namespace())
            print u"Página principal (título sem subpágina): ", page.sectionFreeTitle(
            )
            print u"Página principal sem título nem namespace: ", page.title(
                withNamespace=False)
Ejemplo n.º 22
0
def process(img, row):
    if row['num'] == 1:
        title = title_template_first % row
    else:
        title = title_template % row
    row['title'] = title
    text = text_template % row
    filename = title + '.jpg'
    # bot = upload.UploadRobot(url = img, description = text, targetSite = pywikibot.Site('commons', 'commons'), useFilename = filename, keepFilename = True, verifyDescription = False)
    # if not bot.urlOK():
    # wikipedia.output('wrong filename: %s' % filename)
    # return
    # return bot.upload_image()
    page = pywikibot.Page(pywikibot.Site('commons', 'commons'),
                          'File:' + title + '.jpg')
    if not page.exists():
        style = 'style="color: red"'
    else:
        style = ''
    print(
        u'%s: <a %s href="http://commons.wikimedia.org/wiki/%s">%s</a><br />' %
        (row['id'], style, page.urlname(), title)).encode('utf-8')
Ejemplo n.º 23
0
def main():
    """ Update a template with my user accounts edits """

    eswiki = wikipedia.Site('es', 'wikipedia')
    users = [
        'AVBOT', 'BOTijo', 'Emijrp', 'Emijrpbot', 'Poc-oban', 'Toolserver'
    ]
    path = "http://toolserver.org/~vvv/sulutil.php?user="******"{{#switch:{{{1|}}}"
    total = 0
    oldtotal = 0  #total anterior
    for user in users:
        url = path + user
        f = urllib.urlopen(url, 'r')
        raw = f.read()
        m = re.compile(
            ur"Total editcount: <b>(?P<useredits>\d+)</b>").finditer(raw)
        for j in m:
            salida += u"\n|%s=%s" % (user, j.group("useredits"))
            edits = j.group("useredits")
            total += int(edits)
        f.close()
        print user, edits
        time.sleep(5)
    salida += u"\n|Total=%d\n|%d}}" % (total, total)

    editcount = wikipedia.Page(eswiki, u"User:Emijrp/Editcount")
    #evitamos regresiones en el contador
    oldtotal = int(editcount.get().split("Total=")[1].split("\n")[0])

    print "Total:", total, "Oldtotal:", oldtotal
    if total > oldtotal:
        oldpage = wikipedia.Page(eswiki, u"User:Emijrp/Editcount/Old")
        oldpage.put(
            editcount.get(),
            u"BOT - Datos de la versión anterior de [[User:Emijrp/Editcount]]")
        editcount.put(
            salida, u"BOT - Actualizando ediciones globales de %s: %d" %
            (", ".join(users), total))
Ejemplo n.º 24
0
def checkBlockInEnglishWikipedia(editData):
    comment = ""
    isProxy = False
    if re.search(avbotglobals.parserRegexps['ip'],
                 editData['author']):  #Is it an IP?
        enwiki = wikipedia.Site('en', 'wikipedia')

        data = enwiki.getUrl("/w/index.php?title=Special:BlockList&ip=%s" %
                             editData['author'])
        data = data.split('<!-- start content -->')
        data = data[1].split('<!-- end content -->')[0]

        data = data.split('<li>')
        if len(data) > 1:
            m = re.compile(
                ur"</span> *\((?P<expires>[^<]*?)\) *<span class=\"comment\">\((?P<comment>[^<]*?)\)</span>"
            ).finditer(data[1])
            for i in m:
                comment = u"''Bloqueado en Wikipedia en inglés ([http://en.wikipedia.org/w/index.php?title=Special:BlockList&ip=%s bloqueo vigente], [http://en.wikipedia.org/w/index.php?title=Special:Log&type=block&page=User:%s historial de bloqueos]): %s''" % (
                    editData['author'], editData['author'], i.group("expires"))
                if re.search(ur'(?i)proxy', i.group('comment')):
                    isProxy = True
                break  #con el primero basta
Ejemplo n.º 25
0
def main():
    site = wikipedia.Site("pt", "wikipedia")
    cat = catlib.Category(site, u"!Robótica")

    pages = pagegenerators.CategorizedPageGenerator(cat)
    for page in pages:

        print page.title()

        pageNamespaceNumber = page.namespace()
        namespace = site.namespace(pageNamespaceNumber)
        if namespace == u"Ajuda":

            print len(page.get())
            print u"namespace: ", site.namespace(page.namespace())
            print u"templates: ", page.templates()

        elif namespace == u"Wikipédia":

            print u"namespace: ", site.namespace(page.namespace())
            print u"Página principal (título sem subpágina): ", page.sectionFreeTitle(
            )
            print u"Página principal sem título nem namespace: ", page.title(
                withNamespace=False)
Ejemplo n.º 26
0
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import wikipedia, time

import tarea000

#w, wikt, b, q, s, n, v

botijoinfo = wikipedia.Page(wikipedia.Site("en", "wikipedia"),
                            u"User:Emijrp/BOTijoInfo.css").get()

for family in ['wikipedia',
               'wiktionary']:  #añadir al user-config.py los credenciales
    langswiki = []

    for lang in tarea000.getLangsByFamily(family):
        if lang == 'en-simple':
            langswiki.append('simple')
        else:
            langswiki.append(lang)

    for lang in langswiki:
        time.sleep(0.1)
        print lang, family
Ejemplo n.º 27
0
    """Copy some stats from s23.org and paste in a page in Wikipedia for log purposes
    """
    try:
        opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
    except getopt.error, msg:
        print msg
        print "for help use --help"
        sys.exit(2)
    for o, a in opts:
        if o in ("-h", "--help"):
            print main.__doc__
            sys.exit(0)

    ## @var url
    # URL to statistics page
    url = 'http://s23.org/wikistats/wikipedias_wiki.php'
    f = urllib.urlopen(url, 'r')
    text = f.read()
    text = re.sub(ur'(?im)[\n\r]*</?pre>[\n\r]*', ur'', text)  #cleaning...
    text = u'Lista de Wikipedias extraida de %s\n\n%s' % (url, text)
    ## @var p
    # Page where to save
    p = wikipedia.Page(wikipedia.Site('es', 'wikipedia'),
                       u'User:Emijrp/Lista de Wikipedias')
    p.put(text, u'BOT - Updating from %s' % url)
    f.close()


if __name__ == "__main__":
    main()
Ejemplo n.º 28
0
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import wikipedia
import MySQLdb

site = wikipedia.Site('es', 'wikipedia')

conn = MySQLdb.connect(host='sql-s3',
                       db='eswiki_p',
                       read_default_file='~/.my.cnf')
cursor = conn.cursor()
cursor.execute('''
/* SLOW_OK */SELECT CONCAT("| [[:Category:",page_title,"]]\n|-")
FROM page
JOIN categorylinks ON cl_to = page_title
WHERE page_id = cl_from
AND page_namespace = 14;
''')

query = [unicode(row[0], 'utf-8') for row in cursor.fetchall()]
page_list = []
Ejemplo n.º 29
0
    else:
        b = (punto4['lat'] - punto1['lat']) * conv

    if a != 0 and b != 0:
        return math.sqrt(a**2 + b**2)
    else:
        return 999999


lang = "es"
if len(sys.argv) > 1:
    lang = sys.argv[1]

objs = {}
paises = {}
site = wikipedia.Site("es", "wikipedia")
objspage = wikipedia.Page(
    site, u"Wikipedia:Imágenes requeridas por zona/Objetivos conocidos")
l = []
for line in objspage.get().splitlines():
    l.append(line)
    t = line.strip().split(";")
    if len(t) == 4:
        objname = t[0].strip()
        objcountry = t[1].strip()
        paises[objname] = objcountry
        objlat = float(t[2].strip())
        objlon = float(t[3].strip())
        objs[objname] = {'lat': objlat, 'lon': objlon}
l.sort()
output3 = "\n".join(l)
Ejemplo n.º 30
0
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import catlib
import pagegenerators
import sys
import wikipedia

start = sys.argv[1]
lfsite = wikipedia.Site('librefind', 'librefind')
ensite = wikipedia.Site('en', 'wikipedia')

cat = catlib.Category(lfsite, u"Category:All the searches")
gen = pagegenerators.CategorizedPageGenerator(cat, start=start)
pre = pagegenerators.PreloadingGenerator(gen)

for page in pre:
    title = page.title()
    enpage = wikipedia.Page(ensite, title)
    if enpage.exists(
    ) and not enpage.isRedirectPage() and not enpage.isDisambig():
        redirects = enpage.getReferences(redirectsOnly=True)
        for redirect in redirects:
            if redirect.namespace(
            ) != 0:  #skiping redirects from userpages etc