Python Page Examples

Programming Language: Python

Namespace/Package Name: wikipedia

Method/Function: Page

Examples at hotexamples.com: 30

Python Page - 30 examples found. These are the top rated real world Python examples of wikipedia.Page extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def add_text(generator):
    fa_page_title_list = get_query()
    for page in generator:
        if _cache.get(tuple([page.title(), 'add_text'])):
            wikipedia.output(
                u'\03{lightred}>>> Page ' + page.title() +
                u' was checked before so it will pass\03{default}')
            continue
        original_text = u''
        if page.namespace() != 0:
            continue
        try:
            pagetitle_source = page.title()
            original_text = page.get()
            pagetitle = pagetitle_source
            redirection = 0
        except wikipedia.NoPage:
            wikipedia.output(u"%s doesn't exist, skip!" % page.title())
            continue

        except wikipedia.IsRedirectPage:
            wikipedia.output(u"%s is a redirect, skip!" % page.title())
            pagemain = page.getRedirectTarget()
            try:
                original_text = pagemain.get()
            except:
                wikipedia.output(u"%s doesn't exist, skip!" % pagemain.title())
                continue
            pagetitle_source = pagemain.title()
            pagetitle = page.title()
            redirection = 1
        except:
            continue

        pagetitle3 = re.sub(
            ur'[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]', ur"",
            pagetitle)
        if pagetitle3 != pagetitle:
            continue

        if pagetitle.find(u'در حال ویرایش') != -1:
            continue
        _cache[tuple([page.title(), 'add_text'])] = 1
        if original_text:
            if redirection == 0:
                wrong_words = ur'ًٌٍَُِّْٔ' + u'يٰك' + u"@#$%^&*'‍‍~`"
                pagetitle2 = re.sub(ur'[' + wrong_words + ur']', ur"",
                                    pagetitle)
                passp = redirect_find(pagetitle)
                if not passp:
                    if pagetitle != pagetitle2:
                        for vowel in wrong_words:
                            if vowel in pagetitle:
                                break
                        passport = False
                        text = u'{| class="wikitable plainlinks"\n|-\n'
                        page = wikipedia.Page(fasite,
                                              u"user:fawikibot/movearticles2")
                        text_fa = page.get()
                        if not pagetitle in text_fa:
                            text += u"|[[" + pagetitle + u"]] ||«" + vowel + u"»\n|-\n"
                            passport = True
                        text += u'\n|}\n'
                        if passport:
                            page.put(text_fa + u'\n' + text,
                                     u"ربات:مقاله‌ها برای انتقال")
            try:
                #-------------------------1----------------------------------
                if u"ی" in pagetitle or u"ک" in pagetitle:
                    if not u"‌" in pagetitle:
                        New_redirect_name = pagetitle.replace(u"ی",
                                                              u"ي").replace(
                                                                  u"ک", u"ك")
                        if fa_page_title_list.find(u'\n' +
                                                   New_redirect_name.strip() +
                                                   u'\n') == -1:
                            msg = u"ربات:تغییرمسیر از ی و ک عربی به ی و ک فارسی (" + botVersion + u")"
                            fa_page_title_list = creat_redirect(
                                fa_page_title_list, New_redirect_name,
                                pagetitle_source, msg)
                    else:
                        New_redirect_name = pagetitle.replace(
                            u"ی", u"ي").replace(u"ک",
                                                u"ك").replace(u"‌", u" ")
                        if fa_page_title_list.find(u'\n' +
                                                   New_redirect_name.strip() +
                                                   u'\n') == -1:
                            msg = u"ربات:تغییرمسیر از ی و ک عربی به ی و ک فارسی و فاصله به فاصلهٔ مجازی (" + botVersion + u")"
                            fa_page_title_list = creat_redirect(
                                fa_page_title_list, New_redirect_name,
                                pagetitle_source, msg)
                #-------------------------2----------------------------------
                if u"‌" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"‌", u" ")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر از فاصله به فاصلهٔ مجازی (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------3----------------------------------
                if u"آ" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"آ", u"ا")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر از ا به آ (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------3----------------------------------
                if u"أ" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"أ", u"ا")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر از ا به أ (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------3.5----------------------------------
                if u"ء" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"ء", u"")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر از ء به  (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------3.5----------------------------------
                if (u"," or u"،" or u"(") in pagetitle:
                    New_redirect_name = pagetitle.replace(u",", u" ").replace(
                        u"،", u" ").replace(u")",
                                            u" ").replace(u"(", u" ").replace(
                                                u"  ", u" ").strip()
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر از ,()، به  (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------3.5----------------------------------
                if (u"(" or u"،" or u",") in pagetitle:
                    New_redirect_name = pagetitle.replace(u" ،", u"،").replace(
                        u" ,", u",").replace(u"،", u"، ").replace(
                            u",", u", ").replace(u"(", u" (").replace(
                                u")", u") ").replace(u"  ", u" ").strip()
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر فاصله برای سجاوندی (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------3.5----------------------------------
                if u"ة" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"ة", u"ه")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر از  ه به ة (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------4----------------------------------
                if u"،" in pagetitle:
                    New_redirect_name = pagetitle.replace(u" ،", u"،")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر سجاوندی درست  برای ویرگول(" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------5----------------------------------
                if u"," in pagetitle:
                    New_redirect_name = pagetitle.replace(u",", u"،")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر سجاوندی درست برای ویرگول غیرفارسی (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------5.5----------------------------------
                b = -1
                sources = [
                    u'اول', u'یکم', u'ثانی', u'ثالث', u'نخستین', u'اولین'
                ]
                targets = [u'یکم', u'اول', u'دوم', u'سوم', u'اولین', u'نخستین']
                for i in sources:
                    b += 1
                    j = targets[b]
                    if i in pagetitle:
                        New_redirect_name = (u' ' + pagetitle + u' ').replace(
                            u' ' + i + u' ', u' ' + j + u' ').strip()
                        if fa_page_title_list.find(u'\n' +
                                                   New_redirect_name.strip() +
                                                   u'\n') == -1:
                            msg = u"ربات:تغییرمسیر از " + j + u" به " + i + u" (" + botVersion + u")"
                            fa_page_title_list = creat_redirect(
                                fa_page_title_list, New_redirect_name,
                                pagetitle_source, msg)
                #-------------------------6----------------------------------
                if u"ؤ" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"ؤ", u"و")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر و به ؤ (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------6----------------------------------
                if u"کامپیوتر" in pagetitle:
                    New_redirect_name = pagetitle.replace(
                        u"کامپیوترها", u"رایانه‌ها").replace(
                            u"کامپیوتری",
                            u"رایانه‌ای").replace(u"کامپیوتر", u"رایانه")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر رایانه به کامپیوتر (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------7----------------------------------
                if u"ه‌ی" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"ه‌ی", u"ه")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر  ه‌ به ه‌ی (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------8----------------------------------
                if u"ه " in pagetitle and redirection == 0 and pagetitle.find(
                        u'كه '
                ) == -1 and pagetitle.find(u'اه ') == -1 and pagetitle.find(
                        u'ه اي '
                ) == -1 and pagetitle.find(u'ه ای ') == -1 and pagetitle.find(
                        u'که '
                ) == -1 and pagetitle.find(u'راه ') == -1 and pagetitle.find(
                        u'ه با '
                ) == -1 and pagetitle.find(u'گروه ') == -1 and pagetitle.find(
                        u'ه که '
                ) == -1 and pagetitle.find(u'ه كه ') == -1 and pagetitle.find(
                        u'ه در '
                ) == -1 and pagetitle.find(u'ه براي ') == -1 and pagetitle.find(
                        u'ه برای '
                ) == -1 and pagetitle.find(u'ه از ') == -1 and pagetitle.find(
                        u'ه ;'
                ) == -1 and pagetitle.find(u'علیه ') == -1 and pagetitle.find(
                        u'عليه '
                ) == -1 and pagetitle.find(u'ه و ') == -1 and pagetitle.find(
                        u'ه :'
                ) == -1 and pagetitle.find(u'شاه ') == -1 and pagetitle.find(
                        u'به '
                ) == -1 and pagetitle.find(u'الله ') == -1 and pagetitle.find(
                        u'ه (') == -1 and pagetitle.find(
                            u'گه ') == -1 and pagetitle.find(
                                u'ه -') == -1 and pagetitle.find(u'ه-') == -1:
                    New_redirect_name = pagetitle.replace(u"ه ", u"هٔ ")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر  هٔ به ه (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------9----------------------------------
                if u"‌ها " in pagetitle and redirection == 0 and pagetitle.find(
                        u'ها ('
                ) == -1 and pagetitle.find(
                        u'ها براي '
                ) == -1 and pagetitle.find(
                        u'ها برای '
                ) == -1 and pagetitle.find(u'ها با ') == -1 and pagetitle.find(
                        u'ها در '
                ) == -1 and pagetitle.find(u'ها از ') == -1 and pagetitle.find(
                        u'ها كه '
                ) == -1 and pagetitle.find(u'ها که ') == -1 and pagetitle.find(
                        u'ها و ') == -1 and pagetitle.find(
                            u'ها :') == -1 and pagetitle.find(u'ها ;') == -1:
                    New_redirect_name = pagetitle.replace(u"ها ", u"های ")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر های به ها (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------10----------------------------------
                if u"‌‌ها" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"‌ها", u"ها")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر فاصلهٔ مجازی+ها به ها (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------10.5----------------------------------
                if u"‌‌ها" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"‌ها", u" ها")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر فاصلهٔ مجازی+ها به فاصله+ها (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------11----------------------------------
                if u"‌‌می‌" in pagetitle or u"‌‌مي" in pagetitle:
                    New_redirect_name = pagetitle.replace(u"می‌",
                                                          u"می").replace(
                                                              u"مي‌‌", u"مي")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر می+فاصلهٔ مجازی به می (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------12----------------------------------
                farsinum = [
                    u'۰', u'۱', u'۲', u'۳', u'۴', u'۵', u'۶', u'۷', u'۸', u'۹'
                ]
                counters = -1
                pagetitle2 = pagetitle
                for num in farsinum:
                    counters += 1
                    pagetitle2 = pagetitle2.replace(num, str(counters))
                if pagetitle2 != pagetitle:
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر عدد لاتین به عدد فارسی (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                #-------------------------13----------------------------------
                if u"ایالات متحده آمریکا" in pagetitle:
                    New_redirect_name = pagetitle.replace(
                        u"ایالات متحده آمریکا", u"ایالات متحده")
                    if fa_page_title_list.find(u'\n' +
                                               New_redirect_name.strip() +
                                               u'\n') == -1:
                        msg = u"ربات:تغییرمسیر (" + botVersion + u")"
                        fa_page_title_list = creat_redirect(
                            fa_page_title_list, New_redirect_name,
                            pagetitle_source, msg)
                elif u"آمریکا" in pagetitle:
                    if not u"آمریکایی" in pagetitle:
                        New_redirect_name = pagetitle.replace(
                            u"آمریکا", u"ایالات متحده")
                        if fa_page_title_list.find(u'\n' +
                                                   New_redirect_name.strip() +
                                                   u'\n') == -1:
                            msg = u"ربات:تغییرمسیر (" + botVersion + u")"
                            fa_page_title_list = creat_redirect(
                                fa_page_title_list, New_redirect_name,
                                pagetitle_source, msg)
                pagetitle_source, New_redirect_name, msg = u'', u'', u''
            except:
                continue

Example #2

Show file

File: tarea027.py Project: edgarskos/toolserver

# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import wikipedia, re, catlib

essite = wikipedia.Site('es', 'wikipedia')
selfcat = wikipedia.Page(
    essite, u'Wikipedia:Informes automáticos/Categorías autocontenidas')
spam = u"Usando: [[Wikipedia:Informes automáticos/Categorías autocontenidas]]"

m = re.compile(ur"(?i)\[\[:(C[^\]].*?)\]\]").finditer(selfcat.get())
for i in m:
    cattitle = i.group(1)
    catpage = catlib.Category(essite, cattitle)

    if catpage.exists(
    ) and not catpage.isRedirectPage() and not catpage.isDisambig():
        cattitleWithout = catpage.titleWithoutNamespace()
        cattext = catpage.get()
        wikipedia.output(catpage.title())

        #marcamos para destruir las que no tienen artículos ni subcategorías
        if len(cattext) >= len(cattitle) + 4 and len(

Example #3

Show file

File: tarea006.py Project: edgarskos/toolserver

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import datetime
import os, re, wikipedia

site=wikipedia.Site("es", "wikipedia")

#discusiones mas activas
os.system('mysql -h sql-s3 -e "use eswiki_p;select count(*) as count, rc_title from recentchanges where rc_timestamp>=date_add(now(), interval -3 day) and rc_namespace=1 group by rc_title order by count desc limit 25;" > /home/emijrp/temporal/tarea006data')

f=open('/home/emijrp/temporal/tarea006data', 'r')
sql=unicode(f.read(), 'utf-8')
m=re.compile(ur"(\d+)\s+(.*)").finditer(sql)

page=wikipedia.Page(site, "Template:DiscusionesActivas")
s=u"<div class='plainlinks'>\n{| class='wikitable' style='width: {{{ancho|275px}}};clear: right;float: right;margin: 0 0 1em 1em;text-align: center;'\n! Discusiones más activas [[Image:FireIcon.svg|18px]]\n! Ediciones\n"
c=1
ss=""
for i in m:
    ed=str(i.group(1))
    art_=i.group(2)
    art=re.sub("_", " ", art_)
    if not re.search(u"Candidatura a destacado", art):
        if c<=5:
            ss+=u"|-\n| [[Discusión:%s|%s]] || [http://es.wikipedia.org/w/index.php?title=Discusión:%s&action=history %s] \n" % (art,art,art_,ed)
        c+=1
s+=ss
s+=u"|-\n| colspan='2' | <small>Actualizado: {{subst:CURRENTTIME}} (UTC) del {{subst:CURRENTDAY}} de {{subst:CURRENTMONTHNAME}} de {{subst:CURRENTYEAR}}</small>\n"
s+=u"|}\n</div>"
wikipedia.output(s)

Example #4

Show file

File: tarea048.py Project: edgarskos/toolserver

def main():
    limit = 14
    conn = MySQLdb.connect(host='sql-s3',
                           db='eswiki_p',
                           read_default_file='~/.my.cnf',
                           use_unicode=True)
    cursor = conn.cursor()
    cursor.execute(
        "SELECT rc_timestamp from recentchanges where rc_user_text='AVBOT' and rc_namespace=0 and rc_deleted=0 and rc_timestamp>=date_add(now(), interval -%d day);"
        % limit)
    result = cursor.fetchall()
    days = {}
    c = 0
    for row in result:
        if len(row) == 1:
            rc_timestamp = row[0][:8]
            if days.has_key(rc_timestamp):
                days[rc_timestamp] += 1
            else:
                days[rc_timestamp] = 1
        c += 1
        percent(c)

    l = []
    for day, edits in days.items():
        l.append([day, edits])
    l.sort()
    l.reverse()

    site = wikipedia.Site('es', 'wikipedia')

    weekday = {
        0: u'lunes',
        1: u'martes',
        2: u'miércoles',
        3: u'jueves',
        4: u'viernes',
        5: u'sábado',
        6: u'domingo'
    }
    monthname = {
        1: u'enero',
        2: u'febrero',
        3: u'marzo',
        4: u'abril',
        5: u'mayo',
        6: u'junio',
        7: u'julio',
        8: u'agosto',
        9: u'septiembre',
        10: u'octubre',
        11: u'noviembre',
        12: u'diciembre'
    }

    output = u"{| class='wikitable sortable' align='right' style='text-align: center' \n! Día !! Ediciones "
    for day, edits in l:
        date = datetime.datetime(year=int(day[0:4]),
                                 month=int(day[4:6]),
                                 day=int(day[6:8]))
        output += u"\n|-\n| %s, [[%d de %s]] || %d " % (
            weekday[date.weekday()], date.day, monthname[date.month], edits)
    output += u"\n|-\n| colspan=2 | <small>''Esta tabla recoge la actividad de AVBOT<br/>en los últimos días<br/>* La tasa ha disminuido gracias a<br/>la nueva herramienta [[Special:AbuseFilter|AbuseFilter]]''</small>\n|}"

    wii = wikipedia.Page(site, u"User:AVBOT/Últimos días")
    wii.put(output, u"BOT - Actualizando plantilla")

Example #5

Show file

File: GE-ImageMirror-Bot.py Project: vmorrisonwood/pywikia

    def doImage(self, image):
        r = re.compile(u'\|', re.UNICODE | re.DOTALL)
        data = re.split(r, image)
        imageName = data[0]
        newImageName = data[0]
        r = re.compile(u'^\s*$', re.UNICODE | re.DOTALL)
        if len(data) >= 2 and not re.match(r, data[1]):
            newImageName = data[1]
        sourceWiki = u'anime'
        if len(data) >= 3:
            sourceWiki = data[2]
        exclusionMode = u'normal'
        if len(data) >= 4:
            exclusionMode = data[3]
        exclusionInfo = u''
        if len(data) >= 5:
            exclusionInfo = data[4]
        sourceSite = None
        outputSites = []
        sourceImage = None
        sourcePage = None

        wikipedia.output(u'Doing Image %s' % imageName)
        for site in self.siteList:
            if site.family.name == sourceWiki:
                sourceSite = site
            if exclusionMode == u'normal':
                outputSites.append(site)
            elif exclusionMode == u'include':
                r = re.compile(u',', re.UNICODE | re.DOTALL)
                includes = re.split(r, exclusionInfo)
                if site.family.name in includes:
                    outputSites.append(site)
            elif exclusionMode == u'exclude':
                r = re.compile(u',', re.UNICODE | re.DOTALL)
                excludes = re.split(r, exclusionInfo)
                if site.family.name not in includes:
                    outputSites.append(site)
            else:
                wikipedia.output(u'Unknown exclusion mode. Skiping %s.' %
                                 imageName)
                return False
        if sourceSite == None:
            wikipedia.output(u'No source site found. Skiping %s.' % imageName)
            return False

        try:
            sourceDescriptionPage = wikipedia.Page(sourceSite, imageName, None,
                                                   6)  #6=Image Namespace
            sourceImagePage = wikipedia.ImagePage(
                sourceSite, sourceDescriptionPage.title())
        except wikipedia.NoPage:
            wikipedia.output(u'No source page found. Skiping %s.' % imageName)
            return False

        sourceURL = sourceImagePage.fileUrl()
        if '://' not in sourceURL:
            sourceURL = u'http://%s%s' % (sourceSite.hostname(), sourceURL)

        # Get file contents
        uo = wikipedia.MyURLopener()
        sourceFile = uo.open(sourceURL, "rb")
        wikipedia.output(u'Reading file %s' % sourceURL)
        sourceContents = sourceFile.read()
        if sourceContents.find(
                "The requested URL was not found on this server.") != -1:
            wikipedia.output("Couldn't download the image. Skiping.")
            return False
        sourceFile.close()

        #Setup Description Page
        pageDescription = sourceDescriptionPage.get()
        r = re.compile(u'== Summary ==\n?')
        if re.search(r, pageDescription):
            pageDescription = re.sub(r, u'', pageDescription)

        mirrorText = u'{{networkMirror|%s|%s}}' % (imageName,
                                                   sourceSite.family.name)
        comm = re.compile(u'({{commons(\|[^{}]*)?}})', re.IGNORECASE)
        if re.search(comm, pageDescription):
            pageDescription = re.sub(comm, u'\\1\n%s' % mirrorText,
                                     pageDescription)
        else:
            pageDescription = u'%s%s' % (mirrorText, pageDescription)
        pageDescription = u'== Summary ==\n%s' % pageDescription

        for site in outputSites:
            if sourceSite.family.name != site.family.name or imageName != newImageName:
                doUpload = False
                doDescription = False

                try:
                    siteDescriptionPage = wikipedia.Page(
                        site, newImageName, None, 6)  #6=Image Namespace
                    siteImagePage = wikipedia.ImagePage(
                        site, siteDescriptionPage.title())

                    siteURL = siteImagePage.fileUrl()
                    if '://' not in siteURL:
                        siteURL = u'http://%s%s' % (site.hostname(), siteURL)

                    uo2 = wikipedia.MyURLopener()
                    siteFile = uo2.open(siteURL, "rb")
                    wikipedia.output(u'Reading file %s' % siteURL)
                    siteContents = siteFile.read()
                    if sourceContents.find(
                            "The requested URL was not found on this server."
                    ) != -1:
                        wikipedia.output(
                            "Couldn't download the image at new location.")
                        doUpload = True
                        break
                    siteFile.close()

                    if siteContents != sourceContents:
                        doUpload = True

                    if siteDescriptionPage.get() != pageDescription:
                        doDescription = True

                except wikipedia.NoPage:
                    doUpload = True
                    doDescription = True

                if doUpload:
                    bot = upload.UploadRobot(url=sourceURL,
                                             useFilename=newImageName,
                                             keepFilename=True,
                                             verifyDescription=False,
                                             description=msg['en'],
                                             targetSite=site,
                                             urlEncoding=sourceSite.encoding())
                    bot.run()
                if doDescription:
                    siteDescriptionPage.put(pageDescription)

Example #6

Show file

# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import wikipedia,re 

site=wikipedia.Site("es", "wikipedia")

page=wikipedia.Page(site, u"Wikipedia:Candidaturas a bibliotecario/Tabla")
#{{CandidaturaBibliotecario|Racso|Tomatejc|18/01/2008|03/04/2007|4402|color=#FFFFCC}}
m=re.compile(ur"\{\{CandidaturaBibliotecario\|(?P<candidato>[^\|]+)\|(?P<propuesto>[^\|]+)\|").finditer(page.get())

s=u"{| class='wikitable' width='500px' style='font-size: 90%s;text-align: center;'\n! colspan=7 | Candidaturas a bibliotecario \n|-\n! # !! Usuario !! Propuesto por !! A favor !! En contra !! %s !! Estado" % ("%", "%")
raw=u""
c=0
send=False
limite=0
for i in m:
    limite+=1
    if limite>3:
        break
    cafavor=0
    cencontra=0
    estado=u"Abierta"

Example #7

Show file

File: table2wiki.py Project: dysklyver/pywikipediabot

def main():
    quietMode = False # use -quiet to get less output
    # if the -file argument is used, page titles are stored in this array.
    # otherwise it will only contain one page.
    articles = []
    # if -file is not used, this temporary array is used to read the page title.
    page_title = []

    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []

    xmlfilename = None
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlfilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlfilename = arg[5:]
            gen = TableXmlDumpPageGenerator(xmlfilename)
        elif arg == '-sql':
            query = u"""
SELECT page_namespace, page_title
FROM page JOIN text ON (page_id = old_id)
WHERE old_text LIKE '%<table%'
LIMIT 200"""
            gen = pagegenerators.MySQLPageGenerator(query)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg.startswith('-skip:'):
            articles = articles[articles.index(arg[6:]):]
        elif arg.startswith('-auto'):
            config.table2wikiAskOnlyWarnings = True
            config.table2wikiSkipWarnings = True
            print "Automatic mode!\n"
        elif arg.startswith('-quiet'):
            quietMode = True
        else:
            if not genFactory.handleArg(arg):
                page_title.append(arg)

    # if the page is given as a command line argument,
    # connect the title's parts with spaces
    if page_title != []:
        page_title = ' '.join(page_title)
        page = pywikibot.Page(pywikibot.getSite(), page_title)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()

    if gen:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = Table2WikiRobot(preloadingGen, quietMode)
        bot.run()
    else:
        pywikibot.showHelp('table2wiki')

Example #8

Show file

    def _parseCategory(self, purge=False, startFrom=None):
        """
        Yields all articles and subcategories that are in this category.

        Set purge to True to instruct MediaWiki not to serve a cached version.

        Set startFrom to a string which is the title of the page to start from.

        Yielded results are tuples in the form (tag, page) where tag is one
        of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category
        object.

        Note that results of this method need not be unique.

        This should not be used outside of this module.
        """
        if self.site().versionnumber() < 4:
            Rtitle = re.compile('title\s?=\s?\"([^\"]*)\"')
        elif self.site().versionnumber() < 8:
            # FIXME seems to parse all links
            Rtitle = re.compile('/\S*(?: title\s?=\s?)?\"([^\"]*)\"')
        else:
            Rtitle = re.compile(
                '<li>(?:<span.*?>)?<a href=\".*?\"\s?title\s?=\s?\"([^\"]*)\"\>\+?[^\<\+]'
            )
        if self.site().versionnumber() < 8:
            Rsubcat = None
            Rimage = None
        else:
            Rsubcat = re.compile(
                'CategoryTreeLabelCategory\"\s?href=\".+?\">(.+?)</a>')
            Rimage = re.compile(
                '<div class\s?=\s?\"thumb\"\sstyle=\"[^\"]*\">(?:<div style=\"[^\"]*\">)?<a href=\".*?\"(?:\sclass="image")?\stitle\s?=\s?\"([^\"]*)\"'
            )
        ns = self.site().category_namespaces()
        # regular expression matching the "(next 200)" link
        RLinkToNextPage = re.compile('&amp;from=(.*?)" title="')

        if startFrom:
            currentPageOffset = urllib.quote(
                startFrom.encode(self.site().encoding()))
        else:
            currentPageOffset = None
        while True:
            path = self.site().get_address(self.urlname())
            if purge:
                path += '&action=purge'
            if currentPageOffset:
                path += '&from=' + currentPageOffset
                wikipedia.output(
                    'Getting [[%s]] starting at %s...' %
                    (self.title(),
                     wikipedia.url2link(currentPageOffset, self.site(),
                                        self.site())))
            else:
                wikipedia.output('Getting [[%s]]...' % self.title())
            wikipedia.get_throttle()
            txt = self.site().getUrl(path)
            # index where subcategory listing begins
            if self.site().versionnumber() >= 9:
                # These IDs were introduced in 1.9
                if '<div id="mw-subcategories">' in txt:
                    ibegin = txt.index('<div id="mw-subcategories">')
                elif '<div id="mw-pages">' in txt:
                    ibegin = txt.index('<div id="mw-pages">')
                elif '<div id="mw-category-media">' in txt:
                    ibegin = txt.index('<div id="mw-category-media">')
                else:
                    # No pages
                    return
            else:
                ibegin = txt.index('<!-- start content -->'
                                   )  # does not work for cats without text
                # TODO: This parses category text and may think they are
                # pages in category! Check for versions before 1.9
            # index where article listing ends
            if '<div class="printfooter">' in txt:
                iend = txt.index('<div class="printfooter">')
            elif '<div class="catlinks">' in txt:
                iend = txt.index('<div class="catlinks">')
            else:
                iend = txt.index('<!-- end content -->')
            txt = txt[ibegin:iend]
            for title in Rtitle.findall(txt):
                if title == self.title():
                    # This is only a link to "previous 200" or "next 200".
                    # Ignore it.
                    pass
                # For MediaWiki versions where subcats look like articles
                elif isCatTitle(title, self.site()):
                    ncat = Category(self.site(), title)
                    yield SUBCATEGORY, ncat
                else:
                    yield ARTICLE, wikipedia.Page(self.site(), title)
            if Rsubcat:
                # For MediaWiki versions where subcats look differently
                for titleWithoutNamespace in Rsubcat.findall(txt):
                    title = 'Category:%s' % titleWithoutNamespace
                    ncat = Category(self.site(), title)
                    yield SUBCATEGORY, ncat
            if Rimage:
                # For MediaWiki versions where images work through galleries
                for title in Rimage.findall(txt):
                    # In some MediaWiki versions, the titles contain the namespace,
                    # but they don't in other (newer) versions. Use the ImagePage's
                    # defaultNamespace feature to get everything correctly.
                    yield ARTICLE, wikipedia.ImagePage(self.site(), title)
            # try to find a link to the next list page
            matchObj = RLinkToNextPage.search(txt)
            if matchObj:
                currentPageOffset = matchObj.group(1)
            else:
                break

Example #9

Show file

def treat(text, linkedPage, targetPage):
    """
    Based on the method of the same name in solve_disambiguation.py
    """
    mysite = pywikibot.getSite()
    linktrail = mysite.linktrail()

    # make a backup of the original text so we can show the changes later
    linkR = re.compile(
        r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>'
        + linktrail + ')')
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group('title').strip() == '' or \
           mysite.isInterwikiLink(m.group('title')):
            continue
        else:
            actualLinkPage = pywikibot.Page(targetPage.site(),
                                            m.group('title'))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        # how many bytes should be displayed around the current link
        context = 15
        # at the beginning of the link, start red color.
        # at the end of the link, reset the color to default
        #pywikibot.output(text[max(0, m.start() - context) : m.start()] + '\03{lightred}' + text[m.start() : m.end()] + '\03{default}' + text[m.end() : m.end() + context])
        choice = 'y'

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group('title')
        link_text = m.group('label')

        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group('section') == None:
            section = ''
        else:
            section = m.group('section')
        trailing_chars = m.group('linktrail')
        if trailing_chars:
            link_text += trailing_chars

        if choice in "uU":
            # unlink - we remove the section if there's any
            text = text[:m.start()] + link_text + text[m.end():]
            continue
        replaceit = choice in "rR"

        # remove preleading ":"
        if link_text[0] == ':':
            link_text = link_text[1:]
        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = targetPage.title()[0].lower() + \
                             targetPage.title()[1:]

        # remove preleading ":"
        if new_page_title[0] == ':':
            new_page_title = new_page_title[1:]

        if replaceit and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif replaceit or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif len(new_page_title) <= len(link_text) and \
             firstcap(link_text[:len(new_page_title)]) == \
             firstcap(new_page_title) and \
             re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                    link_text[len(new_page_title):])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[:m.start()] + newlink + text[m.end():]
        continue
    return text

Example #10

Show file

def main():
    args = wikipedia.handleArgs()
    all = False
    force = False
    for currentArgument in args:
        if currentArgument.startswith("-always"):
            all = True
        if currentArgument.startswith("-force"):
            force = True
            
    templateFile = codecs.open("modello_totocalcio.txt", "r", "utf-8")
    modelloVoce = templateFile.read() # Legge il modello della pagina
    templateFile.close()
    
    urlo = "http://www.calcio.sisal.it/pages/totocalcio/ultimo.xwb"
    wikipedia.output(u'Prendo la pagina dal server...')
    try:
        htmlText = pageText(urlo)
    except urllib2.HTTPError:
        try:
            wikipedia.output(u"Errore del server. Aspetto 10 secondi... " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()) )
            time.sleep(10)
            htmlText = pageText(urlo)
        except urllib2.HTTPError:
            wikipedia.output(u"Errore del server. Chiudo.")
            return
    
    concorso = re.search("<h2>Concorso n. (\d+) di (.*?) (\d+) (.*?) (\d+)</h2>", htmlText)
    montepremi = re.search("<tr>\s*<th[^>]*>Il Montepremi</th>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Del Concorso \(premi a punteggio\)\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD>\s*(.*?)\s*</TD>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Riporto Jackpot conc\. precedente\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>\s*<tr\s*>\s*<TD[^>]*>\s*Montepremi totale del concorso\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>", htmlText, re.I)
    montepremi9 = re.search("<tr>\s*<th[^>]*>Il Montepremi \"Il9\"</th>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Del Concorso \(premi a punteggio\)\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD>\s*(.*?)\s*</TD>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Riporto Jackpot conc\. precedente\s*</TD>\s*<TD>\s*EUR\s*</TD>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>\s*<tr>\s*<TD[^>]*>\s*Montepremi totale del concorso 9\s*</TD>\s*<TD>\s*EUR\s*</td>\s*<TD[^>]*>\s*(.*?)\s*</TD>\s*</tr>", htmlText, re.I)
    bloccoQuote = "<tr>\s*<td>\s*(.*?)\s*</td>\s*<td>\"%s\"</td>\s*<td[^>]*>\s*(.*?)\s*</td>\s*</tr>"
    bloccoPartita = "<tr[^>]*>\s*<td[^>]*>\s*<b>\s*%s\s*</b>\s*</td>\s*<td[^>]*>\s*(.*?)\s*</td>\s*<td[^>]*>\s*(.*?)\s*</td>\s*<td[^>]*>\s*<b>\s*(\d+)\s*-\s*(\d+)&nbsp;\s*</b>\s*</td>\s*<td[^>]*>\s*<b>\s*([12X])&nbsp;\s*</b>\s*</td>\s*</tr>"
    jackpotFuturi = re.search("<table.*?>\s*<tr>\s*<th[^>]*>\s*Jackpot prossimo concorso\s*</th>\s*</tr>\s*<tr>\s*<th[^>]*>.*?</th>\s*</tr>\s*<tr>\s*<td[^>]*><h2><b>14</b></h2></td>\s*<td[^>]*><h1>(.*?)<font[^>]*>.*?</font></h1></td>\s*</tr>\s*</table>", htmlText, re.I)
    
    page = wikipedia.Page(wikipedia.getSite(code='it', fam='wikinews'), "Italia: concorso n. " + concorso.group(1) + "/" + concorso.group(5) + " del Totocalcio")
    if page.exists() and not force:
        wikipedia.output("Nessuna nuova estrazione. Mi fermo.")
        return
        
    elencoSostituzioni = { # Sostituisce le variabili nel modello
        '#super-id': concorso.group(1),
        '#dow': concorso.group(2).replace('&igrave;', u'ì'),
        '#giorno': concorso.group(3),
        '#mese': concorso.group(4),
        '#anno': concorso.group(5),
        
        '#montepremi-parz': montepremi.group(1),
        '#jackpot': montepremi.group(2),
        '#montepremi-tot': montepremi.group(3),
        
        '#9-montepremi-parz': montepremi9.group(1),
        '#9-jackpot': montepremi9.group(2),
        '#9-montepremi-tot': montepremi9.group(3),
    }
    
    try:
        elencoSostituzioni['#futuro-jackpot'] = jackpotFuturi.group(1)
    except:
        elencoSostituzioni['#futuro-jackpot'] = '-'
        
    try:
        elencoSostituzioni['#9-futuro-jackpot'] = jackpotFuturi.group(2)
    except:
        elencoSostituzioni['#9-futuro-jackpot'] = '-'
        
    partite = range(1, 15)
    for p in partite:
        match = re.search(bloccoPartita % p, htmlText, re.I)
        elencoSostituzioni['#sq-' + str(p) + 'a'] = match.group(1).capitalize()
        elencoSostituzioni['#sq-' + str(p) + 'b'] = match.group(2).capitalize()
        elencoSostituzioni['#res-' + str(p) + 'a'] = match.group(3)
        elencoSostituzioni['#res-' + str(p) + 'b'] = match.group(4)
        elencoSostituzioni['#ok-' + str(p)] = match.group(5)
                
    quotes = [9, 12, 13, 14]
    for c in quotes:
        match = re.search(bloccoQuote % c, htmlText, re.I)
        elencoSostituzioni['#vincitori-' + str(c)] = match.group(1).replace('nessuna', '0')
        elencoSostituzioni['#euro-' + str(c)] = match.group(2).replace('-', '0')
        
    nuovoTesto = massiveReplace(elencoSostituzioni, modelloVoce)
    
    #page = wikipedia.Page(wikipedia.getSite(code='it', fam='wikinews'), "Utente:BimBot/Sandbox") #DEBUG
    wikipedia.output(">>>>> " + page.title() + " <<<<<")
    try:
        vecchioTesto = page.get()
    except wikipedia.NoPage:
        vecchioTesto = ''
    wikipedia.showDiff(vecchioTesto, nuovoTesto)
    if not all:
        choice = wikipedia.inputChoice(u"Modificare?",  ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
    else:
        choice = 'y'
    if choice in ['A', 'a']:
        all = True
        choice = 'y'
    if choice in ['Y', 'y']:
        page.put(nuovoTesto, u"Bot: Inserisco nuova estrazione del Totocalcio")

Example #11

Show file

def main():
    gen = None
    prefix = None
    oldName = None
    newName = None
    noredirect = True
    always = False
    skipredirects = False
    summary = None
    fromToPairs = []

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-pairs'):
            if len(arg) == len('-pairs'):
                filename = wikipedia.input(
                    u'Enter the name of the file containing pairs:')
            else:
                filename = arg[len('-pairs:'):]
            oldName1 = None
            for page in pagegenerators.TextfilePageGenerator(filename):
                if oldName1:
                    fromToPairs.append([oldName1, page.title()])
                    oldName1 = None
                else:
                    oldName1 = page.title()
            if oldName1:
                wikipedia.output(
                    u'WARNING: file %s contains odd number of links' %
                    filename)
        elif arg == '-noredirect':
            noredirect = False
        elif arg == '-always':
            always = True
        elif arg == '-skipredirects':
            skipredirects = True
        elif arg.startswith('-from:'):
            if oldName:
                wikipedia.output(u'WARNING: -from:%s without -to:' % oldName)
            oldName = arg[len('-from:'):]
        elif arg.startswith('-to:'):
            if oldName:
                fromToPairs.append([oldName, arg[len('-to:'):]])
                oldName = None
            else:
                wikipedia.output(u'WARNING: %s without -from' % arg)
        elif arg.startswith('-prefix'):
            if len(arg) == len('-prefix'):
                prefix = wikipedia.input(u'Enter the prefix:')
            else:
                prefix = arg[8:]
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = wikipedia.input(u'Enter the summary:')
            else:
                summary = arg[9:]
        else:
            genFactory.handleArg(arg)

    if oldName:
        wikipedia.output(u'WARNING: -from:%s without -to:' % oldName)
    for pair in fromToPairs:
        page = wikipedia.Page(wikipedia.getSite(), pair[0])
        bot = MovePagesBot(None, prefix, noredirect, always, skipredirects,
                           summary)
        bot.moveOne(page, pair[1])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = MovePagesBot(preloadingGen, prefix, noredirect, always,
                           skipredirects, summary)
        bot.run()
    elif not fromToPairs:
        wikipedia.showHelp('movepages')

Example #12

Show file

     l = l.strip()
     if l:
         if l[0] in [';', ':', '|', '{', '}', '<', '[', ']', '!', '#', '*', ' ']:
             continue
         else:
             if pagetitle.lower() in l.lower():
                 abstract = compensatehtmlcomments(l)
                 break
 
 #capturar imagenes
 images = re.findall(ur"(?im)(?:(?:Archivo|File|Image)\s*\:|(?:image[ _]?skyline|picture|photo|photography|imagen?|foto|fotograf[íi]a)\s*=)\s*([^\|\[\]]+?\.(?:jpe?g))", revtext)
 selectedimage = ''
 caption = ''
 if images and images[0]:
     selectedimage = images[0]
     commonspage = wikipedia.Page(commonssite, u'File:%s' % (selectedimage))
     if commonspage.exists():
         caption = revtext.split(selectedimage)[1].strip()
         if caption.startswith('|thumb') or caption.startswith('|left') or caption.startswith('|right'):
             m = re.findall(ur'(?im)^\s*\|\s*(?:thumb|thumbnail|frame|(?:(?:up)?(?:left|right|center)(?:\s*=?\s*\d*\.?\d*)?))([^\[\]]*?)\]\]', caption)
             if m:
                 caption = m[0].strip().lstrip('|')
             else:
                 brackets = 2
                 c = 0
                 while len(caption) > c and c <= 500 and brackets != 0:
                     if caption[c] == '[':
                         brackets += 1
                     elif caption[c] == ']':
                         brackets -= 1
                     c += 1

Example #13

Show file

    def subTemplate(self, content, param):
        """Substitute the template tags in content according to param.

           @param content: Content with tags to substitute.
           @type  content: string
           @param param: Param with data how to substitute tags.
           @type  param: dict

           Returns a tuple containig the new content with tags
           substituted and a list of those tags.
        """

        substed_tags = []  # DRTRIGON-73
        metadata = {
            'mw-signature': u'~~~~',
            'mw-timestamp': u'~~~~~',
        }  # DRTRIGON-132

        # 0.2.) check for 'simple' mode and get additional params
        if param['simple']:
            p = self.site.getExpandedString(param['simple'])
            param.update(pywikibot.extract_templates_and_params(p)[0][1])

        # 0.5.) check cron/date
        if param['cron']:
            # [min] [hour] [day of month] [month] [day of week]
            # (date supported only, thus [min] and [hour] dropped)
            if not (param['cron'][0] == '@'):
                param['cron'] = '* * ' + param['cron']
            entry = crontab.CronTab(param['cron'])
            # find the delay from midnight (does not return 0.0 - but next)
            delay = entry.next(datetime.datetime.now().replace(hour=0,
                                                               minute=0,
                                                               second=0,
                                                               microsecond=0) - \
                               datetime.timedelta(microseconds=1))

            pywikibot.output(u'CRON delay for execution: %.3f (<= %i)' %
                             (delay, self._bot_config['CRONMaxDelay']))

            if not (delay <= self._bot_config['CRONMaxDelay']):
                return (content, substed_tags, metadata)

        # 1.) getUrl or wiki text
        # (security: check url not to point to a local file on the server,
        #  e.g. 'file://' - same as used in xsalt.py)
        secure = False
        for item in [
                u'http://', u'https://', u'mail://', u'local://', u'wiki://'
        ]:
            secure = secure or (param['url'][:len(item)] == item)
        param['zip'] = ast.literal_eval(param['zip'])
        if not secure:
            return (content, substed_tags, metadata)
        if param['url'][:7] == u'wiki://':
            url = param['url'][7:].strip('[]')  # enable wiki-links
            if ast.literal_eval(param['expandtemplates']
                                ):  # DRTRIGON-93 (only with 'wiki://')
                external_buffer = pywikibot.Page(self.site,
                                                 url).get(expandtemplates=True)
            else:
                external_buffer = self.load(pywikibot.Page(self.site, url))
        elif (param['url'][:7] == u'mail://'):  # DRTRIGON-101
            url = param['url'].replace(u'{{@}}', u'@')  # e.g. nlwiki
            mbox = SubsterMailbox(
                pywikibot.config.datafilepath(self._bot_config['data_path'],
                                              self._bot_config['mbox_file'],
                                              ''))
            external_buffer = mbox.find_data(url)
            mbox.close()
        elif (param['url'][:8] == u'local://'):  # DRTRIGON-131
            if (param['url'][8:] == u'cache/state_bots'):
                # filename hard-coded
                d = shelve.open(
                    pywikibot.config.datafilepath('cache', 'state_bots'))
                external_buffer = pprint.pformat(
                    ast.literal_eval(pprint.pformat(d)))
                d.close()
            else:
                external_buffer = u'n/a'
        else:
            # consider using 'expires', 'last-modified', 'etag' in order to
            # make the updating data requests more efficient! use those stored
            # on page, if the user placed them, else use the conventional mode.
            # http://www.diveintopython.net/http_web_services/etags.html
            f_url, external_buffer = http.request(self.site,
                                                  param['url'],
                                                  no_hostname=True,
                                                  back_response=True)
            headers = f_url.headers  # same like 'f_url.info()'
            #if param['zip']:
            if ('text/' not in headers['content-type']):
                pywikibot.output(u'Source is of non-text content-type, '
                                 u'using raw data instead.')
                external_buffer = f_url.read()
            del f_url  # free some memory (no need to keep copy)

            for h in ['content-length', 'date', 'last-modified', 'expires']:
                if h in headers:
                    metadata['url-%s' % h] = headers[h]

        # some intermediate processing (unzip, xlsx2csv, ...)
        if param['zip']:  # 'application/zip', ...
            fileno = 0 if (param['zip'] is True) else (param['zip'] - 1)
            external_buffer = self.unzip(external_buffer, fileno)
        if param[
                'xlsx']:  # 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            external_buffer = self.xlsx2csv(external_buffer, param['xlsx'])
        if param['ods']:  # 'application/vnd.oasis.opendocument.spreadsheet'
            external_buffer = self.ods2csv(external_buffer, param['ods'])

        if not ast.literal_eval(param['beautifulsoup']):  # DRTRIGON-88
            # 2.) regexp
            #for subitem in param['regex']:
            subitem = param['regex']
            regex = re.compile(subitem, re.S | re.I)

            # 3.) subst in content
            external_data = regex.search(external_buffer)

            external_data_dict = {}
            if external_data:  # not None
                external_data = external_data.groups()

                pywikibot.output(u'Groups found by regex: %i' %
                                 len(external_data))

                # DRTRIGON-114: Support for named groups in regexs
                if regex.groupindex:
                    for item in regex.groupindex:
                        external_data_dict[u'%s-%s' %
                                           (param['value'],
                                            item)] = external_data[
                                                regex.groupindex[item] - 1]
                elif (len(external_data) == 1):
                    external_data_dict = {param['value']: external_data[0]}
                else:
                    external_data_dict = {param['value']: str(external_data)}
            pywikibot.debug(str(external_data_dict))

            param['postproc'] = eval(param['postproc'])
            # should be secured as given below, but needs code changes in wiki too
            #param['postproc'] = ast.literal_eval(param['postproc'])
            for value in external_data_dict:
                external_data = external_data_dict[value]

                # 4.) postprocessing
                func = param['postproc'][
                    0]  # needed by exec call of self._code
                DATA = [external_data]
                args = param['postproc'][1:]
                scope = {}  # (scope to run in)
                scope.update(locals())  # (add DATA, *args, ...)
                scope.update(globals())  # (add imports and else)
                if func:
                    exec(
                        self._code + (self._bot_config['CodeTemplate'] % func),
                        scope, scope)
                    external_data = DATA[0]
                pywikibot.debug(external_data)

                # 5.) subst content
                (content, tags) = self.subTag(content, value, external_data,
                                              int(param['count']))
                substed_tags += tags
        else:
            # DRTRIGON-105: Support for multiple BS template configurations
            value = param['value']
            if value:
                value += u'-'

            # DRTRIGON-88: Enable Beautiful Soup power for Subster
            BS_tags = self.get_BS_regex(value).findall(content)

            pywikibot.output(u'BeautifulSoup tags found by regex: %i' %
                             len(BS_tags))

            prev_content = content

            BS = BeautifulSoup.BeautifulSoup(external_buffer)
            for item in BS_tags:
                external_data = eval('BS.%s' % item[1])
                external_data = self._BS_regex_str % {
                    'var1': value + 'BS:' + item[1],
                    'var2': value,
                    'cont': external_data
                }
                content = content.replace(item[0], external_data, 1)

            if (content != prev_content):
                substed_tags.append(value + 'BS')

        metadata['bot-timestamp'] = pywikibot.Timestamp.now().isoformat(' ')

        return (content, substed_tags, metadata)

Example #14

Show file

    def _parseCategory(self,
                       purge=False,
                       startFrom=None,
                       sortby=None,
                       sortdir=None):
        """
        Yields all articles and subcategories that are in this category by API.

        Set startFrom to a string which is the title of the page to start from.

        Yielded results are tuples in the form (tag, page) where tag is one
        of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category
        object.

        Note that results of this method need not be unique.

        This should not be used outside of this module.
        """
        if not self.site().has_api() or self.site().versionnumber() < 11:
            for tag, page in self._oldParseCategory(purge, startFrom):
                yield tag, page
            return

        currentPageOffset = None
        params = {
            'action': 'query',
            'list': 'categorymembers',
            'cmtitle': self.title(),
            'cmprop': ['title', 'ids', 'sortkey', 'timestamp'],
            #'': '',
        }
        if sortby:
            params['cmsort'] = sortby
        if sortdir:
            params['cmdir'] = sortdir
        while True:
            if wikipedia.config.special_page_limit > 500:
                params['cmlimit'] = 500
            else:
                params['cmlimit'] = wikipedia.config.special_page_limit

            if currentPageOffset:
                params.update(currentPageOffset)
                wikipedia.output(
                    'Getting [[%s]] list from %s...' %
                    (self.title(), "%s=%s" % currentPageOffset.popitem()))
            elif startFrom:
                startFrom = startFrom.upper(
                )  # category sort keys are uppercase
                params['cmstartsortkey'] = startFrom
                wikipedia.output('Getting [[%s]] list starting at %s...' %
                                 (self.title(), startFrom))
            else:
                wikipedia.output('Getting [[%s]]...' % self.title())

            wikipedia.get_throttle()
            data = query.GetData(params, self.site())
            if 'error' in data:
                raise RuntimeError("%s" % data['error'])
            count = 0

            for memb in data['query']['categorymembers']:
                count += 1
                # For MediaWiki versions where subcats look like articles
                if memb['ns'] == 14:
                    yield SUBCATEGORY, Category(self.site(),
                                                memb['title'],
                                                sortKey=memb['sortkey'])
                elif memb['ns'] == 6:
                    yield ARTICLE, wikipedia.ImagePage(self.site(),
                                                       memb['title'])
                else:
                    yield ARTICLE, wikipedia.Page(self.site(),
                                                  memb['title'],
                                                  defaultNamespace=memb['ns'])
                if count >= params['cmlimit']:
                    break
            # try to find a link to the next list page
            if 'query-continue' in data and count < params['cmlimit']:
                currentPageOffset = data['query-continue']['categorymembers']
            else:
                break

Example #15

Show file

File: archivering.py Project: bdijkstra82/nlwikibots

    def run(self):
        """
        Starts the robot.
        """
        # Run the generator which will yield Pages which might need to be
        # changed.
        for page in self.generator:
            wikipedia.output(u'\n>>> %s <<<' % page.title())
            #Current time
            sectiont0 = time.time()
            try:
                # Load the page's text from the wiki.
                original_text = page.get()
                if not page.canBeEdited():
                    wikipedia.output(
                        u'Pagina %s wordt overgeslagen, deze pagina is beveiligd.'
                        % page.title())
                    continue
            #No page, so ignore
            except wikipedia.NoPage:
                wikipedia.output(u'Pagina %s bestaat niet.' % page.title())
                continue
            #Get the archiving settings.
            settings = self.loadConfig(original_text)

            #No settings were found, leave a message on the page.
            if not settings:
                wikipedia.output(
                    u'Er kunnen geen instellingen worden gevonden op %s. Er wordt een bericht achtergelaten.'
                    % page.title())
                page.put(original_text + self.nosettingscomment,
                         self.commentsummary,
                         minorEdit=False)
                continue

            #Incorrect magicwords settings were found, leave a message on the page.
            if not self.settings[
                    'magicwords'] == u'oudste' and not self.settings[
                        'magicwords'] == u'recentste':
                wikipedia.output(
                    u'Pagina %s wordt overgeslagen, er zijn geen of foute magicwords instellingen opgegeven, opgegeven was %s. Er wordt een bericht achtergelaten.'
                    % (page.title(), self.settings['magicwords']))
                page.put(original_text + self.nomagicwordscomment,
                         self.commentsummary,
                         minorEdit=False)
                continue

            #Get the number of days after which a section should be archived.
            self.settings['dagen'] = int(self.settings['dagen'])

            #Get the template for the archive page, some variables still have to be replaced using the section's oldest or most
            #recent date.
            #Make it a subpage of the current page.
            archive_titletemplate = page.title(
            ) + '/' + self.doDateReplacements(self.settings['archief'].strip())

            #Get a datetime object for the current date and time to compare other dates.
            todaydt = datetime.datetime.today()

            #Split the text into sections
            sections = self.resection.split(original_text)

            #The text before the first section won't be checked.
            new_text = sections[0]

            #A dictionary containing the archive page as key and the text as item.
            archives_dictionary = {}

            #The archiving target to be used in summaries.
            archive_target = 'n.v.t.'

            #The number of sections that will be archived.
            numberofsections = 0

            #A dictionary containing the archive page as key and the number of sections that will
            #be archived to that page as item.
            nos_dictionary = {}

            #Check all sections
            for i in range(2, len(sections), 2):
                archive_text = ''
                section_text = sections[i]
                #Check if the page shouldn't be archived.
                if self.renoarchive.search(section_text):
                    #Ignore this section.
                    new_text += sections[i - 1] + section_text
                    continue

                #A list of the dates in wikisyntax.
                dates = self.redate.findall(section_text)
                if dates:
                    #A list of the dates as datetimeobjects.
                    datesdt = []
                    #A list of the difference in seconds between the date and now.
                    differences = {}
                    j = 0

                    #Create datetime objects from all found dates.
                    for date in dates:
                        datematch = self.redatematch.match(date[0])
                        try:
                            datedt = datetime.datetime(
                                int(datematch.group(3)),
                                self.monthn[datematch.group(2)],
                                int(datematch.group(1)),
                                int(datematch.group(4)),
                                int(datematch.group(5)))
                        except:
                            wikipedia.output(
                                u'Could not create a datetime object, skipping date'
                            )
                            continue
                        datesdt.append(datedt)
                        differencedt = todaydt - datedt
                        differences[
                            j] = differencedt.days * 86400 + differencedt.seconds
                        j += 1

                    try:
                        diferences_sortedkeys = self.sort_by_value(differences)
                        difference = todaydt - datesdt[
                            diferences_sortedkeys[0]]
                    except:
                        wikipedia.output(
                            u'Could not get the difference, probably because of skipping a date.'
                        )
                        #Add daylight saving time
                        if time.daylight == 1:
                            dst = 'CEST'
                        else:
                            dst = 'CET'

                        section_text += '\n<!-- %s %s %s -->' % (
                            time.strftime('%d'), self.month[int(
                                time.strftime('%m'))],
                            time.strftime('%Y %H:%M (%Z)'))
                        new_text += sections[i - 1] + section_text
                        continue
                    #Check if a section should be archived using the most recent date.
                    if difference.days >= self.settings['dagen']:
                        if self.settings['magicwords'] == 'recentste':
                            archive_title = self.doTitleReplacements(
                                archive_titletemplate,
                                datesdt[diferences_sortedkeys[0]])
                        else:
                            archive_title = self.doTitleReplacements(
                                archive_titletemplate,
                                datesdt[diferences_sortedkeys[
                                    len(diferences_sortedkeys) - 1]])

                        #Add section to archive.
                        numberofsections += 1

                        #Add the text and number of sections to the corresponding dictionaries.
                        if archives_dictionary.has_key(archive_title):
                            archives_dictionary[archive_title] += sections[
                                i - 1] + section_text
                            nos_dictionary[archive_title] += 1
                        else:
                            archives_dictionary[archive_title] = sections[
                                i - 1] + section_text
                            nos_dictionary[archive_title] = 1

                        #Add archive_title to archive_target
                        archive_target = '[[%s]]' % archive_title
                    else:
                        new_text += sections[i - 1] + section_text
                else:
                    #No date was found, add one.
                    #We have to fill in the date ourselves because MediaWiki ignores <!-- ~~~~~ -->.

                    #Add daylight saving time
                    if time.daylight == 1:
                        dst = 'CEST'
                    else:
                        dst = 'CET'

                    section_text += '\n<!-- %s %s %s (%s) -->' % (
                        time.strftime('%d'), self.month[int(
                            time.strftime('%m'))], time.strftime('%Y %H:%M'),
                        dst)
                    new_text += sections[i - 1] + section_text

            #Check if there are multiple archive pages
            if len(archives_dictionary) > 1:
                archive_target = '%i archiefpagina\'s' % len(
                    archives_dictionary)

            if not original_text == new_text:
                if page.isRedirectPage() or not page.canBeEdited():
                    wikipedia.output(u'Can not edit %s. Aborting.' %
                                     page.title())
                    continue
                abort = False
                for title in archives_dictionary.keys():
                    ap = wikipedia.Page(self.site, title)
                    if ap.isRedirectPage() or not ap.canBeEdited():
                        wikipedia.output(u'Can not edit %s. Aborting.' %
                                         ap.title())
                        try:
                            page.put(page.get() + self.cantedit % ap.title(),
                                     self.canteditsummary,
                                     minorEdit=False)
                        except wikipedia.EditConflict:
                            wikipedia.output(
                                u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.'
                                % (page.title()))
                        abort = True
                        break

                if abort:
                    continue

            if not original_text == new_text:
                diff = len(original_text) - len(new_text)
                reduction = (diff / len(original_text)) * 100
                wikipedia.output(
                    u'Er worden %d onderwerpen gearchiveerd ouder dan %d dagen. In totaal worden %d tekens aangepast, een reductie van %d procent.'
                    % (numberofsections, self.settings['dagen'], diff,
                       reduction))
                wikipedia.output(
                    u'Deze onderwerpen worden gearchiveerd naar %d verschillende archiefpagina\'s.'
                    % (len(archives_dictionary)))

                if not self.acceptall:

                    cview = wikipedia.inputChoice(
                        u'Wilt u deze wijzigingen bekijken?', ['Yes', 'No'],
                        ['y', 'N'], 'N')

                    if cview in ['y', 'Y']:
                        wikipedia.showDiff(original_text, new_text)

                    choice = wikipedia.inputChoice(
                        u'Wilt u deze wijzigingen doorvoeren?',
                        ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
                    if choice in ['a', 'A']:
                        self.acceptall = True

                #Archive the page.
                if self.acceptall or choice in ['y', 'Y']:
                    if numberofsections:
                        wikipedia.setAction(
                            'nlwikibots: [[Gebruiker:Erwin85/Bot/Archivering|Archivering]] van %i %s ouder dan %i dagen naar %s.'
                            % (numberofsections,
                               self.plural(numberofsections, 'onderwerp',
                                           'onderwerpen'),
                               self.settings['dagen'], archive_target))
                    else:
                        wikipedia.setAction(
                            'nlwikibots: Datum toegevoegd in verband met [[Gebruiker:Erwin85/Bot/Archivering|archivering]].'
                        )

                    try:
                        page.put(new_text)
                    except wikipedia.EditConflict:
                        wikipedia.output(
                            u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.'
                            % (page.title()),
                            toStdout=True)
                        continue
                    except wikipedia.LockedPage:
                        wikipedia.output(u'Pagina %s is beveiligd.' %
                                         (page.title()),
                                         toStdout=True)
                        continue

                    for archive_title, archivetext in archives_dictionary.items(
                    ):
                        redirect = False
                        if numberofsections:
                            wikipedia.setAction(
                                'nlwikibots: [[Gebruiker:Erwin85/Bot/Archivering|Archivering]] van %i %s ouder dan %i dagen van [[%s]].'
                                % (nos_dictionary[archive_title],
                                   self.plural(nos_dictionary[archive_title],
                                               'onderwerp', 'onderwerpen'),
                                   self.settings['dagen'], page.title()))
                        try:
                            archivepage = wikipedia.Page(
                                self.site, archive_title)
                            # Load the page's text from the wiki
                            original_archivetext = archivepage.get()
                            if not page.canBeEdited():
                                wikipedia.output(
                                    u'Pagina %s wordt overgeslagen, deze pagina is beveiligd.'
                                    % archive_title)
                                continue
                        except wikipedia.NoPage:
                            wikipedia.output(u'Pagina %s bestaat niet.' %
                                             archive_title)
                            original_archivetext = ''
                        except wikipedia.IsRedirectPage:
                            wikipedia.output(
                                u'Pagina %s is een doorverwijzing.' %
                                archive_title)
                            redirect = True
                        if not redirect:
                            if original_archivetext:
                                archivetext = original_archivetext + '\n' + archivetext
                            else:
                                if self.settings['sjabloon']:
                                    archivetext = '{{subst:%s}}\n' % self.settings[
                                        'sjabloon'] + archivetext

                            try:
                                archivepage.put(archivetext)
                            except wikipedia.EditConflict:
                                wikipedia.output(
                                    u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.'
                                    % (archive_title))

                        else:
                            wikipedia.output(
                                u'Leaving message informing that archive page is a redirect.'
                            )
                            try:
                                page.put(page.get() +
                                         self.cantedit % archive_title,
                                         self.canteditsummary,
                                         minorEdit=False)
                            except wikipedia.EditConflict:
                                wikipedia.output(
                                    u'Pagina %s wordt overgeslagen vanwege een bewerkingsconflict.'
                                    % (page.title()))

            else:
                #No need for archiving.
                wikipedia.output(u'Archivering is niet nodig.')

            #Execution time for this section.
            sectiontimediff = time.time() - sectiont0
            wikipedia.output(u'Executiontime: %ss.' % str(sectiontimediff))

        #Total execution time.
        timediff = time.time() - self.t0
        wikipedia.output(u'Total executiontime: %ss.' % str(timediff))

Example #16

Show file

def main():
    args = wikipedia.handleArgs()
    all = False
    for currentArgument in args:
        if currentArgument.startswith("-always"):
            all = True

    templateFile = codecs.open("modello_meteo.txt", "r", "utf-8")
    modelloVoce = templateFile.read()  # Legge il modello della pagina
    templateFile.close()

    urlo = "http://www.meteoam.it/modules/tempoInAtto/infoStazione.php?icao=%s"
    replacements = {}
    for i in codiciStazioni:
        try:
            htmlText = pageText(urlo % i)
        except urllib2.HTTPError:
            try:
                wikipedia.output(
                    u"Errore del server. Aspetto 10 secondi... " +
                    time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()))
                time.sleep(10)
                htmlText = pageText(urlo)
            except urllib2.HTTPError:
                wikipedia.output(u"Errore del server. Chiudo.")
                return
        match = re.search(
            '<div class="titolo">Informazioni meteorologiche/climatologiche per (.*?)</div>',
            htmlText)
        nomeLocalita = unicode(match.group(1))
        wikipedia.output(nomeLocalita + " (" + i + ")")
        nuvoloMatch = re.search(
            u'<b>Nuvolosità</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?>\s*<p .*?><img .*? alt\s*="(.*?)" .*?>\s*</td>',
            htmlText)
        ventoMatch = re.search(
            u'<b>Vento</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?>\s*<p .*?><img .*? alt\s*="Vento (.*?)( Direzione (.*?))?" .*?>\s*</td>',
            htmlText)
        maxMatch = re.search(
            u'<td .*?>\s*<font .*?><b>Temperatura</b></font>\s*<p><font .*?><b>&nbsp;Max</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?><b>\s*<font .*?>([+-]?\s*\d+)</font></b></td>',
            htmlText)
        minMatch = re.search(
            u'<td .*?>\s*<font .*?><b>Temperatura</b></font>\s*<p><font .*?><b>&nbsp;Min</b></font></td>\s*<td.*?>\s*.*?\s*</td>\s*<td .*?><b>\s*<font .*?>([+-]?\s*\d+)</font></b></td>',
            htmlText)
        if nuvoloMatch != None:
            replacements['#tempo-' + nomeLocalita + '#'] = iconaTempo(
                nuvoloMatch.group(1))
        else:
            replacements['#tempo-' + nomeLocalita + '#'] = nd
        if ventoMatch != None:
            replacements['#intensita-' + nomeLocalita +
                         '#'] = ventoMatch.group(1)
            if ventoMatch.group(2) != None:
                replacements['#vento-' + nomeLocalita +
                             '#'] = ventoMatch.group(3)
            elif ventoMatch.group(2) == None and ventoMatch.group(
                    1) == "variabile":
                replacements['#vento-' + nomeLocalita + '#'] = "variabile"
                replacements['#intensita-' + nomeLocalita + '#'] = "debole"
            else:
                replacements['#vento-' + nomeLocalita + '#'] = nd
        else:
            replacements['#vento-' + nomeLocalita + '#'] = nd
            replacements['#intensita-' + nomeLocalita + '#'] = nd
        if maxMatch != None:
            replacements['#max-' + nomeLocalita + '#'] = maxMatch.group(1)
        else:
            replacements['#max-' + nomeLocalita + '#'] = nd
        if minMatch != None:
            replacements['#min-' + nomeLocalita + '#'] = minMatch.group(1)
        else:
            replacements['#min-' + nomeLocalita + '#'] = nd

    nuovoTesto = massiveReplace(replacements, modelloVoce)

    page = wikipedia.Page(wikipedia.Site('it', 'wikinews'),
                          'Template:Pagina principale/Secondo piano/Meteo')
    vecchioTesto = page.get()
    wikipedia.showDiff(vecchioTesto, nuovoTesto)
    if not all:
        choice = wikipedia.inputChoice(u"Modificare?", ['Yes', 'No', 'All'],
                                       ['y', 'N', 'a'], 'N')
    else:
        choice = 'y'
    if choice in ['A', 'a']:
        all = True
        choice = 'y'
    if choice in ['Y', 'y']:
        page.put(nuovoTesto, u"Bot: Aggiorno meteo")

Example #17

Show file

    def replace_image(self,
                      image,
                      site,
                      page_title,
                      summary,
                      replacement=None):
        """ The actual replacement. Giving None as argument for replacement
        will delink instead of replace."""

        page = wikipedia.Page(site, page_title)
        hook = None

        # TODO: Per site config.
        if page.namespace(
        ) in self.CommonsDelinker.config['delink_namespaces']:
            try:
                text = page.get(get_redirect=True)
            except wikipedia.NoPage:
                return 'failed'
            new_text = text

            m_image = ImmutableByReference(image)
            m_replacement = ImmutableByReference(replacement)
            self.CommonsDelinker.exec_hook(
                'before_replace', (page, summary, m_image, m_replacement))
            image = m_image.get()
            replacement = m_replacement.get()

            def create_regex(s):
                first, other = re.escape(s[0]), re.escape(s[1:])
                return ur'(?:[%s%s]%s)' % (first.upper(), first.lower(), other)

            def create_regex_i(s):
                return ur'(?:%s)' % u''.join(
                    [u'[%s%s]' % (c.upper(), c.lower()) for c in s])

            namespaces = site.namespace(6, all=True) + site.namespace(-2,
                                                                      all=True)
            r_namespace = ur'\s*(?:%s)\s*\:\s*' % u'|'.join(
                map(create_regex_i, namespaces))
            # Note that this regex creates a group!
            r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]')

            def simple_replacer(match):
                m_replacement = ImmutableByReference(replacement)
                groups = list(match.groups())
                if hook:
                    if False is self.CommonsDelinker.exec_hook(
                            '%s_replace' % hook,
                        (page, summary, image, m_replacement, match, groups)):
                        return u''.join(groups)

                if m_replacement.get() is None:
                    return u''
                else:
                    groups[1] = m_replacement.get()
                    return u''.join(groups)

            # Previously links in image descriptions will cause
            # unexpected behaviour: [[Image:image.jpg|thumb|[[link]] in description]]
            # will truncate at the first occurence of ]]. This cannot be
            # fixed using one regular expression.
            # This means that all ]] after the start of the image
            # must be located. If it then does not have an associated
            # [[, this one is the closure of the image.

            r_simple_s = u'(\[\[%s)%s' % (r_namespace, r_image)
            r_s = '\[\['
            r_e = '\]\]'
            # First determine where wikilinks start and end
            image_starts = [
                match.start() for match in re.finditer(r_simple_s, text)
            ]
            link_starts = [match.start() for match in re.finditer(r_s, text)]
            link_ends = [match.end() for match in re.finditer(r_e, text)]

            r_simple = u'(\[\[%s)%s(.*)' % (r_namespace, r_image)
            hook = 'simple'
            replacements = []
            for image_start in image_starts:
                current_link_starts = [
                    link_start for link_start in link_starts
                    if link_start > image_start
                ]
                current_link_ends = [
                    link_end for link_end in link_ends
                    if link_end > image_start
                ]
                end = image_start
                if current_link_ends: end = current_link_ends[0]

                while current_link_starts and current_link_ends:
                    start = current_link_starts.pop(0)
                    end = current_link_ends.pop(0)
                    if end <= start and end > image_start:
                        # Found the end of the image
                        break

                # Check whether this image is the first one on the line
                if image_start == 0:
                    prev = ''
                else:
                    prev = new_text[image_start - 1]
                if prev in ('', '\r', '\n') and replacement is None:
                    # Kill all spaces after end
                    while (end + 1) < len(new_text):
                        if new_text[end + 1] in WHITESPACE:
                            end += 1
                        else:
                            break

                # Add the replacement to the todo list. Doing the
                # replacement right know would alter the indices.
                replacements.append((new_text[image_start:end],
                                     re.sub(r_simple, simple_replacer,
                                            new_text[image_start:end])))

            # Perform the replacements
            for old, new in replacements:
                if old: new_text = new_text.replace(old, new)

            # Remove the image from galleries
            hook = 'gallery'
            r_galleries = ur'(?s)(\<%s\>)(.*?)(\<\/%s\>)' % (
                create_regex_i('gallery'), create_regex_i('gallery'))
            r_gallery = ur'(?m)^((?:%s)?)%s(\s*(?:\|.*?)?\s*$)' % (r_namespace,
                                                                   r_image)

            def gallery_replacer(match):
                return ur'%s%s%s' % (match.group(1),
                                     re.sub(r_gallery, simple_replacer,
                                            match.group(2)), match.group(3))

            new_text = re.sub(r_galleries, gallery_replacer, new_text)

            if text == new_text or self.CommonsDelinker.config.get(
                    'force_complex', False):
                # All previous steps did not work, so the image is
                # likely embedded in a complicated template.
                hook = 'complex'
                r_templates = ur'(?s)(\{\{.*?\}\})'
                r_complicated = u'(?s)(?<=[|{=])[\s\u200E\uFEFF\u200B\u200C]*((?:%s)?)%s[\u200E\uFEFF\u200B\u200C]*' % (
                    r_namespace, r_image)

                def template_replacer(match):
                    return re.sub(r_complicated, simple_replacer,
                                  match.group(1))

                new_text = re.sub(r_templates, template_replacer, text)

            if text != new_text:
                # Save to the wiki
                # Code for checking user page existance has been moved
                # to summary() code, to avoid checking the user page
                # for each removal.
                new_text = ImmutableByReference(new_text)
                m_summary = ImmutableByReference(summary)
                if False is self.CommonsDelinker.exec_hook(
                        'before_save', (page, text, new_text, m_summary)):
                    return 'skipped'

                is_retry = False
                while True:
                    try:
                        if self.CommonsDelinker.config.get('edit', True) and not \
                                ((self.CommonsDelinker.site.lang == 'commons') ^ \
                                (config.usernames.get('commons', {}).get(
                                'commons') == 'CommonsDelinker')):
                            page.put(new_text.get(), m_summary.get())
                        return 'ok'
                    except wikipedia.ServerError, e:
                        output(u'Warning! ServerError: %s' % str(e))
                    except wikipedia.EditConflict:
                        # Try again
                        output(u'Got EditConflict trying to remove %s from %s:%s.' % \
                            (image, site, page_title))
                        return self.replace_image(image,
                                                  site,
                                                  page_title,
                                                  summary,
                                                  replacement=None)
                    except wikipedia.PageNotSaved:
                        if is_retry: return 'failed'
                        is_retry = True
                    except wikipedia.LockedPage:
                        return 'failed'

Example #18

Show file

    if newpages:
        for (page, date, length, loggedIn, user, comment) in pywikibot.getSite().newpages(1000):
            checkPage(page, checknames, knownonly)
    elif start:
        for page in pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start,includeredirects=False)):
            checkPage(page, checknames, knownonly)

    if longpages:
        for (page, length) in pywikibot.getSite().longpages(500):
            checkPage(page, checknames, knownonly)

    else:
        title = ' '.join(title)
        while title != '':
            try:
                page = pywikibot.Page(mysite,title)
                text = page.get()
            except pywikibot.NoPage:
                print "Page does not exist."
            except pywikibot.IsRedirectPage:
                print "Page is a redirect page"
            else:
                checkPage(page, knownonly=knownonly)
            title = pywikibot.input(u"Which page to check now? (enter to stop)")
finally:
    pywikibot.stopme()
    filename = pywikibot.config.datafilepath('externals/spelling',
                                      'spelling-' + checklang + '.txt')
    if rebuild:
        list = knownwords.keys()
        list.sort()

Example #19

Show file

File: us-states.py Project: moleculea/ess

def main():
    start = '0'
    force = False
    msg = {'en':'Creating state abbreviation redirect',
           'ar':u'إنشاء تحويلة اختصار الولاية',
           'fa':u'ایجاد تغییرمسیر برای نام اختصاری ایالت',
           'he':u'יוצר הפניה מראשי התיבות של המדינה',
           }

    abbrev = {
        'Alabama': 'AL',
        'Alaska': 'AK',
        'Arizona': 'AZ',
        'Arkansas': 'AR',
        'California': 'CA',
        'Colorado': 'CO',
        'Delaware': 'DE',
        'Florida': 'FL',
        'Georgia': 'GA',
        'Hawaii': 'HI',
        'Idaho': 'ID',
        'Illinois': 'IL',
        'Indiana': 'IN',
        'Iowa': 'IA',
        'Kansas': 'KS',
        'Kentucky': 'KY',
        'Louisiana': 'LA',
        'Maine': 'ME',
        'Maryland': 'MD',
        'Massachusetts': 'MA',
        'Michigan': 'MI',
        'Minnesota': 'MN',
        'Mississippi': 'MS',
        'Missouri': 'MO',
        'Montana': 'MT',
        'North Carolina': 'NC',
        'North Dakota': 'ND',
        'Nebraska': 'NE',
        'Nevada': 'NV',
        'New Hampshire': 'NH',
        'New Jersey': 'NJ',
        'New Mexico': 'NM',
        'New York': 'NY',
        'Ohio': 'OH',
        'Oklahoma': 'OK',
        'Oregon': 'OR',
        'Pennsylvania': 'PA',
        'Rhode Island': 'RI',
        'South Carolina': 'SC',
        'South Dakota': 'SD',
        'Tennessee': 'TN',
        'Texas': 'TX',
        'Utah': 'UT',
        'Vermont': 'VT',
        'Virginia': 'VA',
        'Washington': 'WA',
        'West Virginia': 'WV',
        'Wisconsin': 'WI',
        'Wyoming': 'WY'
    }

    for arg in pywikibot.handleArgs():
        if arg.startswith('-start:'):
            start = arg[7:]
        elif arg == '-force':
            force = True
        else:
            pywikibot.output(
                u'Warning: argument "%s" not understood; ignoring.' % arg)

    mysite = pywikibot.getSite()
    for p in mysite.allpages(start = start):
        for sn in abbrev:
            R=re.compile('[^[]]*' + '\%2C_' + sn)
            for res in R.findall(p.title()):
                pl=pywikibot.Page(mysite, p.title().replace(sn,abbrev[sn]))
                # A bit hacking here - the real work is done in the
                # 'except pywikibot.NoPage' part rather than the 'try'.
                try:
                    goal = pl.getRedirectTarget().title()
                    if pywikibot.Page(mysite, goal):
                        pywikibot.output(
                            u"Not creating %s - redirect already exists."
                            % goal)
                    else:
                        pywikibot.output(
                            u"WARNING!!! %s already exists but redirects elsewhere!"
                            % goal)
                except pywikibot.IsNotRedirectPage:
                    pywikibot.output(
                        u"WARNING!!! Page %s already exists and is not a redirect. Please check page!"
                        % goal)
                except pywikibot.NoPage:
                    change=''
                    if p.isRedirectPage():
                        p2 = p.getRedirectTarget()
                        wikipeda.ouput(
                            u'Note: goal page is redirect. Creating redirect to "%s" to avoid double redirect.'
                            % p2.title().replace("%2C",",").replace("_"," "))
                    else:
                        p2 = p
                    if force:
                        change='y'
                    else:
                        while not change in ['y','n']:
                            pywikibot.output(
                                u"Create redirect %s" %
                                pl.title().replace("%2C",",").replace("_"," "))
                            change = raw_input("(y/n)? ")
                    if change=='y':
                        text = '#REDIRECT [['+p2.title().replace("%2C",",").replace("_"," ")+']]'
                        pl.put(text, comment=pywikibot.translate(mysite, msg),
                               minorEdit = '0')

Example #20

Show file

File: superenalotto.py Project: pietrodn/pywikipedia-pietrodn

def main():
    args = wikipedia.handleArgs()
    all = False
    force = False
    for currentArgument in args:
        if currentArgument.startswith("-always"):
            all = True
        if currentArgument.startswith("-force"):
            force = True

    templateFile = codecs.open("modello_superenalotto.txt", "r", "utf-8")
    modelloVoce = templateFile.read()  # Legge il modello della pagina
    templateFile.close()

    now = datetime.datetime.utcnow()
    urlo = "http://www.sisal.it/se/se_main/1,4136,se_Default,00.html"
    wikipedia.output(u'Prendo la pagina dal server...')
    try:
        htmlText = pageText(urlo)
    except urllib2.HTTPError:
        try:
            wikipedia.output(
                u"Errore del server. Aspetto 10 secondi... " +
                time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()))
            time.sleep(10)
            htmlText = pageText(urlo)
        except urllib2.HTTPError:
            wikipedia.output(u"Errore del server. Chiudo.")
            return

    numeri = re.search(
        "<TABLE[^>]*>\s*<tr>\s*<td[^>]*>\s*<a[^>]*><nobr><font[^>]*>\s*(\d+)&nbsp;-&nbsp;\s*(\d+)&nbsp;-&nbsp;\s*(\d+)&nbsp;-&nbsp;\s*(\d+)&nbsp;-&nbsp;\s*(\d+)&nbsp;-&nbsp;\s*(\d+)\s*</font>\s*</nobr>\s*</a>\s*</td>\s*</tr>\s*</table>",
        htmlText)
    jolly = re.search(
        "<td[^>]*background=\"/giochi/se2006/hp2009/img/BTN_JOLLY.gif\"[^>]*>\s*<a[^>]*><font[^>]*><b>(\d+)</b></font></a>\s*</td>",
        htmlText)
    superstar = re.search(
        "<td[^>]*background=\"/giochi/se2006/hp2009/img/BTN_SUPERSTAR.gif\"[^>]*>\s*<a[^>]*><font[^>]*><b>(\d+)</b></font></a>\s*</td>",
        htmlText)
    concorso = re.search(
        "<font[^>]*><font[^>]*><b>Concorso n. (\d+) di (.*?) (\d+)/(\d+)/(\d+)</b></font></a>",
        htmlText)
    montepremiparz = re.search(
        "<td><a[^>]*><font class=testo8[^>]*>Del Concorso</a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?) euro</font></a></td>",
        htmlText)
    jackpot = re.search(
        "<td><a[^>]*><font class=testo8[^>]*>\s*Riporto Jackpot</a>\s*</td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?) euro</font></a></td>",
        htmlText)
    montepremitot = re.search(
        "<td><a[^>]*><font class=testo8[^>]*><b>Totale</a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*><b>(.*?) euro</font></a></td>",
        htmlText)
    bloccoQuote = "<tr[^>]*>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?)</font></a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>&quot;%s&quot;</a></td>\s*<td[^>]*><a[^>]*><font class=testo8[^>]*>(.*?)</font></a></td>\s*</tr>"

    page = wikipedia.Page(
        wikipedia.getSite(code='it', fam='wikinews'), "Italia: concorso n. " +
        concorso.group(1) + "/" + concorso.group(5) + " del SuperEnalotto")
    if page.exists() and not force:
        wikipedia.output("Nessuna nuova estrazione. Mi fermo.")
        return

    elencoSostituzioni = { # Sostituisce le variabili nel modello
        '#super-id': concorso.group(1),
        '#dow': concorso.group(2).replace('&igrave;', u'ì'),
        '#giorno': concorso.group(3),
        '#mese': concorso.group(4),
        '#anno': concorso.group(5),

        '#num-1': numeri.group(1),
        '#num-2': numeri.group(2),
        '#num-3': numeri.group(3),
        '#num-4': numeri.group(4),
        '#num-5': numeri.group(5),
        '#num-6': numeri.group(6),
        '#num-jolly': jolly.group(1),
        '#num-superstar': superstar.group(1),

        '#montepremi-parz': montepremiparz.group(1),
        '#jackpot': jackpot.group(1),
        '#montepremi-tot': montepremitot.group(1),

    }

    quotes = [
        'punti 6', 'punti 5\+', 'punti 5', 'punti 4', 'punti 3', '5 stella',
        '4 stella', '3 stella', '2 stella', '1 stella', '0 stella'
    ]
    for c in quotes:
        match = re.search(bloccoQuote % c, htmlText)
        elencoSostituzioni['#' + c.lower().replace(' ', '-') +
                           '#'] = match.group(2).replace('nessuna', '0')
        elencoSostituzioni['#vincitori-' + c.lower().replace(' ', '-') +
                           '#'] = match.group(1).replace('nessuna', '0')

    nuovoTesto = massiveReplace(elencoSostituzioni, modelloVoce)

    #page = wikipedia.Page(wikipedia.getSite(code='it', fam='wikinews'), "Utente:BimBot/Sandbox") #DEBUG
    wikipedia.output(">>>>> " + page.title() + " <<<<<")
    try:
        vecchioTesto = page.get()
    except wikipedia.NoPage:
        vecchioTesto = ''
    wikipedia.showDiff(vecchioTesto, nuovoTesto)
    if not all:
        choice = wikipedia.inputChoice(u"Modificare?", ['Yes', 'No', 'All'],
                                       ['y', 'N', 'a'], 'N')
    else:
        choice = 'y'
    if choice in ['A', 'a']:
        all = True
        choice = 'y'
    if choice in ['Y', 'y']:
        page.put(nuovoTesto,
                 u"Bot: Inserisco nuova estrazione del SuperEnalotto")

Example #21

Show file

File: table2wiki.py Project: dysklyver/pywikipediabot

 def __iter__(self):
     tableTagR = re.compile('<table', re.IGNORECASE)
     for entry in self.xmldump.parse():
         if tableTagR.search(entry.text):
             yield pywikibot.Page(pywikibot.getSite(), entry.title)

Example #22

Show file

File: votacionescongreso.py Project: edgarskos/emijrp

        'fecha': fecha,
        'titulo': titulo,
        'textoexp': textoexp,
        'titulosub': titulosub,
        'textosub': textosub,
        'asentimiento': asentimiento,
        'presentes': presentes,
        'afavor': afavor,
        'encontra': encontra,
        'abstenciones': abstenciones,
        'novotan': novotan,
        'votos': votos,
    })

    p = wikipedia.Page(
        wikipedia.Site('15mpedia', '15mpedia'),
        u'Lista de votaciones del Congreso de los Diputados/%s/Sesión %s/Votación %s'
        % (legislatura, sesion, numerovotacion))
    p.put(output,
          u'BOT - Creando página de votación del Congreso de los Diputados')

votaciones = u''
votacionesids.sort()
for votacionid in votacionesids:
    votaciones += u"""
=== Votación %s ===
{{main|Lista de votaciones del Congreso de los Diputados/%s/Sesión %s/Votación %s}}
{{:Lista de votaciones del Congreso de los Diputados/%s/Sesión %s/Votación %s}}
""" % (votacionid, legislatura, sesion, votacionid, legislatura, sesion,
       votacionid)

output = string.Template(

Example #23

Show file

File: interwikiscommons.py Project: edgarskos/toolserver

wikipediaen=wikipedia.Site('en', 'wikipedia')
gen=pagegenerators.AllpagesPageGenerator(start=st, namespace=0, includeredirects=False, site=commons)
preloadingGen=pagegenerators.PreloadingGenerator(gen, pageNumber=100, lookahead=100)
 
for page in preloadingGen:
    if page.isRedirectPage() or page.isDisambig():
        continue
    else:
        if not getAllInterwikis(page.get()):
            wtitle=page.title()
            wtext=newtext=page.get()
            summary="BOT -"
            eniw=getEnglishInterwiki(newtext)
            wikipedia.output("=== %s ===" % wtitle)
            wikipedia.output("La galería NO tiene interwikis")
            enpage=wikipedia.Page(ensite, wtitle)
            if enpage.exists() and not enpage.isRedirectPage() and not enpage.isDisambig():
                commonsimages=getImageTitles(wtitle, commonssite)
                enimages=getImageTitles(wtitle, ensite)
                for image in enimages:
                    if commonsimages.count(image)!=0: #con que una imagen coincida, ya vale
                        eniws=enpage.interwiki()
                        eniws.append(enpage)
                        eniws.sort()
                        iws_=""
                        for iw in eniws:
                            iws_+="[[%s:%s]]\n" % (iw.site().lang, iw.title())
                        page.put(u"%s\n\n%s" % (wtext, iws_), u"BOT - Adding %d interwiki(s) from [[:en:%s]]" % (len(eniws), enpage.title()))
                        break
                continue
        else:

Example #24

Show file

    def run(self):
        tosend={'language':self.imagePage.site().language().encode('utf-8'),
                'image':self.imagePage.titleWithoutNamespace().encode('utf-8'),
                'newname':self.newname.encode('utf-8'),
                'project':self.imagePage.site().family.name.encode('utf-8'),
                'username':'',
                'commonsense':'1',
                'remove_categories':'1',
                'ignorewarnings':'1',
                'doit':'Uitvoeren'
                }

        tosend=urllib.urlencode(tosend)
        print tosend
        CH=pageTextPost('http://www.toolserver.org/~magnus/commonshelper.php', tosend)
        print 'Got CH desc.'

        tablock=CH.split('<textarea ')[1].split('>')[0]
        CH=CH.split('<textarea '+tablock+'>')[1].split('</textarea>')[0]
        CH=CH.replace(u'&times;', u'×')
        CH = self.fixAuthor(CH)
        pywikibot.output(CH);

        # I want every picture to be tagged with the bottemplate so i can check my contributions later.
        CH=u'\n\n{{BotMoveToCommons|'+ self.imagePage.site().language() + '.' + self.imagePage.site().family.name +'|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}' + CH

        if self.category:
            CH = CH.replace(u'{{subst:Unc}} <!-- Remove this line once you have added categories -->', u'')
            CH = CH + u'[[Category:' + self.category + u']]'

        bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = pywikibot.getSite('commons', 'commons'))
        bot.run()

        #Should check if the image actually was uploaded
        if pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'Image:' + self.newname).exists():
            #Get a fresh copy, force to get the page so we dont run into edit conflicts
            imtxt=self.imagePage.get(force=True)

            #Remove the move to commons templates
            if self.imagePage.site().language() in moveToCommonsTemplate:
                for moveTemplate in moveToCommonsTemplate[self.imagePage.site().language()]:
                    imtxt = re.sub(u'(?i)\{\{' + moveTemplate + u'[^\}]*\}\}', u'', imtxt)

            #add {{NowCommons}}
            if self.imagePage.site().language() in nowCommonsTemplate:
                addTemplate = nowCommonsTemplate[self.imagePage.site().language()] % self.newname
            else:
                addTemplate = nowCommonsTemplate['_default'] % self.newname

            if self.imagePage.site().language() in nowCommonsMessage:
                commentText = nowCommonsMessage[self.imagePage.site().language()]
            else:
                commentText = nowCommonsMessage['_default']

            pywikibot.showDiff(self.imagePage.get(), imtxt+addTemplate)
            self.imagePage.put(imtxt + addTemplate, comment = commentText)

            self.gen = pagegenerators.FileLinksGenerator(self.imagePage)
            self.preloadingGen = pagegenerators.PreloadingGenerator(self.gen)

            #If the image is uploaded under a different name, replace all instances
            if self.imagePage.titleWithoutNamespace() != self.newname:
                if self.imagePage.site().language() in imageMoveMessage:
                    moveSummary = imageMoveMessage[self.imagePage.site().language()] % (self.imagePage.titleWithoutNamespace(), self.newname)
                else:
                    moveSummary = imageMoveMessage['_default'] % (self.imagePage.titleWithoutNamespace(), self.newname)
                imagebot = ImageRobot(generator = self.preloadingGen, oldImage = self.imagePage.titleWithoutNamespace(), newImage = self.newname, summary = moveSummary, always = True, loose = True)
                imagebot.run()
        return

Example #25

Show file

File: GE-ImageMirror-Bot.py Project: vmorrisonwood/pywikia

    def __init__(self):
        self.runOk = False
        #Setup Familys for Wikia Involved
        self.anime = wikipedia.getSite(code=u'en', fam=u'anime')
        wikipedia.setAction(wikipedia.translate(self.anime, msg))
        self.siteList = []
        self.imageList = []
        #Get Project Wiki Listing
        wikiaIds = []
        page = wikipedia.Page(self.anime, u'Bots/Wiki', None,
                              4)  #4=Project Namespace
        try:
            text = page.get()
            r = re.compile(u'^.*<!-- \|\|START\|\| -->\n?',
                           re.UNICODE | re.DOTALL)
            text = re.sub(r, u'', text)
            r = re.compile(u'\n?<!-- \|\|END\|\| -->.*$',
                           re.UNICODE | re.DOTALL)
            text = re.sub(r, u'', text)
            r = re.compile(u'\n', re.UNICODE | re.DOTALL)
            wikilist = re.split(r, text)
            r = re.compile(u'^#|^\s*$|^\[',
                           re.UNICODE | re.MULTILINE | re.DOTALL)
            for wiki in wikilist:
                if not re.match(r, wiki):
                    wikiaIds.append(wiki)
        except wikipedia.NoPage:
            return False

        for wiki in wikiaIds:
            self.siteList.append(wikipedia.getSite(code=u'en', fam=wiki))

        #Get Image Info List
        page = wikipedia.Page(self.anime, u'Bots/ImageMirror/Images', None,
                              4)  #4=Project Namespace
        try:
            text = page.get()
            r = re.compile(u'^.*<!-- \|\|START\|\| -->\n?',
                           re.UNICODE | re.DOTALL)
            text = re.sub(r, u'', text)
            r = re.compile(u'\n?<!-- \|\|END\|\| -->.*$',
                           re.UNICODE | re.DOTALL)
            text = re.sub(r, u'', text)
            r = re.compile(u'\n', re.UNICODE | re.DOTALL)
            images = re.split(r, text)
            r = re.compile(u'^#|^\s*$', re.UNICODE | re.MULTILINE | re.DOTALL)
            for image in images:
                if not re.match(r, image):
                    self.imageList.append(image)
        except wikipedia.NoPage:
            return False
        self.runOk = True

        #Mirror the Images category and all subcategorys to all the wiki.
        ImageCategorys = []
        cat = catlib.Category(self.anime, u'Category:Images')
        ImageCategorys.append(cat)

        catlist = cat.subcategories(True)

        for category in catlist:
            ImageCategorys.append(category)

        for category in ImageCategorys:
            categorySource = u'{{networkMirror|%s|anime|category}}\n%s' % (
                category.title(), category.get())

            if categorySource != u'':
                for site in self.siteList:
                    siteCategory = catlib.Category(site, category.title())
                    siteSource = u''
                    try:
                        siteSource = siteCategory.get()
                    except wikipedia.NoPage:
                        wikipedia.output(
                            u'Site %s has no %s category, creating it' %
                            (site, category.title()))
                    if siteSource != categorySource:
                        wikipedia.output(
                            u'Site \'%s\' category status: Needs Updating' %
                            site)
                        wikipedia.output(u'Updating category on %s' % site)
                        siteCategory.put(categorySource)
                    else:
                        wikipedia.output(u'Site \'%s\' category status: Ok' %
                                         site)
            else:
                wikipedia.output(u'Category %s is blank, skipping category' %
                                 category.title())

        #Anime should only be in the list after categorys have been done.
        self.siteList.append(self.anime)

Example #26

Show file

def main(args):
    generator = None;
    #newname = "";
    imagepage = None;
    always = False
    category = u''
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg.startswith('-cc:'):
            category = arg [len('-cc:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    for page in pregenerator:
        skip = False
        if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) :
            imagepage = pywikibot.ImagePage(page.site(), page.title())

            #First do autoskip.
            if doiskip(imagepage.get()):
                pywikibot.output("Skipping " + page.title())
                skip = True
            else:
                # The first upload is last in the list.
                try:
                    username = imagepage.getLatestUploader()[0]
                except NotImplementedError:
                    #No API, using the page file instead
                    (datetime, username, resolution, size, comment) = imagepage.getFileVersionHistory().pop()
                if always:
                    newname=imagepage.titleWithoutNamespace()
                    CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:'+newname)
                    if CommonsPage.exists():
                        skip = True
                else:
                    while True:

                        # Do the Tkdialog to accept/reject and change te name
                        (newname, skip)=Tkdialog(imagepage.titleWithoutNamespace(), imagepage.get(), username, imagepage.permalink(), imagepage.templates()).getnewname()

                        if skip:
                            pywikibot.output('Skipping this image')
                            break

                        # Did we enter a new name?
                        if len(newname)==0:
                            #Take the old name
                            newname=imagepage.titleWithoutNamespace()
                        else:
                            newname = newname.decode('utf-8')

                        # Check if the image already exists
                        CommonsPage=pywikibot.Page(
                                       pywikibot.getSite('commons', 'commons'),
                                       u'File:'+newname)
                        if not CommonsPage.exists():
                            break
                        else:
                            pywikibot.output('Image already exists, pick another name or skip this image')
                        # We dont overwrite images, pick another name, go to the start of the loop

            if not skip:
                imageTransfer(imagepage, newname, category).start()

    pywikibot.output(u'Still ' + str(threading.activeCount()) + u' active threads, lets wait')
    for openthread in threading.enumerate():
        if openthread != threading.currentThread():
            openthread.join()
    pywikibot.output(u'All threads are done')

Example #27

Show file

def main():
    # Load the configurations in the function namespace
    global commento
    global Template
    global disambigPage
    global commenttodisambig
    global exception

    enablePage = None  # Check if someone set an enablePage or not
    limit = 50000  # All the pages! (I hope that there aren't so many lonely pages in a project..)
    generator = None  # Check if the bot should use the default generator or not
    genFactory = pagegenerators.GeneratorFactory(
    )  # Load all the default generators!
    nwpages = False  # Check variable for newpages
    always = False  # Check variable for always
    disambigPage = None  # If no disambigPage given, not use it.
    # Arguments!
    for arg in wikipedia.handleArgs():
        if arg.startswith('-enable'):
            if len(arg) == 7:
                enablePage = wikipedia.input(
                    u'Would you like to check if the bot should run or not?')
            else:
                enablePage = arg[8:]
        if arg.startswith('-disambig'):
            if len(arg) == 9:
                disambigPage = wikipedia.input(
                    u'In which page should the bot save the disambig pages?')
            else:
                disambigPage = arg[10:]
        elif arg.startswith('-limit'):
            if len(arg) == 6:
                limit = int(
                    wikipedia.input(u'How many pages do you want to check?'))
            else:
                limit = int(arg[7:])
        elif arg.startswith('-newpages'):
            if len(arg) == 9:
                nwlimit = 50  # Default: 50 pages
            else:
                nwlimit = int(arg[10:])
            generator = wikipedia.getSite().newpages(number=nwlimit)
            nwpages = True
        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)
    # Retrive the site
    wikiSite = wikipedia.getSite()

    if not generator:
        generator = genFactory.getCombinedGenerator()

    # If the generator is not given, use the default one
    if not generator:
        generator = wikiSite.lonelypages(repeat=True, number=limit)
    # Take the configurations according to our project
    comment = wikipedia.translate(wikiSite, commento)
    commentdisambig = wikipedia.translate(wikiSite, commenttodisambig)
    template = wikipedia.translate(wikiSite, Template)
    exception = wikipedia.translate(wikiSite, exception)
    # EnablePage part
    if enablePage != None:
        # Define the Page Object
        enable = wikipedia.Page(wikiSite, enablePage)
        # Loading the page's data
        try:
            getenable = enable.get()
        except wikipedia.NoPage:
            wikipedia.output(
                u"%s doesn't esist, I use the page as if it was blank!" %
                enable.title())
            getenable = ''
        except wikiepedia.IsRedirect:
            wikipedia.output(u"%s is a redirect, skip!" % enable.title())
            getenable = ''
        # If the enable page is set to disable, turn off the bot
        # (useful when the bot is run on a server)
        if getenable != 'enable':
            wikipedia.output('The bot is disabled')
            return
    # DisambigPage part
    if disambigPage != None:
        disambigpage = wikipedia.Page(wikiSite, disambigPage)
        try:
            disambigtext = disambigpage.get()
        except wikipedia.NoPage:
            wikipedia.output(u"%s doesn't esist, skip!" % disambigpage.title())
            disambigtext = ''
        except wikiepedia.IsRedirect:
            wikipedia.output(u"%s is a redirect, don't use it!" %
                             disambigpage.title())
            disambigPage = None
    # Main Loop
    for page in generator:
        if nwpages == True:
            page = page[
                0]  # The newpages generator returns a tuple, not a Page object.
        wikipedia.output(u"Checking %s..." % page.title())
        # Used to skip the first pages in test phase...
        #if page.title()[0] in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']:
        #continue
        if page.isRedirectPage():  # If redirect, skip!
            wikipedia.output(u'%s is a redirect! Skip...' % page.title())
            continue
        # refs is not a list, it's a generator while resList... is a list, yes.
        refs = page.getReferences()
        refsList = list()
        for j in refs:
            if j == None:
                # We have to find out why the function returns that value
                wikipedia.output(u'Error: 1 --> Skip page')
                continue
            refsList.append(j)
        # This isn't possible with a generator
        if refsList != []:
            wikipedia.output(u"%s isn't orphan! Skip..." % page.title())
            continue
        # Never understood how a list can turn in "None", but it happened :-S
        elif refsList == None:
            # We have to find out why the function returns that value
            wikipedia.output(u'Error: 2 --> Skip page')
            continue
        else:
            # Ok, no refs, no redirect... let's check if there's already the template
            try:
                oldtxt = page.get()
            except wikipedia.NoPage:
                wikipedia.output(u"%s doesn't exist! Skip..." % page.title())
                continue
            except wikipedia.IsRedirectPage:
                wikipedia.output(u"%s is a redirect! Skip..." % page.title())
                continue
            # I've used a loop in a loop. If I use continue in the second loop, it won't do anything
            # in the first. So let's create a variable to avoid this problem.
            Find = False
            for regexp in exception:
                res = re.findall(regexp, oldtxt.lower())
                # Found a template! Let's skip the page!
                if res != []:
                    wikipedia.output(
                        u'Your regex has found something in %s, skipping...' %
                        page.title())
                    Find = True
                    break
            # Skip the page..
            if Find:
                continue
            # Is the page a disambig?
            if page.isDisambig() and disambigPage != None:
                wikipedia.output(u'%s is a disambig page, report..' %
                                 page.title())
                if not page.title().lower() in disambigtext.lower():
                    disambigtext = u"%s\n*[[%s]]" % (disambigtext,
                                                     page.title())
                    disambigpage.put(disambigtext, commentdisambig)
                    continue
            # Is the page a disambig but there's not disambigPage? Skip!
            elif page.isDisambig():
                wikipedia.output(u'%s is a disambig page, skip...' %
                                 page.title())
                continue
            else:
                # Ok, the page need the template. Let's put it there!
                newtxt = u"%s\n%s" % (template, oldtxt
                                      )  # Adding the template in the text
                wikipedia.output(u"\t\t>>> %s <<<" %
                                 page.title())  # Showing the title
                wikipedia.showDiff(oldtxt, newtxt)  # Showing the changes
                choice = 'y'  # Default answer
                if not always:
                    choice = wikipedia.inputChoice(
                        u'Orphan page found, shall I add the template?',
                        ['Yes', 'No', 'All'], ['y', 'n', 'a'])
                if choice == 'a':
                    always = True
                    choice = 'y'
                if choice == 'y':
                    try:
                        page.put(newtxt, comment)
                    except wikipedia.EditConflict:
                        wikipedia.output(u'Edit Conflict! Skip...')
                        continue

Example #28

Show file

File: twitter-commonspotd.py Project: edgarskos/toolserver

import os
import wikipedia
import re
import datetime

commonssite=wikipedia.Site("commons", "commons")
username="******"
password=""
f=open("/home/emijrp/.my.cnf2", "r")
raw=f.read()
f.close()
m=re.findall(ur'%s = *"(.*)"' % username, raw)
password=m[0]

today=datetime.date.today()
page=wikipedia.Page(commonssite, u"Template:Potd/%s" % today.isoformat())
m=re.findall(ur'(?i)\{\{ *potd filename *\| *1? *=? *([^\|]*?) *\|', page.get())
imagename=m[0]
page=wikipedia.Page(commonssite, u"Template:Potd/%s (en)" % today.isoformat())
m=re.findall(ur'(?i)\{\{ *potd description *\| *1? *=? *(.*?) *\| *2? *=? *en *\|', page.get())
imagedesc=m[0]
imagedesc=re.sub(ur'(?i)\[\[([^\|]*?)\|(?P<label>[^\]]*?)\]\]', ur'\g<label>', imagedesc)
imagedesc=re.sub(ur'(?i)[\[\]]', ur'', imagedesc)
if len(imagedesc)>40:
    imagedesc=u'%s...' % (imagedesc[:40])
imagename_=re.sub(" ", "_", imagename)
msg=u'%s → http://commons.wikimedia.org/wiki/File:%s #commons #photos #wikipedia' % (imagedesc, imagename_)

orden='curl -u %s:%s -d status="%s" http://twitter.com/statuses/update.json' % (username, password, msg.encode("utf-8"))
os.system(orden)

Example #29

Show file

File: category_redirect.py Project: moleculea/ess

            u'eilimit': 'max',
            u'format': 'json'
        }
        counts, destmap, catmap = {}, {}, {}
        catlist, catpages, nonemptypages = [], [], []
        target = self.cat_redirect_cat[self.site.family.name][self.site.lang]

        # get a list of all members of the category-redirect category
        for result in self.query_results(
                generator=u'categorymembers',
                gcmtitle=target,
                gcmnamespace=u'14',  # CATEGORY
                gcmlimit=u'max',
                prop='info|categoryinfo'):
            for catdata in result['pages'].values():
                thispage = pywikibot.Page(self.site, catdata['title'])
                catpages.append(thispage)
                if 'categoryinfo' in catdata \
                        and catdata['categoryinfo']['size'] != "0":
                    # save those categories that have contents
                    nonemptypages.append(thispage)

        # preload the category pages for redirected categories
        pywikibot.output(u"")
        pywikibot.output(u"Preloading %s category redirect pages" %
                         len(catpages))
        for cat in pagegenerators.PreloadingGenerator(catpages, 120):
            cat_title = cat.titleWithoutNamespace()
            if "category redirect" in cat_title:
                self.log_text.append(u"* Ignoring %s" %
                                     cat.title(asLink=True, textlink=True))

Example #30

Show file

    def run(self):
        wikipedia.setAction(u'Robot: hardware import')
        page = wikipedia.Page(wikipedia.getSite(), self.name)
        tmpl = """{{MODEL|
|NAME=
|LOCATION=
|OWNER=
|SN=
|PN=
|OOBIP=
|OOBMAC=
|RPSUSED=1
|NICUSED=2
|NIC1=eth0
|NIC2=eth1
|NICMAC1=
|NICMAC2=
|CPUUSED=2x UnknownCPU
|RAMUSED=2x UnknownMB
|DISKSUSED=2x 146GB
|CONTRACT=
}}
{{Instance
|USAGE=[[OpenVZ HN]]
|OS=lenny
|ARCH=x86_64
|AUTH=ldap
|LDAPGROUP=hostgroup-admin-ssh
|ETH=
}}
"""

        lines = list()
        oldlines = tmpl.split("\n")

        data = self.fetchIloData(self.ip)
        print data

        # now replace the values
        for line in oldlines:
            if line.startswith("{{MODEL"):
                line = "{{HP_" + data['productname'].replace(
                    'ProLiant', '').replace(' ', '_') + '|'
            if line.startswith("|NAME"): line = "|NAME=" + self.name
            if line.startswith("|OOBIP"): line = "|OOBIP=" + self.ip
            if line.startswith("|LOCATION"):
                line = "|LOCATION=" + (
                    '%s-%s' %
                    (self.name.split('-')[0], self.name.split('-')[1]))
            if line.startswith("|SN=") and data.has_key('serialnumber'):
                line = "|SN=" + data['serialnumber']
                del data['serialnumber']
            if line.startswith("|PN=") and data.has_key('skunumber'):
                line = "|PN=" + data['skunumber']
                del data['skunumber']
            if line.startswith("|OOBMAC") and data.has_key('oobmac'):
                line = "|OOBMAC=" + data['oobmac']
                del data['oobmac']
            if line.startswith("|RPSUSED") and data.has_key('rpsused'):
                line = "|RPSUSED=" + str(data['rpsused'])
                del data['rpsused']
            if line.startswith("|NICMAC1") and data.has_key('nicmac1'):
                line = "|NICMAC1=" + str(data['nicmac1'])
                del data['nicmac1']
            if line.startswith("|NICMAC2") and data.has_key('nicmac2'):
                line = "|NICMAC2=" + str(data['nicmac2'])
                del data['nicmac2']

            if line.startswith("}}"):
                # hardware template is over, ensure that no other changes are made
                data = dict()
            lines.append(line)
        pagetext = "\r\n".join(lines)

        # Save the page
        try:
            page.put(pagetext)
        except wikipedia.LockedPage:
            wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
        except wikipedia.EditConflict:
            wikipedia.output(u'Skipping %s because of edit conflict' %
                             (page.title()))
        except wikipedia.SpamfilterError, error:
            wikipedia.output(
                u'Cannot change %s because of spam blacklist entry %s' %
                (page.title(), error.url))