Ejemplo n.º 1
0
	def abort(self, status = u'Ok', msg = None):
		log = u'Bot aborted with status: '+status
		if msg:
			log += u' and message: '+msg
		self.log(log)
		wikipedia.stopme()
		exit(1)
Ejemplo n.º 2
0
def checktalk():
	site = wikipedia.getSite()
	path = site.put_address('Non-existing_page')
	text = site.getUrl(path)
	if '<div class="usermessage">' in text:
		wikipedia.output(u'NOTE: You have unread messages on %s' % site)
		wikipedia.stopme()
		sys.exit()
Ejemplo n.º 3
0
 def Import(self, target, project = 'w', crono = '1', namespace = '', prompt = True):
     """Import the page from the wiki. Requires administrator status.
     If prompt is True, asks the user if he wants to delete the page.
     """
     # Fixing the crono value...
     if crono == True:
         crono = '1'
     elif crono == False:
         crono = '0'
     elif crono == '0':
         pass
     elif crono == '1':
         pass
     else:
         wikipedia.output(u'Crono value set wrongly.')
         wikipedia.stopme()
     # Fixing namespace's value.
     if namespace == '0':
         namespace == ''        
     answer = 'y'
     if prompt:
         answer = wikipedia.inputChoice(u'Do you want to import %s?' % target, ['Yes', 'No'], ['y', 'N'], 'N')
     if answer in ['y', 'Y']:
         host = self.site().hostname()
         address = '/w/index.php?title=%s&action=submit' % self.urlname()
         # You need to be a sysop for the import.
         self.site().forceLogin(sysop = True)
         # Getting the token.
         token = self.site().getToken(self, sysop = True)
         # Defing the predata.
         predata = {
             'action' : 'submit',
             'source' : 'interwiki',
             # from what project do you want to import the page?
             'interwiki' : project,
             # What is the page that you want to import?
             'frompage' : target,
             # The entire history... or not?
             'interwikiHistory' : crono,
             # What namespace do you want?
             'namespace': '',
         }
         if self.site().hostname() in config.authenticate.keys():
             predata['Content-type'] = 'application/x-www-form-urlencoded'
             predata['User-agent'] = useragent
             data = self.site().urlEncode(predata)
             response = urllib2.urlopen(urllib2.Request('http://' + self.site().hostname() + address, data))
             data = u''
         else:
             response, data = self.site().postForm(address, predata, sysop = True)
         if data:
             wikipedia.output(u'Page imported, checking...')
             if wikipedia.Page(site, target).exists():
                 wikipedia.output(u'Import success!')
                 return True
             else:
                 wikipedia.output(u'Import failed!')
                 return False
Ejemplo n.º 4
0
def main():
    summary_commandline,template,gen = None,None,None
    exceptions,PageTitles,namespaces = [],[],[]
    cat=''
    autoText,autoTitle = False,False
    genFactory = pagegenerators.GeneratorFactory()
    arg=False#------if you dont want to work with arguments leave it False if you want change it to True---
    if arg==False:
        for arg in wikipedia.handleArgs():
            if arg == '-autotitle':
                autoTitle = True
            elif arg == '-autotext':
                autoText = True
            elif arg.startswith( '-page:' ):
                if len(arg) == 6:
                    PageTitles.append(wikipedia.input( u'Which page do you want to chage?' ))
                else:
                    PageTitles.append(arg[6:])
            elif arg.startswith( '-cat:' ):
                if len(arg) == 5:
                    cat=wikipedia.input( u'Which Category do you want to chage?' )
                else:
                    cat='Category:'+arg[5:]
            elif arg.startswith( '-template:' ):
                if len(arg) == 10:
                    template.append(wikipedia.input( u'Which Template do you want to chage?' ))
                else:
                    template.append('Template:'+arg[10:])
            elif arg.startswith('-except:'):
                exceptions.append(arg[8:])
            elif arg.startswith( '-namespace:' ):
                namespaces.append( int( arg[11:] ) )
            elif arg.startswith( '-ns:' ):
                namespaces.append( int( arg[4:] ) )    
            elif arg.startswith( '-summary:' ):
                wikipedia.setAction( arg[9:] )
                summary_commandline = True
            else:
                generator = genFactory.handleArg(arg)
                if generator:
                    gen = generator
    else:
        PageTitles = [raw_input(u'Page:> ').decode('utf-8')]
    if cat!='':
        facatfalist=facatlist(cat)
        if facatfalist!=False:
            run(facatfalist)    
    if PageTitles:
        pages = [wikipedia.Page(faSite,PageTitle) for PageTitle in PageTitles]
        gen = iter( pages )
    if not gen:
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
    preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 60 )#---number of pages that you want load at same time
    run(preloadingGen)
Ejemplo n.º 5
0
def main():
    site = pywikibot.getSite('en', 'wikipedia')
    prefix = 'Uw-'
    ns = 10

    for p in site.prefixindex(prefix, namespace=ns):
        print p.title()

    pywikibot.stopme()
Ejemplo n.º 6
0
def delTestPage(pagename):
	myuserpage = u"ഉപയോക്താവ്:" + 'Manubot'
	mypage = myuserpage + "/BotLabs/test" + pagename
	 
	 
	# doing the job
	site = wikipedia.getSite('ml','wikipedia')
	page = wikipedia.Page(site,mypage)
	page.delete(reason='Deleting Test pages', prompt=False, throttle=True, mark=True)	
	wikipedia.stopme()	
Ejemplo n.º 7
0
 def shutoffcheck(self):
     return # Not implemented
     print u'Checking emergency shutoff page %s.' % self.shutoffpage.title(asLink=True)
     self.shutoffpagetext = self.shutoffpage.get()
     if unicode(self.shutoffpagetext.strip()) != u'enable':
         print u'Emergency shutoff enabled; stopping.'
         pywikibot.stopme()
         exit()
     else:
         print u'Emergency shutoff disabled; continuing.'
Ejemplo n.º 8
0
def udate2wiki(pagename=u'ഉപയോക്താവ്:Manubot/sandbox',towiki=True):
	global gpageData
	if towiki:
		site = wikipedia.getSite('ml','wikipedia')
		page = wikipedia.Page(site,pagename)
		page.put(gpageData,u'ബോട്ടിന്റെ കൂന്തി വിളയാട്ടം')		
		wikipedia.stopme()
	else:
		f = codecs.open(pagename+u'.txt',encoding='utf-8', mode='w')
		f.write(gpageData)
		f.close()	
Ejemplo n.º 9
0
def shutoffcheck():
	site = wikipedia.getSite()
	pagename = "User:Hazard-Bot/Check/Wikiproject"
	page = wikipedia.Page(site, pagename)
	print "Checking [[" + pagename + "]]for emergency shutoff."
	text = page.get()
	if text.lower() != 'enable':
		print "Emergency shutoff enabled; stopping."
		wikipedia.stopme()
		exit()
	print "Emergency shutoff disabled; continuing."
def main(*args):
    global bot
    try:
        a = pywikibot.handleArgs(*args)
        if len(a) == 1:
            raise RuntimeError('Unrecognized argument "%s"' % a[0])
        elif a:
            raise RuntimeError("Unrecognized arguments: " + " ".join(('"%s"' % arg) for arg in a))
        bot = CategoryRedirectBot()
        bot.run()
    finally:
        pywikibot.stopme()
Ejemplo n.º 11
0
def main():
    summary_commandline,gen,template = None,None,None
    namespaces,PageTitles,exceptions = [],[],[]    
    encat=''
    autoText,autoTitle = False,False
    recentcat,newcat=False,False
    genFactory = pagegenerators.GeneratorFactory()
    for arg in wikipedia.handleArgs():
        if arg == '-autotitle':
            autoTitle = True
        elif arg == '-autotext':
            autoText = True
        elif arg.startswith( '-except:' ):
            exceptions.append( arg[8:] )
            
        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = wikipedia.input(
                    u'At which page do you want to start?')
            firstPageTitle = wikipedia.Page(fasite,firstPageTitle).title(withNamespace=False)
            gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, 0,
                                        includeredirects=True)    
        elif arg.startswith( '-template:' ):
            template = arg[10:]
        elif arg.startswith( '-namespace:' ):
            namespaces.append( int( arg[11:] ) )
        elif arg.startswith( '-summary:' ):
            wikipedia.setAction( arg[9:] )
            summary_commandline = True
        else:
            generator = genFactory.handleArg( arg )
            if generator:
                gen = generator
    if not gen:
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60)    
        preloadingGen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
    else:
         preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60)
    _cache,last_timestamp=get_cache()
    add_text(preloadingGen)

    now = str(datetime.now())
    todaynum=int(now.split('-')[2].split(' ')[0])+int(now.split('-')[1])*30+(int(now.split('-')[0])-2000)*365

    if last_timestamp+3 < todaynum:
        put_cache(_cache,todaynum)
    else:
        put_cache({},0)
Ejemplo n.º 12
0
def getInfobox(film):
	info_box_data = []
	if type(film).__name__ == 'str' or type(film).__name__ == 'unicode':
		site = wikipedia.getSite('en','wikipedia') # Taking the default site
		page = wikipedia.Page(site, film) # Calling the constructor
		if page.isRedirectPage():
			page = page.getRedirectTarget()	
	else:
		page = film
	page_data = page.get()
	#print page_data
	page_data = page_data.split(u'\n')
	info_box = 0
	#remove the |
	r = re.compile(r'^\s*\|\s*',re.UNICODE)

	info_re = re.compile(r'\s*{{\s*Infobox\s*film\s*',re.IGNORECASE|re.UNICODE)
	#remove spaces
	r1 = re.compile(r'\s*=\s*',re.UNICODE)
	#remove comments
	#r2 = re.compile(r'<!--.*-->')


	#Get the info box data
	for line in page_data:
		if len(line) == 0:
			continue
		if info_re.search(line) and info_box == 0:
			print 'Found infobox'
			info_box = 1
		elif line == u'}}' or line == u'|}}' and info_box ==1:
			info_box = 0
			break
		elif info_box == 1:
		#remove unnecessary data
			line = r.sub('',line)
			#line = r2.sub('',line) 
			line = r1.sub('=',line)
			print line
			info_box_data.append(line)
		else:
			pass

	#update in dictionary 
	for i in info_box_data:
		info_box_dict[i.split(u'=',1)[0].strip()] = i.split(u'=',1)[1].strip()
			
	#print info_box_data
	#print info_box_dict
	wikipedia.stopme()
Ejemplo n.º 13
0
def post(unlock = True):
    """
    This function removes throttle file. It also removes lockfile unless
    unlock variable is set to False
    """
    if unlock and lockfile:
        try:
            os.remove(lockfile)
        except OSError:
            error(u"Unable to remove lockfile.")

    pywikibot.output(u"The script " + fullname + u". Stop at " + getTime())
    pywikibot.stopme()
    sys.exit()
Ejemplo n.º 14
0
def udate2wiki(pagename=u'',towiki=True):
	global data
	if towiki:
		myuserpage = u"ഉപയോക്താവ്:" + 'Manubot'
		mypage = myuserpage + "/BotLabs/" + (pagename)
		 
		# doing the job
		site = wikipedia.getSite('ml','wikipedia')
		page = wikipedia.Page(site,mypage)
		page.put(data,u'ബോട്ടിന്റെ കൂന്തി വിളയാട്ടം')		
		wikipedia.stopme()
	else:
		f = codecs.open(pagename+u'.txt',encoding='utf-8', mode='w')
		f.write(data)
		f.close()
Ejemplo n.º 15
0
def main(*args):
    try:
        genFactory = GeneratorFactory()
        for arg in pywikibot.handleArgs():
            if not genFactory.handleArg(arg):
                pywikibot.showHelp('pagegenerators')
                break
        else:
            gen = genFactory.getCombinedGenerator()
            if gen:
                i = 0
                for page in gen:
                    i+=1
                    pywikibot.output("%4d: %s" % (i, page.title()), toStdout = True)
            else:
                pywikibot.showHelp('pagegenerators')
    finally:
        pywikibot.stopme()
Ejemplo n.º 16
0
def main(*args):
    try:
        list_page = pywikibot.Page(pywikibot.getSite(),
            'User:Petrb/Weird pages'
        )
        page_text = list_page.get()
        lines = page_text.split('\n')
        list_elems = lines[1:-2]
        summary = u"[[User:HasteurBot|HasteurBot Task 5]]: Adding maint " +\
            u"category to identified page"
        page_match = re.compile('\{\{AFC submission\|')
        page_match2 = re.compile('\{\{AfC submission\|')
        cat_match = re.compile('\[\[Category\:AfC submissions with missing AfC template\]\]')
        limiter = 50 - 14
        for elem in list_elems:
            if excluded_page(elem):
                continue
            ind_page = pywikibot.Page(pywikibot.getSite(),
                u'Wikipedia talk:'+elem
            )
            if not ind_page.exists():
                continue
            if ind_page.isRedirectPage():
                ind_page = ind_page.getRedirectTarget()
            if ind_page.namespace() != 5:
                continue
            page_text = ind_page.get()
            if page_match.match(page_text) is None \
                and \
                page_match2.match(page_text) is None \
                and \
                'AfC_submissions_with_missing_AfC_template' not in page_text \
                and \
                'AfC submissions with missing AfC template' not in page_text:
                limiter = limiter - 1
                print elem
                imp_text = page_text + \
                    '\n[[Category:AfC submissions with missing AfC template]]'
                ind_page.put(imp_text, comment=summary)
            if limiter == 0:
                break

    finally:
        pywikibot.stopme()
Ejemplo n.º 17
0
def main():
	gen = None
	PageTitles = []
	
	# Read commandline parameters.
	for arg in wikipedia.handleArgs():
    	if arg.startswith('-page'):
            if len(arg) == 5:
				PageTitles.append(wikipedia.input(u'Which page do you want to change?'))
			else:
				PageTitles.append(arg[6:])
	
	PageTitles

if __name__ == "__main__":
	try:
		main()
	finally:
		wikipedia.stopme()
Ejemplo n.º 18
0
	def log(self, msg = u''):
		self.setLogHeader()
		text = None
		try:
			text = self.logPage.get(force=True)
		except wikipedia.NoPage:
			print "ERROR: No log page found."
			wikipedia.stopme()
			exit(1)
		loc = text.find(self.header)
		if loc == -1:
			print "ERROR: No header found on log page."
			wikipedia.stopme()
			exit(1)
		loc += len(self.header)+1
		log = '\n'+strftime(u'%H:%M:%S - ')+msg+u'<br />'
		text = text[0:loc] + log + text[loc:]
		print "logging: "+log[1:len(log)]
		self.logPage.put(text)
Ejemplo n.º 19
0
def main():
    global CD
    output(u'Running ' + __version__)
    CD = CommonsDelinker()
    output(u'This bot runs from: ' + str(CD.site))

    re._MAXCACHE = 4

    args = wikipedia.handleArgs()
    if '-since' in args:
        # NOTE: Untested
        ts_format = '%Y-%m-%d %H:%M:%S'
        try:
            since = time.strptime(
                args[args.index('-since') + 1],
                ts_format)
        except ValueError:
            if args[args.index('-since') + 1][0] == '[' and \
                    len(args) != args.index('-since') + 2:
                since = time.strptime('%s %s' % \
                    args[args.index('-since') + 1],
                    '[%s]' % ts_format)
            else:
                raise ValueError('Incorrect time format!')
        output(u'Reading deletion log since [%s]' %\
            time.strftime(ts_format, since))
        CD.last_check = time.mktime(since)

    try:
        try:
            CD.start()
        except Exception, e:
            if type(e) not in (SystemExit, KeyboardInterrupt):
                output('An exception occured in the main thread!', False)
                traceback.print_exc(file = sys.stderr)
                threadpool.terminate()
    finally:
        output(u'Stopping CommonsDelinker')
        wikipedia.stopme()
        # Flush the standard streams
        sys.stdout.flush()
        sys.stderr.flush()
Ejemplo n.º 20
0
def main():    
    gen = None
    genFactory = pagegenerators.GeneratorFactory()    
    for arg in wikipedia.handleArgs():
        if arg.startswith('-newtem'):    
            arg=arg.replace(':','')
            if len(arg) == 7:
                genfa = pagegenerators.NewpagesPageGenerator(100, False, None,10)
            else:
                genfa = pagegenerators.NewpagesPageGenerator(int(arg[8:]), False, None,10)
            gen = pagegenerators.PreloadingGenerator( genfa,60)
        else:
            gen = genFactory.handleArg( arg )    
 
    if not gen:
        wikipedia.stopme()
        sys.exit()
    preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60)    
    #preloadingGen = pagegenerators.NamespaceFilterPageGenerator(gen,10)
    add_nav(preloadingGen)
Ejemplo n.º 21
0
def main():
    global R

    import sys, traceback
    wikipedia.handleArgs()
    output(u'Running ' + __version__)

    try:
        try:
            # FIXME: Add support for single-process replacer.
            R = Replacer()
            output(u'This bot runs from: ' + str(R.site))
            R.start()
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception, e:
            output('A critical error has occured! Aborting!')
            traceback.print_exc(file = sys.stderr)
    finally:
        output('Exitting replacer')
        wikipedia.stopme()
Ejemplo n.º 22
0
def main():
    gen=None
    #what we will add to the begining of the title
    pre=""
    #what we will add to the end of the title
    post=""
    #what we will remove from the title
    rem=[]
    
    # Read commandline parameters.
    for arg in wikipedia.handleArgs():
        if arg.startswith('-append'):
            if len(arg) >= 8:
                post = arg[8:]
            else: 
                post = wikipedia.input(u'Please enter the text to append:')
        elif arg.startswith('-file'):
            if len(arg) >= 6:
                textfilename = arg[6:]
                gen = pagegenerators.TextfilePageGenerator(textfilename)
        elif arg.startswith('-prefix'):
            if len(arg) >= 8:
                pre = arg[8:]
            else: 
                pre = wikipedia.input(u'Please enter the text to prefix with:')
        elif arg.startswith('-remove'):
            if len(arg) >= 8:
                rem.append(arg[8:])
            else: 
                rem.append(wikipedia.input(u'Please enter the text to remove:'))
    
    if not gen:
        # syntax error, show help text from the top of this file
        wikipedia.output(__doc__, 'utf-8')
        wikipedia.stopme()
        sys.exit()
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
    bot = RenameRobot(preloadingGen, pre, post, rem)
    bot.run()
def main(*args):
    try:
        genFactory = pagegenerators.GeneratorFactory()
        for arg in pywikibot.handleArgs(*args):
            if not genFactory.handleArg(arg):
                pywikibot.showHelp()
                break
        else:
            gen = genFactory.getCombinedGenerator()
            if gen:
              page_match = re.compile('\{\{AFC submission\|')
              summary = u"[[User:HasteurBot|HasteurBot Task 3]]: Removing " + \
                u"maint category that does not apply"
              disclude_list = [
                u'Wikipedia talk:WikiProject Articles for creation',
                u'Wikipedia talk:WikiProject Articles for creation/2013 5',
                u'Wikipedia talk:WikiProject Articles for creation/2011',
              ]
              for article in gen:
                if article.title() in disclude_list:
                  continue
                art_text = article.get()
                if page_match.match(art_text) is not None:
                  print article 
                  art_1 = re.sub(
                    '\\\n\[\[\:Category\:AfC_submissions_with_missing_AfC_template\]\]',
                    '',
                    art_text
                  )
                  art_2 = re.sub(
                    '\\\n\[\[\:Category\:AfC submissions with missing AfC template\]\]',
                    '',
                    art_1
                  )
                  article.put(art_2,comment=summary)
            else:
                pywikibot.showHelp()
    finally:
        pywikibot.stopme()
Ejemplo n.º 24
0
def pre(name, lock = False):
    """
    Return argument list, site object, and configuration of the script.
    This function also handles default arguments, generates lockfile
    and halt the script if lockfile exists before.
    """
    global fullname, lockfile
    pywikibot.handleArgs("-log")
    fullname = name
    pywikibot.output(u"The script " + fullname + u". Start at " + getTime())
    if lock:
        lockfile = os.path.abspath("../../tmp/" + basescript + ".py")
        if os.path.exists(lockfile):
            error(u"Lockfile found. Unable to execute the script.")
            pywikibot.stopme()
            sys.exit()
        open(lockfile, 'w').close()

    confpath = os.path.abspath("../../conf/" + basescript + ".py")
    if os.path.exists(confpath):
        module = imp.load_source("conf", confpath)
    else:
        module = None
    return pywikibot.handleArgs(), pywikibot.getSite(), module
Ejemplo n.º 25
0
                namespaces.append(arg[len('-namespace:'):])
        else:
            argsList.append(arg)

    if operation == None:
        pywikibot.showHelp('templatecount')
    else:
        robot = TemplateCountRobot()
        if not argsList:
            argsList = templates
        choice = ''
        if 'reflist' in argsList:
            pywikibot.output(
                u'NOTE: it will take a long time to count "reflist".')
            choice = pywikibot.inputChoice(
                u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y')
            if choice == 's':
                argsList.remove('reflist')
        if choice == 'n':
            return
        elif operation == "Count":
            robot.countTemplates(argsList, namespaces)
        elif operation == "List":
            robot.listTemplates(argsList, namespaces)

if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()
Ejemplo n.º 26
0
    wikipedia.output(u'')
    wikipedia.output(u'(For checkboxes: An empty string evaluates to False; all others to True)')
    wikipedia.output(u'')

    while True:
        keys, values = [], []
        while True:
            try:
                keys.append(wikipedia.input(u'Which preference do you wish to set?'))
            except KeyboardInterrupt:
                return
            values.append(wikipedia.input(u"To what value do you wish to set '%s'?" % keys[-1]))
            if wikipedia.inputChoice(u"Set more preferences?",
                    ['no', 'yes'], ['n', 'y'], 'n') == 'n': break

        if wikipedia.inputChoice(u"Set %s?" % u', '.join((u'%s:%s' % (key, value)
                for key, value in zip(keys, values))),
                ['yes', 'no'], ['y', 'n'], 'n') == 'y':
            set_all(keys, values, verbose = True)
            wikipedia.output(u"Preferences have been set on all wikis.")

if __name__ == '__main__':
    import sys
    sys.path.insert(1, '..')
    import wikipedia
    try:
        wikipedia.handleArgs()
        main()
    finally:
        wikipedia.stopme()
Ejemplo n.º 27
0
# -*- coding: utf-8  -*-
"""Splits a interwiki.log file into chunks of warnings separated by language"""
#
# (C) Rob W.W. Hooft, 2003
# (C) Pywikipedia bot team, 2004-2011
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: splitwarning.py 9482 2011-08-29 16:32:37Z xqt $'
#

import wikipedia as pywikibot
import codecs
import re

pywikibot.stopme(
)  # No need to have me on the stack - I don't contact the wiki
files = {}
count = {}

# TODO: Variable log filename
fn = pywikibot.config.datafilepath("logs", "interwiki.log")
logFile = codecs.open(fn, 'r', 'utf-8')
rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*')
for line in logFile:
    m = rWarning.match(line)
    if m:
        family = m.group('family')
        code = m.group('code')
        if code in pywikibot.getSite().languages():
            if not code in files:
                files[code] = codecs.open(
Ejemplo n.º 28
0
# -*- coding: utf-8  -*-
"""Splits a interwiki.log file into chunks of warnings separated by language"""
#
# (C) Rob W.W. Hooft, 2003
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
#

import wikipedia
import codecs
import re

wikipedia.stopme(
)  # No need to have me on the stack - I don't contact the wiki
files = {}
count = {}

# TODO: Variable log filename
fn = wikipedia.config.datafilepath("logs", "interwiki.log")
logFile = codecs.open(fn, 'r', 'utf-8')
rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*')
for line in logFile:
    m = rWarning.match(line)
    if m:
        family = m.group('family')
        code = m.group('code')
        if code in wikipedia.getSite().languages():
            if not code in files:
                files[code] = codecs.open(
Ejemplo n.º 29
0
    def __init__(self):

        for arg in pywikibot.handleArgs():
            if arg.startswith('-from'):
                if arg.startswith('-from:'):
                    self.apfrom = arg[6:]
                else:
                    self.apfrom = pywikibot.input(
                        u'Which page to start from: ')
            elif arg.startswith('-reqsize:'):
                self.aplimit = int(arg[9:])
            elif arg == '-links':
                self.links = True
            elif arg == '-linksonly':
                self.links = True
                self.titles = False
            elif arg == '-replace':
                self.replace = True
            elif arg == '-redir':
                self.filterredir = 'all'
            elif arg == '-redironly':
                self.filterredir = 'redirects'
            elif arg.startswith('-limit:'):
                self.stopAfter = int(arg[7:])
            elif arg == '-autonomous' or arg == '-a':
                self.autonomous = True
            elif arg.startswith('-ns:'):
                self.namespaces.append(int(arg[4:]))
            elif arg.startswith('-wikilog:'):
                self.wikilogfile = arg[9:]
            elif arg.startswith('-failedlog:'):
                self.failedTitles = arg[11:]
            elif arg == '-failed':
                self.doFailed = True
            else:
                pywikibot.output(u'Unknown argument %s.' % arg)
                pywikibot.showHelp()
                pywikibot.stopme()
                sys.exit()

        if self.namespaces == [] and not self.doFailed:
            if self.apfrom == u'':
                # 0 should be after templates ns
                self.namespaces = [14, 10, 12, 0]
            else:
                self.namespaces = [0]

        if self.aplimit is None:
            self.aplimit = 200 if self.links else 'max'

        if not self.doFailed:
            self.queryParams = {
                'action': 'query',
                'generator': 'allpages',
                'gaplimit': self.aplimit,
                'gapfilterredir': self.filterredir
            }
        else:
            self.queryParams = {'action': 'query'}
            if self.apfrom != u'':
                pywikibot.output(u'Argument "-from" is ignored with "-failed"')

        propParam = 'info'
        if self.links:
            propParam += '|links|categories'
            self.queryParams['pllimit'] = 'max'
            self.queryParams['cllimit'] = 'max'

        self.queryParams['prop'] = propParam

        self.site = pywikibot.getSite()

        if len(self.localSuspects) != len(self.latinSuspects):
            raise ValueError(u'Suspects must be the same size')
        if len(self.localKeyboard) != len(self.latinKeyboard):
            raise ValueError(u'Keyboard info must be the same size')

        if not os.path.isabs(self.wikilogfile):
            self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile)
        self.wikilog = self.OpenLogFile(self.wikilogfile)

        if not os.path.isabs(self.failedTitles):
            self.failedTitles = pywikibot.config.datafilepath(
                self.failedTitles)

        if self.doFailed:
            with codecs.open(self.failedTitles, 'r', 'utf-8') as f:
                self.titleList = [self.Page(t) for t in f]
            self.failedTitles += '.failed'

        self.lclToLatDict = dict([(ord(self.localSuspects[i]),
                                   self.latinSuspects[i])
                                  for i in xrange(len(self.localSuspects))])
        self.latToLclDict = dict([(ord(self.latinSuspects[i]),
                                   self.localSuspects[i])
                                  for i in xrange(len(self.localSuspects))])

        if self.localKeyboard is not None:
            self.lclToLatKeybDict = dict([
                (ord(self.localKeyboard[i]), self.latinKeyboard[i])
                for i in xrange(len(self.localKeyboard))
            ])
            self.latToLclKeybDict = dict([
                (ord(self.latinKeyboard[i]), self.localKeyboard[i])
                for i in xrange(len(self.localKeyboard))
            ])
        else:
            self.lclToLatKeybDict = {}
            self.latToLclKeybDict = {}

        badPtrnStr = u'([%s][%s]|[%s][%s])' \
                     % (self.latLtr, self.localLtr, self.localLtr, self.latLtr)
        self.badWordPtrn = re.compile(u'[%s%s]*%s[%s%s]*' %
                                      (self.latLtr, self.localLtr, badPtrnStr,
                                       self.latLtr, self.localLtr))

        # Get whitelist
        self.knownWords = set()
        self.seenUnresolvedLinks = set()

        # TODO: handle "continue"
        if self.site.lang in self.whitelists:
            wlpage = self.whitelists[self.site.lang]
            pywikibot.output(u'Loading whitelist from %s' % wlpage)
            wlparams = {
                'action': 'query',
                'prop': 'links',
                'titles': wlpage,
                'redirects': '',
                'indexpageids': '',
                'pllimit': 'max',
            }

            data = query.GetData(wlparams)
            if len(data['query']['pageids']) == 1:
                pageid = data['query']['pageids'][0]
                links = data['query']['pages'][pageid]['links']

                allWords = [
                    nn for n in links for nn in self.FindBadWords(n['title'])
                ]

                self.knownWords = set(allWords)


#                kw = set()
#                for w in allWords:
#                    if len(self.ProcessTitle(w)[1]) > 0:
#                        kw.add(w)
#                self.knownWords = kw

            else:
                raise ValueError(u'The number of pageids is not 1')

            pywikibot.output(u'Loaded whitelist with %i items' %
                             len(self.knownWords))
            if pywikibot.verbose and len(self.knownWords) > 0:
                pywikibot.output(u'Whitelist: %s' % u', '.join(
                    [self.MakeLink(i, False) for i in self.knownWords]))
        else:
            pywikibot.output(u'Whitelist is not known for language %s' %
                             self.site.lang)
Ejemplo n.º 30
0
def doarticle(page):
	key = "4ZK9WURM"
	try:
		wikitext = page.get()
	except wikipedia.IsRedirectPage:
		print 'Skipping %s as it is a redirect' %str(page)
		return
	state0 = wikitext
	import urllib
	m=re.findall(r'\<ref\>ISBN(:| )(.*)\</ref\>', wikitext)
	if len(m) == 0:
		print 'No changes needed in %s' %str(page)
		return
	else:
		print 'Checking %s' %str(page)
	print m
	try:
		m=string.join(m,'')
		m=m[1:]
	except TypeError:
		m=m[0][1]
	isbn = m
	f = urllib.urlopen("http://isbndb.com/api/books.xml?access_key="+key+"&results=authors&index1=isbn&value1=" + m)
	s = f.read()
	f.close()
	print s
	realisbn = re.findall(r'<BookData (.*) isbn="(.*)" (.*)">', s)
	try:
		realisbn=string.join(realisbn,'')
		realisbn=realisbn[1:]
	except TypeError:
		realisbn=realisbn[0][1]
	print 'The official ISBN is: ' +realisbn
	
	title=re.findall(r'\<Title\>(.*)\</Title\>', s)
	try:
		title=string.join(title,'')
	except TypeError:
		title=title[0][1]
	print 'Title is: ' + title
	auth=re.findall(r'\<Person person_id="(.*)"\>(.*)\</Person\>', s)
	print auth
	if len(auth) == 1:
		auth=auth[0][1]
	elif len(auth) == 2:
		auth=auth[0][0]
		auth2=auth[0][1]
	elif len(auth) >= 3:
		print 'Too many authors... skipping'
		return
	authsplit = re.split(',', auth)
	last = authsplit[0]
	first = authsplit[1]
	pubinfo=re.findall(r'\<PublisherText publisher_id="(.*)"\>(.*)</PublisherText\>', s)
	print pubinfo[0][0]
	a = urllib.urlopen("http://isbndb.com/api/publishers.xml?access_key="+key+"&results=details&index1=publisher_id&value1=" + pubinfo[0][0])
	a1=a.read()
	a.close()
	a=a1
	print a
	pub=re.findall(r'\<Name\>(.*)\</Name\>', a)
	print pub[0]
	loc=re.findall(r'\<Details location="(.*)\((.*)\)" /\>', a)
	print loc[0][0]
	temp = u'<ref name="'+title+'">{{cite book |title= '+title+'|last= '+last+'|first= '+first+'|isbn= '+realisbn+'|publisher='+pub[0]+'|location='+loc[0][0]+'}} <!--ISBN Converted by Bot --></ref>'
	wikitext = re.sub('\<ref\>ISBN(:| )(.*)\</ref\>', temp, wikitext)
	wikipedia.showDiff(state0, wikitext)
	try:
		prompt = raw_input('Would you like to accept the changes? [y][n][q] ')
	except KeyboardInterrupt:
		print '\n'
		wikipedia.stopme()
		sys.exit()
	if prompt == 'y':
		page.put(wikitext, 'Fixing raw ISBN')
		done = True
	elif prompt == 'n':
		return False
	elif prompt == 'q':
		wikipedia.stopme()
		sys.exit()
	else:
		print 'Error: Invalid choice, skipping %s' %(str(page))
	return done
Ejemplo n.º 31
0
    def __init__(self):

        for arg in pywikibot.handleArgs():
            if arg.startswith('-from'):
                if arg.startswith('-from:'):
                    self.apfrom = arg[6:]
                else:
                    self.apfrom = pywikibot.input(u'Which page to start from: ')
            elif arg.startswith('-reqsize:'):
                self.aplimit = int(arg[9:])
            elif arg == '-links':
                self.links = True
            elif arg == '-linksonly':
                self.links = True
                self.titles = False
            elif arg == '-replace':
                self.replace = True
            elif arg == '-redir':
                self.filterredir = 'all'
            elif arg == '-redironly':
                self.filterredir = 'redirects'
            elif arg.startswith('-limit:'):
                self.stopAfter = int(arg[7:])
            elif arg == '-autonomous' or arg == '-a':
                self.autonomous = True
            elif arg.startswith('-ns:'):
                self.namespaces.append( int(arg[4:]) )
            elif arg.startswith('-wikilog:'):
                self.wikilogfile = arg[9:]
            elif arg.startswith('-failedlog:'):
                self.failedTitles = arg[11:]
            elif arg == '-failed':
                self.doFailed = True
            else:
                pywikibot.output(u'Unknown argument %s.' % arg)
                pywikibot.showHelp()
                pywikibot.stopme()
                sys.exit()

        if self.namespaces == [] and not self.doFailed:
            if self.apfrom == u'':
                # 0 should be after templates ns
                self.namespaces = [14, 10, 12, 0]
            else:
                self.namespaces = [0]

        if self.aplimit is None:
            self.aplimit = 200 if self.links else 'max'

        if not self.doFailed:
            self.queryParams = { 'action'        : 'query',
                                 'generator'     : 'allpages',
                                 'gaplimit'      : self.aplimit,
                                 'gapfilterredir': self.filterredir}
        else:
            self.queryParams = { 'action'        : 'query' }
            if self.apfrom != u'': pywikibot.output(u'Argument "-from" is ignored with "-failed"')

        propParam = 'info'
        if self.links:
            propParam += '|links|categories'
            self.queryParams['pllimit'] = 'max'
            self.queryParams['cllimit'] = 'max'

        self.queryParams['prop'] = propParam

        self.site = pywikibot.getSite()

        if len(self.localSuspects) != len(self.latinSuspects):
            raise ValueError(u'Suspects must be the same size')
        if len(self.localKeyboard) != len(self.latinKeyboard):
            raise ValueError(u'Keyboard info must be the same size')

        if not os.path.isabs(self.wikilogfile):
            self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile)
        self.wikilog = self.OpenLogFile(self.wikilogfile)

        if not os.path.isabs(self.failedTitles):
            self.failedTitles = pywikibot.config.datafilepath(self.failedTitles)

        if self.doFailed:
            with codecs.open(self.failedTitles, 'r', 'utf-8') as f:
                self.titleList = [self.Page(t) for t in f]
            self.failedTitles += '.failed'

        self.lclToLatDict = dict([(ord(self.localSuspects[i]),
                                   self.latinSuspects[i])
                                  for i in xrange(len(self.localSuspects))])
        self.latToLclDict = dict([(ord(self.latinSuspects[i]),
                                   self.localSuspects[i])
                                  for i in xrange(len(self.localSuspects))])

        if self.localKeyboard is not None:
            self.lclToLatKeybDict = dict(
                [(ord(self.localKeyboard[i]),
                  self.latinKeyboard[i])
                 for i in xrange(len(self.localKeyboard))])
            self.latToLclKeybDict = dict(
                [(ord(self.latinKeyboard[i]),
                  self.localKeyboard[i])
                 for i in xrange(len(self.localKeyboard))])
        else:
            self.lclToLatKeybDict = {}
            self.latToLclKeybDict = {}

        badPtrnStr = u'([%s][%s]|[%s][%s])' \
                     % (self.latLtr, self.localLtr, self.localLtr, self.latLtr)
        self.badWordPtrn = re.compile(u'[%s%s]*%s[%s%s]*'
                                      % (self.latLtr, self.localLtr,
                                         badPtrnStr, self.latLtr,
                                         self.localLtr))

        # Get whitelist
        self.knownWords = set()
        self.seenUnresolvedLinks = set()

        # TODO: handle "continue"
        if self.site.lang in self.whitelists:
            wlpage = self.whitelists[self.site.lang]
            pywikibot.output(u'Loading whitelist from %s' % wlpage)
            wlparams = {
                'action'    : 'query',
                'prop'      : 'links',
                'titles'    : wlpage,
                'redirects' : '',
                'indexpageids' : '',
                'pllimit'   : 'max',
            }

            data = query.GetData(wlparams)
            if len(data['query']['pageids']) == 1:
                pageid = data['query']['pageids'][0]
                links = data['query']['pages'][pageid]['links']

                allWords = [nn for n in links for nn in self.FindBadWords(n['title'])]

                self.knownWords = set(allWords)
#                kw = set()
#                for w in allWords:
#                    if len(self.ProcessTitle(w)[1]) > 0:
#                        kw.add(w)
#                self.knownWords = kw

            else:
                raise ValueError(u'The number of pageids is not 1')

            pywikibot.output(u'Loaded whitelist with %i items' % len(self.knownWords))
            if pywikibot.verbose and len(self.knownWords) > 0:
                pywikibot.output(u'Whitelist: %s' % u', '.join([self.MakeLink(i, False) for i in self.knownWords]))
        else:
            pywikibot.output(u'Whitelist is not known for language %s'
                             % self.site.lang)
Ejemplo n.º 32
0
        for entry in sqldump.query_find(keyword):
            yield entry
    elif action == 'findr':
        keyword = wikipedia.input(u'Search for:')
        for entry in sqldump.query_findr(keyword):
            yield entry
    elif action == 'unmountedcats':
        for entry in sqldump.query_unmountedcats():
            yield entry
    elif action == 'baddisambiguation':
        for entry in sqldump.entries():
            if entry.namespace == 0 and entry.title.endswith(')') and entry.text.startswith("''") and not entry.text.startswith("'''"):
                yield entry

if __name__=="__main__":
    wikipedia.stopme() # No need to have me on the stack, as I'm not contacting the wiki
    import sys
    action = None
    filename = None
    for arg in sys.argv[1:]:
        arg = wikipedia.argHandler(arg, 'sqldump')
        if arg:
            if arg.startswith('-sql'):
                if len(arg) == 4:
                    filename = wikipedia.input(u'Please enter the SQL dump\'s filename: ')
                else:
                    filename = arg[5:]
            else:
                action = arg
    if not filename or not action:
        wikipedia.output(__doc__, 'utf-8')
Ejemplo n.º 33
0
                                '--dry-run',
                                action='store_true',
                                dest='dry',
                                default=False,
                                help='do not make any changes')
        optionParser.add_option('-v',
                                '--verbose',
                                action='store_true',
                                dest='verbose',
                                default=False,
                                help='be verbose')
        (self.opts, _) = optionParser.parse_args()
        if not self.opts.file:
            optionParser.print_help()
            return

        # Parse the XML file
        (compId, namespace, langs, meanings) = self.parseFile()
        langs = self.dropInactive(langs)
        for meaning in meanings:
            # Update pages in the given cluster
            self.processMeaning(compId, namespace, langs, meaning)


if __name__ == "__main__":
    try:
        iac = InterwikiAnalysisCommit()
        iac.main()
    finally:
        wikipedia.stopme()
Ejemplo n.º 34
0
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp('selflink')
    else:
        if namespaces != []:
            gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = SelflinkBot(preloadingGen, always)
        bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()
Ejemplo n.º 35
0
# -*- coding: utf-8  -*-
"""Splits a interwiki.log file into chunks of warnings separated by language"""
#
# (C) Rob W.W. Hooft, 2003
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
#

import wikipedia
import codecs
import re

wikipedia.stopme() # No need to have me on the stack - I don't contact the wiki
files={}
count={}

# TODO: Variable log filename
fn = wikipedia.config.datafilepath("logs", "interwiki.log")
logFile = codecs.open(fn, 'r', 'utf-8')
rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*')
for line in logFile:
    m = rWarning.match(line)
    if m:
        family = m.group('family')
        code = m.group('code')
        if code in wikipedia.getSite().languages():
            if not code in files:
                files[code] = codecs.open(
                                  wikipedia.config.datafilepath('logs',