def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    imagerecat.initLists()
    generator = None;
    genFactory = pagegenerators.GeneratorFactory()

    mark = True

    for arg in wikipedia.handleArgs():
	if arg.startswith('-dontmark'):
	    mark = False
        elif arg.startswith('-page'):
            if len(arg) == 5:
                generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
            else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
	elif arg.startswith('-yesterday'):
	    generator = [wikipedia.Page(wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday())]
        else:
            generator = genFactory.handleArg(arg)
    if generator:
        for page in generator:
	    if((page.namespace() == 14) and (page.title().startswith(u'Category:Media needing categories as of'))):
		wikipedia.output(u'Working on ' + page.title())
		for (image, gals, cats) in getImagesToCategorize(cursor, page.titleWithoutNamespace()):
		    categorizeImage(image, gals, imagerecat.applyAllFilters(cats))
		if (mark):
		    categoriesChecked(page.title())
Exemple #2
0
    def processImage(self, fields):
        '''
        Work on a single image
        '''
        if self.autonomous:
            # Check if the image already exists. Do nothing if the name is already taken.
            CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename'))
            if CommonsPage.exists():
                return False
        else:
            while True:
                # Do the Tkdialog to accept/reject and change te name
                fields=Tkdialog(fields).getnewmetadata()

                if fields.get('skip'):
                    pywikibot.output(u'Skipping %s : User pressed skip.' % fields.get('imagepage').title())
                    return False

                # Check if the image already exists
                CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename'))
                if not CommonsPage.exists():
                    break
                else:
                    pywikibot.output('Image already exists, pick another name or skip this image')
                    # We dont overwrite images, pick another name, go to the start of the loop

        # Put the fields in the queue to be uploaded
        self.uploadQueue.put(fields)
 def run(self):
     for page in self.generator:
         if page.isRedirectPage():
             page = page.getRedirectTarget()
         page_t = page.title()
         # Show the title of the page we're working on.
         # Highlight the title in purple.
         wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<"
                          % page_t)
         page_cap = wikipedia.Page(wikipedia.getSite(), page_t.title().capitalize())
         if not page_cap.exists():
             wikipedia.output(u'%s doesn\'t exist' % page_cap.title())
             if not self.acceptall:
                 choice = wikipedia.inputChoice(
                         u'Do you want to create a redirect?',
                         ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
                 if choice == 'a':
                     self.acceptall = True
             if self.acceptall or choice == 'y':
                 try:
                     wikipedia.setAction(
                         wikipedia.translate(wikipedia.getSite(), msg)
                         % page_t)
                     page_cap.put(u"#REDIRECT [[%s]]" % page_t)
                     print
                 except:
                     wikipedia.output(
                         u"An error occurred. Retrying in 15 seconds...")
                     time.sleep(15)
                     continue
         else:
             wikipedia.output(u'%s already exists, skipping...\n'
                              % page_t.title())
Exemple #4
0
    def __iter__(self):
        """
        Yield page objects until the entire XML dump has been read.
        """
        import xmlreader

        mysite = pywikibot.getSite()
        dump = xmlreader.XmlDump(self.xmlfilename)
        # regular expression to find the original template.
        # {{vfd}} does the same thing as {{Vfd}}, so both will be found.
        # The old syntax, {{msg:vfd}}, will also be found.
        # TODO: check site.nocapitalize()
        templatePatterns = []
        for template in self.templates:
            templatePattern = template.title(withNamespace=False)
            if not pywikibot.getSite().nocapitalize:
                templatePattern = (
                    "[" + templatePattern[0].upper() + templatePattern[0].lower() + "]" + templatePattern[1:]
                )
            templatePattern = re.sub(" ", "[_ ]", templatePattern)
            templatePatterns.append(templatePattern)
        templateRegex = re.compile(
            r"\{\{ *([mM][sS][gG]:)?(?:%s) *(?P<parameters>\|[^}]+|) *}}" % "|".join(templatePatterns)
        )

        for entry in dump.parse():
            if templateRegex.search(entry.text):
                page = pywikibot.Page(mysite, entry.title)
                yield page
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'nl', u'wikipedia'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    items = getNumberOfItems(cursor)
    images = getNumberOfImages(cursor)
    addresses = {}
    names = {} 
    pages = list(set(items.keys() + images.keys()))
    pages.sort()

    for key in pages:
	print key
	page = wikipedia.Page(wikipedia.getSite(), key)
	text = page.get()
	addresses[key] = getNumberOfAddresses(text)
	names[key] = getNumberOfNames(text)
	#print key + u' - ' + str(addresses[key]) + u' - ' + str(names[key]) 
	
    updateStats(pages, items, addresses, names, images)
def main():
    all = False
    language = None
    fam = None
    wikimedia = False
    for arg in pywikibot.handleArgs():
        if arg == "-all":
            all = True
        elif arg[0:7] == "-langs:":
            language = arg[7:]
        elif arg[0:10] == "-families:":
            fam = arg[10:]
        elif arg[0:10] == "-wikimedia":
            wikimedia = True

    mySite = pywikibot.getSite()
    if wikimedia:
        families = [
            "commons",
            "incubator",
            "mediawiki",
            "meta",
            "species",
            "test",
            "wikibooks",
            "wikidata",
            "wikinews",
            "wikipedia",
            "wikiquote",
            "wikisource",
            "wikiversity",
            "wikivoyage",
            "wiktionary",
        ]
    elif fam is not None:
        families = fam.split(",")
    else:
        families = [mySite.family.name]

    for family in families:
        try:
            fam = pywikibot.Family(family)
        except ValueError:
            pywikibot.output(u"No such family %s" % family)
            continue
        if all:
            for lang in fam.langs.iterkeys():
                testSite(pywikibot.getSite(lang, family))
        elif language is None:
            lang = mySite.lang
            if not lang in fam.langs.keys():
                lang = fam.langs.keys()[-1]
            testSite(pywikibot.getSite(lang, family))
        else:
            languages = language.split(",")
            for lang in languages:
                try:
                    testSite(pywikibot.getSite(lang, family))
                except pywikibot.NoSuchSite:
                    pywikibot.output(u"No such language %s in family %s" % (lang, family))
Exemple #7
0
 def __init__(self, pageToUnlink, namespaces, always):
     self.pageToUnlink = pageToUnlink
     gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
     if namespaces != []:
         gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
     self.generator = pagegenerators.PreloadingGenerator(gen)
     linktrail = pywikibot.getSite().linktrail()
     # The regular expression which finds links. Results consist of four
     # groups:
     #
     # group title is the target page title, that is, everything
     # before | or ].
     #
     # group section is the page section.
     # It'll include the # to make life easier for us.
     #
     # group label is the alternative link title, that's everything
     # between | and ].
     #
     # group linktrail is the link trail, that's letters after ]] which are
     # part of the word.
     # note that the definition of 'letter' varies from language to language.
     self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
                             % linktrail)
     self.always = always
     self.done = False
     self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
                                     self.pageToUnlink.title())
Exemple #8
0
 def __init__(self, generator, img, info, imdb):
     """
     Constructor. Parameters:
         @param generator: The page generator that determines on which pages
                           to work.
         @type generator: generator.
     """
     self.generator = generator
     # Set the edit summary message
     self.summary = i18n.twtranslate(pywikibot.getSite(), 'basic-changing')
     
     self.chrome = filmsettings.getChrome()
     self.img = img
     self.info = info
     self.imdb = imdb
     self.imdbNum = 0
     self.templateRegex = re.compile("{{.*}}") #This is how templates are in wikipedia
     self.referenceRegex = re.compile("(<ref.*?/(ref)?>)+")
     self.commentRegex = re.compile("<!--.*?-->")
     self.wikilinkRegex = re.compile("\[\[.*\|.*\]\]")
     
     infoTemp = pywikibot.Page(pywikibot.getSite(), "Template:Infobox_film/doc").get()
     infoTempStart = infoTemp.find("{{Infobox film") + 2
     bracketCount = 2
     infoTempEnd = infoTempStart
     while bracketCount != 0 :
       infoTempEnd += 1
       if infoTemp[infoTempEnd:infoTempEnd+1] == "{":
         bracketCount += 1
       elif infoTemp[infoTempEnd:infoTempEnd+1] == "}":
         bracketCount -= 1
     self.infoboxTemplate = re.sub(self.commentRegex, "", infoTemp[infoTempStart - 2:infoTempEnd+1])
def englishdictionry(link ,firstsite,secondsite):   
    link=link.replace(u' ',u'_')
    total_cache=dict(_cache_old,**_cache)
    if total_cache.get(tuple([link, 'englishdictionry'])):
        return total_cache[tuple([link, 'englishdictionry'])]
    if link==u'':
        _cache[tuple([link, 'englishdictionry'])]=u''
        return u''
    site = wikipedia.getSite(firstsite)
    sitesecond= wikipedia.getSite(secondsite)
    params = {
        'action': 'query',
        'prop': 'langlinks',
        'titles': link,
        'redirects': 1,
        'lllimit':500,
    }
    try:
        categoryname = query.GetData(params,site)  
        for item in categoryname[u'query'][u'pages']:
            case=categoryname[u'query'][u'pages'][item][u'langlinks']
        for item in case:
            if item[u'lang']==secondsite:
                intersec=item[u'*']
                break
        result=intersec
        if result.find('#')!=-1:
            _cache[tuple([link, 'englishdictionry'])]=u''
            return u''
        _cache[tuple([link, 'englishdictionry'])]=result
        return result
    except: 
        _cache[tuple([link, 'englishdictionry'])]=u''
        return u''
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            import catlib
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site, "%s:%s" % (site.category_namespace(),
                                                   cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()
def addCoords(sourceWiki, lang, article, lat, lon, region, type, dim):
    '''
    Add the coordinates to article.
    '''

    if (article and lang and type):
        coordTemplate = 'Coordinate'
        site = wikipedia.getSite(lang, 'wikipedia')

        page = wikipedia.Page(site, article)
        try:
            text = page.get()
        except wikipedia.NoPage: # First except, prevent empty pages
            logging.warning('Page empty: %s', article)
            return False
        except wikipedia.IsRedirectPage: # second except, prevent redirect
            logging.warning('Page is redirect: %s', article)
            wikipedia.output(u'%s is a redirect!' % article)
            return False
        except wikipedia.Error: # third exception, take the problem and print
            logging.warning('Some error: %s', article)
            wikipedia.output(u"Some error, skipping..")
            return False       
    
        if coordTemplate in page.templates():
            logging.info('Already has Coordinate template: %s', article)
            return False

        if 'Linn' in page.templates():
            logging.info('Linn template without coords: %s', article)
            return False
            
        newtext = text
        replCount = 1
        coordText = u'{{Coordinate |NS=%s |EW=%s |type=%s |region=%s' % (lat, lon, type, region)
        if (dim):
            coordText += u' |dim=%s' % ( int(dim),)
        coordText += '}}'
        localCatName = wikipedia.getSite().namespace(WP_CATEGORY_NS)
        catStart = r'\[\[(' + localCatName + '|Category):'
        catStartPlain = u'[[' + localCatName + ':'
        replacementText = u''
        replacementText = coordText + '\n\n' + catStartPlain
    
        # insert coordinate template before categories
        newtext = re.sub(catStart, replacementText, newtext, replCount, flags=re.IGNORECASE)

        if text != newtext:
            logging.info('Adding coords to: %s', article)
            comment = u'lisan artikli koordinaadid %s.wikist' % (sourceWiki)
            wikipedia.showDiff(text, newtext)
            modPage = wikipedia.input(u'Modify page: %s ([y]/n) ?' % (article) )
            if (modPage.lower == 'y' or modPage == ''):
                page.put(newtext, comment)
            return True
        else:
            logging.info('Nothing to change: %s', article)
            return False
    else:
        return False
Exemple #12
0
def checkWait():
        newlist=""#blank variable for later
        site = wikipedia.getSite()
        pagename = localconfig.waitlist
        page = wikipedia.Page(site, pagename)
        waiters = page.get()
        waiters = waiters.replace("}}","")
        waiters = waiters.replace("*{{User|","")
        waiters = waiters.split("\n")
        for waiter in waiters:
                if waiter == "":continue#Non-existant user
                if checkRegisterTime(waiter, 7,False):continue
                if checkBlocked(waiter):continue#If user is blocked, skip putting them back on the list.
                if getEditCount(waiter) == True:#If edited, send them to UAA
                        checkUser(waiter,False,False)
                        continue
                if waiter in newlist:continue#If user already in the list, in case duplicates run over
                #Continue if none of the other checks have issues with the conditions for staying on the waitlist
                newlist = newlist + "\n*{{User|" + waiter + "}}"
                #print "\n*{{User|" + waiter + "}}"
        summary = localconfig.editsumwait
        site = wikipedia.getSite()
        pagename = localconfig.waitlist
        page = wikipedia.Page(site, pagename)
        pagetxt = page.get()
        newlist = newlist.replace("\n*{{User|}}","")
        page.put(newlist, comment=summary)
    def store_wikipedia(self):

        s = ''
        for k in sorted(self.replace.keys()):
            s += '* %s : %s\n' % (k, self.replace[k])
        mypage = pywikibot.Page(pywikibot.getSite(), '%s/replaced' % self.prefix)
        mypage.put_async( s )

        s = ''
        for k in sorted(self.correctPerPage.keys()):
            vlist = self.correctPerPage[k]
            for v in sorted(vlist):
                s += '* %s : %s\n' % (k, v)
        mypage = pywikibot.Page(pywikibot.getSite(), '%s/correctPerPage' % self.prefix)
        mypage.put_async( s )

        s = ''
        for k in sorted(self.noall):
            s += '* %s \n' % (k)
        mypage = pywikibot.Page(pywikibot.getSite(), '%s/correct' % self.prefix)
        mypage.put_async( s )

        s = ''
        for k in sorted(self.rcount.keys()):
            if self.rcount[k] > 0: s += '* %s : %s\n' % (k, self.rcount[k])
        mypage = pywikibot.Page(pywikibot.getSite(), '%s/replacCount' % self.prefix)
        mypage.put_async( s )
        s = ''
def main():
    countrycode = u''

    # Connect database, we need that
    (conn, cursor) = connectDatabase()
    (conn2, cursor2) = connectDatabase2()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-countrycode:'):
            countrycode = arg [len('-countrycode:'):]

    lang = wikipedia.getSite().language()
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    
    if countrycode:
	if not mconfig.countries.get((countrycode, lang)):
	    wikipedia.output(u'I have no config for countrycode "%s" in language "%s"' % (countrycode, lang))
	    return False
	wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
	locateCountry(countrycode, lang, mconfig.countries.get((countrycode, lang)), conn, cursor, conn2, cursor2)
    else:
	for (countrycode, lang), countryconfig in mconfig.countries.iteritems():
            if not countryconfig.get('autoGeocode'):
                wikipedia.output(u'"%s" in language "%s" is not supported in auto geocode mode (yet).' % (countrycode, lang))
            else:
                wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
                locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    bigcategory = u''
    target = u''

    generator = None
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
	        generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
	    else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
	elif arg.startswith('-bigcat'):
	    if len(arg) == 7:
		bigcategory = wikipedia.input(u'What category do you want to split out?')
	    else:
    		bigcategory = arg[8:]
	elif arg.startswith('-target'):
	    if len(arg) == 7:
		target = wikipedia.input(u'What category is the target category?')
	    else:
		target = arg[8:]

    if not bigcategory==u'':
	splitOutCategory(bigcategory, target)
    else:
	if not generator:
	    generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14])
	for cat in generator:
	    intersectCategories(cat)
Exemple #16
0
def main():
    featured = False
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg == '-featured':
            featured = True
        else:
            genFactory.handleArg(arg)

    mysite = pywikibot.getSite()
    if mysite.sitename() == 'wikipedia:nl':
        pywikibot.output(
            u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}')
        sys.exit()

    if featured:
        featuredList = pywikibot.translate(mysite, featured_articles)
        ref = pywikibot.Page(pywikibot.getSite(), featuredList)
        gen = pagegenerators.ReferringPageGenerator(ref)
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        for page in pagegenerators.PreloadingGenerator(gen):
            workon(page)
    else:
        pywikibot.showHelp('fixing_redirects')
def cleanUpCategories(description =''):
    '''
    Filter the categories in the description using the functions in imagerecat
    '''
    #Get the list of current categories
    categoryPages = wikipedia.getCategoryLinks(description, wikipedia.getSite())

    #Make it a list of strings (not page objects)
    categories = []
    for cat in categoryPages:
	categories.append(cat.titleWithoutNamespace())

    #Strip the categories of the current description
    description = wikipedia.removeCategoryLinks(description, wikipedia.getSite())    

    #Filter the list of categories
    categories = imagerecat.applyAllFilters(categories)
    
    #If we have a category, remove the uncat template
    if not (categories==''):
	description = description.replace(u'{{subst:unc}}', u'')

    #Add the categories to the description again
    description = description + u'\n'
    for category in categories:
	#print u'Category : ' + category
	description = description + u'[[Category:' + category + u']]\n'
    return description
Exemple #18
0
def main():
    import os
    index = None
    djvu = None
    pages = None
    # what would have been changed.
    dry = False
    ask = False
    overwrite = 'ask'

    # Parse command line arguments
    for arg in pywikibot.handleArgs():
        if arg.startswith("-dry"):
            dry = True
        elif arg.startswith("-ask"):
            ask = True
        elif arg.startswith("-overwrite:"):
            overwrite = arg[11:12]
            if overwrite != 'y' and overwrite != 'n':
                pywikibot.output(u"Unknown argument %s; will ask before overwriting" % arg)
                overwrite = 'ask'
        elif arg.startswith("-djvu:"):
            djvu = arg[6:]
        elif arg.startswith("-index:"):
            index = arg[7:]
        elif arg.startswith("-pages:"):
            pages = arg[7:]
        else:
            pywikibot.output(u"Unknown argument %s" % arg)

    # Check the djvu file exists
    if djvu:
        os.stat(djvu)

        if not index:
            import os.path
            index = os.path.basename(djvu)

    if djvu and index:
        site = pywikibot.getSite()
        index_page = pywikibot.Page(site, index)

        if site.family.name != 'wikisource':
            raise pywikibot.PageNotFound(u"Found family '%s'; Wikisource required." % site.family.name)

        if not index_page.exists() and index_page.namespace() == 0:
            index_namespace = site.mediawiki_message('Proofreadpage index namespace')

            index_page = pywikibot.Page(pywikibot.getSite(),
                                        u"%s:%s" % (index_namespace, index))
        if not index_page.exists():
            raise pywikibot.NoPage(u"Page '%s' does not exist" % index)
        pywikibot.output(u"uploading text from %s to %s"
                         % (djvu, index_page.title(asLink=True)) )
        bot = DjVuTextBot(djvu, index, pages, ask, overwrite, dry)
        if not bot.has_text():
            raise ValueError("No text layer in djvu file")
        bot.run()
    else:
        pywikibot.showHelp()
Exemple #19
0
def tagNowCommons(wImage, cImage, timestamp):
    site = wikipedia.getSite()
    language = site.language()
    family = site.family.name

    imagepage = wikipedia.ImagePage(wikipedia.getSite(), wImage)
    if not imagepage.exists() or imagepage.isRedirectPage():
	return

    if skips.get(family) and skips.get(family).get(language):
	localskips = skips.get(family).get(language)
    else:
	localskips = skips.get('_default')

    for template in imagepage.templates():
	title = template.replace(u'_', u' ').strip()
	if title in localskips:
	    return
    text = imagepage.get()
    oldtext = text

    text = u'{{NowCommons|File:%s|date=%s|bot=~~~}}\n' % (cImage.replace(u'_', u' '), timestamp) + text
    comment = u'File is available on Wikimedia Commons.'
    wikipedia.showDiff(oldtext, text)
    try:
	imagepage.put(text, comment)
	#print u'put'
    except wikipedia.LockedPage:
	return
def tagUncategorized(templateTitle):
    site = wikipedia.getSite()
    language = site.language()
    family = site.family.name

    page = wikipedia.Page(wikipedia.getSite(), u'Template:%s' % (templateTitle,))

    if not page.exists() or page.isRedirectPage():
	return False

    text = page.get()
    oldtext = text

    text = text + u'<noinclude>\n\n%s\n</noinclude>' % (uncategorizedTemplate.get(family).get(language), )

    wikipedia.showDiff(oldtext, text)
    try:
        wikipedia.output(page.title())
	page.put(text, editComment.get(family).get(language), maxTries=1)
    except wikipedia.LockedPage:
	return
    except wikipedia.MaxTriesExceededError:
	return
    except wikipedia.EditConflict:
	return
Exemple #21
0
 def __init__(self, page, filename, summary, dry, always):
     self.page = pywikibot.Page( pywikibot.getSite(), page )
     self.filename = filename
     self.summary = summary
     if not self.summary:
         self.summary = pywikibot.translate(pywikibot.getSite(), self.msg)
     pywikibot.setAction( self.summary )
def loadPagesWiki(wr, correctWords_page, ignorePages_page):
    """
    Load list of correct words and ignored pages
    """
    # Load correct words
    mypage = pywikibot.Page(pywikibot.getSite(), correctWords_page)
    text = mypage.get()
    lines = text.split('* ')[1:]
    correctWords = {}
    for l in lines:
        spl =  l.split(' : ')
        tmp = correctWords.get( spl[0], [] )
        tmp.append( spl[1].strip() )
        correctWords[spl[0]] = tmp

    print "loaded %s correct words" % len(correctWords)

    # Load ignore pages
    mypage = pywikibot.Page(pywikibot.getSite(), ignorePages_page)
    text = mypage.get()
    lines = text.split('* ')[1:]
    ignorePages = []
    for l in lines:
        ignorePages.append(l.strip())

    print "loaded %s ignored pages " % len(ignorePages)

    wr.ignorePages = ignorePages
    wr.ignorePerPages = correctWords
Exemple #23
0
def main():
    global always
    always = False

    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True

    mysite = pywikibot.getSite()
    # If anything needs to be prepared, you can do it here
    template_image = pywikibot.translate(pywikibot.getSite(),
                                         template_to_the_image)
    template_user = pywikibot.translate(pywikibot.getSite(),
                                        template_to_the_user)
    except_text_translated = pywikibot.translate(pywikibot.getSite(),
                                                 except_text)
    basicgenerator = pagegenerators.UnusedFilesGenerator()
    generator = pagegenerators.PreloadingGenerator(basicgenerator)
    for page in generator:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        if except_text_translated not in page.getImagePageHtml() and \
           'http://' not in page.get():
            pywikibot.output(u'\n' + page.title())
            if template_image in page.get():
                pywikibot.output(u"%s done already" % page.aslink())
                continue
            appendtext(page, u"\n\n"+template_image)
            uploader = page.getFileVersionHistory().pop()[1]
            usertalkname = u'User Talk:%s' % uploader
            usertalkpage = pywikibot.Page(mysite, usertalkname)
            msg2uploader = template_user % {'title': page.title()}
            appendtext(usertalkpage, msg2uploader)
Exemple #24
0
    def aslink(self, forceInterwiki=False, textlink=False, noInterwiki=False):
        """A string representation in the form of a link.

        This method is different from Page.aslink() as the sortkey may have
        to be included.

        """
        if self.sortKey:
            titleWithSortKey = '%s|%s' % (self.title(savetitle=True),
                                          self.sortKey)
        else:
            titleWithSortKey = self.title(savetitle=True)
        if not noInterwiki and (forceInterwiki
                                or self.site() != pywikibot.getSite()):
            if self.site().family != pywikibot.getSite().family \
                    and self.site().family.name != self.site().lang:
                return '[[%s:%s:%s]]' % (self.site().family.name,
                                         self.site().lang,
                                         self.title(savetitle=True))
            else:
                return '[[%s:%s]]' % (self.site().lang,
                                      self.title(savetitle=True))
        elif textlink:
            return '[[:%s]]' % self.title(savetitle=True)
        else:
            return '[[%s]]' % titleWithSortKey
Exemple #25
0
    def __init__(self, url, urlEncoding=None, description=u'',
                 useFilename=None, keepFilename=False,
                 verifyDescription=True, ignoreWarning=False,
                 targetSite=None, uploadByUrl=False):
        """
        @param ignoreWarning: Set this to True if you want to upload even if
            another file would be overwritten or another mistake would be
            risked.

        """
        self._retrieved = False
        self.url = url
        self.urlEncoding = urlEncoding
        self.description = description
        self.useFilename = useFilename
        self.keepFilename = keepFilename
        self.verifyDescription = verifyDescription
        self.ignoreWarning = ignoreWarning
        if config.upload_to_commons:
            self.targetSite = targetSite or pywikibot.getSite('commons',
                                                              'commons')
        else:
            self.targetSite = targetSite or pywikibot.getSite()
        self.targetSite.forceLogin()
        self.uploadByUrl = uploadByUrl
def main():
    # The generator gives the pages that should be worked upon.
    gen = None
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False
    wantHelp = False


    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        else:
            wantHelp = True

    if not wantHelp:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.

        cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Host')
        hosts_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False)
        hosts_gen = pagegenerators.PreloadingGenerator(hosts_gen)

        cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Network')
        nets_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False)
        nets_gen = pagegenerators.PreloadingGenerator(nets_gen)

        bot = IpNetworkBot(nets_gen, hosts_gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()
    def setUp(self):
        self.site = pywikibot.getSite('en', 'wikipedia')
        self.data = [catlib.Category(self.site, 'Category:Cat1'),
                     catlib.Category(self.site, 'Category:Cat2')]

        self.site_de = pywikibot.getSite('de', 'wikipedia')
        self.site_fr = pywikibot.getSite('fr', 'wikipedia')
def facatlist(facat):
    wikipedia.config.put_throttle = 0
    wikipedia.put_throttle.setDelay()
    count=0
    listenpageTitle=[]
    PageTitle=facat.replace(u'[[',u'').replace(u']]',u'').strip()
    language='fa'
    PageTitles =[PageTitle]  
    for PageTitle in PageTitles:
        cat = catlib.Category( wikipedia.getSite(language),PageTitle )
        listacategory=[cat]
        listacategory=categorydown(listacategory)
        for enpageTitle in listacategory:
                   enpageTitle=str(enpageTitle).split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip()
                   cat = catlib.Category( wikipedia.getSite(language),enpageTitle )
                   gent = pagegenerators.CategorizedPageGenerator( cat )
                   for pagework in gent:
                      count+=1
                      try:
                          link=str(pagework).split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip()
                      except:
                          pagework=unicode(str(pagework),'UTF-8')
                          link=pagework.split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip()
                      wikipedia.output(link)
                      fapagetitle=link
                      wikipedia.output(u'adding '+fapagetitle+u' to fapage lists')
                      listenpageTitle.append(fapagetitle)
    if listenpageTitle==[]:
        return False
    return listenpageTitle
Exemple #29
0
 def __init__(self, page, filename, summary, overwrite):
     self.page = pywikibot.Page( pywikibot.getSite(), page )
     self.filename = filename
     self.summary = summary
     self.overwrite = overwrite
     if not self.summary:
         self.summary = pywikibot.translate(pywikibot.getSite(), self.msg)
     pywikibot.setAction( self.summary )
Exemple #30
0
 def __init__(self, catTitle, listTitle, editSummary, overwrite = False, showImages = False, subCats = False, talkPages = False, recurse = False):
     self.editSummary = editSummary
     self.overwrite = overwrite
     self.showImages = showImages
     self.cat = catlib.Category(wikipedia.getSite(), 'Category:' + catTitle)
     self.list = wikipedia.Page(wikipedia.getSite(), listTitle)
     self.subCats = subCats
     self.talkPages = talkPages
     self.recurse = recurse
Exemple #31
0
def updateInterwiki (wikipediaPage = None, commonsPage = None):
    '''
    Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page.
    This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist.

    This function is disabled for the moment untill i figure out what the best way is to update the interwiki's.
    '''
    interwikis = {}
    comment= u''
    interwikilist = wikipediaPage.interwiki()
    interwikilist.append(wikipediaPage)

    for interwikiPage in interwikilist:
        interwikis[interwikiPage.site()]=interwikiPage
    oldtext = commonsPage.get()
    # The commonssite object doesnt work with interwiki's
    newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl'))
    comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language()  + u':' + wikipediaPage.title() + u']]'

    if newtext != oldtext:
        #This doesnt seem to work. Newtext has some trailing whitespace
        wikipedia.showDiff(oldtext, newtext)
        commonsPage.put(newtext=newtext, comment=comment)
Exemple #32
0
def removeCategoryLinks(text, site=None, marker=''):
    """Return text with all category links removed.

    Put the string marker after the last replacement (at the end of the text
    if there is no replacement).

    """
    # This regular expression will find every link that is possibly an
    # interwiki link, plus trailing whitespace. The language code is grouped.
    # NOTE: This assumes that language codes only consist of non-capital
    # ASCII letters and hyphens.
    if site is None:
        site = pywikibot.getSite()
    catNamespace = '|'.join(site.category_namespaces())
    categoryR = re.compile(r'\[\[\s*(%s)\s*:.*?\]\]\s*' % catNamespace, re.I)
    text = replaceExcept(text, categoryR, '',
                         ['nowiki', 'comment', 'math', 'pre', 'source'],
                         marker=marker)
    if marker:
        #avoid having multiple linefeeds at the end of the text
        text = re.sub('\s*%s' % re.escape(marker), '\r\n' + marker,
                      text.strip())
    return text.strip()
Exemple #33
0
 def __init__(self, hours, no_repeat, delay, user):
     self.hours = hours
     self.no_repeat = no_repeat
     if delay is None:
         self.delay = min(15, max(5, int(self.hours * 60)))
     else:
         self.delay = max(5, delay)
     self.user = user
     self.site = pywikibot.getSite()
     if self.user:
         localSandboxTitle = pywikibot.translate(self.site,
                                                 user_sandboxTemplate,
                                                 fallback=False)
         localSandbox = pywikibot.Page(self.site, localSandboxTitle)
         content.update(user_content)
         sandboxTitle[self.site.lang] = [
             item.title() for item in localSandbox.getReferences(
                 onlyTemplateInclusion=True)
         ]
         if self.site.lang not in user_sandboxTemplate:
             sandboxTitle[self.site.code] = []
             pywikibot.output(
                 u'Not properly set-up to run in user namespace!')
def getFilename(photoInfo=None,
                site=pywikibot.getSite(u'commons', u'commons'),
                project=u'Flickr'):
    ''' Build a good filename for the upload based on the username and the
    title. Prevents naming collisions.

    '''
    username = photoInfo.find('photo').find('owner').attrib['username']
    title = photoInfo.find('photo').find('title').text
    if title:
        title = cleanUpTitle(title)
    else:
        title = u''

    if title == u'':
        description = photoInfo.find('photo').find('description').text
        if description:
            if len(description) > 120:
                description = description[0:120]
            title = cleanUpTitle(description)
        else:
            title = u''
            # Should probably have the id of the photo as last resort.

    if pywikibot.Page(site, u'File:%s - %s - %s.jpg' %
                      (title, project, username)).exists():
        i = 1
        while True:
            if (pywikibot.Page(
                    site, u'File:%s - %s - %s (%s).jpg' %
                (title, project, username, str(i))).exists()):
                i = i + 1
            else:
                return u'%s - %s - %s (%s).jpg' % (title, project, username,
                                                   str(i))
    else:
        return u'%s - %s - %s.jpg' % (title, project, username)
def TextfilePageGenerator(filename=None, site=None):
    """Iterate pages from a list in a text file.

    The file must contain page links between double-square-brackets or, in
    alternative, separated by newlines. The generator will yield each
    corresponding Page object.

    @param filename: the name of the file that should be read. If no name is
                     given, the generator prompts the user.
    @param site: the default Site for which Page objects should be created

    """
    if filename is None:
        filename = pywikibot.input(u'Please enter the filename:')
    if site is None:
        site = pywikibot.getSite()
    f = codecs.open(filename, 'r', config.textfile_encoding)

    # title ends either before | or before ]]
    R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)')
    pageTitle = None
    for pageTitle in R.findall(f.read()):
        # If the link is in interwiki format, the Page object may reside
        # on a different Site than the default.
        # This makes it possible to work on different wikis using a single
        # text file, but also could be dangerous because you might
        # inadvertently change pages on another wiki!
        yield pywikibot.Page(site, pageTitle)
    if pageTitle is None:
        f.seek(0)
        for title in f:
            title = title.strip()
            if '|' in title:
                title = title[:title.index('|')]
            if title:
                yield pywikibot.Page(site, title)
    f.close()
Exemple #36
0
 def __init__(self,
              password=None,
              sysop=False,
              site=None,
              username=None,
              verbose=False):
     self.site = site or pywikibot.getSite()
     self.sysop = sysop
     if username:
         self.username = username
         # perform writeback.
         if site.family.name not in config.usernames:
             config.usernames[site.family.name] = {}
         config.usernames[site.family.name][self.site.lang] = username
     else:
         if sysop:
             try:
                 self.username = config.sysopnames\
                                 [self.site.family.name][self.site.lang]
             except:
                 raise pywikibot.NoUsername(
                     u'ERROR: Sysop username for %s:%s is undefined.\nIf you have a sysop account for that site, please add such a line to user-config.py:\n\nsysopnames[\'%s\'][\'%s\'] = \'myUsername\''
                     % (self.site.family.name, self.site.lang,
                        self.site.family.name, self.site.lang))
         else:
             try:
                 self.username = config.usernames[self.site.family.name][
                     self.site.lang]
             except:
                 raise pywikibot.NoUsername(
                     u'ERROR: Username for %s:%s is undefined.\nIf you have an account for that site, please add such a line to user-config.py:\n\nusernames[\'%s\'][\'%s\'] = \'myUsername\''
                     % (self.site.family.name, self.site.lang,
                        self.site.family.name, self.site.lang))
     self.password = password
     self.verbose = verbose
     if getattr(config, 'password_file', ''):
         self.readPassword()
Exemple #37
0
 def getHints(self):
     print "Parsing warnfile..."
     R = re.compile(
         r'WARNING: (?P<family>.+?): \[\[(?P<locallang>.+?):(?P<localtitle>.+?)\]\](?P<warningtype>.+?)\[\[(?P<targetlang>.+?):(?P<targettitle>.+?)\]\]'
     )
     import codecs
     f = codecs.open(self.filename, 'r', 'utf-8')
     hints = {}
     removeHints = {}
     mysite = wikipedia.getSite()
     for line in f.readlines():
         m = R.search(line)
         if m:
             #print "DBG>",line
             if m.group('locallang') == mysite.lang and m.group(
                     'family') == mysite.family.name:
                 #wikipedia.output(u' '.join([m.group('locallang'), m.group('localtitle'), m.group('warningtype'), m.group('targetsite'), m.group('targettitle')]))
                 #print m.group(3)
                 page = wikipedia.Page(mysite, m.group('localtitle'))
                 removing = (
                     m.group('warningtype') == ' links to incorrect ')
                 try:
                     targetSite = mysite.getSite(code=m.group('targetlang'))
                     targetPage = wikipedia.Page(targetSite,
                                                 m.group('targettitle'))
                     if removing:
                         if page not in removeHints:
                             removeHints[page] = []
                         removeHints[page].append(targetPage)
                     else:
                         if page not in hints:
                             hints[page] = []
                         hints[page].append(targetPage)
                 except wikipedia.Error:
                     print "DBG> Failed to add", line
     f.close()
     return hints, removeHints
Exemple #38
0
def main():
    oldImage = None
    newImage = None
    summary = ''
    always = False
    loose = False
    # read command line parameters
    for arg in wikipedia.handleArgs():
        if arg == '-always':
            always = True
        elif arg == '-loose':
            loose = True
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = wikipedia.input(u'Choose an edit summary: ')
            else:
                summary = arg[len('-summary:'):]
        else:
            if oldImage:
                newImage = arg
            else:
                oldImage = arg

    if not oldImage:
        wikipedia.showHelp('image')
    else:
        mysite = wikipedia.getSite()
        ns = mysite.image_namespace()

        oldImagePage = wikipedia.ImagePage(mysite, ns + ':' + oldImage)

        gen = pagegenerators.FileLinksGenerator(oldImagePage)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)

        bot = ImageRobot(preloadingGen, oldImage, newImage, summary, always,
                         loose)
        bot.run()
Exemple #39
0
	def __init__(self):
		self.wiki = self.coreWiki = wikipedia.getSite(code=u'en', fam=u'naruto')
		wikipedia.setAction(wikipedia.translate(self.wiki, self.getSummaries()))
		# This factory is responsible for processing command line arguments
		# that are also used by other scripts and that determine on which pages
		# to work on.
		genFactory = pagegenerators.GeneratorFactory()
		gen = None
		PageTitles = []
		for arg in wikipedia.handleArgs():
			if arg.startswith('-page'):
				if len(arg) == 5:
					PageTitles.append(wikipedia.input(u'\03{lightblue}Which page do you want to chage?\03{default}'))
				elif len(arg) > 6:
					PageTitles.append(arg[6:])
			else:
				generator = genFactory.handleArg(arg)
				if generator:
					gen = generator
		if not gen and PageTitles:
			pages = [wikipedia.Page(self.wiki, PageTitle) for PageTitle in PageTitles]
			gen = iter(pages)
		
		self.generator = gen
Exemple #40
0
def updateStats(date, uncatCount, checkCount, totalCount):
    '''
    Update the stats
    '''
    page = wikipedia.Page(wikipedia.getSite(),
                          u'User:Multichill/Categorization_stats')

    newstats = u'|-\n|' + str(date) + u'\n|' + str(uncatCount) + u'\n|' + str(
        checkCount) + u'\n|' + str(totalCount) + u'\n'
    newtext = page.get()

    if newtext.find(new_marker) == -1:
        wikipedia.output(u'No marker found!')
        newtext = newtext + newstats + new_marker
    else:
        newtext = newtext.replace(new_marker, newstats + new_marker)

    comment = u'Updating stats: ' + str(
        uncatCount) + u' uncategorized files, ' + str(
            checkCount) + u' files to be checked, ' + str(
                totalCount) + u' files in total'
    wikipedia.output(comment)
    wikipedia.showDiff(page.get(), newtext)
    page.put(newtext=newtext, comment=comment)
Exemple #41
0
def put(title, contents):
    mysite = pywikibot.getSite()
    page = pywikibot.Page(mysite, title)
    # Show the title of the page we're working on.
    # Highlight the title in purple.
    pywikibot.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title())
    # Check if it exists:
    #    if page.exists():
    #        print "EXISTS!"
    #        return
    #    else:
    #        print "DOES NOT EXIST!"
    # Post it:
    comment = "Import from spreadsheet via script."
    try:
        page.put(contents, comment=comment, minorEdit=False)
    except pywikibot.LockedPage:
        pywikibot.output(u"Page %s is locked; skipping." % title)
    except pywikibot.EditConflict:
        pywikibot.output(u'Skipping %s because of edit conflict' % title)
    except pywikibot.SpamfilterError, error:
        pywikibot.output(
            u'Cannot change %s because of spam blacklist entry %s' %
            (title, error.url))
def NamespaceFilterPageGenerator(generator, namespaces, site=None):
    """
    Wraps around another generator. Yields only those pages that are in one
    of the given namespaces.

    The namespace list can contain both integers (namespace numbers) and
    strings/unicode strings (namespace names). Namespace may also be a single
    number or a single string.
    """
    # convert namespace names to namespace numbers
    if site is None:
        site = pywikibot.getSite()
    if isinstance(namespaces, (int, basestring)):
        namespaces = [namespaces]
    for i in xrange(len(namespaces)):
        ns = namespaces[i]
        if isinstance(ns, basestring):
            index = site.getNamespaceIndex(ns)
            if index is None:
                raise ValueError(u'Unknown namespace: %s' % ns)
            namespaces[i] = index
    for page in generator:
        if page.namespace() in namespaces:
            yield page
Exemple #43
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
        # Get a preloading generator with only images
        pgenerator = pagegenerators.PreloadingGenerator(
            pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
        for page in pgenerator:
            rijksmonumentid = getRijksmonumentid(page, conn, cursor)
            if rijksmonumentid:
                addRijksmonumentid(page, rijksmonumentid)
def renew_optinhash(db, optin):
    optinPage = wikipedia.Page(wikipedia.getSite(), optin)
    text = optinPage.get()
    # TODO
    text = text.replace("'", "\\'")
    text = text.replace('"', '\\"')
    text = text.replace(";", "")
    names = text.split('\n')
    names = [n for n in names if n != '']
    #now do the optin-hash
    query = """
    select user_id, user_name from dewiki_p.user
    where user_name in ('%s')
    """ % "', '".join(names)
    c = db.cursor()
    c.execute(query.encode('utf-8'))
    lines = c.fetchall()
    f = open(optinhashfile, 'w')
    f.write('# -*- coding: utf-8  -*-\n')
    f.write('optinhash = {\n')
    for l in lines:
        f.write("%s : '%s',\n" % (l[0], l[1].replace("'", "\\'")))
    f.write("'dummy' : -1}")
    f.close()
Exemple #45
0
    def run(self):
        site = pywikibot.getSite()
        newCat = catlib.Category(site, self.newCatTitle)
        # set edit summary message
        if not self.editSummary:
            self.editSummary = i18n.twtranslate(site, 'category-changing') \
                               % {'oldcat':self.oldCat.title(),
                                  'newcat':newCat.title()}

        if self.useSummaryForDeletion and self.editSummary:
            reason = self.editSummary
        else:
            reason = i18n.twtranslate(site, deletion_reason_move) \
                     % {'newcat': self.newCatTitle, 'title': self.newCatTitle}

        # Copy the category contents to the new category page
        copied = False
        oldMovedTalk = None
        if self.oldCat.exists() and self.moveCatPage:
            copied = self.oldCat.copyAndKeep(
                self.newCatTitle, pywikibot.translate(site, cfd_templates))
            # Also move the talk page
            if copied:
                oldTalk = self.oldCat.toggleTalkPage()
                if oldTalk.exists():
                    newTalkTitle = newCat.toggleTalkPage().title()
                    try:
                        talkMoved = oldTalk.move(newTalkTitle, reason)
                    except (pywikibot.NoPage, pywikibot.PageNotSaved), e:
                        #in order :
                        #Source talk does not exist, or
                        #Target talk already exists
                        pywikibot.output(e.message)
                    else:
                        if talkMoved:
                            oldMovedTalk = oldTalk
Exemple #46
0
def processPhoto(photo_id):
    '''
    Work on a single photo at 
    http://www.photolibrary.fema.gov/photolibrary/photo_details.do?id=<photo_id>    
    get the metadata, check for dupes, build description, upload the image
    '''
    print "Working on: " + str(photo_id)
    # Get all the metadata
    metadata = getMetadata(photo_id)
    if not metadata:
        print "Didn't find metadata at http://www.photolibrary.fema.gov/photolibrary/photo_details.do?id=" + str(
            photo_id)
        #Incorrect photo_id
        return

    photoUrl = u'http://www.fema.gov/photodata/original/' + str(
        photo_id) + '.jpg'
    photo = downloadPhoto(photoUrl)

    duplicates = findDuplicateImages(photo)
    # We don't want to upload tupes
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        return

    title = buildTitle(photo_id, metadata)
    description = buildDescription(photo_id, metadata)

    bot = upload.UploadRobot(photoUrl,
                             description=description,
                             useFilename=title,
                             keepFilename=True,
                             verifyDescription=False,
                             targetSite=wikipedia.getSite(
                                 'commons', 'commons'))
    bot.upload_image(debug=False)
Exemple #47
0
def removeLanguageLinks(text, site=None, marker=''):
    """Return text with all interlanguage links removed.

    If a link to an unknown language is encountered, a warning is printed.
    If a marker is defined, that string is placed at the location of the
    last occurence of an interwiki link (at the end if there are no
    interwiki links).

    """
    if site is None:
        site = pywikibot.getSite()
    if not site.validLanguageLinks():
        return text
    # This regular expression will find every interwiki link, plus trailing
    # whitespace.
    languages = '|'.join(site.validLanguageLinks() +
                         site.family.obsolete.keys())
    interwikiR = re.compile(r'\[\[(%s)\s?:[^\[\]\n]*\]\][\s]*' % languages,
                            re.IGNORECASE)
    text = replaceExcept(text,
                         interwikiR,
                         '', ['nowiki', 'comment', 'math', 'pre', 'source'],
                         marker=marker)
    return text.strip()
Exemple #48
0
def getRefnum(article):
    page = wikipedia.Page(wikipedia.getSite(), article)

    if page.exists() and (page.namespace() == 0) and not page.isRedirectPage():
        refnum = u''
        templates = page.templatesWithParams()

        for (template, params) in templates:
            if template.lower().replace(u'_', u' ') == u'infobox nrhp':
                for param in params:
                    #Split at =
                    (field, sep, value) = param.partition(u'=')
                    # Remove leading or trailing spaces
                    field = field.strip()
                    value = value.split("<ref")[0].strip()

                    #Check first that field is not empty
                    if field:
                        if field == u'refnum':
                            refnum = value
                            return refnum.strip().lstrip(u'#')

    # We didn't find anything so return empty string
    return u''
Exemple #49
0
    def __init__(self, site, name):
        """
        Initializer for a User object.

        Parameters:
        site - a wikipedia.Site object
        name - name of the user, without the trailing User:
        """
        if type(site) in [str, unicode]:
            self._site = wikipedia.getSite(site)
        else:
            self._site = site
        self._name = name
        self._blocked = None  #None mean not loaded
        self._groups = None  #None mean not loaded
        #self._editcount = -1 # -1 mean not loaded
        self._registrationTime = -1
        #if self.site().versionnumber() >= 16:
        #    self._urToken = None
        if name[0] == '#':
            # This user is probably being queried for purpose of lifting an
            # autoblock.
            wikipedia.output(
                "This is an autoblock ID, you can only use to unblock it.")
Exemple #50
0
def buildDescription(flinfoDescription=u'',
                     flickrreview=False,
                     reviewer=u'',
                     addCategory=u'',
                     removeCategories=False,
                     rijksmonumentid=1):
    '''
    Build the final description for the image. The description is based on the info from flickrinfo and improved.
    '''
    description = flinfoDescription

    description = description.replace(
        u'\n|Source=[http://www.flickr.com/',
        u'\n{{Rijksmonument|%s}}\n|Source=[http://www.flickr.com/' %
        (rijksmonumentid, ))

    if removeCategories:
        description = wikipedia.removeCategoryLinks(
            description, wikipedia.getSite('commons', 'commons'))

    # Add template
    description = description.replace(
        u'{{cc-by', u'{{Wiki Loves Monuments 2011|nl}}\n{{cc-by')

    if flickrreview:
        if reviewer:
            description = description.replace(
                u'{{flickrreview}}', u'{{flickrreview|' + reviewer +
                '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}'
            )

    if addCategory:
        description = description.replace(u'{{subst:unc}}\n', u'')
        description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description.replace(u'\r\n', u'\n')
    return description
Exemple #51
0
def main(args):
    '''
    Main loop. Get a generator and options. Work on all images in the generator.
    '''
    generator = None
    onlyFilter = False
    onlyUncat = False
    genFactory = pagegenerators.GeneratorFactory()

    global search_wikis
    global hint_wiki

    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)
    for arg in wikipedia.handleArgs():
        if arg == '-onlyfilter':
            onlyFilter = True
        elif arg == '-onlyuncat':
            onlyUncat = True
        elif arg.startswith('-hint:'):
            hint_wiki = arg[len('-hint:'):]
        elif arg.startswith('-onlyhint'):
            search_wikis = arg[len('-onlyhint:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = pagegenerators.CategorizedPageGenerator(catlib.Category(
            site, u'Category:Media needing categories'),
                                                            recurse=True)

    initLists()
    categorizeImages(generator, onlyFilter, onlyUncat)

    wikipedia.output(u'All done')
Exemple #52
0
                wikipedia.output(u'Page %s is locked?!' % page.title())


if __name__ == "__main__":
    singlepage = []
    gen = None
    start = None
    try:
        action = None
        for arg in wikipedia.handleArgs():
            if arg == ('pages'):
                action = 'pages'
            elif arg == ('categories'):
                action = 'categories'
            elif arg.startswith('-start:'):
                start = wikipedia.Page(wikipedia.getSite(), arg[7:])
                gen = pagegenerators.AllpagesPageGenerator(
                    start.titleWithoutNamespace(),
                    namespace=start.namespace(),
                    includeredirects=False)
            elif arg.startswith('-cat:'):
                cat = catlib.Category(wikipedia.getSite(),
                                      'Category:%s' % arg[5:])
                gen = pagegenerators.CategorizedPageGenerator(cat)
            elif arg.startswith('-ref:'):
                ref = wikipedia.Page(wikipedia.getSite(), arg[5:])
                gen = pagegenerators.ReferringPageGenerator(ref)
            elif arg.startswith('-link:'):
                link = wikipedia.Page(wikipedia.getSite(), arg[6:])
                gen = pagegenerators.LinkedPageGenerator(link)
            elif arg.startswith('-page:'):
Exemple #53
0
        # put list of alternatives into listbox
        self.list = list
        #find required area
        laenge = len(list)
        maxbreite = 0
        for i in range(laenge):
            #cycle through all listitems to find maxlength
            if len(list[i]) + len(str(i)) > maxbreite:
                maxbreite = len(list[i]) + len(str(i))
            #show list as formerly in DOS-window
            self.listbox.insert(END, str(i) + ' - ' + list[i])
        #set optimized height & width
        self.listbox.config(height=laenge, width=maxbreite + 2)
        # wait for user to push a button which will destroy (close) the window
        return self.list


if __name__ == "__main__":
    import wikipedia as pywikibot
    try:
        root = Tk()
        root.resizable(width=FALSE, height=FALSE)
        root.title("Pywikipediabot GUI")
        page = pywikibot.Page(pywikibot.getSite(), u'Wiki')
        content = page.get()
        myapp = EditBoxWindow(root)
        myapp.bind("<Control-d>", myapp.debug)
        v = myapp.edit(content, highlight=page.title())
    finally:
        pywikibot.stopme()
Exemple #54
0
        elif arg.startswith("-html"):
            correct_html_codes = True
        elif arg.startswith("-rebuild"):
            rebuild = True
        elif arg.startswith("-noname"):
            checknames = False
        elif arg.startswith("-checklang:"):
            checklang = arg[11:]
        elif arg.startswith("-knownonly"):
            knownonly = True
        elif arg.startswith("-knownplus"):
            knownonly = 'plus'
        else:
            title.append(arg)

    mysite = pywikibot.getSite()
    if not checklang:
        checklang = mysite.language()
    filename = pywikibot.config.datafilepath('externals/spelling',
                                             'spelling-' + checklang + '.txt')
    print "Getting wordlist"
    try:
        f = codecs.open(filename, 'r', encoding=mysite.encoding())
        for line in f.readlines():
            # remove trailing newlines and carriage returns
            try:
                while line[-1] in ['\n', '\r']:
                    line = line[:-1]
            except IndexError:
                pass
            #skip empty lines
Exemple #55
0
def asktoadd(pl):
    if pl.site != mysite:
        return
    if pl.isRedirectPage():
        pl2 = pl.getRedirectTarget()
        if needcheck(pl2):
            tocheck.append(pl2)
            checked[pl2] = pl2
        return
    ctoshow = 500
    pywikibot.output(u'')
    pywikibot.output(u"==%s==" % pl.title())
    while 1:
        answer = raw_input("y(es)/n(o)/i(gnore)/(o)ther options? ")
        if answer == 'y':
            include(pl)
            break
        if answer == 'c':
            include(pl, realinclude=False)
            break
        if answer == 'z':
            if pl.exists():
                if not pl.isRedirectPage():
                    linkterm = pywikibot.input(
                        u"In what manner should it be alphabetized?")
                    include(pl, linkterm=linkterm)
                    break
            include(pl)
            break
        elif answer == 'n':
            exclude(pl)
            break
        elif answer == 'i':
            exclude(pl, real_exclude=False)
            break
        elif answer == 'o':
            pywikibot.output(u"t: Give the beginning of the text of the page")
            pywikibot.output(
                u"z: Add under another title (as [[Category|Title]])")
            pywikibot.output(
                u"x: Add the page, but do not check links to and from it")
            pywikibot.output(u"c: Do not add the page, but do check links")
            pywikibot.output(u"a: Add another page")
            pywikibot.output(u"l: Give a list of the pages to check")
        elif answer == 'a':
            pagetitle = raw_input("Specify page to add:")
            page = pywikibot.Page(pywikibot.getSite(), pagetitle)
            if page not in checked.keys():
                include(page)
        elif answer == 'x':
            if pl.exists():
                if pl.isRedirectPage():
                    pywikibot.output(
                        u"Redirect page. Will be included normally.")
                    include(pl, realinclude=False)
                else:
                    include(pl, checklinks=False)
            else:
                pywikibot.output(u"Page does not exist; not added.")
                exclude(pl, real_exclude=False)
            break
        elif answer == 'l':
            pywikibot.output(u"Number of pages still to check: %s" %
                             len(tocheck))
            pywikibot.output(u"Pages to be checked:")
            pywikibot.output(u" - ".join(page.title() for page in tocheck))
            pywikibot.output(u"==%s==" % pl.title())
        elif answer == 't':
            pywikibot.output(u"==%s==" % pl.title())
            try:
                pywikibot.output(u'' + pl.get(get_redirect=True)[0:ctoshow])
            except pywikibot.NoPage:
                pywikibot.output(u"Page does not exist.")
            ctoshow += 500
        else:
            pywikibot.output(u"Not understood.")
Exemple #56
0
def isdate(s):
    """returns true iff s is a date or year
    """
    dict, val = date.getAutoFormat(pywikibot.getSite().language(), s)
    return dict is not None
#!/usr/bin/python
# -*- coding: utf-8 -*-
# BY: رضا (User:reza1615 on fa.wikipedia)
# Distributed under the terms of the CC-BY-SA 3.0.
import wikipedia
import pagegenerators, query, sys
import fa_cosmetic_changes
import re, os, codecs, catlib, login
wikipedia.config.put_throttle = 0
wikipedia.put_throttle.setDelay()
secondwiki = 'en'
faSite = wikipedia.getSite('fa')
enSite = wikipedia.getSite(secondwiki)
txtTmp = ''
faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیًٌٍَُِّْٓيك'
msg = u'ربات: افزودن نگارخانهٔ آزاد به مقاله'
usernames = u'Fatranslator'
_cache = {}


def login_fa(usernames):
    try:
        password_fa = open("/data/project/rezabot/pywikipedia/passfile2", 'r')
    except:
        password_fa = open("/home/reza/compat/passfile2", 'r')

    password_fa = password_fa.read().replace('"', '').strip()
    passwords = password_fa.split('(')[1].split(',')[1].split(')')[0].strip()
    #-------------------------------------------
    botlog = login.LoginManager(password=passwords,
                                username=usernames,
Exemple #58
0
def main(*args):

    print "ARGS:%s\n" % sys.argv

    genFactory = pagegenerators.GeneratorFactory()
    # If xmlfilename is None, references will be loaded from the live wiki.
    xmlfilename = None
    user = None
    skip = False
    timestamp = None
    # read command line parameters
    for arg in pywikibot.handleArgs(*args):
        xmlfilename = arg
    print xmlfilename
    insite = pywikibot.getSite("en", "wikipedia")
    importsite = "speedydeletion"
    outsite = pywikibot.getSite("en", importsite)
    outsite.forceLogin()

    try:
        print "try to open %s\n" % xmlfilename
        with open(xmlfilename) as f:
            pass
    except:
        print "cannot open %s\n" % xmlfilename
        exit(0)

    if sys.argv[1] == "--validate":
        tempfile = "%s.tmp" % xmlfilename
        status = subprocess.call("xmllint --recover  %s -o %s" %
                                 (xmlfilename, tempfile),
                                 shell=True)
        print "status %d\n" % status
    else:
        tempfile = xmlfilename

    dump = xmlreader.XmlDump(tempfile)
    count = 0

    for entry in dump.parse():
        #        print  file_store[entry.title]
        title = entry.title.encode("utf8", "ignore")

        if re.search("^User:"******"^Wikipedia:", entry.title):
            #            pywikibot.output(u'skipping %s' % entry.title)
            continue
#        if  re.search("^User:"******"^User Talk:" , entry.title):
#            pywikibot.output(u'skipping %s' % entry.title)
#            continue
        if re.search(".css$", entry.title):
            #            pywikibot.output(u'skipping %s' % entry.title)
            continue
        if re.search("^Main Page", entry.title):
            #            pywikibot.output(u'skipping %s' % entry.title)
            continue


#        pywikibot.output(u'Considering %s' % entry.title)
        title = title.replace(":", "_")
        title = title.replace("!", "_")
        title = title.replace("/", "_")
        title = title.replace("\\", "_")
        title = decode(title)
        try:
            if (len(title) < 1):
                pywikibot.output(u'empty title:%s' % entry.title)
                continue

            if (file_store[title]):
                count = count + 1
            else:
                pywikibot.output(u'not exists %s' % entry.title)
        except KeyError:
            try:
                outpage = pywikibot.Page(site=outsite,
                                         title=entry.title,
                                         insite=outsite)

                exists = False
                try:
                    exists = outpage.exists()
                except:
                    pywikibot.output(
                        u'key error exiting article %s transformed to %s' %
                        (entry.title, title))

                if exists:
                    #pywikibot.output(u'there is an article %s' % entry.title)
                    try:
                        file_store[title] = 1
                    except KeyError:
                        pywikibot.output(
                            u'key error saving article %s transformed to %s' %
                            (entry.title, title))

                else:
                    pywikibot.output(u'is not there, adding  %s' % entry.title)
                    contents = entry.text
                    usernames = entry.username
                    if re.search('Template:', title):
                        contents = contents + "<noinclude>{{wikipedia-template|%s}}</noinclude>" % usernames
                    else:
                        contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
                    outpage._site = outsite
                    try:
                        outpage.put(contents)
                    except:
                        pywikibot.output(u'cannot put article %s / %s' %
                                         (entry.title, title))
                try:
                    file_store[title] = 1
                except KeyboardInterrupt:
                    print "Bye"
                    sys.exit()

                except KeyError:
                    pywikibot.output(
                        u'could not save %s! to the list of article' %
                        entry.title)

            except KeyboardInterrupt:
                print "Bye"
                sys.exit()
            except KeyError:
                pywikibot.output(u'problem with  %s! ' % entry.title)

            finally:
                count = count + 1

        except KeyboardInterrupt:
            print "Bye"
            sys.exit()
        except KeyError:
            pywikibot.output(u'problem2 with  %s! ' % entry.title)

        finally:
            count = count + 1
Exemple #59
0
def main():
    item = None
    for arg in pywikibot.handleArgs():
        continue
    bot = myRevertBot(site = pywikibot.getSite())
    bot.revert_contribs()
Exemple #60
0
class AfDBot:
    # Edit summary message that should be used.
    msg = {
        'en':
        u'New section: /* [[Wikipedia:Articles for deletion|AfD]] nomination */ Notification',
    }

    def __init__(self, AfDlog, always, debug=False):
        """
        Constructor. Parameters:
            * AfDlog        - The AfD log to be treated.
            * always        - If True, the user won't be prompted before changes
                             are made.
            * debug         - If True, don't edit pages. Only show proposed
                             edits.
        """
        self.AfDlog = AfDlog
        self.always = always
        self.debug = debug
        self.site = AfDlog.site()
        self.db = None
        self.replag = None

        locale.setlocale(locale.LC_ALL, 'nl_NL.UTF-8')
        os.environ['TZ'] = 'Europe/Amsterdam'

    def run(self):
        # Set up database access
        try:
            self.db = querier.querier(host="nlwiki.labsdb")
        except Exception, error:
            wikipedia.output(u'Could not connect to database: %s.' % error,
                             toStdout=False)

        # Dictionaries of users with page_title and AfD_title tuple.
        self.contributors = {}

        if self.db:
            # Get replag
            sql = """
                    SELECT time_to_sec(timediff(now()+0,CAST(rev_timestamp AS int))) AS replag
                    FROM nlwiki_p.revision
                    ORDER BY rev_timestamp DESC
                    LIMIT 1;"""
            result = self.db.do(sql)

            if not result:
                wikipedia.output(
                    u'Could not get replag. Assuming it\'s infinite (= 1 month).'
                )
                self.replag = 30 * 25 * 3600
            else:
                self.replag = int(result[0]['replag'])
                wikipedia.output(u'Replag: %is.' % self.replag)

        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
        try:
            # Load the page
            text = self.AfDlog.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist; skipping." %
                             self.AfDlog.aslink())
            return
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect; skipping." %
                             self.AfDlog.aslink())
            return

        # Find AfD's
        pageR = re.compile(r'^\*[ ]*?\[\[(?P<page>.*?)(?:\|.*?\]\]|\]\])')
        timestampR = re.compile('(\d{1,2}) (.{3}) (\d{4}) (\d{2}):(\d{2})')
        userR = re.compile(
            r'\[\[(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)(?:\|.*?\]\]|\]\])')
        strictTemplateR = re.compile(
            r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/[Hh]andtekening\}\}')
        templateR = re.compile(
            r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/.*?\}\}')
        pages = []
        lines = text.splitlines()
        for line in lines:
            mPage = pageR.search(line)
            mTimestamp = timestampR.search(line)
            if mTimestamp:
                t = time.strftime(
                    '%Y%m%d%H%M%S',
                    time.gmtime(
                        time.mktime(
                            time.strptime(mTimestamp.group(),
                                          '%d %b %Y %H:%M'))))
            else:
                t = None
            if mPage and userR.search(line):
                pages.append(
                    (mPage.group('page'), userR.search(line).group('user'), t))
                continue
            elif mPage and strictTemplateR.search(line):
                pages.append((mPage.group('page'),
                              strictTemplateR.search(line).group('user'), t))
                continue
            elif mPage and templateR.search(line):
                pages.append((mPage.group('page'),
                              templateR.search(line).group('user'), t))
                continue
            elif mPage:
                pages.append((mPage.group('page'), None, t))
                continue
        wikipedia.output(u'Found %i AfD\'s.' % len(pages))

        # Treat AfD's
        for p in pages:
            page = wikipedia.Page(self.site, p[0])
            nominator = p[1]
            timestamp = p[2]
            page_contributors = self.getcontributors(page, timestamp)

            for contributor in page_contributors:
                if not self.contributors.has_key(contributor):
                    self.contributors[contributor] = [(page.title(), nominator)
                                                      ]
                else:
                    self.contributors[contributor].append(
                        (page.title(), nominator))

        # Treat users
        wikipedia.output(u'\n\nFound %i unique users.' %
                         len(self.contributors))
        pages = []  # User talk pages
        for user in self.contributors.keys():
            pages.append(u'%s:%s' % (self.site.namespace(3), user))

        gen = pagegenerators.PagesFromTitlesGenerator(pages, site=self.site)
        gen = pagegenerators.PreloadingGenerator(gen)

        for page in gen:
            self.treatUser(page)