def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() imagerecat.initLists() generator = None; genFactory = pagegenerators.GeneratorFactory() mark = True for arg in wikipedia.handleArgs(): if arg.startswith('-dontmark'): mark = False elif arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-yesterday'): generator = [wikipedia.Page(wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday())] else: generator = genFactory.handleArg(arg) if generator: for page in generator: if((page.namespace() == 14) and (page.title().startswith(u'Category:Media needing categories as of'))): wikipedia.output(u'Working on ' + page.title()) for (image, gals, cats) in getImagesToCategorize(cursor, page.titleWithoutNamespace()): categorizeImage(image, gals, imagerecat.applyAllFilters(cats)) if (mark): categoriesChecked(page.title())
def processImage(self, fields): ''' Work on a single image ''' if self.autonomous: # Check if the image already exists. Do nothing if the name is already taken. CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename')) if CommonsPage.exists(): return False else: while True: # Do the Tkdialog to accept/reject and change te name fields=Tkdialog(fields).getnewmetadata() if fields.get('skip'): pywikibot.output(u'Skipping %s : User pressed skip.' % fields.get('imagepage').title()) return False # Check if the image already exists CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename')) if not CommonsPage.exists(): break else: pywikibot.output('Image already exists, pick another name or skip this image') # We dont overwrite images, pick another name, go to the start of the loop # Put the fields in the queue to be uploaded self.uploadQueue.put(fields)
def run(self): for page in self.generator: if page.isRedirectPage(): page = page.getRedirectTarget() page_t = page.title() # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<" % page_t) page_cap = wikipedia.Page(wikipedia.getSite(), page_t.title().capitalize()) if not page_cap.exists(): wikipedia.output(u'%s doesn\'t exist' % page_cap.title()) if not self.acceptall: choice = wikipedia.inputChoice( u'Do you want to create a redirect?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice == 'a': self.acceptall = True if self.acceptall or choice == 'y': try: wikipedia.setAction( wikipedia.translate(wikipedia.getSite(), msg) % page_t) page_cap.put(u"#REDIRECT [[%s]]" % page_t) print except: wikipedia.output( u"An error occurred. Retrying in 15 seconds...") time.sleep(15) continue else: wikipedia.output(u'%s already exists, skipping...\n' % page_t.title())
def __iter__(self): """ Yield page objects until the entire XML dump has been read. """ import xmlreader mysite = pywikibot.getSite() dump = xmlreader.XmlDump(self.xmlfilename) # regular expression to find the original template. # {{vfd}} does the same thing as {{Vfd}}, so both will be found. # The old syntax, {{msg:vfd}}, will also be found. # TODO: check site.nocapitalize() templatePatterns = [] for template in self.templates: templatePattern = template.title(withNamespace=False) if not pywikibot.getSite().nocapitalize: templatePattern = ( "[" + templatePattern[0].upper() + templatePattern[0].lower() + "]" + templatePattern[1:] ) templatePattern = re.sub(" ", "[_ ]", templatePattern) templatePatterns.append(templatePattern) templateRegex = re.compile( r"\{\{ *([mM][sS][gG]:)?(?:%s) *(?P<parameters>\|[^}]+|) *}}" % "|".join(templatePatterns) ) for entry in dump.parse(): if templateRegex.search(entry.text): page = pywikibot.Page(mysite, entry.title) yield page
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'nl', u'wikipedia')) conn = None cursor = None (conn, cursor) = connectDatabase() items = getNumberOfItems(cursor) images = getNumberOfImages(cursor) addresses = {} names = {} pages = list(set(items.keys() + images.keys())) pages.sort() for key in pages: print key page = wikipedia.Page(wikipedia.getSite(), key) text = page.get() addresses[key] = getNumberOfAddresses(text) names[key] = getNumberOfNames(text) #print key + u' - ' + str(addresses[key]) + u' - ' + str(names[key]) updateStats(pages, items, addresses, names, images)
def main(): all = False language = None fam = None wikimedia = False for arg in pywikibot.handleArgs(): if arg == "-all": all = True elif arg[0:7] == "-langs:": language = arg[7:] elif arg[0:10] == "-families:": fam = arg[10:] elif arg[0:10] == "-wikimedia": wikimedia = True mySite = pywikibot.getSite() if wikimedia: families = [ "commons", "incubator", "mediawiki", "meta", "species", "test", "wikibooks", "wikidata", "wikinews", "wikipedia", "wikiquote", "wikisource", "wikiversity", "wikivoyage", "wiktionary", ] elif fam is not None: families = fam.split(",") else: families = [mySite.family.name] for family in families: try: fam = pywikibot.Family(family) except ValueError: pywikibot.output(u"No such family %s" % family) continue if all: for lang in fam.langs.iterkeys(): testSite(pywikibot.getSite(lang, family)) elif language is None: lang = mySite.lang if not lang in fam.langs.keys(): lang = fam.langs.keys()[-1] testSite(pywikibot.getSite(lang, family)) else: languages = language.split(",") for lang in languages: try: testSite(pywikibot.getSite(lang, family)) except pywikibot.NoSuchSite: pywikibot.output(u"No such language %s in family %s" % (lang, family))
def __init__(self, pageToUnlink, namespaces, always): self.pageToUnlink = pageToUnlink gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) self.generator = pagegenerators.PreloadingGenerator(gen) linktrail = pywikibot.getSite().linktrail() # The regular expression which finds links. Results consist of four # groups: # # group title is the target page title, that is, everything # before | or ]. # # group section is the page section. # It'll include the # to make life easier for us. # # group label is the alternative link title, that's everything # between | and ]. # # group linktrail is the link trail, that's letters after ]] which are # part of the word. # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) self.always = always self.done = False self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking', self.pageToUnlink.title())
def __init__(self, generator, img, info, imdb): """ Constructor. Parameters: @param generator: The page generator that determines on which pages to work. @type generator: generator. """ self.generator = generator # Set the edit summary message self.summary = i18n.twtranslate(pywikibot.getSite(), 'basic-changing') self.chrome = filmsettings.getChrome() self.img = img self.info = info self.imdb = imdb self.imdbNum = 0 self.templateRegex = re.compile("{{.*}}") #This is how templates are in wikipedia self.referenceRegex = re.compile("(<ref.*?/(ref)?>)+") self.commentRegex = re.compile("<!--.*?-->") self.wikilinkRegex = re.compile("\[\[.*\|.*\]\]") infoTemp = pywikibot.Page(pywikibot.getSite(), "Template:Infobox_film/doc").get() infoTempStart = infoTemp.find("{{Infobox film") + 2 bracketCount = 2 infoTempEnd = infoTempStart while bracketCount != 0 : infoTempEnd += 1 if infoTemp[infoTempEnd:infoTempEnd+1] == "{": bracketCount += 1 elif infoTemp[infoTempEnd:infoTempEnd+1] == "}": bracketCount -= 1 self.infoboxTemplate = re.sub(self.commentRegex, "", infoTemp[infoTempStart - 2:infoTempEnd+1])
def englishdictionry(link ,firstsite,secondsite): link=link.replace(u' ',u'_') total_cache=dict(_cache_old,**_cache) if total_cache.get(tuple([link, 'englishdictionry'])): return total_cache[tuple([link, 'englishdictionry'])] if link==u'': _cache[tuple([link, 'englishdictionry'])]=u'' return u'' site = wikipedia.getSite(firstsite) sitesecond= wikipedia.getSite(secondsite) params = { 'action': 'query', 'prop': 'langlinks', 'titles': link, 'redirects': 1, 'lllimit':500, } try: categoryname = query.GetData(params,site) for item in categoryname[u'query'][u'pages']: case=categoryname[u'query'][u'pages'][item][u'langlinks'] for item in case: if item[u'lang']==secondsite: intersec=item[u'*'] break result=intersec if result.find('#')!=-1: _cache[tuple([link, 'englishdictionry'])]=u'' return u'' _cache[tuple([link, 'englishdictionry'])]=result return result except: _cache[tuple([link, 'englishdictionry'])]=u'' return u''
def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Never ask before changing a page always = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.getSite() try: cat = maintenance_category[site.family.name][site.lang] except: pass else: import catlib if not namespaces: namespaces = [0] cat = catlib.Category(site, "%s:%s" % (site.category_namespace(), cat)) gen = pagegenerators.CategorizedPageGenerator(cat) if not gen: pywikibot.showHelp('noreferences') else: if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always) bot.run()
def addCoords(sourceWiki, lang, article, lat, lon, region, type, dim): ''' Add the coordinates to article. ''' if (article and lang and type): coordTemplate = 'Coordinate' site = wikipedia.getSite(lang, 'wikipedia') page = wikipedia.Page(site, article) try: text = page.get() except wikipedia.NoPage: # First except, prevent empty pages logging.warning('Page empty: %s', article) return False except wikipedia.IsRedirectPage: # second except, prevent redirect logging.warning('Page is redirect: %s', article) wikipedia.output(u'%s is a redirect!' % article) return False except wikipedia.Error: # third exception, take the problem and print logging.warning('Some error: %s', article) wikipedia.output(u"Some error, skipping..") return False if coordTemplate in page.templates(): logging.info('Already has Coordinate template: %s', article) return False if 'Linn' in page.templates(): logging.info('Linn template without coords: %s', article) return False newtext = text replCount = 1 coordText = u'{{Coordinate |NS=%s |EW=%s |type=%s |region=%s' % (lat, lon, type, region) if (dim): coordText += u' |dim=%s' % ( int(dim),) coordText += '}}' localCatName = wikipedia.getSite().namespace(WP_CATEGORY_NS) catStart = r'\[\[(' + localCatName + '|Category):' catStartPlain = u'[[' + localCatName + ':' replacementText = u'' replacementText = coordText + '\n\n' + catStartPlain # insert coordinate template before categories newtext = re.sub(catStart, replacementText, newtext, replCount, flags=re.IGNORECASE) if text != newtext: logging.info('Adding coords to: %s', article) comment = u'lisan artikli koordinaadid %s.wikist' % (sourceWiki) wikipedia.showDiff(text, newtext) modPage = wikipedia.input(u'Modify page: %s ([y]/n) ?' % (article) ) if (modPage.lower == 'y' or modPage == ''): page.put(newtext, comment) return True else: logging.info('Nothing to change: %s', article) return False else: return False
def checkWait(): newlist=""#blank variable for later site = wikipedia.getSite() pagename = localconfig.waitlist page = wikipedia.Page(site, pagename) waiters = page.get() waiters = waiters.replace("}}","") waiters = waiters.replace("*{{User|","") waiters = waiters.split("\n") for waiter in waiters: if waiter == "":continue#Non-existant user if checkRegisterTime(waiter, 7,False):continue if checkBlocked(waiter):continue#If user is blocked, skip putting them back on the list. if getEditCount(waiter) == True:#If edited, send them to UAA checkUser(waiter,False,False) continue if waiter in newlist:continue#If user already in the list, in case duplicates run over #Continue if none of the other checks have issues with the conditions for staying on the waitlist newlist = newlist + "\n*{{User|" + waiter + "}}" #print "\n*{{User|" + waiter + "}}" summary = localconfig.editsumwait site = wikipedia.getSite() pagename = localconfig.waitlist page = wikipedia.Page(site, pagename) pagetxt = page.get() newlist = newlist.replace("\n*{{User|}}","") page.put(newlist, comment=summary)
def store_wikipedia(self): s = '' for k in sorted(self.replace.keys()): s += '* %s : %s\n' % (k, self.replace[k]) mypage = pywikibot.Page(pywikibot.getSite(), '%s/replaced' % self.prefix) mypage.put_async( s ) s = '' for k in sorted(self.correctPerPage.keys()): vlist = self.correctPerPage[k] for v in sorted(vlist): s += '* %s : %s\n' % (k, v) mypage = pywikibot.Page(pywikibot.getSite(), '%s/correctPerPage' % self.prefix) mypage.put_async( s ) s = '' for k in sorted(self.noall): s += '* %s \n' % (k) mypage = pywikibot.Page(pywikibot.getSite(), '%s/correct' % self.prefix) mypage.put_async( s ) s = '' for k in sorted(self.rcount.keys()): if self.rcount[k] > 0: s += '* %s : %s\n' % (k, self.rcount[k]) mypage = pywikibot.Page(pywikibot.getSite(), '%s/replacCount' % self.prefix) mypage.put_async( s ) s = ''
def main(): countrycode = u'' # Connect database, we need that (conn, cursor) = connectDatabase() (conn2, cursor2) = connectDatabase2() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-countrycode:'): countrycode = arg [len('-countrycode:'):] lang = wikipedia.getSite().language() wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) if countrycode: if not mconfig.countries.get((countrycode, lang)): wikipedia.output(u'I have no config for countrycode "%s" in language "%s"' % (countrycode, lang)) return False wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang)) locateCountry(countrycode, lang, mconfig.countries.get((countrycode, lang)), conn, cursor, conn2, cursor2) else: for (countrycode, lang), countryconfig in mconfig.countries.iteritems(): if not countryconfig.get('autoGeocode'): wikipedia.output(u'"%s" in language "%s" is not supported in auto geocode mode (yet).' % (countrycode, lang)) else: wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang)) locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) bigcategory = u'' target = u'' generator = None for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-bigcat'): if len(arg) == 7: bigcategory = wikipedia.input(u'What category do you want to split out?') else: bigcategory = arg[8:] elif arg.startswith('-target'): if len(arg) == 7: target = wikipedia.input(u'What category is the target category?') else: target = arg[8:] if not bigcategory==u'': splitOutCategory(bigcategory, target) else: if not generator: generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14]) for cat in generator: intersectCategories(cat)
def main(): featured = False gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg == '-featured': featured = True else: genFactory.handleArg(arg) mysite = pywikibot.getSite() if mysite.sitename() == 'wikipedia:nl': pywikibot.output( u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}') sys.exit() if featured: featuredList = pywikibot.translate(mysite, featured_articles) ref = pywikibot.Page(pywikibot.getSite(), featuredList) gen = pagegenerators.ReferringPageGenerator(ref) gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) if not gen: gen = genFactory.getCombinedGenerator() if gen: for page in pagegenerators.PreloadingGenerator(gen): workon(page) else: pywikibot.showHelp('fixing_redirects')
def cleanUpCategories(description =''): ''' Filter the categories in the description using the functions in imagerecat ''' #Get the list of current categories categoryPages = wikipedia.getCategoryLinks(description, wikipedia.getSite()) #Make it a list of strings (not page objects) categories = [] for cat in categoryPages: categories.append(cat.titleWithoutNamespace()) #Strip the categories of the current description description = wikipedia.removeCategoryLinks(description, wikipedia.getSite()) #Filter the list of categories categories = imagerecat.applyAllFilters(categories) #If we have a category, remove the uncat template if not (categories==''): description = description.replace(u'{{subst:unc}}', u'') #Add the categories to the description again description = description + u'\n' for category in categories: #print u'Category : ' + category description = description + u'[[Category:' + category + u']]\n' return description
def main(): import os index = None djvu = None pages = None # what would have been changed. dry = False ask = False overwrite = 'ask' # Parse command line arguments for arg in pywikibot.handleArgs(): if arg.startswith("-dry"): dry = True elif arg.startswith("-ask"): ask = True elif arg.startswith("-overwrite:"): overwrite = arg[11:12] if overwrite != 'y' and overwrite != 'n': pywikibot.output(u"Unknown argument %s; will ask before overwriting" % arg) overwrite = 'ask' elif arg.startswith("-djvu:"): djvu = arg[6:] elif arg.startswith("-index:"): index = arg[7:] elif arg.startswith("-pages:"): pages = arg[7:] else: pywikibot.output(u"Unknown argument %s" % arg) # Check the djvu file exists if djvu: os.stat(djvu) if not index: import os.path index = os.path.basename(djvu) if djvu and index: site = pywikibot.getSite() index_page = pywikibot.Page(site, index) if site.family.name != 'wikisource': raise pywikibot.PageNotFound(u"Found family '%s'; Wikisource required." % site.family.name) if not index_page.exists() and index_page.namespace() == 0: index_namespace = site.mediawiki_message('Proofreadpage index namespace') index_page = pywikibot.Page(pywikibot.getSite(), u"%s:%s" % (index_namespace, index)) if not index_page.exists(): raise pywikibot.NoPage(u"Page '%s' does not exist" % index) pywikibot.output(u"uploading text from %s to %s" % (djvu, index_page.title(asLink=True)) ) bot = DjVuTextBot(djvu, index, pages, ask, overwrite, dry) if not bot.has_text(): raise ValueError("No text layer in djvu file") bot.run() else: pywikibot.showHelp()
def tagNowCommons(wImage, cImage, timestamp): site = wikipedia.getSite() language = site.language() family = site.family.name imagepage = wikipedia.ImagePage(wikipedia.getSite(), wImage) if not imagepage.exists() or imagepage.isRedirectPage(): return if skips.get(family) and skips.get(family).get(language): localskips = skips.get(family).get(language) else: localskips = skips.get('_default') for template in imagepage.templates(): title = template.replace(u'_', u' ').strip() if title in localskips: return text = imagepage.get() oldtext = text text = u'{{NowCommons|File:%s|date=%s|bot=~~~}}\n' % (cImage.replace(u'_', u' '), timestamp) + text comment = u'File is available on Wikimedia Commons.' wikipedia.showDiff(oldtext, text) try: imagepage.put(text, comment) #print u'put' except wikipedia.LockedPage: return
def tagUncategorized(templateTitle): site = wikipedia.getSite() language = site.language() family = site.family.name page = wikipedia.Page(wikipedia.getSite(), u'Template:%s' % (templateTitle,)) if not page.exists() or page.isRedirectPage(): return False text = page.get() oldtext = text text = text + u'<noinclude>\n\n%s\n</noinclude>' % (uncategorizedTemplate.get(family).get(language), ) wikipedia.showDiff(oldtext, text) try: wikipedia.output(page.title()) page.put(text, editComment.get(family).get(language), maxTries=1) except wikipedia.LockedPage: return except wikipedia.MaxTriesExceededError: return except wikipedia.EditConflict: return
def __init__(self, page, filename, summary, dry, always): self.page = pywikibot.Page( pywikibot.getSite(), page ) self.filename = filename self.summary = summary if not self.summary: self.summary = pywikibot.translate(pywikibot.getSite(), self.msg) pywikibot.setAction( self.summary )
def loadPagesWiki(wr, correctWords_page, ignorePages_page): """ Load list of correct words and ignored pages """ # Load correct words mypage = pywikibot.Page(pywikibot.getSite(), correctWords_page) text = mypage.get() lines = text.split('* ')[1:] correctWords = {} for l in lines: spl = l.split(' : ') tmp = correctWords.get( spl[0], [] ) tmp.append( spl[1].strip() ) correctWords[spl[0]] = tmp print "loaded %s correct words" % len(correctWords) # Load ignore pages mypage = pywikibot.Page(pywikibot.getSite(), ignorePages_page) text = mypage.get() lines = text.split('* ')[1:] ignorePages = [] for l in lines: ignorePages.append(l.strip()) print "loaded %s ignored pages " % len(ignorePages) wr.ignorePages = ignorePages wr.ignorePerPages = correctWords
def main(): global always always = False for arg in pywikibot.handleArgs(): if arg == '-always': always = True mysite = pywikibot.getSite() # If anything needs to be prepared, you can do it here template_image = pywikibot.translate(pywikibot.getSite(), template_to_the_image) template_user = pywikibot.translate(pywikibot.getSite(), template_to_the_user) except_text_translated = pywikibot.translate(pywikibot.getSite(), except_text) basicgenerator = pagegenerators.UnusedFilesGenerator() generator = pagegenerators.PreloadingGenerator(basicgenerator) for page in generator: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) if except_text_translated not in page.getImagePageHtml() and \ 'http://' not in page.get(): pywikibot.output(u'\n' + page.title()) if template_image in page.get(): pywikibot.output(u"%s done already" % page.aslink()) continue appendtext(page, u"\n\n"+template_image) uploader = page.getFileVersionHistory().pop()[1] usertalkname = u'User Talk:%s' % uploader usertalkpage = pywikibot.Page(mysite, usertalkname) msg2uploader = template_user % {'title': page.title()} appendtext(usertalkpage, msg2uploader)
def aslink(self, forceInterwiki=False, textlink=False, noInterwiki=False): """A string representation in the form of a link. This method is different from Page.aslink() as the sortkey may have to be included. """ if self.sortKey: titleWithSortKey = '%s|%s' % (self.title(savetitle=True), self.sortKey) else: titleWithSortKey = self.title(savetitle=True) if not noInterwiki and (forceInterwiki or self.site() != pywikibot.getSite()): if self.site().family != pywikibot.getSite().family \ and self.site().family.name != self.site().lang: return '[[%s:%s:%s]]' % (self.site().family.name, self.site().lang, self.title(savetitle=True)) else: return '[[%s:%s]]' % (self.site().lang, self.title(savetitle=True)) elif textlink: return '[[:%s]]' % self.title(savetitle=True) else: return '[[%s]]' % titleWithSortKey
def __init__(self, url, urlEncoding=None, description=u'', useFilename=None, keepFilename=False, verifyDescription=True, ignoreWarning=False, targetSite=None, uploadByUrl=False): """ @param ignoreWarning: Set this to True if you want to upload even if another file would be overwritten or another mistake would be risked. """ self._retrieved = False self.url = url self.urlEncoding = urlEncoding self.description = description self.useFilename = useFilename self.keepFilename = keepFilename self.verifyDescription = verifyDescription self.ignoreWarning = ignoreWarning if config.upload_to_commons: self.targetSite = targetSite or pywikibot.getSite('commons', 'commons') else: self.targetSite = targetSite or pywikibot.getSite() self.targetSite.forceLogin() self.uploadByUrl = uploadByUrl
def main(): # The generator gives the pages that should be worked upon. gen = None # If debug is True, doesn't do any real changes, but only show # what would have been changed. debug = False wantHelp = False # Parse command line arguments for arg in wikipedia.handleArgs(): if arg.startswith("-debug"): debug = True else: wantHelp = True if not wantHelp: # The preloading generator is responsible for downloading multiple # pages from the wiki simultaneously. cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Host') hosts_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False) hosts_gen = pagegenerators.PreloadingGenerator(hosts_gen) cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Network') nets_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False) nets_gen = pagegenerators.PreloadingGenerator(nets_gen) bot = IpNetworkBot(nets_gen, hosts_gen, debug) bot.run() else: wikipedia.showHelp()
def setUp(self): self.site = pywikibot.getSite('en', 'wikipedia') self.data = [catlib.Category(self.site, 'Category:Cat1'), catlib.Category(self.site, 'Category:Cat2')] self.site_de = pywikibot.getSite('de', 'wikipedia') self.site_fr = pywikibot.getSite('fr', 'wikipedia')
def facatlist(facat): wikipedia.config.put_throttle = 0 wikipedia.put_throttle.setDelay() count=0 listenpageTitle=[] PageTitle=facat.replace(u'[[',u'').replace(u']]',u'').strip() language='fa' PageTitles =[PageTitle] for PageTitle in PageTitles: cat = catlib.Category( wikipedia.getSite(language),PageTitle ) listacategory=[cat] listacategory=categorydown(listacategory) for enpageTitle in listacategory: enpageTitle=str(enpageTitle).split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip() cat = catlib.Category( wikipedia.getSite(language),enpageTitle ) gent = pagegenerators.CategorizedPageGenerator( cat ) for pagework in gent: count+=1 try: link=str(pagework).split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip() except: pagework=unicode(str(pagework),'UTF-8') link=pagework.split(u'|')[0].split(u']]')[0].replace(u'[[',u'').strip() wikipedia.output(link) fapagetitle=link wikipedia.output(u'adding '+fapagetitle+u' to fapage lists') listenpageTitle.append(fapagetitle) if listenpageTitle==[]: return False return listenpageTitle
def __init__(self, page, filename, summary, overwrite): self.page = pywikibot.Page( pywikibot.getSite(), page ) self.filename = filename self.summary = summary self.overwrite = overwrite if not self.summary: self.summary = pywikibot.translate(pywikibot.getSite(), self.msg) pywikibot.setAction( self.summary )
def __init__(self, catTitle, listTitle, editSummary, overwrite = False, showImages = False, subCats = False, talkPages = False, recurse = False): self.editSummary = editSummary self.overwrite = overwrite self.showImages = showImages self.cat = catlib.Category(wikipedia.getSite(), 'Category:' + catTitle) self.list = wikipedia.Page(wikipedia.getSite(), listTitle) self.subCats = subCats self.talkPages = talkPages self.recurse = recurse
def updateInterwiki (wikipediaPage = None, commonsPage = None): ''' Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page. This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist. This function is disabled for the moment untill i figure out what the best way is to update the interwiki's. ''' interwikis = {} comment= u'' interwikilist = wikipediaPage.interwiki() interwikilist.append(wikipediaPage) for interwikiPage in interwikilist: interwikis[interwikiPage.site()]=interwikiPage oldtext = commonsPage.get() # The commonssite object doesnt work with interwiki's newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl')) comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]' if newtext != oldtext: #This doesnt seem to work. Newtext has some trailing whitespace wikipedia.showDiff(oldtext, newtext) commonsPage.put(newtext=newtext, comment=comment)
def removeCategoryLinks(text, site=None, marker=''): """Return text with all category links removed. Put the string marker after the last replacement (at the end of the text if there is no replacement). """ # This regular expression will find every link that is possibly an # interwiki link, plus trailing whitespace. The language code is grouped. # NOTE: This assumes that language codes only consist of non-capital # ASCII letters and hyphens. if site is None: site = pywikibot.getSite() catNamespace = '|'.join(site.category_namespaces()) categoryR = re.compile(r'\[\[\s*(%s)\s*:.*?\]\]\s*' % catNamespace, re.I) text = replaceExcept(text, categoryR, '', ['nowiki', 'comment', 'math', 'pre', 'source'], marker=marker) if marker: #avoid having multiple linefeeds at the end of the text text = re.sub('\s*%s' % re.escape(marker), '\r\n' + marker, text.strip()) return text.strip()
def __init__(self, hours, no_repeat, delay, user): self.hours = hours self.no_repeat = no_repeat if delay is None: self.delay = min(15, max(5, int(self.hours * 60))) else: self.delay = max(5, delay) self.user = user self.site = pywikibot.getSite() if self.user: localSandboxTitle = pywikibot.translate(self.site, user_sandboxTemplate, fallback=False) localSandbox = pywikibot.Page(self.site, localSandboxTitle) content.update(user_content) sandboxTitle[self.site.lang] = [ item.title() for item in localSandbox.getReferences( onlyTemplateInclusion=True) ] if self.site.lang not in user_sandboxTemplate: sandboxTitle[self.site.code] = [] pywikibot.output( u'Not properly set-up to run in user namespace!')
def getFilename(photoInfo=None, site=pywikibot.getSite(u'commons', u'commons'), project=u'Flickr'): ''' Build a good filename for the upload based on the username and the title. Prevents naming collisions. ''' username = photoInfo.find('photo').find('owner').attrib['username'] title = photoInfo.find('photo').find('title').text if title: title = cleanUpTitle(title) else: title = u'' if title == u'': description = photoInfo.find('photo').find('description').text if description: if len(description) > 120: description = description[0:120] title = cleanUpTitle(description) else: title = u'' # Should probably have the id of the photo as last resort. if pywikibot.Page(site, u'File:%s - %s - %s.jpg' % (title, project, username)).exists(): i = 1 while True: if (pywikibot.Page( site, u'File:%s - %s - %s (%s).jpg' % (title, project, username, str(i))).exists()): i = i + 1 else: return u'%s - %s - %s (%s).jpg' % (title, project, username, str(i)) else: return u'%s - %s - %s.jpg' % (title, project, username)
def TextfilePageGenerator(filename=None, site=None): """Iterate pages from a list in a text file. The file must contain page links between double-square-brackets or, in alternative, separated by newlines. The generator will yield each corresponding Page object. @param filename: the name of the file that should be read. If no name is given, the generator prompts the user. @param site: the default Site for which Page objects should be created """ if filename is None: filename = pywikibot.input(u'Please enter the filename:') if site is None: site = pywikibot.getSite() f = codecs.open(filename, 'r', config.textfile_encoding) # title ends either before | or before ]] R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') pageTitle = None for pageTitle in R.findall(f.read()): # If the link is in interwiki format, the Page object may reside # on a different Site than the default. # This makes it possible to work on different wikis using a single # text file, but also could be dangerous because you might # inadvertently change pages on another wiki! yield pywikibot.Page(site, pageTitle) if pageTitle is None: f.seek(0) for title in f: title = title.strip() if '|' in title: title = title[:title.index('|')] if title: yield pywikibot.Page(site, title) f.close()
def __init__(self, password=None, sysop=False, site=None, username=None, verbose=False): self.site = site or pywikibot.getSite() self.sysop = sysop if username: self.username = username # perform writeback. if site.family.name not in config.usernames: config.usernames[site.family.name] = {} config.usernames[site.family.name][self.site.lang] = username else: if sysop: try: self.username = config.sysopnames\ [self.site.family.name][self.site.lang] except: raise pywikibot.NoUsername( u'ERROR: Sysop username for %s:%s is undefined.\nIf you have a sysop account for that site, please add such a line to user-config.py:\n\nsysopnames[\'%s\'][\'%s\'] = \'myUsername\'' % (self.site.family.name, self.site.lang, self.site.family.name, self.site.lang)) else: try: self.username = config.usernames[self.site.family.name][ self.site.lang] except: raise pywikibot.NoUsername( u'ERROR: Username for %s:%s is undefined.\nIf you have an account for that site, please add such a line to user-config.py:\n\nusernames[\'%s\'][\'%s\'] = \'myUsername\'' % (self.site.family.name, self.site.lang, self.site.family.name, self.site.lang)) self.password = password self.verbose = verbose if getattr(config, 'password_file', ''): self.readPassword()
def getHints(self): print "Parsing warnfile..." R = re.compile( r'WARNING: (?P<family>.+?): \[\[(?P<locallang>.+?):(?P<localtitle>.+?)\]\](?P<warningtype>.+?)\[\[(?P<targetlang>.+?):(?P<targettitle>.+?)\]\]' ) import codecs f = codecs.open(self.filename, 'r', 'utf-8') hints = {} removeHints = {} mysite = wikipedia.getSite() for line in f.readlines(): m = R.search(line) if m: #print "DBG>",line if m.group('locallang') == mysite.lang and m.group( 'family') == mysite.family.name: #wikipedia.output(u' '.join([m.group('locallang'), m.group('localtitle'), m.group('warningtype'), m.group('targetsite'), m.group('targettitle')])) #print m.group(3) page = wikipedia.Page(mysite, m.group('localtitle')) removing = ( m.group('warningtype') == ' links to incorrect ') try: targetSite = mysite.getSite(code=m.group('targetlang')) targetPage = wikipedia.Page(targetSite, m.group('targettitle')) if removing: if page not in removeHints: removeHints[page] = [] removeHints[page].append(targetPage) else: if page not in hints: hints[page] = [] hints[page].append(targetPage) except wikipedia.Error: print "DBG> Failed to add", line f.close() return hints, removeHints
def main(): oldImage = None newImage = None summary = '' always = False loose = False # read command line parameters for arg in wikipedia.handleArgs(): if arg == '-always': always = True elif arg == '-loose': loose = True elif arg.startswith('-summary'): if len(arg) == len('-summary'): summary = wikipedia.input(u'Choose an edit summary: ') else: summary = arg[len('-summary:'):] else: if oldImage: newImage = arg else: oldImage = arg if not oldImage: wikipedia.showHelp('image') else: mysite = wikipedia.getSite() ns = mysite.image_namespace() oldImagePage = wikipedia.ImagePage(mysite, ns + ':' + oldImage) gen = pagegenerators.FileLinksGenerator(oldImagePage) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ImageRobot(preloadingGen, oldImage, newImage, summary, always, loose) bot.run()
def __init__(self): self.wiki = self.coreWiki = wikipedia.getSite(code=u'en', fam=u'naruto') wikipedia.setAction(wikipedia.translate(self.wiki, self.getSummaries())) # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() gen = None PageTitles = [] for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input(u'\03{lightblue}Which page do you want to chage?\03{default}')) elif len(arg) > 6: PageTitles.append(arg[6:]) else: generator = genFactory.handleArg(arg) if generator: gen = generator if not gen and PageTitles: pages = [wikipedia.Page(self.wiki, PageTitle) for PageTitle in PageTitles] gen = iter(pages) self.generator = gen
def updateStats(date, uncatCount, checkCount, totalCount): ''' Update the stats ''' page = wikipedia.Page(wikipedia.getSite(), u'User:Multichill/Categorization_stats') newstats = u'|-\n|' + str(date) + u'\n|' + str(uncatCount) + u'\n|' + str( checkCount) + u'\n|' + str(totalCount) + u'\n' newtext = page.get() if newtext.find(new_marker) == -1: wikipedia.output(u'No marker found!') newtext = newtext + newstats + new_marker else: newtext = newtext.replace(new_marker, newstats + new_marker) comment = u'Updating stats: ' + str( uncatCount) + u' uncategorized files, ' + str( checkCount) + u' files to be checked, ' + str( totalCount) + u' files in total' wikipedia.output(comment) wikipedia.showDiff(page.get(), newtext) page.put(newtext=newtext, comment=comment)
def put(title, contents): mysite = pywikibot.getSite() page = pywikibot.Page(mysite, title) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title()) # Check if it exists: # if page.exists(): # print "EXISTS!" # return # else: # print "DOES NOT EXIST!" # Post it: comment = "Import from spreadsheet via script." try: page.put(contents, comment=comment, minorEdit=False) except pywikibot.LockedPage: pywikibot.output(u"Page %s is locked; skipping." % title) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % title) except pywikibot.SpamfilterError, error: pywikibot.output( u'Cannot change %s because of spam blacklist entry %s' % (title, error.url))
def NamespaceFilterPageGenerator(generator, namespaces, site=None): """ Wraps around another generator. Yields only those pages that are in one of the given namespaces. The namespace list can contain both integers (namespace numbers) and strings/unicode strings (namespace names). Namespace may also be a single number or a single string. """ # convert namespace names to namespace numbers if site is None: site = pywikibot.getSite() if isinstance(namespaces, (int, basestring)): namespaces = [namespaces] for i in xrange(len(namespaces)): ns = namespaces[i] if isinstance(ns, basestring): index = site.getNamespaceIndex(ns) if index is None: raise ValueError(u'Unknown namespace: %s' % ns) namespaces[i] = index for page in generator: if page.namespace() in namespaces: yield page
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) # Connect database, we need that conn = None cursor = None (conn, cursor) = connectDatabase() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: rijksmonumentid = getRijksmonumentid(page, conn, cursor) if rijksmonumentid: addRijksmonumentid(page, rijksmonumentid)
def renew_optinhash(db, optin): optinPage = wikipedia.Page(wikipedia.getSite(), optin) text = optinPage.get() # TODO text = text.replace("'", "\\'") text = text.replace('"', '\\"') text = text.replace(";", "") names = text.split('\n') names = [n for n in names if n != ''] #now do the optin-hash query = """ select user_id, user_name from dewiki_p.user where user_name in ('%s') """ % "', '".join(names) c = db.cursor() c.execute(query.encode('utf-8')) lines = c.fetchall() f = open(optinhashfile, 'w') f.write('# -*- coding: utf-8 -*-\n') f.write('optinhash = {\n') for l in lines: f.write("%s : '%s',\n" % (l[0], l[1].replace("'", "\\'"))) f.write("'dummy' : -1}") f.close()
def run(self): site = pywikibot.getSite() newCat = catlib.Category(site, self.newCatTitle) # set edit summary message if not self.editSummary: self.editSummary = i18n.twtranslate(site, 'category-changing') \ % {'oldcat':self.oldCat.title(), 'newcat':newCat.title()} if self.useSummaryForDeletion and self.editSummary: reason = self.editSummary else: reason = i18n.twtranslate(site, deletion_reason_move) \ % {'newcat': self.newCatTitle, 'title': self.newCatTitle} # Copy the category contents to the new category page copied = False oldMovedTalk = None if self.oldCat.exists() and self.moveCatPage: copied = self.oldCat.copyAndKeep( self.newCatTitle, pywikibot.translate(site, cfd_templates)) # Also move the talk page if copied: oldTalk = self.oldCat.toggleTalkPage() if oldTalk.exists(): newTalkTitle = newCat.toggleTalkPage().title() try: talkMoved = oldTalk.move(newTalkTitle, reason) except (pywikibot.NoPage, pywikibot.PageNotSaved), e: #in order : #Source talk does not exist, or #Target talk already exists pywikibot.output(e.message) else: if talkMoved: oldMovedTalk = oldTalk
def processPhoto(photo_id): ''' Work on a single photo at http://www.photolibrary.fema.gov/photolibrary/photo_details.do?id=<photo_id> get the metadata, check for dupes, build description, upload the image ''' print "Working on: " + str(photo_id) # Get all the metadata metadata = getMetadata(photo_id) if not metadata: print "Didn't find metadata at http://www.photolibrary.fema.gov/photolibrary/photo_details.do?id=" + str( photo_id) #Incorrect photo_id return photoUrl = u'http://www.fema.gov/photodata/original/' + str( photo_id) + '.jpg' photo = downloadPhoto(photoUrl) duplicates = findDuplicateImages(photo) # We don't want to upload tupes if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) return title = buildTitle(photo_id, metadata) description = buildDescription(photo_id, metadata) bot = upload.UploadRobot(photoUrl, description=description, useFilename=title, keepFilename=True, verifyDescription=False, targetSite=wikipedia.getSite( 'commons', 'commons')) bot.upload_image(debug=False)
def removeLanguageLinks(text, site=None, marker=''): """Return text with all interlanguage links removed. If a link to an unknown language is encountered, a warning is printed. If a marker is defined, that string is placed at the location of the last occurence of an interwiki link (at the end if there are no interwiki links). """ if site is None: site = pywikibot.getSite() if not site.validLanguageLinks(): return text # This regular expression will find every interwiki link, plus trailing # whitespace. languages = '|'.join(site.validLanguageLinks() + site.family.obsolete.keys()) interwikiR = re.compile(r'\[\[(%s)\s?:[^\[\]\n]*\]\][\s]*' % languages, re.IGNORECASE) text = replaceExcept(text, interwikiR, '', ['nowiki', 'comment', 'math', 'pre', 'source'], marker=marker) return text.strip()
def getRefnum(article): page = wikipedia.Page(wikipedia.getSite(), article) if page.exists() and (page.namespace() == 0) and not page.isRedirectPage(): refnum = u'' templates = page.templatesWithParams() for (template, params) in templates: if template.lower().replace(u'_', u' ') == u'infobox nrhp': for param in params: #Split at = (field, sep, value) = param.partition(u'=') # Remove leading or trailing spaces field = field.strip() value = value.split("<ref")[0].strip() #Check first that field is not empty if field: if field == u'refnum': refnum = value return refnum.strip().lstrip(u'#') # We didn't find anything so return empty string return u''
def __init__(self, site, name): """ Initializer for a User object. Parameters: site - a wikipedia.Site object name - name of the user, without the trailing User: """ if type(site) in [str, unicode]: self._site = wikipedia.getSite(site) else: self._site = site self._name = name self._blocked = None #None mean not loaded self._groups = None #None mean not loaded #self._editcount = -1 # -1 mean not loaded self._registrationTime = -1 #if self.site().versionnumber() >= 16: # self._urToken = None if name[0] == '#': # This user is probably being queried for purpose of lifting an # autoblock. wikipedia.output( "This is an autoblock ID, you can only use to unblock it.")
def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', addCategory=u'', removeCategories=False, rijksmonumentid=1): ''' Build the final description for the image. The description is based on the info from flickrinfo and improved. ''' description = flinfoDescription description = description.replace( u'\n|Source=[http://www.flickr.com/', u'\n{{Rijksmonument|%s}}\n|Source=[http://www.flickr.com/' % (rijksmonumentid, )) if removeCategories: description = wikipedia.removeCategoryLinks( description, wikipedia.getSite('commons', 'commons')) # Add template description = description.replace( u'{{cc-by', u'{{Wiki Loves Monuments 2011|nl}}\n{{cc-by') if flickrreview: if reviewer: description = description.replace( u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}' ) if addCategory: description = description.replace(u'{{subst:unc}}\n', u'') description = description + u'\n[[Category:' + addCategory + ']]\n' description = description.replace(u'\r\n', u'\n') return description
def main(args): ''' Main loop. Get a generator and options. Work on all images in the generator. ''' generator = None onlyFilter = False onlyUncat = False genFactory = pagegenerators.GeneratorFactory() global search_wikis global hint_wiki site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) for arg in wikipedia.handleArgs(): if arg == '-onlyfilter': onlyFilter = True elif arg == '-onlyuncat': onlyUncat = True elif arg.startswith('-hint:'): hint_wiki = arg[len('-hint:'):] elif arg.startswith('-onlyhint'): search_wikis = arg[len('-onlyhint:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: generator = pagegenerators.CategorizedPageGenerator(catlib.Category( site, u'Category:Media needing categories'), recurse=True) initLists() categorizeImages(generator, onlyFilter, onlyUncat) wikipedia.output(u'All done')
wikipedia.output(u'Page %s is locked?!' % page.title()) if __name__ == "__main__": singlepage = [] gen = None start = None try: action = None for arg in wikipedia.handleArgs(): if arg == ('pages'): action = 'pages' elif arg == ('categories'): action = 'categories' elif arg.startswith('-start:'): start = wikipedia.Page(wikipedia.getSite(), arg[7:]) gen = pagegenerators.AllpagesPageGenerator( start.titleWithoutNamespace(), namespace=start.namespace(), includeredirects=False) elif arg.startswith('-cat:'): cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % arg[5:]) gen = pagegenerators.CategorizedPageGenerator(cat) elif arg.startswith('-ref:'): ref = wikipedia.Page(wikipedia.getSite(), arg[5:]) gen = pagegenerators.ReferringPageGenerator(ref) elif arg.startswith('-link:'): link = wikipedia.Page(wikipedia.getSite(), arg[6:]) gen = pagegenerators.LinkedPageGenerator(link) elif arg.startswith('-page:'):
# put list of alternatives into listbox self.list = list #find required area laenge = len(list) maxbreite = 0 for i in range(laenge): #cycle through all listitems to find maxlength if len(list[i]) + len(str(i)) > maxbreite: maxbreite = len(list[i]) + len(str(i)) #show list as formerly in DOS-window self.listbox.insert(END, str(i) + ' - ' + list[i]) #set optimized height & width self.listbox.config(height=laenge, width=maxbreite + 2) # wait for user to push a button which will destroy (close) the window return self.list if __name__ == "__main__": import wikipedia as pywikibot try: root = Tk() root.resizable(width=FALSE, height=FALSE) root.title("Pywikipediabot GUI") page = pywikibot.Page(pywikibot.getSite(), u'Wiki') content = page.get() myapp = EditBoxWindow(root) myapp.bind("<Control-d>", myapp.debug) v = myapp.edit(content, highlight=page.title()) finally: pywikibot.stopme()
elif arg.startswith("-html"): correct_html_codes = True elif arg.startswith("-rebuild"): rebuild = True elif arg.startswith("-noname"): checknames = False elif arg.startswith("-checklang:"): checklang = arg[11:] elif arg.startswith("-knownonly"): knownonly = True elif arg.startswith("-knownplus"): knownonly = 'plus' else: title.append(arg) mysite = pywikibot.getSite() if not checklang: checklang = mysite.language() filename = pywikibot.config.datafilepath('externals/spelling', 'spelling-' + checklang + '.txt') print "Getting wordlist" try: f = codecs.open(filename, 'r', encoding=mysite.encoding()) for line in f.readlines(): # remove trailing newlines and carriage returns try: while line[-1] in ['\n', '\r']: line = line[:-1] except IndexError: pass #skip empty lines
def asktoadd(pl): if pl.site != mysite: return if pl.isRedirectPage(): pl2 = pl.getRedirectTarget() if needcheck(pl2): tocheck.append(pl2) checked[pl2] = pl2 return ctoshow = 500 pywikibot.output(u'') pywikibot.output(u"==%s==" % pl.title()) while 1: answer = raw_input("y(es)/n(o)/i(gnore)/(o)ther options? ") if answer == 'y': include(pl) break if answer == 'c': include(pl, realinclude=False) break if answer == 'z': if pl.exists(): if not pl.isRedirectPage(): linkterm = pywikibot.input( u"In what manner should it be alphabetized?") include(pl, linkterm=linkterm) break include(pl) break elif answer == 'n': exclude(pl) break elif answer == 'i': exclude(pl, real_exclude=False) break elif answer == 'o': pywikibot.output(u"t: Give the beginning of the text of the page") pywikibot.output( u"z: Add under another title (as [[Category|Title]])") pywikibot.output( u"x: Add the page, but do not check links to and from it") pywikibot.output(u"c: Do not add the page, but do check links") pywikibot.output(u"a: Add another page") pywikibot.output(u"l: Give a list of the pages to check") elif answer == 'a': pagetitle = raw_input("Specify page to add:") page = pywikibot.Page(pywikibot.getSite(), pagetitle) if page not in checked.keys(): include(page) elif answer == 'x': if pl.exists(): if pl.isRedirectPage(): pywikibot.output( u"Redirect page. Will be included normally.") include(pl, realinclude=False) else: include(pl, checklinks=False) else: pywikibot.output(u"Page does not exist; not added.") exclude(pl, real_exclude=False) break elif answer == 'l': pywikibot.output(u"Number of pages still to check: %s" % len(tocheck)) pywikibot.output(u"Pages to be checked:") pywikibot.output(u" - ".join(page.title() for page in tocheck)) pywikibot.output(u"==%s==" % pl.title()) elif answer == 't': pywikibot.output(u"==%s==" % pl.title()) try: pywikibot.output(u'' + pl.get(get_redirect=True)[0:ctoshow]) except pywikibot.NoPage: pywikibot.output(u"Page does not exist.") ctoshow += 500 else: pywikibot.output(u"Not understood.")
def isdate(s): """returns true iff s is a date or year """ dict, val = date.getAutoFormat(pywikibot.getSite().language(), s) return dict is not None
#!/usr/bin/python # -*- coding: utf-8 -*- # BY: رضا (User:reza1615 on fa.wikipedia) # Distributed under the terms of the CC-BY-SA 3.0. import wikipedia import pagegenerators, query, sys import fa_cosmetic_changes import re, os, codecs, catlib, login wikipedia.config.put_throttle = 0 wikipedia.put_throttle.setDelay() secondwiki = 'en' faSite = wikipedia.getSite('fa') enSite = wikipedia.getSite(secondwiki) txtTmp = '' faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیًٌٍَُِّْٓيك' msg = u'ربات: افزودن نگارخانهٔ آزاد به مقاله' usernames = u'Fatranslator' _cache = {} def login_fa(usernames): try: password_fa = open("/data/project/rezabot/pywikipedia/passfile2", 'r') except: password_fa = open("/home/reza/compat/passfile2", 'r') password_fa = password_fa.read().replace('"', '').strip() passwords = password_fa.split('(')[1].split(',')[1].split(')')[0].strip() #------------------------------------------- botlog = login.LoginManager(password=passwords, username=usernames,
def main(*args): print "ARGS:%s\n" % sys.argv genFactory = pagegenerators.GeneratorFactory() # If xmlfilename is None, references will be loaded from the live wiki. xmlfilename = None user = None skip = False timestamp = None # read command line parameters for arg in pywikibot.handleArgs(*args): xmlfilename = arg print xmlfilename insite = pywikibot.getSite("en", "wikipedia") importsite = "speedydeletion" outsite = pywikibot.getSite("en", importsite) outsite.forceLogin() try: print "try to open %s\n" % xmlfilename with open(xmlfilename) as f: pass except: print "cannot open %s\n" % xmlfilename exit(0) if sys.argv[1] == "--validate": tempfile = "%s.tmp" % xmlfilename status = subprocess.call("xmllint --recover %s -o %s" % (xmlfilename, tempfile), shell=True) print "status %d\n" % status else: tempfile = xmlfilename dump = xmlreader.XmlDump(tempfile) count = 0 for entry in dump.parse(): # print file_store[entry.title] title = entry.title.encode("utf8", "ignore") if re.search("^User:"******"^Wikipedia:", entry.title): # pywikibot.output(u'skipping %s' % entry.title) continue # if re.search("^User:"******"^User Talk:" , entry.title): # pywikibot.output(u'skipping %s' % entry.title) # continue if re.search(".css$", entry.title): # pywikibot.output(u'skipping %s' % entry.title) continue if re.search("^Main Page", entry.title): # pywikibot.output(u'skipping %s' % entry.title) continue # pywikibot.output(u'Considering %s' % entry.title) title = title.replace(":", "_") title = title.replace("!", "_") title = title.replace("/", "_") title = title.replace("\\", "_") title = decode(title) try: if (len(title) < 1): pywikibot.output(u'empty title:%s' % entry.title) continue if (file_store[title]): count = count + 1 else: pywikibot.output(u'not exists %s' % entry.title) except KeyError: try: outpage = pywikibot.Page(site=outsite, title=entry.title, insite=outsite) exists = False try: exists = outpage.exists() except: pywikibot.output( u'key error exiting article %s transformed to %s' % (entry.title, title)) if exists: #pywikibot.output(u'there is an article %s' % entry.title) try: file_store[title] = 1 except KeyError: pywikibot.output( u'key error saving article %s transformed to %s' % (entry.title, title)) else: pywikibot.output(u'is not there, adding %s' % entry.title) contents = entry.text usernames = entry.username if re.search('Template:', title): contents = contents + "<noinclude>{{wikipedia-template|%s}}</noinclude>" % usernames else: contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames outpage._site = outsite try: outpage.put(contents) except: pywikibot.output(u'cannot put article %s / %s' % (entry.title, title)) try: file_store[title] = 1 except KeyboardInterrupt: print "Bye" sys.exit() except KeyError: pywikibot.output( u'could not save %s! to the list of article' % entry.title) except KeyboardInterrupt: print "Bye" sys.exit() except KeyError: pywikibot.output(u'problem with %s! ' % entry.title) finally: count = count + 1 except KeyboardInterrupt: print "Bye" sys.exit() except KeyError: pywikibot.output(u'problem2 with %s! ' % entry.title) finally: count = count + 1
def main(): item = None for arg in pywikibot.handleArgs(): continue bot = myRevertBot(site = pywikibot.getSite()) bot.revert_contribs()
class AfDBot: # Edit summary message that should be used. msg = { 'en': u'New section: /* [[Wikipedia:Articles for deletion|AfD]] nomination */ Notification', } def __init__(self, AfDlog, always, debug=False): """ Constructor. Parameters: * AfDlog - The AfD log to be treated. * always - If True, the user won't be prompted before changes are made. * debug - If True, don't edit pages. Only show proposed edits. """ self.AfDlog = AfDlog self.always = always self.debug = debug self.site = AfDlog.site() self.db = None self.replag = None locale.setlocale(locale.LC_ALL, 'nl_NL.UTF-8') os.environ['TZ'] = 'Europe/Amsterdam' def run(self): # Set up database access try: self.db = querier.querier(host="nlwiki.labsdb") except Exception, error: wikipedia.output(u'Could not connect to database: %s.' % error, toStdout=False) # Dictionaries of users with page_title and AfD_title tuple. self.contributors = {} if self.db: # Get replag sql = """ SELECT time_to_sec(timediff(now()+0,CAST(rev_timestamp AS int))) AS replag FROM nlwiki_p.revision ORDER BY rev_timestamp DESC LIMIT 1;""" result = self.db.do(sql) if not result: wikipedia.output( u'Could not get replag. Assuming it\'s infinite (= 1 month).' ) self.replag = 30 * 25 * 3600 else: self.replag = int(result[0]['replag']) wikipedia.output(u'Replag: %is.' % self.replag) wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg)) try: # Load the page text = self.AfDlog.get() except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist; skipping." % self.AfDlog.aslink()) return except wikipedia.IsRedirectPage: wikipedia.output(u"Page %s is a redirect; skipping." % self.AfDlog.aslink()) return # Find AfD's pageR = re.compile(r'^\*[ ]*?\[\[(?P<page>.*?)(?:\|.*?\]\]|\]\])') timestampR = re.compile('(\d{1,2}) (.{3}) (\d{4}) (\d{2}):(\d{2})') userR = re.compile( r'\[\[(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)(?:\|.*?\]\]|\]\])') strictTemplateR = re.compile( r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/[Hh]andtekening\}\}') templateR = re.compile( r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/.*?\}\}') pages = [] lines = text.splitlines() for line in lines: mPage = pageR.search(line) mTimestamp = timestampR.search(line) if mTimestamp: t = time.strftime( '%Y%m%d%H%M%S', time.gmtime( time.mktime( time.strptime(mTimestamp.group(), '%d %b %Y %H:%M')))) else: t = None if mPage and userR.search(line): pages.append( (mPage.group('page'), userR.search(line).group('user'), t)) continue elif mPage and strictTemplateR.search(line): pages.append((mPage.group('page'), strictTemplateR.search(line).group('user'), t)) continue elif mPage and templateR.search(line): pages.append((mPage.group('page'), templateR.search(line).group('user'), t)) continue elif mPage: pages.append((mPage.group('page'), None, t)) continue wikipedia.output(u'Found %i AfD\'s.' % len(pages)) # Treat AfD's for p in pages: page = wikipedia.Page(self.site, p[0]) nominator = p[1] timestamp = p[2] page_contributors = self.getcontributors(page, timestamp) for contributor in page_contributors: if not self.contributors.has_key(contributor): self.contributors[contributor] = [(page.title(), nominator) ] else: self.contributors[contributor].append( (page.title(), nominator)) # Treat users wikipedia.output(u'\n\nFound %i unique users.' % len(self.contributors)) pages = [] # User talk pages for user in self.contributors.keys(): pages.append(u'%s:%s' % (self.site.namespace(3), user)) gen = pagegenerators.PagesFromTitlesGenerator(pages, site=self.site) gen = pagegenerators.PreloadingGenerator(gen) for page in gen: self.treatUser(page)