def copyAndKeep(self, catname, cfdTemplates): """ Returns true if copying was successful, false if target page already existed. """ catname = self.site().category_namespace() + ':' + catname targetCat = wikipedia.Page(self.site(), catname) if targetCat.exists(): wikipedia.output('Target page %s already exists!' % targetCat.title()) return False else: wikipedia.output('Moving text from %s to %s.' % (self.title(), targetCat.title())) authors = ', '.join(self.contributingUsers()) creationSummary = wikipedia.translate(wikipedia.getSite(), msg_created_for_renaming) % (self.title(), authors) newtext = self.get() for regexName in cfdTemplates: matchcfd = re.compile(r"{{%s.*?}}" % regexName, re.IGNORECASE) newtext = matchcfd.sub('',newtext) matchcomment = re.compile(r"<!--BEGIN CFD TEMPLATE-->.*<!--END CFD TEMPLATE-->", re.IGNORECASE | re.MULTILINE | re.DOTALL) newtext = matchcomment.sub('',newtext) pos = 0 while (newtext[pos:pos+1] == "\n"): pos = pos + 1 newtext = newtext[pos:] targetCat.put(newtext, creationSummary) return True
def listTemplates(self, templates, namespaces): mysite = pywikibot.getSite() count = 0 # The names of the templates are the keys, and lists of pages # transcluding templates are the values. templateDict = {} finalText = [u'', u'List of pages transcluding templates:'] for template in templates: finalText.append(u'* %s' % template) finalText.append(u'-' * 36) for template in templates: transcludingArray = [] gen = pagegenerators.ReferringPageGenerator( pywikibot.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion=True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) for page in gen: finalText.append(u'%s' % page.title()) count += 1 transcludingArray.append(page) templateDict[template] = transcludingArray; finalText.append(u'Total page count: %d' % count) for line in finalText: pywikibot.output(line, toStdout=True) pywikibot.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) return templateDict
def process_children(obj,current_user): if pywikibot.debug: pywikibot.output(u'parsing node: %s' % obj) for c in obj.children: temp = process_node(c,current_user) if temp and not current_user: current_user = temp
def main(): # If debug is True, don't edit pages, but only show what would have been # changed. debug = False # The AfD log that should be treated. date = None # Whether to confirm edits. always = False # Parse command line arguments for arg in wikipedia.handleArgs(): if arg.startswith('-debug'): wikipedia.output(u'Debug mode.') debug = True elif arg.startswith('-date'): if len(arg) == 5: date = wikipedia.input(u'Please enter the date of the log that should be treated (yyyymmdd):') else: date = arg[6:] elif arg.startswith('-always'): always = True if date: page_title = u'Wikipedia:Te verwijderen pagina\'s/Toegevoegd %s' % date else: page_title = u'Wikipedia:Te verwijderen pagina\'s/Toegevoegd %s' % time.strftime("%Y%m%d", time.localtime(time.time()-60*60*24)) wikipedia.output(u'Checking: %s.' % page_title) page = wikipedia.Page(wikipedia.getSite(code = 'nl', fam = 'wikipedia'), page_title) bot = AfDBot(page, always, debug) bot.run()
def countTemplates(self, templates, namespaces): mysite = pywikibot.getSite() total = 0 # The names of the templates are the keys, and the numbers of # transclusions are the values. templateDict = {} pg = pagegenerators getall = templates mytpl = mysite.template_namespace()+':' for template in getall: gen = pg.ReferringPageGenerator(pywikibot.Page(mysite, mytpl + template), onlyTemplateInclusion = True) if namespaces: gen = pg.NamespaceFilterPageGenerator(gen, namespaces) count = 0 for page in gen: count += 1 templateDict[template] = count total += count pywikibot.output(u'\nNumber of transclusions per template', toStdout=True) pywikibot.output(u'-' * 36, toStdout=True) for key in templateDict.keys(): pywikibot.output(u'%-10s: %5d' % (key, templateDict[key]), toStdout=True) pywikibot.output(u'TOTAL : %5d' % total, toStdout=True) pywikibot.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) return templateDict
def processImage(self, fields): ''' Work on a single image ''' if self.autonomous: # Check if the image already exists. Do nothing if the name is already taken. CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename')) if CommonsPage.exists(): return False else: while True: # Do the Tkdialog to accept/reject and change te name fields=Tkdialog(fields).getnewmetadata() if fields.get('skip'): pywikibot.output(u'Skipping %s : User pressed skip.' % fields.get('imagepage').title()) return False # Check if the image already exists CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename')) if not CommonsPage.exists(): break else: pywikibot.output('Image already exists, pick another name or skip this image') # We dont overwrite images, pick another name, go to the start of the loop # Put the fields in the queue to be uploaded self.uploadQueue.put(fields)
def in_list(self, pagelist, title, laxyload=True): if pywikibot.verbose: pywikibot.output(u'Checking whitelist for: %s' % title) # quick check for exact match if title in pagelist: return title # quick check for wildcard if '' in pagelist: if pywikibot.verbose: pywikibot.output(u"wildcarded") return '.*' for item in pagelist: if pywikibot.verbose: pywikibot.output(u"checking against whitelist item = %s" % item) if isinstance(item, PatrolRule): if pywikibot.verbose: pywikibot.output(u"invoking programmed rule") if item.match(title): return item elif title_match(item, title): return item if pywikibot.verbose: pywikibot.output(u'not found')
def showImageList(self, imagelist): for i in range(len(imagelist)): image = imagelist[i] #sourceSite = sourceImagePage.site() print "-" * 60 pywikibot.output(u"%s. Found image: %s" % (i, image.title(asLink=True))) try: # Show the image description page's contents pywikibot.output(image.get(throttle=False)) # look if page already exists with this name. # TODO: consider removing this: a different image of the same # name may exist on the target wiki, and the bot user may want # to upload anyway, using another name. try: # Maybe the image is on the target site already targetTitle = '%s:%s' % (self.targetSite.image_namespace(), image.title().split(':', 1)[1]) targetImage = pywikibot.Page(self.targetSite, targetTitle) targetImage.get(throttle=False) pywikibot.output(u"Image with this name is already on %s." % self.targetSite) print "-" * 60 pywikibot.output(targetImage.get(throttle=False)) sys.exit() except pywikibot.NoPage: # That's the normal case pass except pywikibot.IsRedirectPage: pywikibot.output( u"Description page on target wiki is redirect?!") except pywikibot.NoPage: break print "="*60
def processImage(self, page): ''' Work on a single image ''' if page.exists() and (page.namespace() == 6) and \ (not page.isRedirectPage()): imagepage = pywikibot.ImagePage(page.site(), page.title()) #First do autoskip. if self.doiskip(imagepage): pywikibot.output( u'Skipping %s : Got a template on the skip list.' % page.title()) return False text = imagepage.get() foundMatch = False for (regex, replacement) in licenseTemplates[page.site().language()]: match = re.search(regex, text, flags=re.IGNORECASE) if match: foundMatch = True if not foundMatch: pywikibot.output( u'Skipping %s : No suitable license template was found.' % page.title()) return False self.prefetchQueue.put(self.getNewFields(imagepage))
def __iter__(self): try: # this array will contain up to pageNumber pages and will be flushed # after these pages have been preloaded and yielded. somePages = [] for page in self.wrapped_gen: ## if self.finished.isSet(): ## return somePages.append(page) # We don't want to load too many pages at once using XML export. # We only get a maximum number at a time. if len(somePages) >= self.pageNumber: for loaded_page in self.preload(somePages): yield loaded_page somePages = [] if somePages: # wrapped generator is exhausted but some pages still unloaded # preload remaining pages for loaded_page in self.preload(somePages): yield loaded_page except GeneratorExit: pass except Exception, e: traceback.print_exc() pywikibot.output(unicode(e))
def MySQLPageGenerator(query, site = None): import MySQLdb as mysqldb if site is None: site = pywikibot.getSite() conn = mysqldb.connect(config.db_hostname, db = site.dbName(), user = config.db_username, passwd = config.db_password) cursor = conn.cursor() pywikibot.output(u'Executing query:\n%s' % query) query = query.encode(site.encoding()) cursor.execute(query) while True: try: namespaceNumber, pageName = cursor.fetchone() print namespaceNumber, pageName except TypeError: # Limit reached or no more results break #print pageName if pageName: namespace = site.namespace(namespaceNumber) pageName = unicode(pageName, site.encoding()) if namespace: pageTitle = '%s:%s' % (namespace, pageName) else: pageTitle = pageName page = pywikibot.Page(site, pageTitle) yield page
def save(self, text, page, comment, minorEdit=False, botflag=False): # only save if something was changed if text != page.get(): # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) # show what was changed pywikibot.showDiff(page.get(), text) pywikibot.output(u'Comment: %s' %comment) choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N') if choice == 'y': try: # Save the page page.put(text, comment=comment, minorEdit=minorEdit, botflag=botflag) except pywikibot.LockedPage: pywikibot.output(u"Page %s is locked; skipping." % page.title(asLink=True)) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % (page.title())) except pywikibot.SpamfilterError, error: pywikibot.output( u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url)) else: return True
def PageTitleFilterPageGenerator(generator, ignoreList): """ Wraps around another generator. Yields only those pages are not listed in the ignore list. The ignoreList is a dictionary. Family names are mapped to dictionaries in which language codes are mapped to lists of page titles. """ def isIgnored(page): if not (page.site().family.name in ignoreList and page.site().lang in ignoreList[page.site().family.name]): return False for ig in ignoreList[page.site().family.name][page.site().lang]: if re.match(ig, page.title()): return True return False for page in generator: if isIgnored(page): if pywikibot.verbose: pywikibot.output('Ignoring page %s' % page.title()) else: yield page
def load_word_function(raw): """ This is a function used to load the badword and the whitelist.""" page = re.compile(r"(?:\"|\')(.*?)(?:\"|\')(?:, |\))", re.UNICODE) list_loaded = page.findall(raw) if len(list_loaded) == 0: pywikibot.output(u'There was no input on the real-time page.') return list_loaded
def _catlib_Category__parseCategory(self, recurse=False, purge=False, startFrom=None): if not startFrom: startFrom = 0 ns = self.site().category_namespaces() catsdone = [] catstodo = [(self, recurse)] # Get subcats and articles for (cat, recurselevel) in catstodo: if type(recurselevel) == type(1): newrecurselevel = recurselevel - 1 else: newrecurselevel = recurselevel catsdone.append(cat) wikipedia.output("Getting [[%s]] from %s..." % (cat.title(), cat.site().dbName())) for page in toolserver.Generators.getCategoryMembers(cat, startFrom): if type(page) == catlib.Category: if recurselevel and page not in catsdone: catstodo.append((page, newrecurselevel)) yield catlib.SUBCATEGORY, page.title() else: yield catlib.ARTICLE, page.title() # Get supercats for supercat in toolserver.Generators.getCategories(self): yield catlib.SUPERCATEGORY, supercat.title()
def copyTo(self, catname): """Returns true if copying was successful, false if target page already existed. """ catname = self.site().category_namespace() + ":" + catname targetCat = pywikibot.Page(self.site(), catname) if targetCat.exists(): pywikibot.output("Target page %s already exists!" % targetCat.title()) return else: pywikibot.output("Moving text from %s to %s." % (self.title(), targetCat.title())) authors = ", ".join(self.contributingUsers()) creationSummary = pywikibot.translate(pywikibot.getSite(), msg_created_for_renaming) % ( self.title(), authors, ) # Maybe sometimes length of summary is more than 200 characters and # thus will not be shown. For avoidning copyright violation bot must # listify authors in another place if len(creationSummary) > 200: talkpage = targetCat.toggleTalkPage() try: talktext = talkpage.get() except pywikibot.NoPage: talkpage.put(u"==Authors==\n%s-~~~~" % authors, u"Bot:Listifying authors") else: talkpage.put(talktext + u"\n==Authors==\n%s-~~~~" % authors, u"Bot:Listifying authors") targetCat.put(self.get(), creationSummary) return True
def categoryAllElementsAPI(CatName, cmlimit=5000, categories_parsed=[], site=None): """ Category to load all the elements in a category using the APIs. Limit: 5000 elements. """ pywikibot.output("Loading %s..." % CatName) # action=query&list=categorymembers&cmlimit=500&cmtitle=Category:License_tags params = {"action": "query", "list": "categorymembers", "cmlimit": cmlimit, "cmtitle": CatName} data = query.GetData(params, site) categories_parsed.append(CatName) try: members = data["query"]["categorymembers"] except KeyError: if int(cmlimit) != 500: pywikibot.output(u"An Error occured, trying to reload the category.") return categoryAllElementsAPI(CatName, cmlimit=500) else: raise pywikibot.Error(data) if len(members) == int(cmlimit): raise pywikibot.Error(u"The category selected has >= %s elements, limit reached." % cmlimit) allmembers = members results = list() for subcat in members: ns = subcat["ns"] title = subcat["title"] if ns == 14: if title not in categories_parsed: categories_parsed.append(title) (results_part, categories_parsed) = categoryAllElementsAPI(title, 5000, categories_parsed) allmembers.extend(results_part) for member in allmembers: results.append(member) return (results, categories_parsed)
def revert(self, item): predata = { 'action': 'query', 'titles': item['title'], 'prop': 'revisions', 'rvprop': 'ids|timestamp|user|content', 'rvlimit': '2', 'rvstart': item['timestamp'], } data = query.GetData(predata, self.site) if 'error' in data: raise RuntimeError(data['error']) pages = data['query'].get('pages', ()) if not pages: return False page = pages.itervalues().next() if len(page.get('revisions', ())) != 2: return False rev = page['revisions'][1] comment = u'Reverted to revision %s by %s on %s' % (rev['revid'], rev['user'], rev['timestamp']) if self.comment: comment += ': ' + self.comment page = pywikibot.Page(self.site, item['title']) pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.aslink(True, True)) old = page.get() new = rev['*'] pywikibot.showDiff(old, new) page.put(new, comment) return comment
def main(args): ''' Main loop. Get a generator and options. Work on all images in the generator. ''' generator = None onlyFilter = False onlyUncat = False genFactory = pagegenerators.GeneratorFactory() global search_wikis global hint_wiki site = pywikibot.getSite(u'commons', u'commons') pywikibot.setSite(site) for arg in pywikibot.handleArgs(): if arg == '-onlyfilter': onlyFilter = True elif arg == '-onlyuncat': onlyUncat = True elif arg.startswith('-hint:'): hint_wiki = arg [len('-hint:'):] elif arg.startswith('-onlyhint'): search_wikis = arg [len('-onlyhint:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: generator = pagegenerators.CategorizedPageGenerator( catlib.Category(site, u'Category:Media needing categories'), recurse=True) initLists() categorizeImages(generator, onlyFilter, onlyUncat) pywikibot.output(u'All done')
def UserEditFilterGenerator(generator, username, timestamp=None, skip=False): """ Generator which will yield Pages depending of user:username is an Author of that page (only looks at the last 100 editors). If timestamp is set in MediaWiki format JJJJMMDDhhmmss, older edits are ignored If skip is set, pages edited by the given user are ignored otherwise only pages edited by this user are given back """ if timestamp: ts = pywikibot.Timestamp.fromtimestampformat(timestamp) for page in generator: editors = page.getLatestEditors(limit=100) found = False for ed in editors: uts = pywikibot.Timestamp.fromISOformat(ed['timestamp']) if not timestamp or uts>=ts: if username == ed['user']: found = True break else: break if found and not skip or not found and skip: yield page else: pywikibot.output(u'Skipping %s' % page.title(asLink=True))
def getPoisonedLinks(pl): """Returns a list of known corrupted links that should be removed if seen """ result = [] pywikibot.output(u'getting poisoned links for %s' % pl.title()) dictName, value = date.getAutoFormat(pl.site().language(), pl.title()) if dictName is not None: pywikibot.output( u'date found in %s' % dictName ) # errors in year BC if dictName in date.bcFormats: for fmt in bcDateErrors: result.append( fmt % value ) # i guess this is like friday the 13th for the years if value == 398 and dictName == 'yearsBC': appendFormatedDates(result, dictName, 399) if dictName == 'yearsBC': appendFormatedDates(result, 'decadesBC', value) appendFormatedDates(result, 'yearsAD', value) if dictName == 'yearsAD': appendFormatedDates(result, 'decadesAD', value) appendFormatedDates(result, 'yearsBC', value) if dictName == 'centuriesBC': appendFormatedDates(result, 'decadesBC', value * 100 + 1) if dictName == 'centuriesAD': appendFormatedDates(result, 'decadesAD', value * 100 + 1) return result
def getPhotos(photoset=u'', start_id='', end_id='', interval=100): ''' Loop over a set of Panoramio photos. ''' i = 0 has_more = True url = u'http://www.panoramio.com/map/get_panoramas.php?set=%s&from=%s&to=%s&size=original' while has_more: gotInfo = False maxtries = 10 tries = 0 while not gotInfo: try: if tries < maxtries: tries += 1 panoramioApiPage = urllib2.urlopen(url % (photoset, i, i + interval)) contents = panoramioApiPage.read().decode('utf-8') gotInfo = True i += interval else: break except IOError: pywikibot.output(u'Got an IOError, let\'s try again') except socket.timeout: pywikibot.output(u'Got a timeout, let\'s try again') metadata = json.loads(contents) count = metadata.get(u'count') # Useless? photos = metadata.get(u'photos') for photo in photos: yield photo has_more = metadata.get(u'has_more') return
def dump(self, filename = 'category.dump.bz2'): '''Saves the contents of the dictionaries superclassDB and catContentDB to disk. ''' if not os.path.isabs(filename): filename = pywikibot.config.datafilepath(filename) if self.catContentDB or self.superclassDB: pywikibot.output(u'Dumping to %s, please wait...' % pywikibot.config.shortpath(filename)) f = bz2.BZ2File(filename, 'w') databases = { 'catContentDB': self.catContentDB, 'superclassDB': self.superclassDB } # store dump to disk in binary format try: pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL) except pickle.PicklingError: pass f.close() else: try: os.remove(filename) except EnvironmentError: pass else: pywikibot.output(u'Database is empty. %s removed' % pywikibot.config.shortpath(filename))
def getExtendedFindNearby(lat, lng): ''' Get the result from http://ws.geonames.org/extendedFindNearby and put it in a list of dictionaries to play around with ''' result = [] gotInfo = False parameters = urllib.urlencode({'lat' : lat, 'lng' : lng}) while(not gotInfo): try: page = urllib.urlopen("http://ws.geonames.org/extendedFindNearby?%s" % parameters) et = xml.etree.ElementTree.parse(page) gotInfo=True except IOError: wikipedia.output(u'Got an IOError, let\'s try again') time.sleep(30) except socket.timeout: wikipedia.output(u'Got a timeout, let\'s try again') time.sleep(30) for geoname in et.getroot().getchildren(): geonamedict = {} if geoname.tag=='geoname': for element in geoname.getchildren(): geonamedict[element.tag]=element.text result.append(geonamedict) #print result return result
def main(): pywikibot.warning("this script should not be run manually/directly, but automatically by maintainer.py") if len(sys.argv) == 1: pywikibot.output("Usage: censure.py <article title>") sys.exit(1) del sys.argv[0] checkPage(" ".join(sys.argv).decode("utf-8"))
def _refreshOld(site, sysop=False): # get watchlist special page's URL path = site.watchlist_address() pywikibot.output(u'Retrieving watchlist for %s' % repr(site)) #pywikibot.put_throttle() # It actually is a get, but a heavy one. watchlistHTML = site.getUrl(path, sysop=sysop) pywikibot.output(u'Parsing watchlist') watchlist = [] for itemR in [re.compile(r'<li><input type="checkbox" name="id\[\]" value="(.+?)" />'), re.compile(r'<li><input name="titles\[\]" type="checkbox" value="(.+?)" />')]: for m in itemR.finditer(watchlistHTML): pageName = m.group(1) watchlist.append(pageName) # Save the watchlist to disk # The file is stored in the watchlists subdir. Create if necessary. if sysop: f = open(pywikibot.config.datafilepath('watchlists', 'watchlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 'w') else: f = open(pywikibot.config.datafilepath('watchlists', 'watchlist-%s-%s.dat' % (site.family.name, site.lang)), 'w') pickle.dump(watchlist, f) f.close()
def get(site = None): if site is None: site = pywikibot.getSite() if site in cache: # Use cached copy if it exists. watchlist = cache[site] else: fn = pywikibot.config.datafilepath('watchlists', 'watchlist-%s-%s.dat' % (site.family.name, site.lang)) try: # find out how old our saved dump is (in seconds) file_age = time.time() - os.path.getmtime(fn) # if it's older than 1 month, reload it if file_age > 30 * 24 * 60 * 60: pywikibot.output( u'Copy of watchlist is one month old, reloading') refresh(site) except OSError: # no saved watchlist exists yet, retrieve one refresh(site) f = open(fn, 'r') watchlist = pickle.load(f) f.close() # create cached copy cache[site] = watchlist return watchlist
def writeMainFreeUploads(subpages): site = wikipedia.getSite(u'en', u'wikipedia') page = wikipedia.Page(site, u'User:Multichill/Free_uploads') oldtext = page.get() text = u'__TOC__\n' #text = text + u'== Links to day pages ==\n' #text = text + u'{{Special:PrefixIndex/User:Multichill/Free uploads/20}}\n' text = text + u'== This week ==\n' i = 0 limit = 7 # From new to old subpages.reverse() for subpage in subpages: date = subpage.replace(u'User:Multichill/Free uploads/', u'') if i < limit: text = text + u'===[[%s|%s]]===\n' % (subpage, date) text = text + u'{{%s}}\n' % (subpage,) elif i == limit: text = text + u'== Older ==\n' text = text + u'* [[%s|%s]]\n' % (subpage, date) else: text = text + u'* [[%s|%s]]\n' % (subpage, date) i = i + 1 comment = u'Updating list, %d subpages contain images' % (len(subpages),) wikipedia.showDiff(oldtext, text) wikipedia.output(comment) page.put(text, comment)
def output(message, toStdout = True): message = time.strftime('[%Y-%m-%d %H:%M:%S] ') + message wikipedia.output(message, toStdout = toStdout) if toStdout: sys.stdout.flush() else: sys.stderr.flush()
def makeStatistics(mconfig, totals): text = u'{| class="wikitable sortable"\n' text = text + u'! country !! lang !! total !! page !! row template !! Commons template\n' totalImages = 0 for ((countrycode, lang), countryconfig) in sorted(mconfig.countries.items()): if countryconfig.get('unusedImagesPage') and countryconfig.get('commonsTemplate'): text = text + u'|-\n' text = text + u'| %s ' % countrycode text = text + u'|| %s ' % lang text = text + u'|| %s ' % totals.get((countrycode, lang)) totalImages = totalImages + totals.get((countrycode, lang)) text = text + u'|| [[:%s:%s|%s]] ' % (lang, countryconfig.get('unusedImagesPage'), countryconfig.get('unusedImagesPage')) text = text + u'|| [[:%s:Template:%s|%s]] ' % (lang, countryconfig.get('rowTemplate'), countryconfig.get('rowTemplate')) text = text + u'|| {{tl|%s}}\n' % countryconfig.get('commonsTemplate') text = text + u'|-\n' text = text + u'| || || %s \n' % totalImages text = text + u'|}\n' site = wikipedia.getSite('commons', 'commons') page = wikipedia.Page(site, u'Commons:Monuments database/Unused images/Statistics') comment = u'Updating unused image statistics. Total unused images: %s' % totalImages wikipedia.output(text) page.put(newtext = text, comment = comment)
class AfDBot: # Edit summary message that should be used. msg = { 'en': u'New section: /* [[Wikipedia:Articles for deletion|AfD]] nomination */ Notification', } def __init__(self, AfDlog, always, debug=False): """ Constructor. Parameters: * AfDlog - The AfD log to be treated. * always - If True, the user won't be prompted before changes are made. * debug - If True, don't edit pages. Only show proposed edits. """ self.AfDlog = AfDlog self.always = always self.debug = debug self.site = AfDlog.site() self.db = None self.replag = None #locale.setlocale(locale.LC_ALL, 'nl_NL') os.environ['TZ'] = 'Europe/Amsterdam' def run(self): # Set up database access try: self.db = querier.querier(host="nlwiki.labsdb") except Exception, error: wikipedia.output(u'Could not connect to database: %s.' % error, toStdout=False) # Dictionaries of users with page_title and AfD_title tuple. self.contributors = {} if self.db: # Get replag sql = """ SELECT time_to_sec(timediff(now()+0,CAST(rev_timestamp AS int))) AS replag FROM nlwiki_p.revision ORDER BY rev_timestamp DESC LIMIT 1;""" result = self.db.do(sql) if not result: wikipedia.output( u'Could not get replag. Assuming it\'s infinite (= 1 month).' ) self.replag = 30 * 25 * 3600 else: self.replag = int(result[0]['replag']) wikipedia.output(u'Replag: %is.' % self.replag) wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg)) try: # Load the page text = self.AfDlog.get() except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist; skipping." % self.AfDlog.aslink()) return except wikipedia.IsRedirectPage: wikipedia.output(u"Page %s is a redirect; skipping." % self.AfDlog.aslink()) return # Find AfD's pageR = re.compile(r'^\*[ ]*?\[\[(?P<page>.*?)(?:\|.*?\]\]|\]\])') timestampR = re.compile('(\d{1,2}) (.{3}) (\d{4}) (\d{2}):(\d{2})') userR = re.compile( r'\[\[(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)(?:\|.*?\]\]|\]\])') strictTemplateR = re.compile( r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/[Hh]andtekening\}\}') templateR = re.compile( r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/.*?\}\}') pages = [] lines = text.splitlines() for line in lines: mPage = pageR.search(line) mTimestamp = timestampR.search(line) if mTimestamp: t = time.strftime( '%Y%m%d%H%M%S', time.gmtime( time.mktime( time.strptime(mTimestamp.group(), '%d %b %Y %H:%M')))) else: t = None if mPage and userR.search(line): pages.append( (mPage.group('page'), userR.search(line).group('user'), t)) continue elif mPage and strictTemplateR.search(line): pages.append((mPage.group('page'), strictTemplateR.search(line).group('user'), t)) continue elif mPage and templateR.search(line): pages.append((mPage.group('page'), templateR.search(line).group('user'), t)) continue elif mPage: pages.append((mPage.group('page'), None, t)) continue wikipedia.output(u'Found %i AfD\'s.' % len(pages)) # Treat AfD's for p in pages: page = wikipedia.Page(self.site, p[0]) nominator = p[1] timestamp = p[2] page_contributors = self.getcontributors(page, timestamp) for contributor in page_contributors: if not self.contributors.has_key(contributor): self.contributors[contributor] = [(page.title(), nominator) ] else: self.contributors[contributor].append( (page.title(), nominator)) # Treat users wikipedia.output(u'\n\nFound %i unique users.' % len(self.contributors)) pages = [] # User talk pages for user in self.contributors.keys(): pages.append(u'%s:%s' % (self.site.namespace(3), user)) gen = pagegenerators.PagesFromTitlesGenerator(pages, site=self.site) gen = pagegenerators.PreloadingGenerator(gen) for page in gen: self.treatUser(page)
def outputall(self): list = self.dict.keys() list.sort() for name in self.dict: pywikibot.output("There are " + str(self.dict[name]) + " " + name)
for id in pageobjs['query']['badrevids']: if id == int(revid): # print rv pywikibot.output('* ' + revid) return False return True cat = catlib.Category( pywikibot.getSite(), 'Category:%s' % pywikibot.translate(pywikibot.getSite(), reports_cat)) gen = pagegenerators.CategorizedPageGenerator(cat, recurse=True) for page in gen: data = page.get() pywikibot.output(page.title(asLink=True)) output = '' # # Preserve text before of the sections # m = re.search("(?m)^==\s*[^=]*?\s*==", data) if m: output = data[:m.end() + 1] else: m = re.search("(?m)^===\s*[^=]*?", data) if not m: continue output = data[:m.start()]
savetext = u"{{#switch:{{{1|ur}}}" # sql part for lang in [ "ur", "fa", "ar", "ro", "tr", "en", "fr", "de", "hi", "az", "id", "pnb", "hu", "he" ]: site = wikipedia.getSite(lang) query = "select /* SLOW_OK */ count(rc_title),0 from recentchanges join page on rc_cur_id=page_id where rc_new=1 and rc_namespace=0 and page_is_redirect=0 and page.page_len>70 and rc_deleted=0 and DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 1 DAY)<rc_timestamp;" conn = mysqldb.connect(lang + "wiki.labsdb", db=site.dbName(), user=config.db_username, passwd=config.db_password) cursor = conn.cursor() wikipedia.output(u'Executing query:\n%s' % query) query = query.encode(site.encoding()) cursor.execute(query) wikinum, nunum = cursor.fetchone() if wikinum: savetext = savetext + u"|" + lang + u"=" + numbertopersian(wikinum) # pywikipedia part savetext = savetext + "}}" wikipedia.output(savetext) site = wikipedia.getSite() page = wikipedia.Page(site, u"سانچہ:شماریات گذشتہ 24/شمار") page.put(savetext, u"(روبالہ:تجديد شماريات")
def pages(self): for page in self.generator: try: pywikibot.output(u'\n>>>> %s <<<<' % page.title()) commons = pywikibot.getSite('commons', 'commons') commonspage = pywikibot.Page(commons, page.title()) try: getcommons = commonspage.get(get_redirect=True) if page.title() == commonspage.title(): oldText = page.get() text = oldText # for commons template findTemplate = re.compile(ur'\{\{[Cc]ommonscat') s = findTemplate.search(text) findTemplate2 = re.compile(ur'\{\{[Ss]isterlinks') s2 = findTemplate2.search(text) if s or s2: pywikibot.output(u'** Already done.') else: text = pywikibot.replaceCategoryLinks( text + u'{{commons|%s}}' % commonspage.title(), page.categories()) if oldText != text: pywikibot.showDiff(oldText, text) if not self.acceptall: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice == 'a': self.acceptall = True if self.acceptall or choice == 'y': try: msg = pywikibot.translate( pywikibot.getSite(), comment1) page.put(text, msg) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % (page.title())) except pywikibot.NoPage: pywikibot.output(u'Page does not exist in Commons!') except pywikibot.NoPage: pywikibot.output(u'Page %s does not exist?!' % page.title()) except pywikibot.IsRedirectPage: pywikibot.output(u'Page %s is a redirect; skipping.' % page.title()) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked?!' % page.title())
def asktoadd(pl): if pl.site() != mysite: return if pl.isRedirectPage(): pl2 = pl.getRedirectTarget() if needcheck(pl2): tocheck.append(pl2) checked[pl2] = pl2 return ctoshow = 500 pywikibot.output(u'') pywikibot.output(u"==%s==" % pl.title()) while 1: answer = raw_input("y(es)/n(o)/i(gnore)/(o)ther options? ") if answer == 'y': include(pl) break if answer == 'c': include(pl, realinclude=False) break if answer == 'z': if pl.exists(): if not pl.isRedirectPage(): linkterm = pywikibot.input( u"In what manner should it be alphabetized?") include(pl, linkterm=linkterm) break include(pl) break elif answer == 'n': exclude(pl) break elif answer == 'i': exclude(pl, real_exclude=False) break elif answer == 'o': pywikibot.output(u"t: Give the beginning of the text of the page") pywikibot.output( u"z: Add under another title (as [[Category|Title]])") pywikibot.output( u"x: Add the page, but do not check links to and from it") pywikibot.output(u"c: Do not add the page, but do check links") pywikibot.output(u"a: Add another page") pywikibot.output(u"l: Give a list of the pages to check") elif answer == 'a': pagetitle = raw_input("Specify page to add:") page = pywikibot.Page(pywikibot.getSite(), pagetitle) if not page in checked.keys(): include(page) elif answer == 'x': if pl.exists(): if pl.isRedirectPage(): pywikibot.output( u"Redirect page. Will be included normally.") include(pl, realinclude=False) else: include(pl, checklinks=False) else: pywikibot.output(u"Page does not exist; not added.") exclude(pl, real_exclude=False) break elif answer == 'l': pywikibot.output(u"Number of pages still to check: %s" % len(tocheck)) pywikibot.output(u"Pages to be checked:") pywikibot.output(u" - ".join(page.title() for page in tocheck)) pywikibot.output(u"==%s==" % pl.title()) elif answer == 't': pywikibot.output(u"==%s==" % pl.title()) try: pywikibot.output(u'' + pl.get(get_redirect=True)[0:ctoshow]) except pywikibot.NoPage: pywikibot.output(u"Page does not exist.") ctoshow += 500 else: pywikibot.output(u"Not understood.")
pywikibot.getall(mysite, subcatlist) for cat in subcatlist: list = cat.articlesList() for page in list: exclude(page.title(), real_exclude=False) checked[page] = page list = workingcat.articlesList() if list: for pl in list: checked[pl] = pl pywikibot.getall(mysite, list) for pl in list: include(pl) else: pywikibot.output( u"Category %s does not exist or is empty. Which page to start with?" % workingcatname) answer = pywikibot.input(u"(Default is [[%s]]):" % workingcatname) if not answer: answer = workingcatname pywikibot.output(u'' + answer) pl = pywikibot.Page(mysite, answer) tocheck = [] checked[pl] = pl include(pl) loaded = 0 while tocheck: if loaded == 0: if len(tocheck) < 50: loaded = len(tocheck) else:
def getcontributors(self, page, timestamp): """ Return a page's major contributors. """ wikipedia.output(u'\n>>> %s <<<' % (page.title())) if page.isRedirectPage(): wikipedia.output(u'Page is a redirect.') if self.db: sql = """ SELECT 1 FROM nlwiki_p.logging WHERE log_namespace = %s AND log_title = %s AND log_timestamp > %s AND log_type = 'move' ORDER BY log_timestamp ASC LIMIT 1;""" args = (page.namespace(), self.sqltitle(page.titleWithoutNamespace()), timestamp) result = self.db.do(sql, args) if result: page = page.getRedirectTarget() wikipedia.output( u'Page was moved after the nomination. Checking target: %s.' % page.aslink()) # Get first author of article if self.site.versionnumber() >= 12: #API Mode params = { 'action': 'query', 'titles': self.sqltitle(page.title()), 'prop': 'revisions', 'rvdir': 'newer', 'rvlimit': 1, 'rvprop': 'timestamp|user', } datas = query.GetData(params, self.site) try: users = [ datas['query']['pages'][page_id]['revisions'][0]['user'] for page_id in datas['query']['pages'].keys() ] creator = users[0] except: wikipedia.output( u'Could not get first author from api for %s. The page has probably been deleted. Ignoring.' % page.title(), toStdout=True) return set() elif self.db: wikipedia.output( u'Can not use api for version history. Trying database.') sql = """ SELECT * FROM nlwiki_p.revision LEFT JOIN nlwiki_p.page ON page_id = rev_page WHERE page_namespace = %s AND page_title = %s ORDER BY rev_timestamp ASC LIMIT 1;""" args = (page.namespace(), self.sqltitle(page.title())) result = self.db.do(sql, args) if result: creator = result[0]['rev_user_text'] else: creator = None else: wikipedia.output( u'Both api and database are unavailable. Aborting.', toStdout=False) # Get authors with more than 5 major edits. # FIXME: It's actually faster to select * than rev_user_text. Don't know why. if self.db: sql = """ SELECT * FROM nlwiki_p.revision LEFT JOIN nlwiki_p.page ON page_id = rev_page WHERE page_namespace = %s AND page_title = %s AND rev_timestamp < %s AND rev_minor_edit = 0 GROUP BY rev_user_text HAVING COUNT(1) > 5;""" args = (page.namespace(), self.sqltitle(page.title()), timestamp) results = self.db.do(sql, args) try: contributors = set([ unicode(result['rev_user_text'], 'utf8') for result in results ]) except Exception, error: wikipedia.output(u'Could not get contributors.') print error
def run(self): """ Starts the robot's action. """ keepGoing = True startFromBeginning = True while keepGoing: if startFromBeginning: self.savedProgress = None self.refreshGenerator() count = 0 for page in self.preloadingGen: try: pageText = page.get(get_redirect=True).split("\n") count += 1 except pywikibot.NoPage: pywikibot.output(u'Page %s does not exist or has already ' u'been deleted, skipping.' % page.title(asLink=True)) continue # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.output(u'- - - - - - - - - ') if len(pageText) > 75: pywikibot.output('The page detail is too many lines, ' u'only output first 50 lines:') pywikibot.output(u'- ' * 9) pywikibot.output(u'\n'.join(pageText[:50])) else: pywikibot.output(u'\n'.join(pageText)) pywikibot.output(u'- - - - - - - - - ') choice = pywikibot.inputChoice( u'Input action?', ['delete', 'skip', 'update', 'quit'], ['d', 'S', 'u', 'q'], 'S') if choice == 'q': keepGoing = False break elif choice == 'u': pywikibot.output(u'Updating from CSD category.') self.savedProgress = page.title() startFromBeginning = False break elif choice == 'd': reason = self.getReasonForDeletion(page) pywikibot.output( u'The chosen reason is: \03{lightred}%s\03{default}' % reason) page.delete(reason, prompt=False) else: pywikibot.output(u'Skipping page %s' % page.title()) startFromBeginning = True if count == 0: if startFromBeginning: pywikibot.output( u'There are no pages to delete.\n' u'Waiting for 30 seconds or press Ctrl+C to quit...') try: time.sleep(30) except KeyboardInterrupt: keepGoing = False else: startFromBeginning = True pywikibot.output(u'Quitting program.')
def treatUser(self, page): """ Leave a message for the user. """ wikipedia.output(u'\n>>> %s <<<' % (page.title())) user = page.titleWithoutNamespace() welcomeUser = False afds = [] try: # Load the page original_text = page.get() except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist." % page.aslink()) original_text = '' welcomeUser = True except wikipedia.IsRedirectPage: wikipedia.output(u"Page %s is a redirect. Skipping." % page.aslink()) return if not user in self.contributors.keys(): wikipedia.output( u'Could not find AfD information for this user. Skipping.') return else: for page_title, nominator in self.contributors[user]: if nominator == page.title(): # Pagina is gestart en genomineerd door dezelfde gebruiker. wikipedia.output( u'* [[%s]]: Article has been nominated for deletion by its author.' % page_title) continue # Try to find links to the page using the replicated database. if self.db and self.replag < 600: # FIXME: pl_namespace should not be fixed at 0. sql = """ SELECT 1 FROM nlwiki_p.page LEFT JOIN nlwiki_p.pagelinks ON pl_from = page_id WHERE page_namespace = 3 AND page_title = %s AND pl_namespace = 0 AND pl_title = %s LIMIT 1;""" args = (self.sqltitle(user), self.sqltitle(page_title)) result = self.db.do(sql, args) if result: wikipedia.output(u'* [[%s]]: Found link in database.' % page_title) continue else: if re.search( r'\[\[\:{0,1}%s(?:.*?|)\]\]' % re.escape(page_title).replace('\\ ', '[_ ]'), original_text): wikipedia.output( u'* [[%s]]: Found a link in text. Ignoring.' % page_title) continue elif re.search( r'\{\{vvn\|%s.*?\}\}' % re.escape(page_title).replace('\\ ', '[_ ]'), original_text): wikipedia.output(u'* [[%s]]: {{vvn}} found.' % page_title) continue wikipedia.output(u'* [[%s]]: Leaving message.' % page_title) afds.append((page_title, nominator)) if len(afds) == 0: wikipedia.output(u'User has been notified of all AfD\'s.') return if len(afds) == 1: header = u'Beoordelingsnominatie [[%s]]' % afds[0][0] if afds[0][1]: titles = u'Het gaat om [[%s]] dat is genomineerd door [[Gebruiker:%s|%s]].' % ( afds[0][0], afds[0][1], afds[0][1]) else: titles = u'Het gaat om [[%s]].' % (afds[0][0]) elif len(afds) > 1: header = u'Beoordelingsnominatie van o.a. [[%s]]' % afds[0][0] titles = u'De genomineerde artikelen zijn: ' for page_title, nominator in afds: if nominator: titles += u'[[%s]] door [[Gebruiker:%s|%s]], ' % ( page_title, nominator, nominator) else: titles += u'[[%s]] door een onbekende gebruiker, ' % ( page_title) titles = u'%s.' % titles[:-2] comment = u'Nieuw onderwerp: /* %s */ Automatische melding van beoordelingsnominatie' % header AfDMessage = u'{{subst:Gebruiker:Erwin/Bot/Verwijderbericht/SPagina|%s|%s|%s}} --~~~~' % ( header, titles, self.AfDlog.title()) if welcomeUser: comment = u'Welkom op Wikipedia!; %s' % comment text = u'{{welkomstbericht}}' + u'\n\n' + AfDMessage else: text = original_text + u'\n\n' + AfDMessage text = text.strip() # only save if something was changed if text != original_text: # show what was changed if not self.always or self.debug: wikipedia.showDiff(original_text, text) if not self.debug: if not self.always: choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N') else: choice = 'y' if choice == 'y': try: # Save the page page.put(text, comment=comment, minorEdit=False) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked; skipping." % page.aslink()) except wikipedia.EditConflict: wikipedia.output( u'Skipping %s because of edit conflict' % (page.title())) except wikipedia.SpamfilterError, error: wikipedia.output( u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url)) except wikipedia.PageNotSaved: wikipedia.output( u'Page %s could not be saved; skipping.' % page.aslink())
def put(self, title, contents): mysite = pywikibot.getSite() page = pywikibot.Page(mysite, title) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title()) if self.summary: comment = self.summary else: comment = pywikibot.translate(mysite, self.msg) comment_top = comment + " - " + pywikibot.translate(mysite, self.msg_top) comment_bottom = comment + " - " + pywikibot.translate(mysite, self.msg_bottom) comment_force = comment + " *** " + pywikibot.translate(mysite, self.msg_force) + " ***" # Remove trailing newlines (cause troubles when creating redirects) contents = re.sub('^[\r\n]*','', contents) if page.exists(): if self.append == "Top": pywikibot.output(u"Page %s already exists, appending on top!" % title) contents = contents + page.get() comment = comment_top elif self.append == "Bottom": pywikibot.output(u"Page %s already exists, appending on bottom!" % title) contents = page.get() + contents comment = comment_bottom elif self.force: pywikibot.output(u"Page %s already exists, ***overwriting!" % title) comment = comment_force else: pywikibot.output(u"Page %s already exists, not adding!" % title) return else: if self.autosummary: comment = '' pywikibot.setAction('') if self.dry: pywikibot.output("*** Dry mode ***\n" + \ "\03{lightpurple}title\03{default}: " + title + "\n" + \ "\03{lightpurple}contents\03{default}:\n" + contents + "\n" \ "\03{lightpurple}comment\03{default}: " + comment + "\n") return try: page.put(contents, comment = comment, minorEdit = self.minor) except pywikibot.LockedPage: pywikibot.output(u"Page %s is locked; skipping." % title) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % title) except pywikibot.SpamfilterError, error: pywikibot.output( u'Cannot change %s because of spam blacklist entry %s' % (title, error.url))
try: contributors = set([ unicode(result['rev_user_text'], 'utf8') for result in results ]) except Exception, error: wikipedia.output(u'Could not get contributors.') print error else: contributors = set() if creator: contributors.add(creator) wikipedia.output(u'Found %i contributors: %s.' % (len(contributors), u', '.join(contributors))) return contributors def sqltitle(self, page_title): """ Return a MySQL style title. """ return page_title.replace(' ', '_').encode('utf8') def treatUser(self, page): """ Leave a message for the user. """ wikipedia.output(u'\n>>> %s <<<' % (page.title())) user = page.titleWithoutNamespace() welcomeUser = False
def badNameFilter(self, name, force=False): if not globalvar.filtBadName: return False #initialize blacklist if not hasattr(self, '_blacklist') or force: elenco = [ ' ano', ' anus', 'anal ', 'babies', 'baldracca', 'balle', 'bastardo', 'bestiali', 'bestiale', 'bastarda', 'b.i.t.c.h.', 'bitch', 'boobie', 'bordello', 'breast', 'cacata', 'cacca', 'cachapera', 'cagata', 'cane', 'cazz', 'cazzo', 'cazzata', 'chiavare', 'chiavata', 'chick', 'christ ', 'cristo', 'clitoride', 'coione', 'cojdioonear', 'cojones', 'cojo', 'coglione', 'coglioni', 'cornuto', 'cula', 'culatone', 'culattone', 'culo', 'deficiente', 'deficente', 'dio', 'die ', 'died ', 'ditalino', 'ejackulate', 'enculer', 'eroticunt', 'fanculo', 'f******o', 'fica ', 'ficken', 'figa', 'sfiga', 'fottere', 'fotter', 'fottuto', 'f**k', 'f.u.c.k.', "funkyass", 'gay', 'hentai.com', 'horne', 'horney', 'virgin', 'hotties', 'idiot', '@alice.it', 'incest', 'jesus', 'gesu', 'gesù', 'kazzo', 'kill', 'leccaculo', 'lesbian', 'lesbica', 'lesbo', 'masturbazione', 'masturbare', 'masturbo', 'merda', 'merdata', 'merdoso', 'mignotta', 'minchia', 'minkia', 'minchione', 'mona', 'nudo', 'nuda', 'nudi', 'oral', 'sex', 'orgasmso', 'porc', 'pompa', 'pompino', 'porno', 'puttana', 'puzza', 'puzzone', "racchia", 'sborone', 'sborrone', 'sborata', 'sborolata', 'sboro', 'scopata', 'scopare', 'scroto', 'scrotum', 'sega', 'sesso', 'shit', 'shiz', 's.h.i.t.', 'sadomaso', 'sodomist', 'stronzata', 'stronzo', 'succhiamelo', 'succhiacazzi', 'testicol', 'troia', 'universetoday.net', 'vaffanculo', 'v****a', 'vibrator', "vacca", 'yiddiot', "zoccola", ] elenco_others = [ '@', ".com", ".sex", ".org", ".uk", ".en", ".it", "admin", "administrator", "amministratore", '@yahoo.com', '@alice.com', "amministratrice", "burocrate", "checkuser", "developer", "http://", "jimbo", "mediawiki", "on wheals", "on wheal", "on wheel", "planante", "razinger", "sysop", "troll", "vandal", " v.f. ", "v. fighter", "vandal f.", "vandal fighter", 'wales jimmy', "wheels", "wales", "www.", ] #blacklist from wikipage badword_page = pywikibot.Page( self.site, pywikibot.translate(self.site, bad_pag)) list_loaded = list() if badword_page.exists(): pywikibot.output(u'\nLoading the bad words list from %s...' % self.site) list_loaded = load_word_function(badword_page.get()) else: showStatus(4) pywikibot.output(u'The bad word page doesn\'t exist!') self._blacklist = elenco + elenco_others + list_loaded del elenco, elenco_others, list_loaded if not hasattr(self, '_whitelist') or force: #initialize whitelist whitelist_default = ['emiliano'] wtlpg = pywikibot.translate(self.site, whitelist_pg) list_white = list() if wtlpg: whitelist_page = pywikibot.Page(self.site, wtlpg) if whitelist_page.exists(): pywikibot.output(u'\nLoading the whitelist from %s...' % self.site) list_white = load_word_function(whitelist_page.get()) else: showStatus(4) pywikibot.output(u"The whitelist's page doesn't exist!") else: showStatus(4) pywikibot.output(u"WARNING: The whitelist hasn't been setted!") # Join the whitelist words. self._whitelist = list_white + whitelist_default del list_white, whitelist_default try: for wname in self._whitelist: if wname.lower() in str(name).lower(): name = name.lower().replace(wname.lower(), '') for bname in self._blacklist: self.bname[name] = bname return bname.lower() in name.lower() except UnicodeEncodeError: pass try: for bname in self._blacklist: if bname.lower() in str(name).lower(): #bad name positive self.bname[name] = bname return True except UnicodeEncodeError: pass return False
def askAlternative(word, context=None): correct = None wikipedia.output(u"=" * 60) wikipedia.output(u"Found unknown word '%s'" % word) if context: wikipedia.output(u"Context:") wikipedia.output(u"" + context) wikipedia.output(u"-" * 60) while not correct: for i in xrange(len(Word(word).getAlternatives())): wikipedia.output( u"%s: Replace by '%s'" % (i + 1, Word(word).getAlternatives()[i].replace('_', ' '))) wikipedia.output(u"a: Add '%s' as correct" % word) if word[0].isupper(): wikipedia.output(u"c: Add '%s' as correct" % (uncap(word))) wikipedia.output(u"i: Ignore once (default)") wikipedia.output(u"p: Ignore on this page") wikipedia.output(u"r: Replace text") wikipedia.output(u"s: Replace text, but do not save as alternative") wikipedia.output(u"g: Guess (give me a list of similar words)") wikipedia.output(u"*: Edit by hand") wikipedia.output(u"x: Do not check the rest of this page") answer = wikipedia.input(u":") if answer == "": answer = "i" if answer in "aAiIpP": correct = word if answer in "aA": knownwords[word] = word newwords.append(word) elif answer in "pP": pageskip.append(word) elif answer in "rRsS": correct = wikipedia.input(u"What should I replace it by?") if answer in "rR": if correct_html_codes: correct = removeHTML(correct) if correct != cap(word) and correct != uncap( word) and correct != word: try: knownwords[word] += [correct.replace(' ', '_')] except KeyError: knownwords[word] = [correct.replace(' ', '_')] newwords.append(word) knownwords[correct] = correct newwords.append(correct) elif answer in "cC" and word[0].isupper(): correct = word knownwords[uncap(word)] = uncap(word) newwords.append(uncap(word)) elif answer in "gG": possible = getalternatives(word) if possible: print "Found alternatives:" for pos in possible: wikipedia.output(" %s" % pos) else: print "No similar words found." elif answer == "*": correct = edit elif answer == "x": correct = endpage else: for i in xrange(len(Word(word).getAlternatives())): if answer == str(i + 1): correct = Word(word).getAlternatives()[i].replace('_', ' ') return correct
def addReferences(self, oldText): """ Tries to add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. * Returns : The modified pagetext """ # Is there an existing section where we can add the references tag? for section in pywikibot.translate(self.site, referencesSections): sectionR = re.compile(r'\r\n=+ *%s *=+ *\r\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, skipping.' % section) index = match.end() else: pywikibot.output( u'Adding references tag to existing %s section...\n' % section) newText = oldText[:match.end( )] + u'\n' + self.referencesText + u'\n' + oldText[ match.end():] return newText else: break # Create a new section for the references tag for section in pywikibot.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile(r'\r\n(?P<ident>=+) *%s *(?P=ident) *\r\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, won\'t add the references in front of it.' % section) index = match.end() else: pywikibot.output( u'Adding references section before %s section...\n' % section) index = match.start() ident = match.group('ident') return self.createReferenceSection( oldText, index, ident) else: break # This gets complicated: we want to place the new references # section over the interwiki links and categories, but also # over all navigation bars, persondata, and other templates # that are at the bottom of the page. So we need some advanced # regex magic. # The strategy is: create a temporary copy of the text. From that, # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.category_namespaces()) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template # occurence in the temp text. ### fix me: ### {{commons}} or {{commonscat}} are part of Weblinks section ### * {{template}} is mostly part of a section ### so templatePattern must be fixed templatePattern = r'\r\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' metadataR = re.compile( r'(\r\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText while True: match = metadataR.search(tmpText) if match: tmpText = tmpText[:match.start()] else: break pywikibot.output( u'Found no section that can be preceeded by a new references section.\nPlacing it before interwiki links, categories, and bottom templates.' ) index = len(tmpText) return self.createReferenceSection(oldText, index)
def run(self): while True: welcomed_count = 0 if globalvar.quick and self.site.has_api(): us = [x for x in self.parseNewUserLog()] showStatus() try: userlib.getall(self.site, us) except NotImplementedError: globalvar.quick = False us = self._parseNewUserLogOld() else: us = self.parseNewUserLog() for users in us: if users.isBlocked(): showStatus(3) pywikibot.output(u'%s has been blocked!' % users.name()) continue if 'bot' in users.groups(): showStatus(3) pywikibot.output(u'%s is a bot!' % users.name()) continue if 'bot' in users.name().lower(): showStatus(3) pywikibot.output(u'%s might be a global bot!' % users.name()) continue #if globalvar.offset != 0 and time.strptime(users.registrationTime(), "%Y-%m-%dT%H:%M:%SZ") >= globalvar.offset: # if users.editCount() >= globalvar.attachEditCount: showStatus(2) pywikibot.output(u'%s has enough edits to be welcomed.' % users.name()) ustp = users.getUserTalkPage() if ustp.exists(): showStatus(3) pywikibot.output(u'%s has been already welcomed.' % users.name()) continue else: if self.badNameFilter(users.name()): self.reportBadAccount(users.name()) continue welcome_text = pywikibot.translate(self.site, netext) if globalvar.randomSign: if self.site.family != 'wikinews': welcome_text = welcome_text % choice( self.defineSign()) if self.site.family == 'wiktionary' and self.site.lang == 'it': pass else: welcome_text += timeselected elif (self.site.family != 'wikinews' and self.site.lang != 'it'): welcome_text = welcome_text % globalvar.defaultSign if self.site.lang in final_new_text_additions: welcome_text += pywikibot.translate( self.site, final_new_text_additions) welcome_comment = i18n.twtranslate( self.site, 'welcome-welcome') try: #append welcomed, welcome_count++ ustp.put(welcome_text, welcome_comment, minorEdit=False) welcomed_count += 1 self._totallyCount += 1 self.welcomed_users.append(users) except pywikibot.EditConflict: showStatus(4) pywikibot.output( u'An edit conflict has occured, skipping this user.' ) if globalvar.makeWelcomeLog and pywikibot.translate( self.site, logbook): showStatus(5) if welcomed_count == 1: pywikibot.output(u'One user has been welcomed.') elif welcomed_count == 0: pywikibot.output(u'No users have been welcomed.') else: pywikibot.output(u'%s users have been welcomed.' % welcomed_count) if welcomed_count >= globalvar.dumpToLog: if self.makelogpage(self.welcomed_users): self.welcomed_users = list() welcomed_count = 0 else: continue # If we haven't to report, do nothing. else: if users.editCount() == 0: if not globalvar.quiet: showStatus(1) pywikibot.output(u'%s has no contributions.' % users.name()) else: showStatus(1) pywikibot.output(u'%s has only %d contributions.' % (users.name(), users.editCount())) # That user mustn't be welcomed. continue if globalvar.makeWelcomeLog and pywikibot.translate( self.site, logbook) and welcomed_count > 0: showStatus() if welcomed_count == 1: pywikibot.output(u'Putting the log of the latest user...') else: pywikibot.output( u'Putting the log of the latest %d users...' % welcomed_count) if self.makelogpage(self.welcomed_users): self.welcomed_users = list() else: continue self.welcomed_users = list() if hasattr(self, '_BAQueue'): showStatus() pywikibot.output("Putting bad name to report page....") self.reportBadAccount(None, final=True) try: if globalvar.recursive: showStatus() if locale.getlocale()[1]: strfstr = unicode( time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()), locale.getlocale()[1]) else: strfstr = unicode( time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime())) pywikibot.output(u'Sleeping %d seconds before rerun. %s' % (globalvar.timeRecur, strfstr)) time.sleep(globalvar.timeRecur) else: raise KeyboardInterrupt except KeyboardInterrupt: #if globalvar.makeWelcomeLog and len(self.welcomed_users) > 0: # pywikibot.output("Update log before qutting script.") # self.makelogpage(self.welcomed_users) #if hasattr(self, '_BAQueue') and len(self._BAQueue) > 0 and globalvar.filtBadName: # self.reportBadAccount(None, final = True) break
def showpageinfo(self): pywikibot.output(u'[[%s]] %s ' % (self.page.title(), self.date)) print 'Length: %i bytes' % self.length pywikibot.output(u'User : %s' % self.user)
)) else: globalvar.dumpToLog = int(arg[11:]) elif arg == '-quiet': globalvar.quiet = True elif arg == '-quick': globalvar.quick = True # Filename and pywikipedia path # file where is stored the random signature index filename = pywikibot.config.datafilepath( 'welcome-%s-%s.data' % (pywikibot.default_family, pywikibot.default_code)) if globalvar.offset and globalvar.timeoffset: pywikibot.output( 'WARING: both -offset and -timeoffset were provided, ignoring -offset' ) globalvar.offset = 0 bot = WelcomeBot() try: bot.run() except KeyboardInterrupt: if bot.welcomed_users: showStatus() pywikibot.output("Put welcomed users before quit...") bot.makelogpage(bot.welcomed_users) pywikibot.output("\nQuitting...") finally: # If there is the savedata, the script must save the number_user. if globalvar.randomSign and globalvar.saveSignIndex and bot.welcomed_users: import cPickle
pre, noinclude, includeonly, tags1, tags2 = u'\n', u'\n', u'\n', u'\n', u'\n' for entry in dump.new_parse(): if entry.ns == '0': text = entry.text.replace(u' /', u'/').replace(u'/ ', u'/').replace( u'< ', u'<').replace(u' >', u'>') if u'<noinclude>' in text or u'</noinclude>' in text: noinclude += u"#[[%s]]\n" % entry.title elif u'<includeonly>' in text or u'</includeonly>' in text: includeonly += u"#[[%s]]\n" % entry.title elif u'<pre>' in text or u'</pre>' in text: pre += u"#[[%s]]\n" % entry.title elif u'__NOGALLERY__' in text: tags1 += u"#[[%s]]\n" % entry.title elif u'__NOEDITSECTION__' in text: tags2 += u"#[[%s]]\n" % entry.title else: continue wikipedia.output(entry.title) my_text = u'\n== pre ==\n' + pre + u'\n== noinclude ==\n' + noinclude + u'\n== includeonly ==\n' + includeonly + u'\n== NOGALLERY ==\n' + tags1 + u'\n== NOEDITSECTION ==\n' + tags2 f = codecs.open(bot_adress + "zztages.txt", "w", "utf-8") f.write(my_text) f.close() #os.system("rm "+bot_adress+"fawiki-%s-pages-meta-current.xml.bz2" %(TheDay)) site = wikipedia.getSite('fa') page = wikipedia.Page(site, u"ویکیپدیا:گزارش دیتابیس/مقالاتی که تگ الگو دارند") my_text = u'مقالات زیر ممکن است الگو درون آنها به اشتباه استفاده شدهباشد \n' + my_text page.put(my_text, u"ربات: بهروز رسانی آمار دیگر ویکیها")
def handlebadpage(self): try: self.content = self.page.get() except pywikibot.IsRedirectPage: pywikibot.output(u'Already redirected, skipping.') return except pywikibot.NoPage: pywikibot.output(u'Already deleted') return for d in pywikibot.translate(pywikibot.getSite(), done): if d in self.content: pywikibot.output(u'Found: "%s" in content, nothing necessary' % d) return print "---- Start content ----------------" pywikibot.output(u"%s" % self.content) print "---- End of content ---------------" # Loop other user answer answered = False while not answered: answer = pywikibot.input(question) if answer == 'q': sys.exit("Exiting") if answer == 'd': pywikibot.output(u'Trying to delete page [[%s]].' % self.page.title()) self.page.delete() return if answer == 'e': oldText = self.page.get() text = oldText editor = editarticle.TextEditor() text = editor.edit(self.page.get()) if oldText != text: pywikibot.showDiff(oldText, text) msg = pywikibot.input(u'Summary message:') self.page.put(text, msg) return if answer == 'b': pywikibot.output(u'Blanking page [[%s]].' % self.page.title()) try: self.page.put('', comment=pywikibot.translate( pywikibot.getSite(), blanking) % self.content) except EditConflict: print "An edit conflict occured ! Automatically retrying" handlebadpage(self) return if answer == '': print 'Page correct ! Proceeding with next pages.' return # Check user input: if answer[0] == 'u': # Answer entered as an utf8 string try: choices = answer[1:].split(',') except ValueError: # User entered wrong value pywikibot.error(u'"%s" is not valid' % answer) continue else: try: choices = answer.split(',') except ValueError: # User entered wrong value pywikibot.error(u'"%s" is not valid' % answer) continue #test input for choice in choices: try: x = int(choice) except ValueError: break else: answered = x in range(1, len(questionlist) + 1) if not answered: pywikibot.error(u'"%s" is not valid' % answer) continue summary = u'' for choice in choices: answer = int(choice) # grab the template parameters tpl = pywikibot.translate(pywikibot.getSite(), templates)[questionlist[answer]] if tpl['pos'] == 'top': pywikibot.output(u'prepending %s...' % questionlist[answer]) self.content = questionlist[answer] + '\n' + self.content elif tpl['pos'] == 'bottom': pywikibot.output(u'appending %s...' % questionlist[answer]) self.content += '\n' + questionlist[answer] else: pywikibot.error( u'"pos" should be "top" or "bottom" for template ' u'%s. Contact a developer.' % questionlist[answer]) sys.exit("Exiting") summary += tpl['msg'] + ' ' pywikibot.output(u'Probably added %s' % questionlist[answer]) # pywikibot.output(newcontent) bug #2986247 self.page.put(self.content, comment=summary) pywikibot.output(u'with comment %s\n' % summary)
def standardizePageFooter(self, text): """ Makes sure that interwiki links, categories and star templates are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links """ starsList = [ u'bueno', u'bom interwiki', u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed', u'destacado', u'destaca[tu]', u'enllaç[ _]ad', u'enllaz[ _]ad', u'leam[ _]vdc', u'legătură[ _]a[bcf]', u'liamm[ _]pub', u'lien[ _]adq', u'lien[ _]ba', u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt', u'liên[ _]kết[ _]chọn[ _]lọc', u'ligam[ _]adq', u'ligoelstara', u'ligoleginda', u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km', u'link[ _]sm', u'linkfa', u'na[ _]lotura', u'nasc[ _]ar', u'tengill[ _][úg]g', u'ua', u'yüm yg', u'רא', u'وصلة مقالة جيدة', u'وصلة مقالة مختارة', ] categories = None interwikiLinks = None allstars = [] # The PyWikipediaBot is no longer allowed to touch categories on the # German Wikipedia. See # http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and not '{{Personendaten' in text and \ not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \ not self.site.lang in ('et', 'it', 'bg', 'ru'): try: categories = pywikibot.getCategoryLinks(text, site=self.site) # there are categories like [[categoy:Foo {{#time:Y...}}]] except InvalidTitle: pass if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.lang] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = pywikibot.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = pywikibot.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = pywikibot.removeDisabledParts(text) for star in starsList: regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: text = regex.sub('', text) allstars += found # Adding categories if categories: ##Sorting categories in alphabetic order. beta test only on Persian Wikipedia, TODO fix bug for sorting #if self.site.language() == 'fa': # categories.sort() ##Taking main cats to top # for name in categories: # if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title: # categories.remove(name) # categories.insert(0, name) text = pywikibot.replaceCategoryLinks(text, categories, site=self.site) # Adding stars templates if allstars: text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s\r\n' % element.strip() if pywikibot.verbose: pywikibot.output(u'%s' % element.strip()) # Adding the interwiki if interwikiLinks: text = pywikibot.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
['y', 'n'], 'y') == 'y' if email: preferences.set_all( ['wpUserEmail', 'wpEmailFlag', 'wpOpenotifusertalkpages'], [email, True, False], verbose=True) if ssl: pop = poplib.POP3_SSL(host, port) else: pop = poplib.POP3(host, port) pop.user(username) pop.pass_(password) wikipedia.output(unicode(pop.getwelcome())) messages = [i.split(' ', 1)[0] for i in pop.list()[1]] for i in messages: msg = pop.retr(i) confirmed = False for line in msg[1]: if r_mail.search(line): confirmed = True link = r_mail.search(line).group(1) wikipedia.output(u'Confirming %s.' % link) confirm(link) if not confirmed: wikipedia.output(u'Unconfirmed mail!') elif do_delete: pop.dele(i)
def run(self): site = pywikibot.getSite() newCat = catlib.Category(site, self.newCatTitle) # set edit summary message if not self.editSummary: self.editSummary = i18n.twtranslate(site, 'category-changing') \ % {'oldcat':self.oldCat.title(), 'newcat':newCat.title()} if self.useSummaryForDeletion and self.editSummary: reason = self.editSummary else: reason = i18n.twtranslate(site, deletion_reason_move) \ % {'newcat': self.newCatTitle, 'title': self.newCatTitle} # Copy the category contents to the new category page copied = False oldMovedTalk = None if self.oldCat.exists() and self.moveCatPage: copied = self.oldCat.copyAndKeep( self.newCatTitle, pywikibot.translate(site, cfd_templates)) # Also move the talk page if copied: oldTalk = self.oldCat.toggleTalkPage() if oldTalk.exists(): newTalkTitle = newCat.toggleTalkPage().title() try: talkMoved = oldTalk.move(newTalkTitle, reason) except (pywikibot.NoPage, pywikibot.PageNotSaved), e: #in order : #Source talk does not exist, or #Target talk already exists pywikibot.output(e.message) else: if talkMoved: oldMovedTalk = oldTalk if self.withHistory: # Whether or not there was an old talk page, we write # the page history to the new talk page history = self.oldCat.getVersionHistoryTable() # Set the section title for the old cat's history on the new # cat's talk page. sectionTitle = i18n.twtranslate(site, 'category-section-title') \ % {'oldcat': self.oldCat.title()} #Should be OK, we are within if self.oldCat.exists() historySection = u'\n== %s ==\n%s' % (sectionTitle, history) try: text = newCat.toggleTalkPage().get() + historySection except pywikibot.NoPage: text = historySection try: newCat.toggleTalkPage().put( text, i18n.twtranslate(site, 'category-version-history') % {'oldcat': self.oldCat.title()}) except: pywikibot.output( 'History of the category has not been saved to new talk page' )
def treat(self, page): try: # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) ccToolkit = CosmeticChangesToolkit(page.site, debug=True, namespace=page.namespace(), pageTitle=page.title()) changedText = ccToolkit.change(page.get()) if changedText.strip() != page.get().strip(): if not self.acceptall: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'All', 'Quit'], ['y', 'n', 'a', 'q'], 'n') if choice == 'a': self.acceptall = True elif choice == 'q': self.done = True return if self.acceptall or choice == 'y': if self.async: page.put_async(changedText, comment=self.comment) else: page.put(changedText, comment=self.comment) else: pywikibot.output('No changes were necessary in %s' % page.title()) except pywikibot.NoPage: pywikibot.output("Page %s does not exist?!" % page.title(asLink=True)) except pywikibot.IsRedirectPage: pywikibot.output("Page %s is a redirect; skipping." % page.title(asLink=True)) except pywikibot.LockedPage: pywikibot.output("Page %s is locked?!" % page.title(asLink=True)) except pywikibot.EditConflict: pywikibot.output("An edit conflict has occured at %s." % page.title(asLink=True))
def treat(self, page): text = self.load(page) if text is None: return cats = page.categories() # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.output(u"Current categories:") for cat in cats: pywikibot.output(u"* %s" % cat.title()) catpl = pywikibot.Page(self.site, self.newcatTitle, defaultNamespace=14) if catpl in cats: pywikibot.output(u"%s is already in %s." % (page.title(), catpl.title())) else: if self.sort: catpl = self.sorted_by_last_name(catpl, page) pywikibot.output(u'Adding %s' % catpl.title(asLink=True)) cats.append(catpl) text = pywikibot.replaceCategoryLinks(text, cats) if not self.save(text, page, self.editSummary): pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
def move_to_category(self, article, original_cat, current_cat): ''' Given an article which is in category original_cat, ask the user if it should be moved to one of original_cat's subcategories. Recursively run through subcategories' subcategories. NOTE: current_cat is only used for internal recursion. You should always use current_cat = original_cat. ''' pywikibot.output(u'') # Show the title of the page where the link was found. # Highlight the title in purple. pywikibot.output( u'Treating page \03{lightpurple}%s\03{default}, currently in \03{lightpurple}%s\03{default}' % (article.title(), current_cat.title())) # Determine a reasonable amount of context to print try: full_text = article.get(get_redirect=True) except pywikibot.NoPage: pywikibot.output(u'Page %s not found.' % article.title()) return try: contextLength = full_text.index('\n\n') except ValueError: # substring not found contextLength = 500 if full_text.startswith(u'[['): # probably an image # Add extra paragraph. contextLength = full_text.find('\n\n', contextLength + 2) if contextLength > 1000 or contextLength < 0: contextLength = 500 print pywikibot.output(full_text[:contextLength]) print subcatlist = self.catDB.getSubcats(current_cat) supercatlist = self.catDB.getSupercats(current_cat) alternatives = u'\n' if len(subcatlist) == 0: alternatives += u'This category has no subcategories.\n\n' if len(supercatlist) == 0: alternatives += u'This category has no supercategories.\n\n' # show subcategories as possible choices (with numbers) for i in range(len(supercatlist)): # layout: we don't expect a cat to have more than 10 supercats alternatives += (u"u%d - Move up to %s\n" % (i, supercatlist[i].title())) for i in range(len(subcatlist)): # layout: we don't expect a cat to have more than 100 subcats alternatives += (u"%2d - Move down to %s\n" % (i, subcatlist[i].title())) alternatives += u" j - Jump to another category\n" alternatives += u" s - Skip this article\n" alternatives += u" r - Remove this category tag\n" alternatives += u" l - list these options again\n" alternatives += u" m - more context\n" alternatives += (u"Enter - Save category as %s\n" % current_cat.title()) flag = False longchoice = True while not flag: if longchoice: longchoice = False pywikibot.output(alternatives) choice = pywikibot.input(u"Option:") else: choice = pywikibot.input( u"Option (#, [j]ump, [s]kip, [r]emove, [l]ist, [m]ore context, [RETURN]):" ) if choice in ['s', 'S']: flag = True elif choice == '': pywikibot.output(u'Saving category as %s' % current_cat.title()) if current_cat == original_cat: print 'No changes necessary.' else: newcat = u'[[:%s|%s]]' % (current_cat.title( savetitle=True), current_cat.title( withNamespace=False)) editsum = i18n.twtranslate( pywikibot.getSite(), 'category-replacing', { 'oldcat': original_cat.title(withNamespace=False), 'newcat': newcat }) if pywikibot.getSite().family.name == "commons": if original_cat.title(withNamespace=False).startswith( "Media needing categories as of"): parts = original_cat.title().split() catstring = u"{{Uncategorized|year=%s|month=%s|day=%s}}" % ( parts[-1], parts[-2], parts[-3]) if catstring in article.get(): article.put(article.get().replace( catstring, u"[[%s]]" % current_cat.title(savetitle=True)), comment=editsum) flag = True if not flag: catlib.change_category(article, original_cat, current_cat, comment=editsum) flag = True elif choice in ['j', 'J']: newCatTitle = pywikibot.input( u'Please enter the category the article should be moved to:' ) newCat = catlib.Category(pywikibot.getSite(), 'Category:' + newCatTitle) # recurse into chosen category self.move_to_category(article, original_cat, newCat) flag = True elif choice in ['r', 'R']: # remove the category tag catlib.change_category(article, original_cat, None, comment=self.editSummary) flag = True elif choice in ['l', 'L']: longchoice = True elif choice in ['m', 'M', '?']: contextLength += 500 print pywikibot.output(full_text[:contextLength]) print # if categories possibly weren't visible, show them additionally # (maybe this should always be shown?) if len(full_text) > contextLength: print '' print 'Original categories: ' for cat in article.categories(): pywikibot.output(u'* %s' % cat.title()) elif choice[0] == 'u': try: choice = int(choice[1:]) except ValueError: # user pressed an unknown command. Prompt him again. continue self.move_to_category(article, original_cat, supercatlist[choice]) flag = True else: try: choice = int(choice) except ValueError: # user pressed an unknown command. Prompt him again. continue # recurse into subcategory self.move_to_category(article, original_cat, subcatlist[choice]) flag = True
def getPDFTitle(self, ref, f): """ Use pdfinfo to retrieve title from a PDF. Unix-only, I'm afraid. """ pywikibot.output(u'PDF file.') fd, infile = tempfile.mkstemp() urlobj = os.fdopen(fd, 'r+w') urlobj.write(f.read()) try: pdfinfo_out = subprocess.Popen([r"pdfinfo", "/dev/stdin"], stdin=urlobj, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False).communicate()[0] for aline in pdfinfo_out.splitlines(): if aline.lower().startswith('title'): ref.title = aline.split(None)[1:] ref.title = ' '.join(ref.title) if ref.title != '': pywikibot.output(u'title: %s' % ref.title) pywikibot.output(u'PDF done.') except ValueError: pywikibot.output(u'pdfinfo value error.') except OSError: pywikibot.output(u'pdfinfo OS error.') except: # Ignore errors pywikibot.output(u'PDF processing error.') pass finally: urlobj.close() os.unlink(infile)
class CategoryMoveRobot: """Robot to move pages from one category to another.""" def __init__(self, oldCatTitle, newCatTitle, batchMode=False, editSummary='', inPlace=False, moveCatPage=True, deleteEmptySourceCat=True, titleRegex=None, useSummaryForDeletion=True, withHistory=False): site = pywikibot.getSite() self.editSummary = editSummary self.oldCat = catlib.Category(site, oldCatTitle) self.newCatTitle = newCatTitle self.inPlace = inPlace self.moveCatPage = moveCatPage self.batchMode = batchMode self.deleteEmptySourceCat = deleteEmptySourceCat self.titleRegex = titleRegex self.useSummaryForDeletion = useSummaryForDeletion self.withHistory = withHistory def run(self): site = pywikibot.getSite() newCat = catlib.Category(site, self.newCatTitle) # set edit summary message if not self.editSummary: self.editSummary = i18n.twtranslate(site, 'category-changing') \ % {'oldcat':self.oldCat.title(), 'newcat':newCat.title()} if self.useSummaryForDeletion and self.editSummary: reason = self.editSummary else: reason = i18n.twtranslate(site, deletion_reason_move) \ % {'newcat': self.newCatTitle, 'title': self.newCatTitle} # Copy the category contents to the new category page copied = False oldMovedTalk = None if self.oldCat.exists() and self.moveCatPage: copied = self.oldCat.copyAndKeep( self.newCatTitle, pywikibot.translate(site, cfd_templates)) # Also move the talk page if copied: oldTalk = self.oldCat.toggleTalkPage() if oldTalk.exists(): newTalkTitle = newCat.toggleTalkPage().title() try: talkMoved = oldTalk.move(newTalkTitle, reason) except (pywikibot.NoPage, pywikibot.PageNotSaved), e: #in order : #Source talk does not exist, or #Target talk already exists pywikibot.output(e.message) else: if talkMoved: oldMovedTalk = oldTalk if self.withHistory: # Whether or not there was an old talk page, we write # the page history to the new talk page history = self.oldCat.getVersionHistoryTable() # Set the section title for the old cat's history on the new # cat's talk page. sectionTitle = i18n.twtranslate(site, 'category-section-title') \ % {'oldcat': self.oldCat.title()} #Should be OK, we are within if self.oldCat.exists() historySection = u'\n== %s ==\n%s' % (sectionTitle, history) try: text = newCat.toggleTalkPage().get() + historySection except pywikibot.NoPage: text = historySection try: newCat.toggleTalkPage().put( text, i18n.twtranslate(site, 'category-version-history') % {'oldcat': self.oldCat.title()}) except: pywikibot.output( 'History of the category has not been saved to new talk page' ) #TODO: some nicer exception handling (not too important) # first move the page, than tagg the vh # Move articles gen = pagegenerators.CategorizedPageGenerator(self.oldCat, recurse=False) preloadingGen = pagegenerators.PreloadingGenerator(gen) for article in preloadingGen: if not self.titleRegex or re.search(self.titleRegex, article.title()): catlib.change_category(article, self.oldCat, newCat, comment=self.editSummary, inPlace=self.inPlace) # Move subcategories gen = pagegenerators.SubCategoriesPageGenerator(self.oldCat, recurse=False) preloadingGen = pagegenerators.PreloadingGenerator(gen) for subcategory in preloadingGen: if not self.titleRegex or re.search(self.titleRegex, subcategory.title()): catlib.change_category(subcategory, self.oldCat, newCat, comment=self.editSummary, inPlace=self.inPlace) # Delete the old category and its moved talk page if copied and self.deleteEmptySourceCat == True: if self.oldCat.isEmptyCategory(): confirm = not self.batchMode self.oldCat.delete(reason, confirm, mark=True) if oldMovedTalk is not None: oldMovedTalk.delete(reason, confirm, mark=True) else: pywikibot.output('Couldn\'t delete %s - not empty.' % self.oldCat.title())
def httpError(self, err_num, link, pagetitleaslink): """Log HTTP Error""" pywikibot.output(u'HTTP error (%s) for %s on %s' % (err_num, link, pagetitleaslink), toStdout=True)
def run(self): """ Runs the Bot """ pywikibot.setAction(pywikibot.translate(self.site, msg)) try: deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read() except IOError: pywikibot.output( 'You need to download http://www.twoevils.org/files/wikipedia/404-links.txt.gz and to ungzip it in the same directory' ) raise socket.setdefaulttimeout(30) editedpages = 0 for page in self.generator: try: # Load the page's text from the wiki new_text = page.get() if not page.canBeEdited(): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue except pywikibot.IsRedirectPage: pywikibot.output(u'Page %s is a redirect' % page.title(asLink=True)) continue for match in linksInRef.finditer( pywikibot.removeDisabledParts(page.get())): #for each link to change link = match.group(u'url') #debugging purpose #print link if u'jstor.org' in link: #TODO: Clean URL blacklist continue ref = RefLink(link, match.group('name')) f = None try: socket.setdefaulttimeout(20) try: f = urllib2.urlopen(ref.url.decode("utf8")) except UnicodeError: ref.url = urllib2.quote(ref.url.encode("utf8"), "://") f = urllib2.urlopen(ref.url) #Try to get Content-Type from server headers = f.info() contentType = headers.getheader('Content-Type') if contentType and not self.MIME.search(contentType): if ref.link.lower().endswith('.pdf') and \ not self.ignorepdf: # If file has a PDF suffix self.getPDFTitle(ref, f) else: pywikibot.output( u'\03{lightyellow}WARNING\03{default} : media : %s ' % ref.link) if ref.title: if not re.match( '(?i) *microsoft (word|excel|visio)', ref.title): ref.transform(ispdf=True) repl = ref.refTitle() else: pywikibot.output( '\03{lightyellow}WARNING\03{default} : PDF title blacklisted : %s ' % ref.title) repl = ref.refLink() else: repl = ref.refLink() new_text = new_text.replace(match.group(), repl) continue # Get the real url where we end (http redirects !) redir = f.geturl() if redir != ref.link and \ domain.findall(redir) == domain.findall(link): if soft404.search(redir) and \ not soft404.search(ref.link): pywikibot.output( u'\03{lightyellow}WARNING\03{default} : Redirect 404 : %s ' % ref.link) continue if dirIndex.match(redir) and \ not dirIndex.match(ref.link): pywikibot.output( u'\03{lightyellow}WARNING\03{default} : Redirect to root : %s ' % ref.link) continue # uncompress if necessary if headers.get('Content-Encoding') in ('gzip', 'x-gzip'): # XXX: small issue here: the whole page is downloaded # through f.read(). It might fetch big files/pages. # However, truncating an encoded gzipped stream is not # an option, for unzipping will fail. compressed = StringIO.StringIO(f.read()) f = gzip.GzipFile(fileobj=compressed) # Read the first 1,000,000 bytes (0.95 MB) linkedpagetext = f.read(1000000) socket.setdefaulttimeout(None) except UnicodeError: #example : http://www.adminet.com/jo/20010615¦/ECOC0100037D.html # in [[fr:Cyanure]] pywikibot.output( u'\03{lightred}Bad link\03{default} : %s in %s' % (ref.url, page.title(asLink=True))) continue except urllib2.HTTPError, e: pywikibot.output( u'HTTP error (%s) for %s on %s' % (e.code, ref.url, page.title(asLink=True)), toStdout=True) # 410 Gone, indicates that the resource has been purposely # removed if e.code == 410 or \ (e.code == 404 and (u'\t%s\t' % ref.url in deadLinks)): repl = ref.refDead() new_text = new_text.replace(match.group(), repl) continue except (urllib2.URLError, socket.error, IOError, httplib.error), e: #except (urllib2.URLError, socket.timeout, ftplib.error, httplib.error, socket.error), e: pywikibot.output(u'Can\'t retrieve page %s : %s' % (ref.url, e)) continue except ValueError: #Known bug of httplib, google for : #"httplib raises ValueError reading chunked content" continue