def getToken(self, type): """Get a token For wikis with MW 1.24 or newer: type (string) - csrf, deleteglobalaccount, patrol, rollback, setglobalaccountstatus, userrights, watch For older wiki versions, only csrf (edit, move, etc.) tokens are supported """ if self.newtoken: params = { 'action': 'query', 'meta': 'tokens', 'type': type, } req = api.APIRequest(self, params) response = req.query(False) token = response['query']['tokens'][type + 'token'] else: if type not in [ 'edit', 'delete', 'protect', 'move', 'block', 'unblock', 'email', 'csrf' ]: raise WikiError('Token type unavailable') params = { 'action': 'query', 'prop': 'info', 'intoken': 'edit', 'titles': '1' } req = api.APIRequest(self, params) response = req.query(False) pid = response['data']['query']['pages'].keys()[0] token = response['query']['pages'][pid]['edittoken'] return token
def unblock(self, reason=False): """Unblock the user reason - reason for the log """ params = { 'action': 'unblock', 'user': self.name, 'gettoken': '' } req = api.APIRequest(self.site, params) res = req.query() token = res['unblock']['unblocktoken'] params = { 'action': 'unblock', 'user': self.name, 'token': token } if reason: params['reason'] = reason req = api.APIRequest(self.site, params, write=False) res = req.query() if 'unblock' in res: self.blocked = False return res
def block(self, reason=False, expiry=False, anononly=False, nocreate=False, autoblock=False, noemail=False, hidename=False, allowusertalk=False, reblock=False): """Block the user Params are the same as the API reason - block reason expiry - block expiration anononly - block anonymous users only nocreate - disable account creation autoblock - block IP addresses used by the user noemail - block user from sending email through the site hidename - hide the username from the log (requires hideuser right) allowusertalk - allow the user to edit their talk page reblock - overwrite existing block """ params = {'action': 'block', 'user': self.name, 'gettoken': ''} req = api.APIRequest(self.site, params) res = req.query() token = res['block']['blocktoken'] params = {'action': 'block', 'user': self.name, 'token': token} if reason: params['reason'] = reason if expiry: params['expiry'] = expiry if anononly: params['anononly'] = '' if nocreate: params['nocreate'] = '' if autoblock: params['autoblock'] = '' if noemail: params['noemail'] = '' if hidename: params['hidename'] = '' if allowusertalk: params['allowusertalk'] = '' if reblock: params['reblock'] = '' req = api.APIRequest(self.site, params, write=False) res = req.query() if 'block' in res: self.blocked = True return res
def setPageInfo(self): """Sets basic page info, required for almost everything""" followRedir = self.followRedir params = {'action': 'query'} if self.pageid: params['pageids'] = self.pageid else: params['titles'] = self.title if followRedir: params['redirects'] = '' req = api.APIRequest(self.site, params) response = req.query() self.pageid = response['query']['pages'].keys()[0] if self.pageid > 0: self.exists = True if 'missing' in response['query']['pages'][str(self.pageid)]: if not self.title: # Pageids are never recycled, so a bad pageid with no title will never work raise wiki.WikiError("Bad pageid given with no title") self.exists = False if 'invalid' in response['query']['pages'][str(self.pageid)]: raise BadTitle(self.title) if 'title' in response['query']['pages'][str(self.pageid)]: self.title = response['query']['pages'][str( self.pageid)]['title'].encode('utf-8') self.namespace = int(response['query']['pages'][str( self.pageid)]['ns']) if self.namespace is not 0: self.unprefixedtitle = self.title.split(':', 1)[1] else: self.unprefixedtitle = self.title self.pageid = int(self.pageid) if self.pageid < 0: self.pageid = 0 return self
def getLinks(self, force=False): """Gets a list of all the internal links *on* the page force - load the list even if we already loaded it before """ if self.links and not force: return self.links if self.pageid == 0 and not self.title: self.setPageInfo() if not self.exists: raise NoPage params = { 'action': 'query', 'prop': 'links', 'pllimit': self.site.limit, } if self.pageid > 0: params['pageids'] = self.pageid else: params['titles'] = self.title req = api.APIRequest(self.site, params) response = req.query() self.links = [] if isinstance( response, list): #There shouldn't be more than 5000 links on a page... for page in response: self.links.extend(self.__extractToList(page, 'links')) else: self.links = self.__extractToList(response, 'links') return self.links
def getProtection(self, force=False): """Returns the current protection status of the page""" if self.protection and not force: return self.protection if self.pageid == 0 and not self.title: self.setPageInfo() params = { 'action': 'query', 'prop': 'info', 'inprop': 'protection', } if not self.exists or self.pageid <= 0: params['titles'] = self.title else: params['titles'] = self.title req = api.APIRequest(self.site, params) response = req.query() for pr in response['query'].values()[0].values()[0]['protection']: if pr['level']: if pr['expiry'] == 'infinity': expiry = 'infinity' else: expiry = datetime.datetime.strptime( pr['expiry'], '%Y-%m-%dT%H:%M:%SZ') self.protection[pr['type']] = { 'expiry': expiry, 'level': pr['level'] } return self.protection
def getCategories(self, force=False): """Gets all list of all the categories on the page force - load the list even if we already loaded it before """ if self.categories and not force: return self.categories if self.pageid == 0 and not self.title: self.setPageInfo() if not self.exists: raise NoPage params = { 'action': 'query', 'prop': 'categories', 'cllimit': self.site.limit, } if self.pageid: params['pageids'] = self.pageid else: params['titles'] = self.title req = api.APIRequest(self.site, params) response = req.query() self.categories = [] if isinstance(response, list): for part in response: self.categories.extend(self.__extractToList( part, 'categories')) else: self.categories = self.__extractToList(response, 'categories') return self.categories
def getToken(self, type): """Get a token for everything except rollbacks type (string) - edit, delete, protect, move, block, unblock, email Currently all the tokens are interchangeable, but this may change in the future """ if self.pageid == 0 and not self.title: self.setPageInfo() if not self.exists and type != 'edit': raise NoPage params = { 'action': 'query', 'prop': 'info', 'intoken': type, } if self.exists and self.pageid: params['pageids'] = self.pageid else: params['titles'] = self.title req = api.APIRequest(self.site, params) response = req.query() if self.pageid == 0: self.pageid = response['query']['pages'].keys()[0] token = response['query']['pages'][str(self.pageid)][type + 'token'] return token
def setPageInfo(self): """Sets basic page info, required for almost everything""" followRedir = self.followRedir params = {'action': 'query'} if self.pageid: params['pageids'] = self.pageid else: params['titles'] = self.title if followRedir: params['redirects'] = '' req = api.APIRequest(self.site, params) response = req.query() self.pageid = response['query']['pages'].keys()[0] if self.pageid > 0: self.exists = True if 'missing' in response['query']['pages'][str(self.pageid)]: self.exists = False if 'invalid' in response['query']['pages'][str(self.pageid)]: raise BadTitle(self.title) if 'title' in response['query']['pages'][str(self.pageid)]: self.title = response['query']['pages'][str( self.pageid)]['title'].encode('utf-8') self.namespace = int(response['query']['pages'][str( self.pageid)]['ns']) if 'invalid' in response['query']['pages'][str(self.pageid)]: raise BadTitle(self.title) self.pageid = int(self.pageid) if self.pageid < 0: self.pageid = 0 return self
def setSiteinfo(self): """Retrieves basic siteinfo Called when constructing, or after login if the first call failed """ params = { 'action': 'query', 'meta': 'siteinfo', 'siprop': 'general|namespaces', } if self.maxlag < 120: params['maxlag'] = 120 req = api.APIRequest(self, params) info = req.query() sidata = info['query']['general'] for item in sidata: self.siteinfo[item] = sidata[item] nsdata = info['query']['namespaces'] for ns in nsdata: nsinfo = nsdata[ns] self.namespaces[nsinfo['id']] = nsinfo if not 'writeapi' in sidata: print "WARNING: Write-API not enabled, you will not be able to edit" version = re.search("\d\.(\d\d)", self.siteinfo['generator']) if not int(version.group(1)) >= 13: # Will this even work on 13? print "WARNING: Some features may not work on older versions of MediaWiki" return self
def getTemplates(self, force=False): """Gets all list of all the templates on the page force - load the list even if we already loaded it before """ if self.templates and not force: return self.templates if self.pageid == 0 and not self.title: self.setPageInfo() if not self.exists: raise NoPage params = { 'action': 'query', 'prop': 'templates', 'tllimit': self.site.limit, } if self.pageid: params['pageids'] = self.pageid else: params['titles'] = self.title req = api.APIRequest(self.site, params) self.templates = [] for data in req.queryGen(): self.templates.extend(self.__extractToList(data, 'templates')) return self.templates
def protect(self, restrictions={}, expirations={}, reason=False, cascade=False): """Protect a page Restrictions and expirations are dictionaries of protection level/expiry settings, e.g., {'edit':'sysop'} and {'move':'3 days'}. expirations can also be a string to set all levels to the same expiration reason - summary for log cascade - apply protection to all pages transcluded on the page """ if not self.title: self.setPageInfo() if not restrictions: raise ProtectError("No protection levels given") if len(expirations) > len(restrictions): raise ProtectError("More expirations than restrictions given") token = self.site.getToken('csrf') protections = '' expiry = '' if isinstance(expirations, str): expiry = expirations for type in restrictions: if protections: protections += "|" protections += type + "=" + restrictions[type] if isinstance(expirations, dict) and type in expirations: if expiry: expiry += "|" expiry += expirations[type] elif isinstance(expirations, dict): if expiry: expiry += "|" expiry += 'indefinite' params = { 'action': 'protect', 'title': self.title, 'token': token, 'protections': protections } if expiry: params['expiry'] = expiry if reason: params['reason'] = reason if cascade: params['cascade'] = '' req = api.APIRequest(self.site, params, write=True) result = req.query() if 'protect' in result: self.protection = {} return result
def move(self, mvto, reason=False, movetalk=False, noredirect=False, watch=False, unwatch=False): """Move the page Params are the same as the API: mvto - page title to move to, the only required param reason - summary for the log movetalk - move the corresponding talk page noredirect - don't create a redirect at the previous title watch - add the page to your watchlist unwatch - remove the page from your watchlist """ if not self.title and self.pageid == 0: self.setPageInfo() if not self.exists: raise NoPage token = self.getToken('move') params = { 'action': 'move', 'to': mvto, 'token': token, } if self.pageid: params['fromid'] = self.pageid else: params['from'] = self.title if reason: params['reason'] = reason.encode('utf-8') if movetalk: params['movetalk'] = '1' if noredirect: params['noredirect'] = '1' if watch: params['watch'] = '1' if unwatch: params['unwatch'] = '1' req = api.APIRequest(self.site, params, write=True) result = req.query() if 'move' in result: self.title = result['move']['to'] if not isinstance(self.title, unicode): self.title = unicode(self.title, 'utf-8') self.urltitle = urllib.quote( self.title.encode('utf-8')).replace('%20', '_').replace( '%2F', '/') else: self.urltitle = urllib.quote( self.title.encode('utf-8')).replace('%20', '_').replace( '%2F', '/') return result
def upload(self, fileobj=None, comment='', url=None, ignorewarnings=False, watch=False): """Upload a file, requires the "poster" module fileobj - A file object opened for reading comment - The log comment, used as the inital page content if the file doesn't already exist on the wiki url - A URL to upload the file from, if allowed on the wiki ignorewarnings - Ignore warnings about duplicate files, etc. watch - Add the page to your watchlist """ if not api.canupload and fileobj: raise UploadError( "The poster module is required for file uploading") if not fileobj and not url: raise UploadError("Must give either a file object or a URL") if fileobj and url: raise UploadError("Cannot give a file and a URL") if fileobj: if not isinstance(fileobj, file): raise UploadError( 'If uploading from a file, a file object must be passed') if fileobj.mode not in ['r', 'rb', 'r+']: raise UploadError('File must be readable') fileobj.seek(0) params = { 'action': 'upload', 'comment': comment, 'filename': self.unprefixedtitle, 'token': self.getToken('edit') # There's no specific "upload" token } if url: params['url'] = url else: params['file'] = fileobj if ignorewarnings: params['ignorewarnings'] = '' if watch: params['watch'] = '' req = api.APIRequest(self.site, params, write=True, multipart=bool(fileobj)) res = req.query() if 'upload' in res and res['upload']['result'] == 'Success': self.wikitext = '' self.links = [] self.templates = [] self.exists = True return res
def setSiteinfo(self): """Retrieves basic siteinfo Called when constructing, or after login if the first call failed """ params = { 'action': 'query', 'meta': 'siteinfo|tokens', 'siprop': 'general|namespaces|namespacealiases', } if self.maxlag < 120: params['maxlag'] = 120 req = api.APIRequest(self, params) info = req.query(False) sidata = info['query']['general'] for item in sidata: self.siteinfo[item] = sidata[item] nsdata = info['query']['namespaces'] for ns in nsdata: nsinfo = nsdata[ns] self.namespaces[nsinfo['id']] = nsinfo if ns != "0": try: attr = "NS_%s" % (nsdata[ns]['canonical'].replace( ' ', '_').upper()) except KeyError: attr = "NS_%s" % (nsdata[ns]['*'].replace(' ', '_').upper()) else: attr = "NS_MAIN" setattr(self, attr.encode('utf8'), Namespace(ns.encode('utf8'))) nsaliasdata = info['query']['namespacealiases'] if nsaliasdata: for ns in nsaliasdata: self.NSaliases[ns['*']] = ns['id'] if not 'writeapi' in sidata: warnings.warn( UserWarning, "WARNING: Write-API not enabled, you will not be able to edit") version = re.search("\d\.(\d\d)", self.siteinfo['generator']) if not int(version.group(1)) >= 13: # Will this even work on 13? warnings.warn( UserWarning, "WARNING: Some features may not work on older versions of MediaWiki" ) if 'tokens' in info['query'].keys(): self.newtoken = True return self
def getToken(self, type): """Get a token type (string) - csrf, deleteglobalaccount, patrol, rollback, setglobalaccountstatus, userrights, watch """ params = { 'action': 'query', 'meta': 'tokens', 'type': type, } req = api.APIRequest(self, params) response = req.query() token = response['query']['tokens'][type + 'token'] return token
def isBlocked(self, force=False): """Determine if a user is blocked""" if self.blocked is not None and not force: return self.blocked params = {'action':'query', 'list':'blocks', 'bkusers':self.name, 'bkprop':'id' } req = api.APIRequest(self.site, params) res = req.query(False) if len(res['query']['blocks']) > 0: self.blocked = True else: self.blocked = False return self.blocked
def __getSection(self, section): if not self.title: self.setPageInfo() params = {'action': 'parse', 'page': self.title, 'prop': 'sections'} number = False req = api.APIRequest(self.site, params) response = req.query() for item in response['parse']['sections']: if section == item['line'] or section == item['anchor']: if item['index'].startswith( 'T' ): # TODO: It would be cool if it set the page title to the template in this case continue number = item['index'] break return number
def listFromTitles(site, titles, check=True, followRedir=False): """Create a list of page objects from a list of titles check and followRedir have the same meaning as in page.Page """ ret = [] if not check: for title in titles: title = page.Page(site, title=title, check=False) ret.append(title) else: querylist = [] limit = int(site.limit) if len(titles) > limit / 10: iters = int(math.ceil(float(len(titles)) / (limit / 10))) for x in range(0, iters): lower = x * limit / 10 upper = (x + 1) * limit / 10 querylist.append(titles[lower:upper]) else: querylist.append(titles) response = False for item in querylist: tlist = '|'.join(item) if not isinstance(tlist, unicode): tlist = unicode(tlist, 'utf8') params = { 'action': 'query', 'titles': tlist, } if followRedir: params['redirects'] = '' req = api.APIRequest(site, params) res = req.query(False) if not response: response = res else: # This breaks on non-existent titles, the api gives them negative numbers # resultCombine doesn't account for this and ignores or overwrites the # duplicate pageids response = api.resultCombine('', response, res) for key in response['query']['pages'].keys(): res = response['query']['pages'][key] item = makePage(key, res, site) ret.append(item) return ret
def __getUsageInternal(self, namespaces=False): params = {'action':'query', 'list':'imageusage', 'iutitle':self.title, 'iulimit':self.site.limit, } if namespaces is not False: params['iunamespace'] = '|'.join([str(ns) for ns in namespaces]) while True: req = api.APIRequest(self.site, params) data = req.query(False) for item in data['query']['imageusage']: yield page.Page(self.site, item['title'], check=False, followRedir=False) try: params['iucontinue'] = data['query-continue']['imageusage']['iucontinue'] except: break
def __getMembersInternal(self, namespaces=False): params = {'action':'query', 'list':'categorymembers', 'cmtitle':self.title, 'cmlimit':self.site.limit, 'cmprop':'title' } if namespaces is not False: params['cmnamespace'] = '|'.join([str(ns) for ns in namespaces]) while True: req = api.APIRequest(self.site, params) data = req.query(False) for item in data['query']['categorymembers']: yield page.Page(self.site, item['title'], check=False, followRedir=False) try: params['cmcontinue'] = data['query-continue']['categorymembers']['cmcontinue'] except: break
def download(self, width=False, height=False, location=False): """Download the image to a local file width/height - set width OR height of the downloaded image location - set the filename to save to. If not set, the page title minus the namespace prefix will be used and saved to the current directory """ if self.pageid == 0: self.setPageInfo() params = {'action':'query', 'prop':'imageinfo', 'iiprop':'url' } if width and height: raise DimensionError("Can't specify both width and height") if width: params['iiurlwidth'] = width if height: params['iiurlheight'] = height if self.pageid != 0: params['pageids'] = self.pageid elif self.title: params['titles'] = self.title else: self.setPageInfo() if not self.exists: # Non-existant files may be on a shared repo (e.g. commons) params['titles'] = self.title else: params['pageids'] = self.pageid req = api.APIRequest(self.site, params) res = req.query(False) key = res['query']['pages'].keys()[0] url = res['query']['pages'][key]['imageinfo'][0]['url'] if not location: location = self.title.split(':', 1)[1] opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.site.cookies)) headers = { "User-agent": self.site.useragent } request = urllib2.Request(url, None, headers) data = opener.open(request) f = open(location, 'wb', 0) f.write(data.read()) f.close() return location
def __getSection(self, section): if not self.title: self.setPageInfo() params = { 'action': 'parse', 'text': '{{:' + self.title + '}}__TOC__', 'title': self.title, 'prop': 'sections' } number = False req = api.APIRequest(self.site, params) response = req.query() counter = 0 for item in response['parse']['sections']: counter += 1 if section == item['line']: number = counter break return number
def listFromPageids(site, pageids, check=True, followRedir=False): """Create a list of page objects from a list of pageids check and followRedir have the same meaning as in page.Page """ ret = [] if not check: for id in pageids: title = page.Page(site, pageid=id, check=False) ret.append(title) else: querylist = [] limit = int(site.limit) if len(pageids) > limit / 10: iters = int(math.ceil(float(len(pageids)) / (limit / 10))) for x in range(0, iters): lower = x * limit / 10 upper = (x + 1) * limit / 10 querylist.append(pageids[lower:upper]) else: querylist.append(pageids) response = False for item in querylist: ids = [str(id) for id in item] idlist = '|'.join(ids) params = { 'action': 'query', 'pageids': idlist, } if followRedir: params['redirects'] = '' req = api.APIRequest(site, params) res = req.query() if not response: response = res else: response = api.resultCombine('', response, res) for key in response['query']['pages'].keys(): res = response['query']['pages'][key] item = makePage(key, res, site) ret.append(item) return ret
def logout(self): params = { 'action': 'logout' } if self.maxlag < 120: params['maxlag'] = 120 cookiefile = self.cookiepath + str(hash(self.username+' - '+self.apibase))+'.cookies' try: os.remove(cookiefile) except: pass req = api.APIRequest(self, params, write=True) # action=logout returns absolutely nothing, which json.loads() treats as False # causing APIRequest.query() to get stuck in a loop req.opener.open(req.request) self.cookies = WikiCookieJar() self.username = '' self.maxlag = 5 self.useragent = "python-wikitools/%s" % VERSION self.limit = 500 return True
def getHistory(self, force=False): if self.history and not force: return self.history if self.pageid == 0 and not self.title: self.setPageInfo() if not self.exists: raise NoPage params = { 'action': 'query', 'prop': 'imageinfo', 'iilimit': self.site.limit, } if self.pageid > 0: params['pageids'] = self.pageid else: params['titles'] = self.title req = api.APIRequest(self.site, params) response = req.query() self.history = response['query']['pages'][str(self.pageid)]['imageinfo'] return self.history
def setSiteinfo(self): """Retrieves basic siteinfo Called when constructing, or after login if the first call failed """ params = { 'action': 'query', 'meta': 'siteinfo', 'siprop': 'general|namespaces|namespacealiases', } if self.maxlag < 120: params['maxlag'] = 120 req = api.APIRequest(self, params) info = req.query() sidata = info['query']['general'] for item in sidata: self.siteinfo[item] = sidata[item] nsdata = info['query']['namespaces'] for ns in nsdata: nsinfo = nsdata[ns] self.namespaces[nsinfo['id']] = nsinfo if ns != "0": attr = "NS_%s" % (nsdata[ns]['canonical'].replace(' ', '_').upper()) else: attr = "NS_MAIN" #print 'attr='+attr #print 'ns='+ns setattr(self, attr.encode('windows-1251'), Namespace(ns)) # N.B. nsaliasdata = info['query']['namespacealiases'] if nsaliasdata: for ns in nsaliasdata: self.NSaliases[ns['*']] = ns['id'] if not 'writeapi' in sidata: print "WARNING: Write-API not enabled, you will not be able to edit" version = re.search("\d\.(\d\d)", self.siteinfo['generator']) if not int(version.group(1)) >= 13: # Will this even work on 13? print "WARNING: Some features may not work on older versions of MediaWiki" return self
def isLoggedIn(self, username=False): """Verify that we are a logged in user username - specify a username to check against """ data = { "action": "query", "meta": "userinfo", } if self.maxlag < 120: data['maxlag'] = 120 req = api.APIRequest(self, data) info = req.query(False) if info['query']['userinfo']['id'] == 0: return False elif username and info['query']['userinfo']['name'] != username: return False else: return True
def getFileHistory(self, force=False): if self.filehistory and not force: return self.filehistory if self.pageid == 0 and not self.title: self.setPageInfo() params = { 'action': 'query', 'prop': 'imageinfo', 'iilimit': self.site.limit, } if self.pageid > 0: params['pageids'] = self.pageid else: params['titles'] = self.title req = api.APIRequest(self.site, params) self.filehistory = [] for data in req.queryGen(): pid = data['query']['pages'].keys()[0] for item in data['query']['pages'][pid]['imageinfo']: self.filehistory.append(item) return self.filehistory
def isRedir(self): """Is the page a redirect?""" params = {'action': 'query', 'redirects': ''} if not self.exists: raise NoPage if self.pageid != 0 and self.exists: params['pageids'] = self.pageid elif self.title: params['titles'] = self.title else: self.setPageInfo() if self.pageid != 0 and self.exists: params['pageids'] = self.pageid else: raise NoPage req = api.APIRequest(self.site, params) res = req.query() if 'redirects' in res['query']: return True else: return False