def purgquery(falink): falink = falink.replace(u' ', u'_') total_cache = dict(_cache_old, **_cache) if total_cache.get(tuple([falink, 'purgquery'])): return total_cache[tuple([falink, 'purgquery'])] if falink: params = {'action': 'purge', 'titles': falink, 'forcelinkupdate': 1} try: categoryname = query.GetData(params, faSite) for item in categoryname[u'purge']: templateha = item[u'title'] break except: pass enlink = englishdictionry(falink, 'fa', 'en') if enlink: enlink = enlink.replace(u' ', u'_') params = {'action': 'purge', 'titles': enlink, 'forcelinkupdate': 1} try: categoryname = query.GetData(params, enSite) for item in categoryname[u'purge']: templateha = item[u'title'] break except: pass _cache[tuple([falink, 'purgquery'])] = True
def getUserInfo(user): editcount = 0 if not isIP(user): if users.has_key(user): editcount = users[user]['editcount'] if editcount > preferences['newbie']: if not random.randint(0, 20): #avoid update no newbies users too much return editcount params = { 'action': 'query', 'list': 'users', 'ususers': user, 'usprop': 'editcount|groups', } data = query.GetData(params, site=preferences['site']) if not 'error' in data.keys(): editcount = 0 if 'editcount' in query.GetData( params)['query']['users'][0].keys(): editcount = int( query.GetData(params)['query']['users'][0]['editcount']) groups = [] if 'groups' in query.GetData(params)['query']['users'][0].keys(): groups = query.GetData(params)['query']['users'][0]['groups'] users[user] = { 'editcount': editcount, 'groups': groups, } saveUserInfo()
def loadGroups(): #Info about groups: http://www.mediawiki.org/wiki/Manual:User_rights global groups groups = [] params = { 'action': 'query', 'meta': 'siteinfo', 'siprop': 'usergroups', } data = query.GetData(params, site=preferences['site']) if not 'error' in data.keys(): for item in query.GetData(params)['query']['usergroups']: groups.append(item['name'])
def query(self): result = query.GetData(self.params, site=self.site) blocklist = result['query']['blocks'] #Todo: handle possible errors (they will cause KeyError at this time) while 'query-continue' in result: self.params.update(result['query-continue']['blocks']) result = query.GetData(self.params) blocklist += result['query']['blocks'] #Finally we remove possible duplicates. This piece of code may be #removed after successful closing of #https://bugzilla.wikimedia.org/show_bug.cgi?id=34029 for b in blocklist: if blocklist.count(b) > 1: blocklist.pop(blocklist.index(b)) return blocklist
def queryMonths(self): months_long = ['january', 'february', 'march', 'april', 'may_long', 'june', 'july', 'august', 'september', 'october', 'november', 'december'] months_short = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] #one query instead of multiple queries using site.mediawiki_message() #can be refactored to use site.mediawiki_message() params = { 'action': 'query', 'meta': 'allmessages', 'ammessages': '|'.join(months_long) + '|' + '|'.join(months_short), 'amlang': self.site.lang, } monthsDict = query.GetData(params)['query']['allmessages'] monthNum2origNames = dict((i, {'short': '', 'long': ''}) for i in range(1, 13)) origNames2monthNum = dict() for el in monthsDict: orig, eng = el['*'], el['name'] try: month_num = months_long.index(eng) + 1 monthNum2origNames[month_num]['long'] = orig except ValueError: month_num = months_short.index(eng) + 1 monthNum2origNames[month_num]['short'] = orig origNames2monthNum[orig] = month_num return monthNum2origNames, origNames2monthNum
def query_api(data): predata = { 'action': 'query', 'prop': 'revisions', } predata = query.CombineParams(predata, data) return query.GetData(predata)
def englishdictionry(enlink, firstsite, secondsite): try: enlink = unicode(str(enlink), 'UTF-8').replace(u'[[', u'').replace( u']]', u'').replace(u'en:', u'').replace(u'fa:', u'') except: enlink = enlink.replace(u'[[', u'').replace(u']]', u'').replace( u'en:', u'').replace(u'fa:', u'') enlink = enlink.split(u'#')[0].strip() enlink = enlink.replace(u' ', u'_') if _cache.get(tuple([enlink, 'englishdictionry'])): return _cache[tuple([enlink, 'englishdictionry'])] if enlink == u'': _cache[tuple([enlink, 'englishdictionry'])] = False return False site = wikipedia.getSite(firstsite) sitesecond = wikipedia.getSite(secondsite) params = { 'action': 'query', 'prop': 'langlinks', 'titles': enlink, 'redirects': 1, 'lllimit': 500, } try: categoryname = query.GetData(params, site) for item in categoryname[u'query'][u'pages']: case = categoryname[u'query'][u'pages'][item][u'langlinks'] for item in case: if item[u'lang'] == secondsite: intersec = item[u'*'] break result = intersec _cache[tuple([enlink, 'englishdictionry'])] = result return result
def getdata( self ): # getdata() returns a dictionnary of the query to api.php?action=query&meta=siteinfo&siprop=statistics # This method return data in a dictionnary format. # View data with: api.php?action=query&meta=siteinfo&siprop=statistics&format=jsonfm params = { 'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', } pywikibot.output("\nQuerying api for json-formatted data...") try: data = query.GetData(params, self.site, encodeTitle=False) except: url = self.site.protocol() + '://' + self.site.hostname( ) + self.site.api_address() pywikibot.output( "The query has failed. Have you check the API? Cookies are working?" ) pywikibot.output(u"\n>> \03{lightpurple}%s\03{default} <<" % url) if data != None: pywikibot.output("Extracting statistics...") data = data['query'] # "query" entry of data. dict = data['statistics'] # "statistics" entry of "query" dict. return dict
def RunQuery(self, params): while True: # Get data data = query.GetData(params) # Process received data yield data # Clear any continuations first if 'clcontinue' in params: del params['clcontinue'] if 'plcontinue' in params: del params['plcontinue'] if 'query-continue' not in data: if 'gapcontinue' in params: del params['gapcontinue'] break qc = data['query-continue'] # First continue properties only, once done, continue with allpages if 'categories' in qc or 'links' in qc: if 'categories' in qc: params.update(qc['categories']) if 'links' in qc: params.update(qc['links']) elif 'allpages' in qc: params.update(qc['allpages']) else: raise ValueError(u'Unexpected query-continue values: %s' % qc) continue
def AddNoSuggestionTitle(self, title): if title in self.seenUnresolvedLinks: return True self.seenUnresolvedLinks.add(title) params = { 'action': 'query', 'list': 'backlinks', 'bltitle': title, 'bllimit': '50', } data = query.GetData(params) cl = 0 redirs = 0 if 'backlinks' in data['query']: bl = data['query']['backlinks'] cl = len(bl) redirs = len([i for i in bl if 'redirect' in i]) if cl > 0 and 'query-continue' in data: count = '50+' else: count = str(cl if cl > 0 else 'no backlinks') self.AppendLineToLog( self.nosuggestions, u'* %s (%s%s)' % (self.MakeLink(title), count, u', %d redirects' % redirs if redirs > 0 else u'')) return False
def get_contributions(self, max = -1, ns = None): predata = { 'action': 'query', 'list': 'usercontribs', 'uclimit': '500', 'ucuser': self.site.username(), } if ns: predata['ucnamespace'] = ns if max < 500 and max != -1: predata['uclimit'] = str(max) count = 0 iterator = iter(xrange(0)) never_continue = False while count != max or never_continue: try: item = iterator.next() except StopIteration: self.log(u'Fetching new batch of contributions') data = query.GetData(predata, self.site) if 'error' in data: raise RuntimeError(data['error']) if 'query-continue' in data: predata['uccontinue'] = data['query-continue']['usercontribs'] else: never_continue = True iterator = iter(data['query']['usercontribs']) else: count += 1 yield item
def uploadedYesterday(site=None): ''' Return a pagegenerator containing all the pictures uploaded yesterday. Should probably copied to somewhere else ''' result = [] dateformat = "%Y-%m-%dT00:00:00Z" today = datetime.utcnow() yesterday = today + timedelta(days=-1) params = { 'action': 'query', 'list': 'logevents', 'leprop': 'title', 'letype': 'upload', 'ledir': 'newer', 'lelimit': '5000', 'lestart': yesterday.strftime(dateformat), 'leend': today.strftime(dateformat) } data = query.GetData(params, site) try: for item in data['query']['logevents']: result.append(item['title']) except IndexError: raise NoPage(u'API Error, nothing found in the APIs') except KeyError: raise NoPage(u'API Error, nothing found in the APIs') return pagegenerators.PagesFromTitlesGenerator(result, site)
def revert(self, item): predata = { 'action': 'query', 'titles': item['title'], 'prop': 'revisions', 'rvprop': 'ids|timestamp|user|content', 'rvlimit': '2', 'rvstart': item['timestamp'], } data = query.GetData(predata, self.site) if 'error' in data: raise RuntimeError(data['error']) pages = data['query'].get('pages', ()) if not pages: return False page = pages.itervalues().next() if len(page.get('revisions', ())) != 2: return False rev = page['revisions'][1] comment = u'Reverted to revision %s by %s on %s' % (rev['revid'], rev['user'], rev['timestamp']) if self.comment: comment += ': ' + self.comment page = pywikibot.Page(self.site, item['title']) pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.aslink(True, True)) old = page.get() new = rev['*'] pywikibot.showDiff(old, new) page.put(new, comment) return comment
def sendMail(self, subject=u'', text=u'', ccMe=False): if not hasattr(self, '_mailable'): self._load() if not self._mailable: raise UserActionRefuse("This user is not mailable") if not self.site().isAllowed('sendemail'): raise UserActionRefuse("You don't have permission to send mail") if not self.site().has_api() or self.site().versionnumber() < 14: return self.sendMailOld(subject, text, ccMe) params = { 'action': 'emailuser', 'target': self.name(), 'token': self.site().getToken(), 'subject': subject, 'text': text, } if ccMe: params['ccme'] = 1 result = query.GetData(params, self.site()) if 'error' in result: code = result['error']['code'] if code == 'usermaildisabled ': wikipedia.output("User mail has been disabled") #elif code == '': # elif 'emailuser' in result: if result['emailuser']['result'] == 'Success': wikipedia.output(u'Email sent.') return True return False
def get_redirect_pageids_via_api(self): """Return generator that yields page IDs of Pages that are redirects. """ params = { 'action': 'query', 'list': 'allpages', 'apfilterredir': 'redirects', 'aplimit': self.api_number, 'apdir': 'ascending', } for ns in self.namespaces: params['apnamespace'] = ns if self.api_start: params['apfrom'] = self.api_start done = False while not done: pywikibot.output(u'\nRetrieving pages...', newline=False) data = query.GetData(params, self.site) if 'error' in data: raise RuntimeError("API query error: %s" % data['error']) if "limits" in data: # process aplimit = max params['aplimit'] = int(data['limits']['allpages']) for x in data['query']['allpages']: done = self.api_until and x['title'] >= self.api_until if done: break yield x['pageid'] if not done and 'query-continue' in data: params['apfrom'] = data['query-continue']['allpages'][ 'apfrom'] else: break
def englishdictionry(link, firstsite, secondsite): link = link.replace(u' ', u'_') total_cache = dict(_cache_old, **_cache) if total_cache.get(tuple([link, 'englishdictionry'])): return total_cache[tuple([link, 'englishdictionry'])] if link == u'': _cache[tuple([link, 'englishdictionry'])] = u'' return u'' site = wikipedia.getSite(firstsite) sitesecond = wikipedia.getSite(secondsite) params = { 'action': 'query', 'prop': 'langlinks', 'titles': link, 'redirects': 1, 'lllimit': 500, } try: categoryname = query.GetData(params, site) for item in categoryname[u'query'][u'pages']: case = categoryname[u'query'][u'pages'][item][u'langlinks'] for item in case: if item[u'lang'] == secondsite: intersec = item[u'*'] break result = intersec if result.find('#') != -1: _cache[tuple([link, 'englishdictionry'])] = u'' return u'' _cache[tuple([link, 'englishdictionry'])] = result return result
def namespacefinder( enlink ,firstsite): try: enlink=unicode(str(enlink),'UTF-8').replace(u'[[',u'').replace(u']]',u'').replace(u'en:',u'').replace(u'fa:',u'') except: enlink=enlink.replace(u'[[',u'').replace(u']]',u'').replace(u'en:',u'').replace(u'fa:',u'') enlink=enlink.replace(u' ',u'_') if _cache.get(tuple([enlink,firstsite, 'namespacefinder'])): return _cache[tuple([enlink,firstsite, 'namespacefinder'])] site = wikipedia.getSite(firstsite) params = { 'action': 'query', 'prop': 'langlinks', 'titles': enlink, 'redirects': 1, 'lllimit':500, } a=1 if a: categoryname = query.GetData(params,site) for item in categoryname[u'query'][u'pages']: fanamespace=categoryname[u'query'][u'pages'][item]['ns'] _cache[tuple([enlink,firstsite, 'namespacefinder'])]=fanamespace return fanamespace else: _cache[tuple([enlink,firstsite, 'namespacefinder'])]=False return False
def englishdictionry(enlink, firstsite='fa', secondsite='en'): try: enlink = unicode(str(enlink), 'UTF-8').replace(u'[[', u'').replace( u']]', u'').replace(u'en:', u'').replace(u'fa:', u'') except: enlink = enlink.replace(u'[[', u'').replace(u']]', u'').replace( u'en:', u'').replace(u'fa:', u'') if enlink.find('#') != -1: return False if enlink == u'': return False enlink = enlink.replace(u' ', u'_') site = wikipedia.getSite(firstsite) sitesecond = wikipedia.getSite(secondsite) params = { 'action': 'query', 'prop': 'langlinks', 'titles': enlink, 'redirects': 1, 'lllimit': 500, } try: categoryname = query.GetData(params, site) for item in categoryname[u'query'][u'pages']: case = categoryname[u'query'][u'pages'][item][u'langlinks'] for item in case: if item[u'lang'] == secondsite: intersec = item[u'*'] break result = intersec return result
def findDuplicateImages(filename): ''' Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates. TODO: Add exception handling, fix site thing ''' f = open(filename, 'rb') result = [] hashObject = hashlib.sha1() hashObject.update(f.read(-1)) #f.close() sha1Hash = base64.b16encode(hashObject.digest()) params = { 'action': 'query', 'list': 'allimages', 'aisha1': sha1Hash, 'aiprop': '', } data = query.GetData(params, site=wikipedia.getSite(), useAPI=True, encodeTitle=False) for image in data['query']['allimages']: result.append(image['name']) return result
def check_user_admin(username): username = username.replace(u' ', u'_') total_cache = dict(_cache_old, **_cache) if total_cache.get(tuple([username, 'check_user_admin'])): return total_cache[tuple([username, 'check_user_admin'])] params = { 'action': 'query', 'list': 'users', 'ususers': username, 'usprop': 'groups' } try: usernamequery = query.GetData(params, faSite) if not 'sysop' in usernamequery[u'query'][u'users'][0][u'groups']: _cache[tuple([username, 'check_user_admin'])] = True return True else: if not check_user( username ): # if user is syso-bot like user:rezabot it dosen't care _cache[tuple([username, 'check_user_admin'])] = True return True _cache[tuple([username, 'check_user_admin'])] = Fasle return False except: _cache[tuple([username, 'check_user_admin'])] = True return True
def get_interwikis(link): link = link.replace(u' ', u'_') total_cache = dict(_cache_old, **_cache) if total_cache.get(tuple([link, 'get_interwikis'])): return total_cache[tuple([link, 'get_interwikis'])] if link.find('#') != -1 or link == u'': _cache[tuple([link, 'get_interwikis'])] = [] return [] case = [] try: params = { 'action': 'query', 'prop': 'langlinks', 'titles': link, 'redirects': 1, 'lllimit': 500, } pagename = query.GetData(params, faSite) for item in pagename[u'query'][u'pages']: case = pagename[u'query'][u'pages'][item][u'langlinks'] _cache[tuple([link, 'get_interwikis'])] = case return case except: _cache[tuple([link, 'get_interwikis'])] = [] return []
def loadUsersFromGroup(group): global users users[group] = {} aufrom = '!' while aufrom: params = { 'action': 'query', 'list': 'allusers', 'augroup': group, 'aulimit': '500', 'aufrom': aufrom, } data = query.GetData(params, site=preferences['site']) if not 'error' in data.keys(): for item in data['query']['allusers']: user = item['name'] users[group][user] = { 'editcount': getUserEditcount(user), 'groups': getUserGroups(user) } print user, users[group][user] if 'query-continue' in data.keys(): aufrom = data['query-continue']['allusers']['aufrom'] else: aufrom = ''
def recentChanges(site=None, delay=0, block=70): """ Return a pagegenerator containing all the images edited in a certain timespan. The delay is the amount of minutes to wait and the block is the timespan to return images in. Should probably be copied to somewhere else. """ result = [] dateformat = "%Y-%m-%dT%H:%M:%SZ" rcstart = datetime.utcnow() + timedelta(minutes=-delay-block) rcend = datetime.utcnow() + timedelta(minutes=-delay) params = { 'action': 'query', 'list': 'recentchanges', 'rcstart': rcstart.strftime(dateformat), 'rcend': rcend.strftime(dateformat), 'rcdir': 'newer', 'rcnamespace': '6', 'rcprop': 'title', 'rcshow': '!bot', 'rclimit': '5000', 'rctype': 'edit|log', } data = query.GetData(params, site) try: for item in data['query']['recentchanges']: result.append(item['title']) except (IndexError, KeyError): raise NoPage(u'API Error, nothing found in the APIs') return pagegenerators.PagesFromTitlesGenerator(result, site)
def query_results(self, **data): """Iterate results from API action=query, using data as parameters.""" querydata = {'action': 'query', 'maxlag': str(pywikibot.config.maxlag)} querydata = query.CombineParams(querydata, data) if "action" not in querydata or querydata['action'] != 'query': raise ValueError( "query_results: 'action' set to value other than 'query'") waited = 0 while True: try: result = query.GetData(querydata, self.site) #if data.startswith(u"unknown_action"): # e = {'code': data[:14], 'info': data[16:]} # raise APIError(e) except pywikibot.ServerError: pywikibot.output(u"Wikimedia Server Error; retrying...") time.sleep(5) continue except ValueError: # if the result isn't valid JSON, there must be a server # problem. Wait a few seconds and try again # WARNING: if the server is down, this could # cause an infinite loop pywikibot.output(u"Invalid API response received; retrying...") time.sleep(5) continue if type(result) is dict and "error" in result: if result['error']['code'] == "maxlag": print "Pausing due to server lag.\r", time.sleep(5) waited += 5 if waited % 30 == 0: pywikibot.output( u"(Waited %i seconds due to server lag.)" % waited) continue else: raise APIError(result['error']) waited = 0 if type(result) is list: # query returned no results return assert type(result) is dict, ( "Unexpected result of type '%s' received." % type(result)) if "query" not in result: # query returned no results return yield result['query'] if 'query-continue' in result: assert len(result['query-continue'].keys()) == 1, ( "More than one query-continue key returned: %s" % result['query-continue'].keys()) query_type = result['query-continue'].keys()[0] assert (query_type in querydata.keys() or query_type in querydata.values()), \ "Site returned unknown query-continue type '%s'" \ % query_type querydata.update(result['query-continue'][query_type]) else: return
def query_old_api(data): predata = { 'what': 'revisions', 'rvlimit': '1', } predata = query.CombineParams(predata, data) return query.GetData(predata, useAPI = False)
def Query_Abus(pasttime, abusnum): #http://en.wikipedia.org/w/api.php?action=query&list=abuselog&aflprop=details|ids&format=json&afllimit=1000 params = { 'action': 'query', 'list': 'abuselog', 'aflprop': 'details|ids', 'format': 'json', 'afllimit': 1000, 'aflend': pasttime } abus_dict = {} abuslog = query.GetData(params, faSite) for item in abuslog[u'query'][u'abuselog']: abus_id = item['filter_id'] log_id = str(item['id']) item = item['details'] if abus_id == str(abusnum): #abus number #wikipedia.output(u'-----------------------------------------') #wikipedia.output(str(item)) user_editcount = item["user_editcount"] if not user_editcount.strip(): user_editcount = 0 else: user_editcount = int(user_editcount) user_name = item["user_name"].replace(u'\r', u'') user_age = 1000 #int(item["user_age"]) user_groups = item["user_groups"] user_mobile = item["user_mobile"] article_articleid = item["article_articleid"] article_namespace = int(item["article_namespace"]) article_text = item["article_text"].replace(u'\r', u'') article_prefixedtext = item["article_prefixedtext"] article_recent_contributors = item[ "article_recent_contributors"].replace(u'\r', u'').split(u'\n') action = item["action"] summary = item["summary"].replace(u'\r', u'') old_wikitext = item["old_wikitext"].replace(u'\r', u'') new_wikitext = item["new_wikitext"].replace(u'\r', u'') edit_diff = item["edit_diff"].replace(u'\r', u'') new_size = int(item["new_size"]) old_size = int(item["old_size"]) edit_delta = int(item["edit_delta"]) added_lines = item["added_lines"].replace(u'\r', u'') removed_lines = item["removed_lines"].replace(u'\r', u'') tor_exit_node = item["tor_exit_node"] timestamp = int(item["timestamp"]) abus_dict[log_id] = [ user_editcount, user_name, user_age, user_groups, user_mobile, article_articleid, article_namespace, article_text, article_prefixedtext, article_recent_contributors, action, summary, old_wikitext, new_wikitext, edit_diff, new_size, old_size, edit_delta, added_lines, removed_lines, tor_exit_node, timestamp, abus_id ] return abus_dict
def contributions(self, limit=500, namespace=[]): """ Yield tuples describing this user edits with an upper bound of 'limit'. Each tuple is composed of a pywikibot.Page object, the revision id (int), the edit timestamp and the comment (unicode). Pages returned are not guaranteed to be unique. @param limit: limit result to this number of pages @type limit: int @param namespace: only iterate links in these namespaces @type namespace: list """ if not self.site().has_api(): raise NotImplementedError params = { 'action': 'query', 'list': 'usercontribs', 'ucuser': self.name(), 'ucprop': ['ids', 'title', 'timestamp', 'comment'], # 'size','flags'], 'uclimit': limit, 'ucdir': 'older', } if limit > pywikibot.config.special_page_limit: params['uclimit'] = pywikibot.config.special_page_limit if limit > 5000 and self.site().isAllowed('apihighlimits'): params['uclimit'] = 5000 if namespace: params['ucnamespace'] = namespace # An user is likely to contribute on several pages, # keeping track of titles nbresults = 0 while True: pywikibot.output(u'Retrieving %s user contributions from %s...' % (params['uclimit'], self.site())) result = query.GetData(params, self.site()) if 'error' in result: pywikibot.output('%s' % result) raise pywikibot.Error for contrib in result['query']['usercontribs']: ts = pywikibot.parsetime2stamp(contrib['timestamp']) yield (pywikibot.Page(self.site(), contrib['title'], defaultNamespace=contrib['ns']), contrib['revid'], ts, contrib.get('comment', None)) nbresults += 1 if nbresults >= limit: break if 'query-continue' in result and nbresults < limit: params['ucstart'] = result['query-continue']['usercontribs'][ 'ucstart'] else: break return
def Check_Page_Exists(page_link): page_link = page_link.replace(u' ', u'_') params = {'action': 'query', 'prop': 'info', 'titles': page_link} query_page = query.GetData(params, faSite) try: for i in query_page[u'query'][u'pages']: redirect_link = query_page[u'query'][u'pages'][i]['pageid'] return False # page existed except: return True # page not existed
def getRollbackToken(): params = {'action': 'query', 'meta': 'tokens', 'type': 'rollback'} try: data = query.GetData(params) if 'error' in data: raise RuntimeError('%s' % data['error']) elif 'warnings' in data: raise RuntimeError('%s' % data['warnings']) return data['query']['tokens']['rollbacktoken'] except KeyError: raise ServerError("The APIs don't return data, the site may be down")
def redirect_find(page_link): page_link = page_link.replace(u' ', u'_') params = {'action': 'query', 'redirects': "", 'titles': page_link} query_page = query.GetData(params, faSite) try: redirect_link = query_page[u'query'][u'redirects'][0]['to'] return True except: if 'missing=""' in str(query_page): return True else: return False