def localblock(bot, trigger, username, password, Site): if trigger.account in stewards or trigger.account in cvt: options = trigger.group(2).split(" ") if len(options) == 2: wiki = options[0] target = options[1] site = Site(wiki + '.miraheze.org', clients_useragent=ua) site.login(username, password) api(query, http_method='POST', format='json', meta='tokens') for token in result['query']['tokens'].values(): tokens = token['csrftoken'] site.api(block, http_method='POST', format='json', user=target, expiry='3 days', nocreate=1, autoblock=1, token=tokens) elif len(options) > 2 and len(options) < 5: wiki = options[0] target = options[1] time = options[2] site = Site(wiki + '.miraheze.org', clients_useragent=ua) site.login(username, password) api(query, http_method='POST', format='json', meta='tokens') for token in result['query']['tokens'].values(): tokens = token['csrftoken'] site.api(block, http_method='POST', format='json', user=target, expiry=time, nocreate=1, autoblock=1, token=tokens) else: bot.reply('Syntax is .block <wiki> <target> <time>', trigger.sender) else: if trigger.account == '': noaccount() else: bot.say('Access Denied: ' + trigger.account + ' (' + trigger.hostmask + ') is not in the trusted list. This incident will be reported.', trigger.sender) bot.say('Security Alert: ' + trigger.account + ' (' + trigger.hostmask + ') attempted to use CVT on ' + trigger.sender, '#ExamBot-logs')
def getTransclusions(site: mwclient.Site, page: str, sleep_duration: int = None) -> list: """ Get template transclusions. These occur when a template is present on a page and are essentially a list of pages that a specific template appears on. :param site: mwclient.Site object for the site you are currently on :param page: template to get the transclusions of :param sleep_duration: time to sleep between requests :return: list of pages that transclude """ cont = None pages = [] i = 1 while 1: result = site.api('query', list='embeddedin', eititle=str(page), eicontinue=cont, eilimit=500, format='json') if sleep_duration is (not None): time.sleep(sleep_duration) for res in result['query']['embeddedin']: print('append ' + res['title']) pages.append(res['title']) i += 1 try: cont = result['continue']['eicontinue'] print('cont') except NameError: print('Namerror') return pages except Exception as e: print("Other exception" + str(e)) return pages
def namespace_list(sitio: Site) -> Generator[Namespace, None, None]: salida = sitio.api('query', meta='siteinfo', siprop='namespaces') namespaces_dict = salida['query']['namespaces'] for id, namespace_info in namespaces_dict.items(): namespace = Namespace() namespace.from_dict(id, namespace_info) yield namespace
logging.basicConfig(level=logging.WARNING) # The reading list id of the list to be updated. To get this value, run this file once. list_id = 2016297 ua = 'LocationReadingList/0.1 (User:Cxbrx)' print("About to call site") site = Site('en.wikipedia.org', clients_useragent=ua) #print("About to get authmanagerinfo"); #result = site.api('query', meta='authmanagerinfo', amirequestsfor='login') #print(result) print("About to get login token") result = site.api('query', meta='tokens', type='login', format='json') login_token = result['query']['tokens']['logintoken'] # Get the username and password. # FIXME: This assumes no : in either username or password. with open('user-config.txt') as f: pieces = [x.strip().split(':', 1) for x in f] username = pieces[0][0] password = pieces[0][1] print("About to call login") result = site.api('clientlogin', username=username, password=password, logintoken=login_token, loginreturnurl='https://wn.wikipedia.org/')
class Wiki: def __init__(self, host='en.wikipedia.org', user_agent='Wnow?/1.0 ([email protected])'): self.site = Site(host, clients_useragent=user_agent) def get_id(self, title): return wk.page(title=title).pageid def get_title(self, pageid): return wk.page(pageid=pageid).title # This method returns the summary provided by wk.summary() # **kwargs could be either the title of the page or its pageid def get_summary(self, **kwargs) -> str: try: if 'title' in kwargs: return wk.summary(title=kwargs['title']) if 'pageid' in kwargs: return wk.page(pageid=kwargs['pageid']).summary except: print('\tSummary not available') raise APIError # This method returns the content provided by wk.page[].content # **kwargs could be either the title of the page or its pageid def get_content(self, **kwargs) -> str: try: if 'title' in kwargs: return wk.page(title=kwargs['title']).content if 'pageid' in kwargs: return wk.page(pageid=kwargs['pageid']).content except: print('\tContent not available') return 'Content not available' # This method returns the object mwclient.page.Page # **kwargs could be either the title of the page or its pageid def get_page(self, **kwargs): try: if 'title' in kwargs: return self.site.pages[kwargs['title']] if 'pageid' in kwargs: return self.site.pages[kwargs['pageid']] except: raise APIError # This method builds the url to the page given its title def get_page_link(self, title) -> str: return 'en.wikipedia.org/wiki/' + title.replace(' ', '%20') # This method gets the recent changes list using mwclient.Site.api() # It filters pages in namespace 0 and gets only pages created or modified def __recentchanges_list(self, limit, start, end) -> pd.DataFrame: try: rc = self.site.api('query', list='recentchanges', rclimit=limit, rcstart=start, rcend=end, rctype='new|edit', rcnamespace='0') except: raise APIError r = pd.DataFrame(data=rc['query']['recentchanges']) r.drop(columns=['ns', 'revid', 'old_revid', 'rcid', 'timestamp'], inplace=True) return r # This method gets the recent changes by calling __recentchanges_list(..) # Attribute rclimit is required to set up the maximum number of recent changes you can get; to set up the maximum value permitted by MediaWiki'API, type 'max' # Attributes rcstart and rcend are required to set up the time range in which getting recent changes; rcstart must be grater than rcend def recentchanges(self, rclimit, rcstart, rcend) -> pd.DataFrame: images = [] summaries = [] links = [] try: result = self.__recentchanges_list(limit=rclimit, start=rcstart, end=rcend) except: print('\tAn API error occured during recent changes retrieving') raise APIError for pageid in result['pageid']: try: page = self.get_page( pageid=pageid) # get the page from the pageid provided if not page.exists: raise PageNotExists except APIError: print('\tAn API error occured during single page retrieving') result.query( 'pageid != ' + str(pageid), inplace=True ) # if an API error occures, remove the pageid of the page that caused the error from the recent changes list continue except PageNotExists: result.query( 'pageid != ' + str(pageid), inplace=True ) # if a PageNotExists error occures, remove the pageid of the page that caused the error from the recent changes list continue try: summary = self.get_summary( pageid=pageid ) # get the summary of the page given the pageid if not summary: # if summary is empty (there's no summary), raise error raise PageNotExists summaries.append( summary) # insert summary into the list summaries except: result.query( 'pageid != ' + str(pageid), inplace=True ) # if a PageNotExists error occures, remove the pageid of the page that caused the error from the recent changes list continue try: images.append( page.images(generator=True).next().imageinfo['url'] ) # get the first url image from the page calling mwclient.page.Page.images() except: images.append( 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Wikipedia_logo_v3.svg/1024px-Wikipedia_logo_v3.svg.png' ) # append a default image (Wikipedia logo) try: links.append(self.get_page_link( page.name)) # build the page link except: links.append('en.wikipedia.org/wiki/Main_Page' ) # if an error occures, append a default link result.insert(3, column='image', value=images) result.insert(4, column='link', value=links) result.insert(5, column='summary', value=summaries) return result # This method returns a dictionary containing pages from the category provided # According to MediaWiki API's syntax, category must be like 'Category:mycategory' # Attribute pages_num specifies the number of pages that at most will be returned def get_raw_category_pages(self, category, pages_num): search_list = [ category ] # make the list which will contain all the subcategories found recursively in category page_set = [] with tqdm(total=pages_num, desc=category) as cbar: # display progress bar while search_list and len( page_set ) <= pages_num: # while search_list is not empty and the number of pages is less than required query_result = self.site.api('query', list='categorymembers', cmtitle=search_list.pop(0), cmprop='title', cmtype='page|subcat', cmsort='timestamp', cmlimit='max') for element in query_result['query'][ 'categorymembers']: # for each page/category in the query's result if len( page_set ) >= pages_num: # the number of pages is greater than required break elif 'Category:' in element[ 'title']: # element is a category search_list.append( element['title'] ) # push the category found into the categories list else: # element is a page try: summary = wk.summary( element['title'], sentences=3) # request page's summary if summary: # if summary is not empty page_set.append(summary) # append summary cbar.update(1) # increment progress bar except: continue # if an error occures while querying the API for summary, skip the error category = category.replace( 'Category:', '') # get rid of Category: prefix in attribute category provided return { 'text': page_set, 'category': category } # return dictonary made up of all pages' summaries and the category label
from mwclient import Site site = Site('https://lol.gamepedia.com', path="/") # Set wiki site.login('RheingoldRiver@BotPasswordName', 'smldrgsrthmldyhj') limit = -1 now = datetime.datetime.utcnow() now_timestamp = now.isoformat() then = now - datetime.timedelta(hours=4) # change hours if needed last_timestamp = then.isoformat() revisions = site.api('query', format='json', list='recentchanges', rcstart=now_timestamp, rcend=last_timestamp, rcprop='title|ids', rclimit='max', rcdir='older') pages = [] pages_used = {} revs = {} failed_pages = [] for revision in revisions['query']['recentchanges']: revs[revision['revid']] = True if revision['title'] in pages_used: pass else: pages.append(revision['title'])