class GetRevisionContent(GetCurrentContent): input_field = SingleParam('revids', key_prefix=False, attr='rev_id') fields = [ StaticParam('prop', 'revisions'), MultiParam('prop', DEFAULT_PROPS + '|content'), SingleParam('parse', False) ] examples = [OperationExample('539916351')]
class GetPageHTML(Operation): input_field = SingleParam('title') examples = [OperationExample('Africa', limit=1)] output_type = Operation _limit = 1 def __init__(self, *a, **kw): super(GetPageHTML, self).__init__(*a, **kw) self.web_client = getattr(self.client, 'web_client', DEFAULT_WEB_CLIENT) self.raise_exc = kw.pop('raise_exc', True) source_info = getattr(self.client, 'source_info', None) if source_info: main_title = source_info.mainpage main_url = source_info.base self.base_url = main_url[:-len(main_title)] else: self.base_url = DEFAULT_BASE_URL self.url = self.base_url + self.input_param self.results = {} def process(self): try: resp = self.web_client.get(self.url) except Exception as e: self.exception = e if self.raise_exc: raise return self self.results[self.url] = resp.text raise NoMoreResults()
class GetExternalLinks(QueryOperation): """ Fetch page outgoing links to URLs outside of source wiki. """ field_prefix = 'el' input_field = SingleParam('titles', key_prefix=False) fields = [StaticParam('prop', 'extlinks')] output_type = [ExternalLink] examples = [OperationExample('Croatian War of Independence')] def extract_results(self, query_resp): ret = [] for pid_dict in query_resp.get('pages', {}).values(): for el in pid_dict.get('extlinks', []): cur_dict = dict(pid_dict) cur_dict['source'] = self.source cur_dict['url'] = el.get('*') link = ExternalLink.from_query(cur_dict) ret.append(link) return ret def prepare_params(self, **kw): params = super(GetExternalLinks, self).prepare_params(**kw) if params.get('elcontinue'): params['eloffset'] = params.pop('elcontinue') return params
class WebRequestOperation(Operation): input_field = SingleParam('url') output_type = Operation _limit = 1 def __init__(self, input_param, **kw): self.client = kw.pop('client', None) self.web_client = getattr(self.client, 'web_client', DEFAULT_WEB_CLIENT) self.action = kw.pop('action', 'get') self.raise_exc = kw.pop('raise_exc', True) if kw: raise ValueError('got unexpected keyword arguments: %r' % kw.keys()) self.set_input_param(input_param) self.url = self._input_param self.kwargs = kw self.results = {} def process(self): resp = None try: resp = self.web_client.req(self.action, self.url) except Exception as e: self.exception = e if self.raise_exc: raise return self self.results[self.url] = resp.text raise NoMoreResults()
class GeoSearch(QueryOperation): field_prefix = 'gs' input_field = MultiParam('coord') fields = [ StaticParam('list', 'geosearch'), SingleParam('radius', 10000), # must be within 10 and 10000 #SingleParam('maxdim', 1000), # does not work? SingleParam('globe', 'earth'), # which planet? donno... SingleParam('namespace'), StaticParam('gsprop', DEFAULT_COORD_PROPS) ] output_type = [CoordinateIdentifier] examples = [OperationExample(('37.8197', '-122.479'), 1)] def extract_results(self, query_resp): ret = [] for pid_dict in query_resp['geosearch']: page_ident = PageIdentifier.from_query(pid_dict, source=self.source) coord_ident = CoordinateIdentifier(pid_dict, page_ident) ret.append(coord_ident) return ret
class GetCurrentContent(QueryOperation): """ Fetch full content for current (top) revision. """ input_field = MultiParam('titles', key_prefix=False, attr='title') field_prefix = 'rv' fields = [ StaticParam('prop', 'revisions'), MultiParam('prop', DEFAULT_PROPS + '|content'), SingleParam('parse', False), SingleParam('redirects', True, key_prefix=False) ] examples = [ OperationExample('This page does not exist'), OperationExample('Coffee') ] output_type = Revision def extract_results(self, query_resp): ret = [] #redirect_list = query_resp.get('redirects', []) # TODO #redirects = dict([(r['from'], r['to']) for r in redirect_list]) requested_title = self.input_param is_parsed = self.kwargs.get('rvparse', False) pages = query_resp.get('pages', {}) for page_id, pid_dict in pages.iteritems(): if int(page_id) < 0: continue rev_dict = dict(pid_dict) rev_dict.update(pid_dict['revisions'][0]) revision = Revision.from_query(rev_dict, source=self.source, is_parsed=is_parsed) revision.req_title = requested_title ret.append(revision) return ret
class GetFeedbackV5(QueryOperation): """ article feedback v5 breaks standards in a couple ways. * the various v5 APIs use different prefixes (af/afvf) * it doesn't put its results under 'query', requiring a custom post_process_response() """ field_prefix = 'afvf' input_field = SingleParam('pageid') fields = [ StaticParam('list', 'articlefeedbackv5-view-feedback'), SingleParam('filter', default='featured') ] output_type = list examples = [OperationExample('604727')] def post_process_response(self, response): if not response.results: return {} return dict(response.results) def extract_results(self, query_resp): count = query_resp['articlefeedbackv5-view-feedback']['count'] return ['TODO'] * int(count)
class GetTranscludes(QueryOperation): input_field = SingleParam('title', val_prefix='Template:') field_prefix = 'gei' fields = [ StaticParam('generator', 'embeddedin'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection') ] output_type = [PageInfo] examples = [OperationExample('Template:ArticleHistory')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp.get('pages', {}).items(): page_ident = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_ident) return ret
class GetUserContribs(QueryOperation): field_prefix = 'uc' input_field = SingleParam('user') fields = [ StaticParam('list', 'usercontribs'), StaticParam('ucprop', DEFAULT_PROPS) ] output_type = [RevisionInfo] examples = [OperationExample('Jimbo Wales')] def extract_results(self, query_resp): ret = [] for rev_dict in query_resp.get('usercontribs', []): user_contrib = RevisionInfo.from_query(rev_dict, source=self.source) ret.append(user_contrib) return ret
class GetCoordinates(QueryOperation): field_prefix = 'co' input_field = MultiParam('titles', key_prefix=False) fields = [ StaticParam('prop', 'coordinates'), SingleParam('primary', 'all'), # primary, secondary, all MultiParam('prop', DEFAULT_COORD_PROPS) ] output_type = [CoordinateIdentifier] examples = [OperationExample(['White House', 'Mount Everest'])] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page_ident = PageIdentifier.from_query(pid_dict, source=self.source) for coord in pid_dict['coordinates']: coord_ident = CoordinateIdentifier(coord, page_ident) ret.append(coord_ident) return ret
class GetBacklinks(QueryOperation): """ Fetch page's incoming links from other pages on source wiki. """ field_prefix = 'gbl' input_field = SingleParam('title') fields = [ StaticParam('generator', 'backlinks'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection') ] output_type = [PageInfo] examples = [OperationExample('Coffee')] def extract_results(self, query_resp): ret = [] for pid, pid_dict in query_resp.get('pages', {}).iteritems(): page_info = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_info) return ret
class GetLinks(QueryOperation): """ Fetch page's outgoing links to other pages on source wiki. """ field_prefix = 'gpl' input_field = SingleParam('titles', key_prefix=False) fields = [ StaticParam('generator', 'links'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection'), MultiParam('namespace') ] output_type = [PageInfo] examples = [OperationExample('Coffee'), OperationExample('Aabach')] def extract_results(self, query_resp): ret = [] for pid, pid_dict in query_resp['pages'].iteritems(): page_info = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_info) return ret
class GetInterwikiLinks(QueryOperation): """ Fetch pages' interwiki links. """ field_prefix = 'iw' input_field = MultiParam('titles', key_prefix=False) fields = [StaticParam('prop', 'iwlinks'), SingleParam('url', True)] output_type = [InterwikiLink] examples = [OperationExample('Coffee')] def extract_results(self, query_resp): ret = [] for pid_dict in query_resp.get('pages', {}).values(): for iwd in pid_dict.get('iwlinks', []): cur_dict = dict(pid_dict) cur_dict['source'] = self.source cur_dict['url'] = iwd.get('url') cur_dict['prefix'] = iwd.get('prefix') link = InterwikiLink.from_query(cur_dict) ret.append(link) return ret
class GetCategory(QueryOperation): """ Fetch the members in category. """ field_prefix = 'gcm' input_field = SingleParam('title', val_prefix='Category:') fields = [ StaticParam('generator', 'categorymembers'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection'), MultiParam('namespace') ] output_type = [PageInfo] examples = [OperationExample('Featured_articles')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page_ident = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_ident) return ret
class GetCategoryList(QueryOperation): """ Fetch the categories containing pages. """ field_prefix = 'gcl' input_field = MultiParam('titles', key_prefix=False) fields = [ StaticParam('generator', 'categories'), StaticParam('prop', 'categoryinfo'), SingleParam('gclshow', '') ] # hidden, !hidden output_type = [CategoryInfo] examples = [OperationExample('Physics')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): cat_info = CategoryInfo.from_query(pid_dict, source=self.source) if cat_info.page_id < 0: continue ret.append(cat_info) return ret
class GetQueryPage(QueryOperation): field_prefix = 'gqp' input_field = SingleParam('page') fields = [ StaticParam('generator', 'querypage'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection') ] output_type = PageInfo def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page = PageInfo.from_query(pid_dict, source=self.source) ret.append(page) return ret def prepare_params(self, **kw): params = super(GetQueryPage, self).prepare_params(**kw) if params.get('gqpcontinue'): params['gqpoffset'] = params.pop('ggqpcontinue') return params
class GetSubcategoryInfos(QueryOperation): """ Fetch `CategoryInfo` for category, used to count the members of sub-categories. """ field_prefix = 'gcm' input_field = SingleParam('title', val_prefix='Category:') fields = [ StaticParam('generator', 'categorymembers'), StaticParam('prop', 'categoryinfo'), StaticParam('gcmtype', 'subcat') ] output_type = [CategoryInfo] examples = [OperationExample('FA-Class_articles')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): pid_dict.update(pid_dict.get('categoryinfo', {})) cat_info = CategoryInfo.from_query(pid_dict, source=self.source) if cat_info.page_id < 0: continue ret.append(cat_info) return ret
class GetLanguageLinks(QueryOperation): """ Fetch pages' interlanguage links (aka "Language Links" in the MediaWiki API). Interlanguage links should correspond to pages on another language wiki. Mostly useful on a source wiki with a family of similar multilingual projects, such as Wikipedias. """ field_prefix = 'll' input_field = MultiParam('titles', key_prefix=False) fields = [StaticParam('prop', 'langlinks'), SingleParam('url', True)] output_type = [LanguageLink] examples = [OperationExample('Coffee')] def extract_results(self, query_resp): ret = [] for pid_dict in query_resp.get('pages', {}).values(): for ld in pid_dict.get('langlinks', []): cur_dict = dict(pid_dict) cur_dict['source'] = self.source cur_dict['url'] = ld.get('*') cur_dict['language'] = ld.get('lang') link = LanguageLink.from_query(cur_dict) ret.append(link) return ret
class GetRandom(QueryOperation): """ Fetch random pages using MediaWiki's Special:Random. """ field_prefix = 'grn' fields = [StaticParam('generator', 'random'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection'), SingleParam('namespace', default='', coerce=coerce_namespace)] input_field = None output_type = [PageInfo] per_query_limit = QueryLimit(10, 20) examples = [OperationExample(doc='basic random')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page_info = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_info) return ret def get_cont_str(self, *a, **kw): return ''
class MediaWikiCall(Operation): """ Sets up actual API HTTP request, makes the request, encapsulates error handling, and stores results. """ input_field = SingleParam('url_params') # param_type=dict) output_type = Operation _limit = 1 def __init__(self, params, **kw): # These settings will all go on the WapitiClient self.raise_exc = kw.pop('raise_exc', True) self.raise_err = kw.pop('raise_err', True) self.raise_warn = kw.pop('raise_warn', False) self.client = kw.pop('client') self.web_client = getattr(self.client, 'web_client', DEFAULT_WEB_CLIENT) if kw: raise ValueError('got unexpected keyword arguments: %r' % kw.keys()) self.api_url = self.client.api_url params = params or {} self.params = dict(BASE_API_PARAMS) self.params.update(params) self.action = params['action'] self.url = '' self.results = None self.servedby = None self.exception = None self.error = None self.error_code = None self.warnings = [] self._input_param = params def process(self): # TODO: add URL to all exceptions resp = None try: resp = self.web_client.get(self.api_url, self.params) except Exception as e: # TODO: log self.exception = e # TODO: wrap if self.raise_exc: raise return self finally: self.url = getattr(resp, 'url', '') try: self.results = json.loads(resp.text) except Exception as e: self.exception = e # TODO: wrap if self.raise_exc: raise return self self.servedby = self.results.get('servedby') error = self.results.get('error') if error: self.error = error.get('info') self.error_code = error.get('code') warnings = self.results.get('warnings', {}) for mod_name, warn_dict in warnings.items(): warn_str = '%s: %s' % (mod_name, warn_dict.get('*', warn_dict)) self.warnings.append(warn_str) if self.error and self.raise_err: raise WapitiException(self.error_code) if self.warnings and self.raise_warn: raise WapitiException('warnings: %r' % self.warnings) return self @property def notices(self): ret = [] if self.exception: ret.append(self.exception) if self.error: ret.append(self.error) if self.warnings: ret.extend(self.warnings) return ret @property def remaining(self): if self.done: return 0 return 1