Exemple #1
0
class GetAllImageInfos(GetImageInfos):
    field_prefix = 'gai'
    input_field = None
    fields = [StaticParam('generator', 'allimages'),
              StaticParam('prop', 'imageinfo'),
              StaticParam('gaiprop', DEFAULT_IMAGE_PROPS)]
    examples = [OperationExample()]
Exemple #2
0
class GetAllCategoryInfos(GetSubcategoryInfos):
    """
    Fetch all categories on the source wiki.
    """
    field_prefix = 'gac'
    input_field = None
    fields = [
        StaticParam('generator', 'allcategories'),
        StaticParam('prop', 'categoryinfo')
    ]
    examples = [OperationExample(doc='basic allcats')]
Exemple #3
0
class GetPageRevisionInfos(QueryOperation):
    """
    Fetch revisions for pages.
    """
    field_prefix = 'rv'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS)
    ]
    output_type = [RevisionInfo]
    examples = [OperationExample('Coffee', 10)]

    def extract_results(self, query_resp):
        ret = []
        pages = [
            p for p in query_resp.get('pages', {}).values()
            if 'missing' not in p
        ]
        for pid_dict in pages:
            for rev in pid_dict.get('revisions', []):
                rev_dict = dict(pid_dict)
                rev_dict.update(rev)
                rev_info = RevisionInfo.from_query(rev_dict,
                                                   source=self.source)
                ret.append(rev_info)
        return ret
Exemple #4
0
class GetProtections(QueryOperation):
    field_prefix = 'in'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'info'), StaticParam('inprop', 'protection')]
    output_type = ProtectionInfo
    examples = [
        OperationExample('Coffee'),
        OperationExample('Coffee|House'),
        OperationExample(['Coffee', 'House'])
    ]

    def extract_results(self, query_resp):
        ret = []
        for page_id, page in query_resp['pages'].iteritems():
            ret.append(ProtectionInfo(page['protection']))
        return ret
Exemple #5
0
class GetExternalLinks(QueryOperation):
    """
    Fetch page outgoing links to URLs outside of source wiki.
    """
    field_prefix = 'el'
    input_field = SingleParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'extlinks')]
    output_type = [ExternalLink]
    examples = [OperationExample('Croatian War of Independence')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for el in pid_dict.get('extlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = el.get('*')
                link = ExternalLink.from_query(cur_dict)
                ret.append(link)
        return ret

    def prepare_params(self, **kw):
        params = super(GetExternalLinks, self).prepare_params(**kw)
        if params.get('elcontinue'):
            params['eloffset'] = params.pop('elcontinue')
        return params
Exemple #6
0
class GetTranscludes(QueryOperation):
    input_field = SingleParam('title', val_prefix='Template:')
    field_prefix = 'gei'
    fields = [
        StaticParam('generator', 'embeddedin'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Template:ArticleHistory')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp.get('pages', {}).items():
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Exemple #7
0
class GetTemplates(QueryOperation):
    field_prefix = 'gtl'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'templates'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Exemple #8
0
class GetRevisionContent(GetCurrentContent):
    input_field = SingleParam('revids', key_prefix=False, attr='rev_id')
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS + '|content'),
        SingleParam('parse', False)
    ]
    examples = [OperationExample('539916351')]
Exemple #9
0
class GetUserContribs(QueryOperation):
    field_prefix = 'uc'
    input_field = SingleParam('user')
    fields = [
        StaticParam('list', 'usercontribs'),
        StaticParam('ucprop', DEFAULT_PROPS)
    ]
    output_type = [RevisionInfo]
    examples = [OperationExample('Jimbo Wales')]

    def extract_results(self, query_resp):
        ret = []
        for rev_dict in query_resp.get('usercontribs', []):
            user_contrib = RevisionInfo.from_query(rev_dict,
                                                   source=self.source)
            ret.append(user_contrib)
        return ret
Exemple #10
0
class GetRecentChanges(QueryOperation):
    field_prefix = 'grc'
    input_field = None
    fields = [
        StaticParam('generator', 'recentchanges'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = [PageInfo]
    examples = [OperationExample()]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp['pages'].iteritems():
            if pid.startswith('-'):
                continue
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Exemple #11
0
class GetBacklinks(QueryOperation):
    """
    Fetch page's incoming links from other pages on source wiki.
    """
    field_prefix = 'gbl'
    input_field = SingleParam('title')
    fields = [
        StaticParam('generator', 'backlinks'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp.get('pages', {}).iteritems():
            page_info = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_info)
        return ret
Exemple #12
0
class GetImages(QueryOperation):
    """
    Fetch the images embedded on pages.
    """
    field_prefix = 'gim'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('generator', 'images'),
              StaticParam('prop', 'info'),
              StaticParam('inprop', 'subjectid|talkid|protection')]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp['pages'].iteritems():
            if pid.startswith('-'):
                pid_dict['pageid'] = None  # TODO: breaks consistency :/
            page_ident = PageInfo.from_query(pid_dict,
                                             source=self.source)
            ret.append(page_ident)
        return ret
Exemple #13
0
class GetCategory(QueryOperation):
    """
    Fetch the members in category.
    """
    field_prefix = 'gcm'
    input_field = SingleParam('title', val_prefix='Category:')
    fields = [
        StaticParam('generator', 'categorymembers'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection'),
        MultiParam('namespace')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Featured_articles')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Exemple #14
0
class GetLinks(QueryOperation):
    """
    Fetch page's outgoing links to other pages on source wiki.
    """
    field_prefix = 'gpl'
    input_field = SingleParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'links'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection'),
        MultiParam('namespace')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee'), OperationExample('Aabach')]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp['pages'].iteritems():
            page_info = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_info)
        return ret
Exemple #15
0
class GetCategoryList(QueryOperation):
    """
    Fetch the categories containing pages.
    """
    field_prefix = 'gcl'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'categories'),
        StaticParam('prop', 'categoryinfo'),
        SingleParam('gclshow', '')
    ]  # hidden, !hidden
    output_type = [CategoryInfo]
    examples = [OperationExample('Physics')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            cat_info = CategoryInfo.from_query(pid_dict, source=self.source)
            if cat_info.page_id < 0:
                continue
            ret.append(cat_info)
        return ret
Exemple #16
0
class GetQueryPage(QueryOperation):
    field_prefix = 'gqp'
    input_field = SingleParam('page')
    fields = [
        StaticParam('generator', 'querypage'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = PageInfo

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page)
        return ret

    def prepare_params(self, **kw):
        params = super(GetQueryPage, self).prepare_params(**kw)
        if params.get('gqpcontinue'):
            params['gqpoffset'] = params.pop('ggqpcontinue')
        return params
Exemple #17
0
class GeoSearch(QueryOperation):
    field_prefix = 'gs'
    input_field = MultiParam('coord')
    fields = [
        StaticParam('list', 'geosearch'),
        SingleParam('radius', 10000),  # must be within 10 and 10000
        #SingleParam('maxdim', 1000),  # does not work?
        SingleParam('globe', 'earth'),  # which planet? donno...
        SingleParam('namespace'),
        StaticParam('gsprop', DEFAULT_COORD_PROPS)
    ]
    output_type = [CoordinateIdentifier]
    examples = [OperationExample(('37.8197', '-122.479'), 1)]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp['geosearch']:
            page_ident = PageIdentifier.from_query(pid_dict,
                                                   source=self.source)
            coord_ident = CoordinateIdentifier(pid_dict, page_ident)
            ret.append(coord_ident)
        return ret
Exemple #18
0
class GetImageInfos(QueryOperation):
    field_prefix = 'ii'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'imageinfo'),
              StaticParam('iiprop', IMAGE_INFO_PROPS)]
    output_type = [ImageInfo]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            if int(k) < 0 and pid_dict['imagerepository'] != 'local':
                pid_dict['pageid'] = 'shared'
                pid_dict['revid'] = 'shared'
            try:
                pid_dict.update(pid_dict.get('imageinfo', [{}])[0])
                image_info = ImageInfo.from_query(pid_dict,
                                                  source=self.source)
            except ValueError as e:
                print e
                continue
            ret.append(image_info)
        return ret
Exemple #19
0
class GetRandom(QueryOperation):
    """
    Fetch random pages using MediaWiki's Special:Random.
    """
    field_prefix = 'grn'
    fields = [StaticParam('generator', 'random'),
              StaticParam('prop', 'info'),
              StaticParam('inprop', 'subjectid|talkid|protection'), 
              SingleParam('namespace', default='', coerce=coerce_namespace)]
    input_field = None
    output_type = [PageInfo]
    per_query_limit = QueryLimit(10, 20)
    examples = [OperationExample(doc='basic random')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_info = PageInfo.from_query(pid_dict,
                                            source=self.source)
            ret.append(page_info)
        return ret

    def get_cont_str(self, *a, **kw):
        return ''
Exemple #20
0
class GetSubcategoryInfos(QueryOperation):
    """
    Fetch `CategoryInfo` for category, used to count the members of
    sub-categories.
    """
    field_prefix = 'gcm'
    input_field = SingleParam('title', val_prefix='Category:')
    fields = [
        StaticParam('generator', 'categorymembers'),
        StaticParam('prop', 'categoryinfo'),
        StaticParam('gcmtype', 'subcat')
    ]
    output_type = [CategoryInfo]
    examples = [OperationExample('FA-Class_articles')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            pid_dict.update(pid_dict.get('categoryinfo', {}))
            cat_info = CategoryInfo.from_query(pid_dict, source=self.source)
            if cat_info.page_id < 0:
                continue
            ret.append(cat_info)
        return ret
Exemple #21
0
class GetPageInfo(QueryOperation):
    field_prefix = 'in'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('prop', 'info'),
        MultiParam('prop', 'subjectid|talkid|protection')
    ]
    output_type = PageInfo
    examples = [OperationExample(['Coffee', 'Category:Africa'])]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_info = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_info)
        return ret
Exemple #22
0
class GetCoordinates(QueryOperation):
    field_prefix = 'co'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('prop', 'coordinates'),
        SingleParam('primary', 'all'),  # primary, secondary, all
        MultiParam('prop', DEFAULT_COORD_PROPS)
    ]
    output_type = [CoordinateIdentifier]
    examples = [OperationExample(['White House', 'Mount Everest'])]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageIdentifier.from_query(pid_dict,
                                                   source=self.source)
            for coord in pid_dict['coordinates']:
                coord_ident = CoordinateIdentifier(coord, page_ident)
            ret.append(coord_ident)
        return ret
Exemple #23
0
class GetSourceInfo(QueryOperation):
    """
    Fetch meta site information about the source wiki.

    The default properties include:

    - General source information: Main Page, base, sitename, generator,
      phpversion,  phpsapi, dbtype, dbversion, case, rights, lang, fallback,
      fallback8bitEncoding, writeapi, timezone, timeoffset, articlepath,
      scriptpath, script, variantarticlepath, server, wikiid, time, misermode,
      maxuploadsize
    - Namespace map
    - Interwiki map
    - Statistics: pages, articles, edits, images, users, activeusers, admins,
      jobs
    """
    field_prefix = 'si'
    input_field = None
    fields = [StaticParam('meta', 'siteinfo'),
              MultiParam('prop', DEFAULT_PROPS)]
    output_type = SourceInfo

    def extract_results(self, query_resp):
        ret = query_resp['general']
        namespaces = query_resp.get('namespaces', {})
        interwikis = query_resp.get('interwikimap', {})
        ns_map = []
        for ns, ns_dict in namespaces.iteritems():
            ns_map.append(NamespaceDescriptor(ns_dict.get('id'),
                                              ns_dict.get('*'),
                                              ns_dict.get('canonical')))
        iw_map = []
        for iw in interwikis:
            iw_map.append(InterwikiDescriptor(iw.get('prefix'),
                                              iw.get('url'),
                                              iw.get('language')))
        ret['namespace_map'] = tuple(ns_map)
        ret['interwiki_map'] = tuple(iw_map)
        ret.update(query_resp['statistics'])
        source_info = SourceInfo(**ret)
        return [source_info]
Exemple #24
0
class GetInterwikiLinks(QueryOperation):
    """
    Fetch pages' interwiki links.
    """
    field_prefix = 'iw'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'iwlinks'), SingleParam('url', True)]
    output_type = [InterwikiLink]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for iwd in pid_dict.get('iwlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = iwd.get('url')
                cur_dict['prefix'] = iwd.get('prefix')
                link = InterwikiLink.from_query(cur_dict)
                ret.append(link)
        return ret
Exemple #25
0
class GetCurrentContent(QueryOperation):
    """
    Fetch full content for current (top) revision.
    """
    input_field = MultiParam('titles', key_prefix=False, attr='title')
    field_prefix = 'rv'
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS + '|content'),
        SingleParam('parse', False),
        SingleParam('redirects', True, key_prefix=False)
    ]
    examples = [
        OperationExample('This page does not exist'),
        OperationExample('Coffee')
    ]
    output_type = Revision

    def extract_results(self, query_resp):
        ret = []
        #redirect_list = query_resp.get('redirects', [])  # TODO
        #redirects = dict([(r['from'], r['to']) for r in redirect_list])
        requested_title = self.input_param
        is_parsed = self.kwargs.get('rvparse', False)

        pages = query_resp.get('pages', {})
        for page_id, pid_dict in pages.iteritems():
            if int(page_id) < 0:
                continue
            rev_dict = dict(pid_dict)
            rev_dict.update(pid_dict['revisions'][0])
            revision = Revision.from_query(rev_dict,
                                           source=self.source,
                                           is_parsed=is_parsed)
            revision.req_title = requested_title
            ret.append(revision)
        return ret
Exemple #26
0
class GetFeedbackV5(QueryOperation):
    """
    article feedback v5 breaks standards in a couple ways.
      * the various v5 APIs use different prefixes (af/afvf)
      * it doesn't put its results under 'query', requiring a custom
      post_process_response()
    """
    field_prefix = 'afvf'
    input_field = SingleParam('pageid')
    fields = [
        StaticParam('list', 'articlefeedbackv5-view-feedback'),
        SingleParam('filter', default='featured')
    ]
    output_type = list
    examples = [OperationExample('604727')]

    def post_process_response(self, response):
        if not response.results:
            return {}
        return dict(response.results)

    def extract_results(self, query_resp):
        count = query_resp['articlefeedbackv5-view-feedback']['count']
        return ['TODO'] * int(count)
Exemple #27
0
class GetLanguageLinks(QueryOperation):
    """
    Fetch pages' interlanguage links (aka "Language Links" in the MediaWiki
    API). Interlanguage links should correspond to pages on another language
    wiki. Mostly useful on a source wiki with a family of similar multilingual
    projects, such as Wikipedias.
    """
    field_prefix = 'll'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'langlinks'), SingleParam('url', True)]
    output_type = [LanguageLink]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for ld in pid_dict.get('langlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = ld.get('*')
                cur_dict['language'] = ld.get('lang')
                link = LanguageLink.from_query(cur_dict)
                ret.append(link)
        return ret
Exemple #28
0
class GetCategoryArticles(GetCategory):
    """
    Fetch the pages (namespace 0 or 1) that are members of category.
    """
    fields = GetCategory.fields + [StaticParam('gcmnamespace', '0')]
    examples = [OperationExample('Featured_articles')]
Exemple #29
0
class GetMostLinkedCategories(GetQueryPage):
    input_field = None
    fields = GetQueryPage.fields + [
        StaticParam('gqppage', 'Mostlinkedcategories')
    ]
Exemple #30
0
class GetMostLinkedTemplates(GetQueryPage):
    input_field = None
    fields = GetQueryPage.fields + [
        StaticParam('gqppage', 'Mostlinkedtemplates')
    ]