Beispiel #1
0
class GetPageRevisionInfos(QueryOperation):
    """
    Fetch revisions for pages.
    """
    field_prefix = 'rv'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS)
    ]
    output_type = [RevisionInfo]
    examples = [OperationExample('Coffee', 10)]

    def extract_results(self, query_resp):
        ret = []
        pages = [
            p for p in query_resp.get('pages', {}).values()
            if 'missing' not in p
        ]
        for pid_dict in pages:
            for rev in pid_dict.get('revisions', []):
                rev_dict = dict(pid_dict)
                rev_dict.update(rev)
                rev_info = RevisionInfo.from_query(rev_dict,
                                                   source=self.source)
                ret.append(rev_info)
        return ret
Beispiel #2
0
class GetPageInfo(QueryOperation):
    field_prefix = 'in'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('prop', 'info'),
        MultiParam('prop', 'subjectid|talkid|protection')
    ]
    output_type = PageInfo
    examples = [OperationExample(['Coffee', 'Category:Africa'])]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_info = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_info)
        return ret
Beispiel #3
0
class GetRevisionContent(GetCurrentContent):
    input_field = SingleParam('revids', key_prefix=False, attr='rev_id')
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS + '|content'),
        SingleParam('parse', False)
    ]
    examples = [OperationExample('539916351')]
Beispiel #4
0
class GetCurrentTalkContent(GetCurrentContent):
    """
    The simple prefix behavior means this operation will only work on
    namespace 0 pages. I wouldn't rely on this operation being around
    for long.
    """
    input_field = MultiParam('titles', val_prefix='Talk:', key_prefix=False)
    examples = [
        OperationExample('This page does not exist'),
        OperationExample('Coffee')
    ]
Beispiel #5
0
class GetCoordinates(QueryOperation):
    field_prefix = 'co'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('prop', 'coordinates'),
        SingleParam('primary', 'all'),  # primary, secondary, all
        MultiParam('prop', DEFAULT_COORD_PROPS)
    ]
    output_type = [CoordinateIdentifier]
    examples = [OperationExample(['White House', 'Mount Everest'])]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageIdentifier.from_query(pid_dict,
                                                   source=self.source)
            for coord in pid_dict['coordinates']:
                coord_ident = CoordinateIdentifier(coord, page_ident)
            ret.append(coord_ident)
        return ret
Beispiel #6
0
class GetRevisionInfos(GetPageRevisionInfos):
    """
    Fetch information about specific revision.
    """
    input_field = MultiParam('revids', attr='rev_id', key_prefix=False)
    output_type = RevisionInfo
    examples = [OperationExample(['538903663', '539916351', '531458383'])]

    def prepare_params(self, *a, **kw):
        ret = super(GetRevisionInfos, self).prepare_params()
        ret.pop(self.field_prefix + 'limit', None)
        return ret
Beispiel #7
0
class GetCurrentContent(QueryOperation):
    """
    Fetch full content for current (top) revision.
    """
    input_field = MultiParam('titles', key_prefix=False, attr='title')
    field_prefix = 'rv'
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS + '|content'),
        SingleParam('parse', False),
        SingleParam('redirects', True, key_prefix=False)
    ]
    examples = [
        OperationExample('This page does not exist'),
        OperationExample('Coffee')
    ]
    output_type = Revision

    def extract_results(self, query_resp):
        ret = []
        #redirect_list = query_resp.get('redirects', [])  # TODO
        #redirects = dict([(r['from'], r['to']) for r in redirect_list])
        requested_title = self.input_param
        is_parsed = self.kwargs.get('rvparse', False)

        pages = query_resp.get('pages', {})
        for page_id, pid_dict in pages.iteritems():
            if int(page_id) < 0:
                continue
            rev_dict = dict(pid_dict)
            rev_dict.update(pid_dict['revisions'][0])
            revision = Revision.from_query(rev_dict,
                                           source=self.source,
                                           is_parsed=is_parsed)
            revision.req_title = requested_title
            ret.append(revision)
        return ret
Beispiel #8
0
class GetProtections(QueryOperation):
    field_prefix = 'in'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'info'), StaticParam('inprop', 'protection')]
    output_type = ProtectionInfo
    examples = [
        OperationExample('Coffee'),
        OperationExample('Coffee|House'),
        OperationExample(['Coffee', 'House'])
    ]

    def extract_results(self, query_resp):
        ret = []
        for page_id, page in query_resp['pages'].iteritems():
            ret.append(ProtectionInfo(page['protection']))
        return ret
Beispiel #9
0
class GetTemplates(QueryOperation):
    field_prefix = 'gtl'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'templates'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Beispiel #10
0
class GetSourceInfo(QueryOperation):
    """
    Fetch meta site information about the source wiki.

    The default properties include:

    - General source information: Main Page, base, sitename, generator,
      phpversion,  phpsapi, dbtype, dbversion, case, rights, lang, fallback,
      fallback8bitEncoding, writeapi, timezone, timeoffset, articlepath,
      scriptpath, script, variantarticlepath, server, wikiid, time, misermode,
      maxuploadsize
    - Namespace map
    - Interwiki map
    - Statistics: pages, articles, edits, images, users, activeusers, admins,
      jobs
    """
    field_prefix = 'si'
    input_field = None
    fields = [StaticParam('meta', 'siteinfo'),
              MultiParam('prop', DEFAULT_PROPS)]
    output_type = SourceInfo

    def extract_results(self, query_resp):
        ret = query_resp['general']
        namespaces = query_resp.get('namespaces', {})
        interwikis = query_resp.get('interwikimap', {})
        ns_map = []
        for ns, ns_dict in namespaces.iteritems():
            ns_map.append(NamespaceDescriptor(ns_dict.get('id'),
                                              ns_dict.get('*'),
                                              ns_dict.get('canonical')))
        iw_map = []
        for iw in interwikis:
            iw_map.append(InterwikiDescriptor(iw.get('prefix'),
                                              iw.get('url'),
                                              iw.get('language')))
        ret['namespace_map'] = tuple(ns_map)
        ret['interwiki_map'] = tuple(iw_map)
        ret.update(query_resp['statistics'])
        source_info = SourceInfo(**ret)
        return [source_info]
Beispiel #11
0
class GetLinks(QueryOperation):
    """
    Fetch page's outgoing links to other pages on source wiki.
    """
    field_prefix = 'gpl'
    input_field = SingleParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'links'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection'),
        MultiParam('namespace')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee'), OperationExample('Aabach')]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp['pages'].iteritems():
            page_info = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_info)
        return ret
Beispiel #12
0
class GetImages(QueryOperation):
    """
    Fetch the images embedded on pages.
    """
    field_prefix = 'gim'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('generator', 'images'),
              StaticParam('prop', 'info'),
              StaticParam('inprop', 'subjectid|talkid|protection')]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp['pages'].iteritems():
            if pid.startswith('-'):
                pid_dict['pageid'] = None  # TODO: breaks consistency :/
            page_ident = PageInfo.from_query(pid_dict,
                                             source=self.source)
            ret.append(page_ident)
        return ret
Beispiel #13
0
class GetCategory(QueryOperation):
    """
    Fetch the members in category.
    """
    field_prefix = 'gcm'
    input_field = SingleParam('title', val_prefix='Category:')
    fields = [
        StaticParam('generator', 'categorymembers'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection'),
        MultiParam('namespace')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Featured_articles')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Beispiel #14
0
class GetInterwikiLinks(QueryOperation):
    """
    Fetch pages' interwiki links.
    """
    field_prefix = 'iw'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'iwlinks'), SingleParam('url', True)]
    output_type = [InterwikiLink]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for iwd in pid_dict.get('iwlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = iwd.get('url')
                cur_dict['prefix'] = iwd.get('prefix')
                link = InterwikiLink.from_query(cur_dict)
                ret.append(link)
        return ret
Beispiel #15
0
class GetImageInfos(QueryOperation):
    field_prefix = 'ii'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'imageinfo'),
              StaticParam('iiprop', IMAGE_INFO_PROPS)]
    output_type = [ImageInfo]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            if int(k) < 0 and pid_dict['imagerepository'] != 'local':
                pid_dict['pageid'] = 'shared'
                pid_dict['revid'] = 'shared'
            try:
                pid_dict.update(pid_dict.get('imageinfo', [{}])[0])
                image_info = ImageInfo.from_query(pid_dict,
                                                  source=self.source)
            except ValueError as e:
                print e
                continue
            ret.append(image_info)
        return ret
Beispiel #16
0
class GetCategoryList(QueryOperation):
    """
    Fetch the categories containing pages.
    """
    field_prefix = 'gcl'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'categories'),
        StaticParam('prop', 'categoryinfo'),
        SingleParam('gclshow', '')
    ]  # hidden, !hidden
    output_type = [CategoryInfo]
    examples = [OperationExample('Physics')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            cat_info = CategoryInfo.from_query(pid_dict, source=self.source)
            if cat_info.page_id < 0:
                continue
            ret.append(cat_info)
        return ret
Beispiel #17
0
class GeoSearch(QueryOperation):
    field_prefix = 'gs'
    input_field = MultiParam('coord')
    fields = [
        StaticParam('list', 'geosearch'),
        SingleParam('radius', 10000),  # must be within 10 and 10000
        #SingleParam('maxdim', 1000),  # does not work?
        SingleParam('globe', 'earth'),  # which planet? donno...
        SingleParam('namespace'),
        StaticParam('gsprop', DEFAULT_COORD_PROPS)
    ]
    output_type = [CoordinateIdentifier]
    examples = [OperationExample(('37.8197', '-122.479'), 1)]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp['geosearch']:
            page_ident = PageIdentifier.from_query(pid_dict,
                                                   source=self.source)
            coord_ident = CoordinateIdentifier(pid_dict, page_ident)
            ret.append(coord_ident)
        return ret
Beispiel #18
0
class GetLanguageLinks(QueryOperation):
    """
    Fetch pages' interlanguage links (aka "Language Links" in the MediaWiki
    API). Interlanguage links should correspond to pages on another language
    wiki. Mostly useful on a source wiki with a family of similar multilingual
    projects, such as Wikipedias.
    """
    field_prefix = 'll'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'langlinks'), SingleParam('url', True)]
    output_type = [LanguageLink]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for ld in pid_dict.get('langlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = ld.get('*')
                cur_dict['language'] = ld.get('lang')
                link = LanguageLink.from_query(cur_dict)
                ret.append(link)
        return ret