예제 #1
0
    def test_many_continuations_limited(self):
        """Test PropertyGenerator with many limited props."""
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=30))
        titles = [l.title(with_section=False) for l in links]
        params = {
            'rvprop': 'ids|flags|timestamp|user|comment|content',
            'titles': '|'.join(titles)
        }
        if self.site.version() >= MediaWikiVersion('1.32'):
            params['rvslots'] = 'main'
        gen = api.PropertyGenerator(
            site=self.site,
            prop='revisions|info|categoryinfo|langlinks|templates',
            parameters=params)

        # An APIError is raised if set_maximum_items is not called.
        gen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
        # Force the generator into continuation mode
        gen.set_query_increment(5)

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            count += 1
        self.assertEqual(len(links), count)
예제 #2
0
    def test_info(self):
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=10))
        titles = [l.title(withSection=False) for l in links]
        gen = api.PropertyGenerator(site=self.site,
                                    prop="info",
                                    titles='|'.join(titles))

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            self.assertIn('lastrevid', pagedata)
            count += 1
        self.assertEqual(len(links), count)
예제 #3
0
    def test_info(self):
        """Test PropertyGenerator with prop 'info'."""
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=10))
        titles = [link.title(with_section=False) for link in links]
        gen = api.PropertyGenerator(site=self.site,
                                    prop='info',
                                    parameters={'titles': '|'.join(titles)})

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            self.assertIn('lastrevid', pagedata)
            count += 1
        self.assertLength(links, count)
예제 #4
0
    def test_two_continuations(self):
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=10))
        titles = [l.title(withSection=False) for l in links]
        gen = api.PropertyGenerator(site=self.site,
                                    prop="revisions|coordinates",
                                    titles='|'.join(titles))
        gen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            self.assertIn('revisions', pagedata)
            self.assertIn('revid', pagedata['revisions'][0])
            count += 1
        self.assertEqual(len(links), count)
예제 #5
0
    def test_two_continuations_limited(self):
        """Test PropertyGenerator with many limited props and continuations."""
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=30))
        titles = [link.title(with_section=False) for link in links]
        gen = api.PropertyGenerator(
            site=self.site, prop='info|categoryinfo|langlinks|templates',
            parameters={'titles': '|'.join(titles)})
        # Force the generator into continuation mode
        gen.set_query_increment(5)

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            count += 1
        self.assertLength(links, count)
예제 #6
0
    def _test_two_continuations_limited_long_test(self):
        """Long duration test, with total & step that are a real scenario."""
        mainpage = self.get_mainpage()
        links = list(mainpage.backlinks(total=300))
        titles = [l.title(with_section=False) for l in links]
        gen = api.PropertyGenerator(
            site=self.site,
            prop='info|categoryinfo|langlinks|templates',
            parameters={'titles': '|'.join(titles)})
        # Force the generator into continuation mode
        gen.set_query_increment(50)

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            count += 1
        self.assertEqual(len(links), count)
예제 #7
0
    def test_two_continuations_limited(self):
        # FIXME: test fails
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=30))
        titles = [l.title(withSection=False) for l in links]
        gen = api.PropertyGenerator(
            site=self.site,
            prop="info|categoryinfo|langlinks|templates",
            titles='|'.join(titles))
        # Force the generator into continuation mode
        gen.set_query_increment(5)

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            count += 1
        self.assertEqual(len(links), count)
예제 #8
0
    def test_two_continuations(self):
        """Test PropertyGenerator with prop 'revisions' and 'coordinates'."""
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=10))
        titles = [link.title(with_section=False) for link in links]
        gen = api.PropertyGenerator(site=self.site,
                                    prop='revisions|coordinates',
                                    parameters={'titles': '|'.join(titles)})
        gen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            self.assertIn('revisions', pagedata)
            self.assertIn('revid', pagedata['revisions'][0])
            count += 1
        self.assertLength(links, count)
예제 #9
0
 def langlinks(self):
     #return format is like
     #{'nlwikivoyage':'title','eswikivoyage':'title'}
     #Will also include an object for the current site.
     if hasattr(self, '_data'):
         return self._data
     self._data = []
     gen = api.PropertyGenerator(
         'langlinks',
         titles=self.title(),
         lllimit='max',
         site=self.site,
     )
     for pg in gen:
         if 'langlinks' in pg:
             for obj in pg['langlinks']:
                 self._data.append(
                     Link(source=self, lang=obj['lang'], title=obj['*']))
     return self._data
예제 #10
0
    def test_many_continuations_limited(self):
        mainpage = self.get_mainpage()
        links = list(self.site.pagelinks(mainpage, total=30))
        titles = [l.title(withSection=False) for l in links]
        gen = api.PropertyGenerator(
            site=self.site,
            prop="revisions|info|categoryinfo|langlinks|templates",
            rvprop="ids|flags|timestamp|user|comment|content",
            titles='|'.join(titles))

        # An APIError is raised if set_maximum_items is not called.
        gen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
        # Force the generator into continuation mode
        gen.set_query_increment(5)

        count = 0
        for pagedata in gen:
            self.assertIsInstance(pagedata, dict)
            self.assertIn('pageid', pagedata)
            count += 1
        self.assertEqual(len(links), count)
예제 #11
0
파일: page.py 프로젝트: sema0703/suggestbot
def PageRevIdGenerator(site, pagelist, step=50):
    """
    Generate page objects with their most recent revision ID.
    
    This generator is a modified version of `preloadpages` in pywikibot.site.

    :param site: site we're requesting page IDs from
    :param pagelist: an iterable that returns Page objects
    :param step: how many Pages to query at a time
    :type step: int
    """
    for sublist in itergroup(pagelist, step):
        pageids = [
            str(p._pageid) for p in sublist
            if hasattr(p, "_pageid") and p._pageid > 0
        ]
        cache = dict((p.title(withSection=False), p) for p in sublist)
        props = "revisions|info|categoryinfo"
        rvgen = api.PropertyGenerator(props, site=site)
        rvgen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
        if len(pageids) == len(sublist):
            # only use pageids if all pages have them
            rvgen.request["pageids"] = "|".join(pageids)
        else:
            rvgen.request["titles"] = "|".join(list(cache.keys()))
        rvgen.request[u"rvprop"] = u"ids|flags|timestamp|user|comment"

        logging.debug(u"Retrieving {n} pages from {s}.".format(n=len(cache),
                                                               s=site))
        for pagedata in rvgen:
            logging.debug(u"Preloading {0}".format(pagedata))
            try:
                if pagedata['title'] not in cache:
                    #                   API always returns a "normalized" title which is
                    #                   usually the same as the canonical form returned by
                    #                   page.title(), but sometimes not (e.g.,
                    #                   gender-specific localizations of "User" namespace).
                    #                   This checks to see if there is a normalized title in
                    #                   the response that corresponds to the canonical form
                    #                   used in the query.
                    for key in cache:
                        if site.sametitle(key, pagedata['title']):
                            cache[pagedata['title']] = cache[key]
                            break
                    else:
                        logging.warning(
                            u"preloadpages: Query returned unexpected title"
                            u"'%s'" % pagedata['title'])
                        continue
            except KeyError:
                logging.debug(u"No 'title' in %s" % pagedata)
                logging.debug(u"pageids=%s" % pageids)
                logging.debug(u"titles=%s" % list(cache.keys()))
                continue
            page = cache[pagedata['title']]
            api.update_page(page, pagedata)

        # Since we're not loading content and the pages are already in
        # memory, let's yield the pages in the same order as they were
        # received in case that's important.
        for page in sublist:
            yield page