def test_many_continuations_limited(self): """Test PropertyGenerator with many limited props.""" mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=30)) titles = [l.title(with_section=False) for l in links] params = { 'rvprop': 'ids|flags|timestamp|user|comment|content', 'titles': '|'.join(titles) } if self.site.version() >= MediaWikiVersion('1.32'): params['rvslots'] = 'main' gen = api.PropertyGenerator( site=self.site, prop='revisions|info|categoryinfo|langlinks|templates', parameters=params) # An APIError is raised if set_maximum_items is not called. gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter # Force the generator into continuation mode gen.set_query_increment(5) count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) count += 1 self.assertEqual(len(links), count)
def test_info(self): mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=10)) titles = [l.title(withSection=False) for l in links] gen = api.PropertyGenerator(site=self.site, prop="info", titles='|'.join(titles)) count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) self.assertIn('lastrevid', pagedata) count += 1 self.assertEqual(len(links), count)
def test_info(self): """Test PropertyGenerator with prop 'info'.""" mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=10)) titles = [link.title(with_section=False) for link in links] gen = api.PropertyGenerator(site=self.site, prop='info', parameters={'titles': '|'.join(titles)}) count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) self.assertIn('lastrevid', pagedata) count += 1 self.assertLength(links, count)
def test_two_continuations(self): mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=10)) titles = [l.title(withSection=False) for l in links] gen = api.PropertyGenerator(site=self.site, prop="revisions|coordinates", titles='|'.join(titles)) gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) self.assertIn('revisions', pagedata) self.assertIn('revid', pagedata['revisions'][0]) count += 1 self.assertEqual(len(links), count)
def test_two_continuations_limited(self): """Test PropertyGenerator with many limited props and continuations.""" mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=30)) titles = [link.title(with_section=False) for link in links] gen = api.PropertyGenerator( site=self.site, prop='info|categoryinfo|langlinks|templates', parameters={'titles': '|'.join(titles)}) # Force the generator into continuation mode gen.set_query_increment(5) count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) count += 1 self.assertLength(links, count)
def _test_two_continuations_limited_long_test(self): """Long duration test, with total & step that are a real scenario.""" mainpage = self.get_mainpage() links = list(mainpage.backlinks(total=300)) titles = [l.title(with_section=False) for l in links] gen = api.PropertyGenerator( site=self.site, prop='info|categoryinfo|langlinks|templates', parameters={'titles': '|'.join(titles)}) # Force the generator into continuation mode gen.set_query_increment(50) count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) count += 1 self.assertEqual(len(links), count)
def test_two_continuations_limited(self): # FIXME: test fails mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=30)) titles = [l.title(withSection=False) for l in links] gen = api.PropertyGenerator( site=self.site, prop="info|categoryinfo|langlinks|templates", titles='|'.join(titles)) # Force the generator into continuation mode gen.set_query_increment(5) count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) count += 1 self.assertEqual(len(links), count)
def test_two_continuations(self): """Test PropertyGenerator with prop 'revisions' and 'coordinates'.""" mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=10)) titles = [link.title(with_section=False) for link in links] gen = api.PropertyGenerator(site=self.site, prop='revisions|coordinates', parameters={'titles': '|'.join(titles)}) gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) self.assertIn('revisions', pagedata) self.assertIn('revid', pagedata['revisions'][0]) count += 1 self.assertLength(links, count)
def langlinks(self): #return format is like #{'nlwikivoyage':'title','eswikivoyage':'title'} #Will also include an object for the current site. if hasattr(self, '_data'): return self._data self._data = [] gen = api.PropertyGenerator( 'langlinks', titles=self.title(), lllimit='max', site=self.site, ) for pg in gen: if 'langlinks' in pg: for obj in pg['langlinks']: self._data.append( Link(source=self, lang=obj['lang'], title=obj['*'])) return self._data
def test_many_continuations_limited(self): mainpage = self.get_mainpage() links = list(self.site.pagelinks(mainpage, total=30)) titles = [l.title(withSection=False) for l in links] gen = api.PropertyGenerator( site=self.site, prop="revisions|info|categoryinfo|langlinks|templates", rvprop="ids|flags|timestamp|user|comment|content", titles='|'.join(titles)) # An APIError is raised if set_maximum_items is not called. gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter # Force the generator into continuation mode gen.set_query_increment(5) count = 0 for pagedata in gen: self.assertIsInstance(pagedata, dict) self.assertIn('pageid', pagedata) count += 1 self.assertEqual(len(links), count)
def PageRevIdGenerator(site, pagelist, step=50): """ Generate page objects with their most recent revision ID. This generator is a modified version of `preloadpages` in pywikibot.site. :param site: site we're requesting page IDs from :param pagelist: an iterable that returns Page objects :param step: how many Pages to query at a time :type step: int """ for sublist in itergroup(pagelist, step): pageids = [ str(p._pageid) for p in sublist if hasattr(p, "_pageid") and p._pageid > 0 ] cache = dict((p.title(withSection=False), p) for p in sublist) props = "revisions|info|categoryinfo" rvgen = api.PropertyGenerator(props, site=site) rvgen.set_maximum_items(-1) # suppress use of "rvlimit" parameter if len(pageids) == len(sublist): # only use pageids if all pages have them rvgen.request["pageids"] = "|".join(pageids) else: rvgen.request["titles"] = "|".join(list(cache.keys())) rvgen.request[u"rvprop"] = u"ids|flags|timestamp|user|comment" logging.debug(u"Retrieving {n} pages from {s}.".format(n=len(cache), s=site)) for pagedata in rvgen: logging.debug(u"Preloading {0}".format(pagedata)) try: if pagedata['title'] not in cache: # API always returns a "normalized" title which is # usually the same as the canonical form returned by # page.title(), but sometimes not (e.g., # gender-specific localizations of "User" namespace). # This checks to see if there is a normalized title in # the response that corresponds to the canonical form # used in the query. for key in cache: if site.sametitle(key, pagedata['title']): cache[pagedata['title']] = cache[key] break else: logging.warning( u"preloadpages: Query returned unexpected title" u"'%s'" % pagedata['title']) continue except KeyError: logging.debug(u"No 'title' in %s" % pagedata) logging.debug(u"pageids=%s" % pageids) logging.debug(u"titles=%s" % list(cache.keys())) continue page = cache[pagedata['title']] api.update_page(page, pagedata) # Since we're not loading content and the pages are already in # memory, let's yield the pages in the same order as they were # received in case that's important. for page in sublist: yield page