def test_get_page_number(self, key): """Test IndexPage page get_page_number functions.""" data = self.sites[key] index_page = IndexPage(self.site, self.sites[key]["index"]) # Test get_page_numbers_from_label. for label, num_set in data["get_number"]: # Get set of numbers from label with label as int or str. self.assertEqual(index_page.get_page_number_from_label(label), num_set) self.assertEqual(index_page.get_page_number_from_label(str(label)), num_set) # Error if label does not exists. label, num_set = "dummy label", [] self.assertRaises(KeyError, index_page.get_page_number_from_label, "dummy label") # Test get_page_from_label. for label, page_set in data["get_page"]: # Get set of pages from label with label as int or str. self.assertEqual(index_page.get_page_from_label(label), page_set) self.assertEqual(index_page.get_page_from_label(str(label)), page_set) # Error if label does not exists. self.assertRaises(KeyError, index_page.get_page_from_label, "dummy label") # Test consistency of page <-> numbers mapping on last page_set and # num_set used. for p in page_set: n = index_page._numbers_from_page[p] self.assertEqual(index_page._page_from_numbers[n], p) for n in num_set: n = index_page._page_from_numbers[p] self.assertEqual(index_page._numbers_from_page[p], n)
def test_valid_link_as_source(self): """Test IndexPage from valid Link as source.""" source = pywikibot.Link(self.valid_index_title, source=self.site, defaultNamespace=self.site.proofread_page_ns) page = IndexPage(source) self.assertEqual(page.title(withNamespace=False), source.title) self.assertEqual(page.namespace(), source.namespace)
def test_page_number_mapping(self, key): """Test consistency of page <-> mapping.""" data = self.sites[key] num, title_num, label = data["get_label"] index_page = IndexPage(self.site, self.sites[key]["index"]) page_title = self.sites[key]["page"].format(title_num) proofread_page = ProofreadPage(self.site, page_title) # Get label from number. self.assertEqual(index_page.get_label_from_page_number(num), label) # Error if number does not exists. self.assertRaises(KeyError, index_page.get_label_from_page_number, -1) # Get label from page. self.assertEqual(index_page.get_label_from_page(proofread_page), label) # Error if page does not exists. self.assertRaises(KeyError, index_page.get_label_from_page, None)
def test_get_labels(self, key): """Test IndexPage page get_label_from_* functions.""" data = self.sites[key] num, title_num, label = data['get_label'] index_page = IndexPage(self.site, self.sites[key]['index']) page_title = self.sites[key]['page'].format(title_num) proofread_page = ProofreadPage(self.site, page_title) # Get label from number. self.assertEqual(index_page.get_label_from_page_number(num), label) # Error if number does not exists. self.assertRaises(KeyError, index_page.get_label_from_page_number, -1) # Get label from page. self.assertEqual(index_page.get_label_from_page(proofread_page), label) # Error if page does not exists. self.assertRaises(KeyError, index_page.get_label_from_page, None)
def test_page_gen(self, key): """Test Index page generator.""" data = self.sites[key] num, title_num, label = data['get_label'] index_page = IndexPage(self.site, self.sites[key]['index']) page_title = self.sites[key]['page'].format(title_num) proofread_page = ProofreadPage(self.site, page_title) # Check start/end limits. self.assertRaises(ValueError, index_page.page_gen, -1, 2) self.assertRaises(ValueError, index_page.page_gen, 1, -1) self.assertRaises(ValueError, index_page.page_gen, 2, 1) # Check quality filters. gen = index_page.page_gen(num, num, filter_ql=range(5)) self.assertEqual(list(gen), [proofread_page]) gen = index_page.page_gen(num, num, filter_ql=[0]) self.assertEqual(list(gen), [])
def test_get_page_and_number(self, key): """Test IndexPage page get_page_number functions.""" data = self.sites[key] index_page = IndexPage(self.site, self.sites[key]['index']) # Test get_page_numbers_from_label. for label, num_set in data['get_number']: # Get set of numbers from label with label as int or str. self.assertEqual(index_page.get_page_number_from_label(label), num_set) self.assertEqual(index_page.get_page_number_from_label(str(label)), num_set) # Error if label does not exists. label, num_set = 'dummy label', [] self.assertRaises(KeyError, index_page.get_page_number_from_label, 'dummy label') # Test get_page_from_label. for label, page_set in data['get_page']: # Get set of pages from label with label as int or str. self.assertEqual(index_page.get_page_from_label(label), page_set) self.assertEqual(index_page.get_page_from_label(str(label)), page_set) # Error if label does not exists. self.assertRaises(KeyError, index_page.get_page_from_label, 'dummy label') # Test get_page. for n in num_set: p = index_page.get_page(n) self.assertEqual(index_page.get_number(p), n) # Test get_number. for p in page_set: n = index_page.get_number(p) self.assertEqual(index_page.get_page(n), p)
def test_check_if_cached(self, key): """Test if cache is checked and loaded properly.""" data = self.sites[key] index_page = IndexPage(self.site, self.sites[key]['index']) num, title_num, label = data['get_label'] self.assertIs(index_page._cached, False) fetched_label = index_page.get_label_from_page_number(num) self.assertIs(index_page._cached, True) self.assertEqual(label, fetched_label) # Check if cache is refreshed. index_page._labels_from_page_number[num] = 'wrong cached value' self.assertEqual(index_page.get_label_from_page_number(num), 'wrong cached value') index_page._cached = False self.assertEqual(index_page.get_label_from_page_number(num), label)
def test_valid_site_as_source(self): """Test IndexPage from valid Site as source.""" page = IndexPage(self.site, 'Index:dummy test page') self.assertEqual(page.namespace(), self.site.proofread_index_ns)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ index = None pages = '1-' options = {} # Parse command line arguments. local_args = pywikibot.handle_args(args) for arg in local_args: arg, sep, value = arg.partition(':') if arg == '-index': index = value elif arg == '-pages': pages = value elif arg == '-showdiff': issue_deprecation_warning('The usage of -showdiff option', None, 0) elif arg == '-summary': options['summary'] = value elif arg == '-force': issue_deprecation_warning('The usage of -force option', None, 0) elif arg == '-always': options['always'] = True else: pywikibot.output('Unknown argument %s' % arg) # index is mandatory. if not index: pywikibot.bot.suggest_help(missing_parameters=['-index']) return False site = pywikibot.Site() if not site.has_extension('ProofreadPage'): pywikibot.error('Site %s must have ProofreadPage extension.' % site) return False index = IndexPage(site, index) if not index.exists(): pywikibot.error("Page %s doesn't exist." % index) return False # Parse pages param. # Create a list of (start, end) tuples. pages = pages.split(',') for interval in range(len(pages)): start, sep, end = pages[interval].partition('-') start = 1 if not start else int(start) if not sep: end = start else: end = int(end) if end else index.num_pages pages[interval] = (start, end) gen_list = [] for start, end in sorted(pages): gen = index.page_gen(start=start, end=end, filter_ql=[1], content=False) gen_list.append(gen) gen = itertools.chain(*gen_list) pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True)) bot = UploadTextBot(gen, site=index.site, **options) bot.run()
def setUpClass(cls): """Prepare tests by creating an IndexPage instance.""" super(TestIndexPageHasValidContent, cls).setUpClass() cls.index = IndexPage(cls.site, cls.index_name)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ index = None pages = '1-' options = {} # Parse command line arguments. local_args = pywikibot.handle_args(args) for arg in local_args: arg, sep, value = arg.partition(':') if arg == '-index': index = value elif arg == '-pages': pages = value elif arg == '-showdiff': options['showdiff'] = True elif arg == '-summary': options['summary'] = value elif arg == '-ocr': options['ocr'] = True elif arg == '-force': options['force'] = True elif arg == '-always': options['always'] = True else: pywikibot.output('Unknown argument %s' % arg) # index is mandatory. if not index: pywikibot.bot.suggest_help(missing_parameters=['-index']) return False # '-force' can be used with '-ocr' only. if 'force' in options and 'ocr' not in options: pywikibot.error("'-force' can be used with '-ocr' option only.") return False site = pywikibot.Site() if not site.has_extension('ProofreadPage'): pywikibot.error('Site %s must have ProofreadPage extension.' % site) return False index = IndexPage(site, index) if not index.exists(): pywikibot.error("Page %s doesn't exist." % index) return False # Parse pages param. # Create a list of (start, end) tuples. pages = pages.split(',') for interval in range(len(pages)): start, sep, end = pages[interval].partition('-') start = 1 if not start else int(start) if not sep: end = start else: end = int(end) if end else index.num_pages pages[interval] = (start, end) # gen yields ProofreadPage objects. gen_list = [] for start, end in sorted(pages): gen = index.page_gen(start=start, end=end, filter_ql=[1], content=False) gen_list.append(gen) gen = itertools.chain(*gen_list) pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True)) bot = UploadTextBot(gen, site=index.site, **options) bot.run()
def setUp(self): """Set up test case.""" self._page = IndexPage(self.site, 'Index:Popular Science Monthly Volume 1.djvu') super().setUp()
def test_invalid_link_as_source(self): """Test IndexPage from invalid Link as source.""" source = pywikibot.Link(self.not_existing_invalid_title, source=self.site) with self.assertRaises(ValueError): IndexPage(source)
def test_invalid_not_existing_page_as_source(self): """Test IndexPage from Page not existing in non-Page ns as source.""" source = pywikibot.Page(self.site, self.not_existing_invalid_title) with self.assertRaises(ValueError): IndexPage(source)
def setUpClass(cls): """Prepare tests by creating an IndexPage instance.""" super().setUpClass() cls.index = IndexPage(cls.site, cls.index_name)
def test_invalid_site_source(self): """Test IndexPage from invalid Site as source.""" with self.assertRaises(UnknownExtensionError): IndexPage(self.site, 'title')
def setUpClass(cls): """Prepare tests by creating page instances.""" super(TestIndexPageMappingsRedlinks, cls).setUpClass() cls.index = IndexPage(cls.site, cls.index_name) cls.pages = [ProofreadPage(cls.site, page) for page in cls.page_names] cls.missing = ProofreadPage(cls.site, cls.missing_name)
def test_num_pages(self, key): """Test num_pages property.""" index_page = IndexPage(self.site, self.sites[key]['index']) self.assertEqual(index_page.num_pages, self.sites[key]['num_pages'])
def setUp(self): """Set up test case.""" self._page = IndexPage( self.site, 'Index:Popular Science Monthly Volume 1.djvu') super(TestLoadRevisionsCachingIndexPage, self).setUp()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ index = None pages = '1-' options = {} # Parse command line arguments. local_args = pywikibot.handle_args(args) for arg in local_args: arg, sep, value = arg.partition(':') if arg == '-index': index = value elif arg == '-pages': pages = value elif arg == '-showdiff': options['showdiff'] = True elif arg == '-summary': options['summary'] = value elif arg == '-ocr': options['ocr'] = value or 'phetools' elif arg == '-threads': options['threads'] = int(value) elif arg == '-force': options['force'] = True elif arg == '-always': options['always'] = True else: pywikibot.output('Unknown argument ' + arg) # index is mandatory. if not index: pywikibot.bot.suggest_help(missing_parameters=['-index']) return False # '-force' can be used with '-ocr' only. if 'force' in options and 'ocr' not in options: pywikibot.error("'-force' can be used with '-ocr' option only.") return False site = pywikibot.Site() if not site.has_extension('ProofreadPage'): pywikibot.error( 'Site {} must have ProofreadPage extension.'.format(site)) return False index = IndexPage(site, index) if not index.exists(): pywikibot.error("Page {} doesn't exist.".format(index)) return False # Parse pages param. # Create a list of (start, end) tuples. pages = pages.split(',') for interval in range(len(pages)): start, sep, end = pages[interval].partition('-') start = 1 if not start else int(start) if not sep: end = start else: end = int(end) if end else index.num_pages pages[interval] = (start, end) # gen yields ProofreadPage objects. gen_list = [] for start, end in sorted(pages): gen = index.page_gen(start=start, end=end, filter_ql=[1], content=True) gen_list.append(gen) gen = itertools.chain(*gen_list) pywikibot.output('\nUploading text to {}\n'.format( index.title(as_link=True))) bot = UploadTextBot(gen, site=index.site, **options) bot.run()
def test_invalid_existing_page_as_source(self): """Test IndexPage from invalid existing Page as source.""" source = pywikibot.Page(self.site, self.existing_invalid_title) with self.assertRaises(ValueError): IndexPage(source)
def test_get_page_and_number(self, key): """Test IndexPage page get_page_number functions.""" data = self.sites[key] index_page = IndexPage(self.site, self.sites[key]['index']) # Test get_page_numbers_from_label. for label, num_set in data['get_number']: # Get set of numbers from label with label as int or str. self.assertEqual(index_page.get_page_number_from_label(label), num_set) self.assertEqual(index_page.get_page_number_from_label(str(label)), num_set) # Error if label does not exists. label, num_set = 'dummy label', [] with self.assertRaises(KeyError): index_page.get_page_number_from_label('dummy label') # Test get_page_from_label. for label, page_set in data['get_page']: # Get set of pages from label with label as int or str. self.assertEqual(index_page.get_page_from_label(label), page_set) self.assertEqual(index_page.get_page_from_label(str(label)), page_set) # Error if label does not exists. with self.assertRaises(KeyError): index_page.get_page_from_label('dummy label') # Test get_page. for n in num_set: p = index_page.get_page(n) self.assertEqual(index_page.get_number(p), n) # Test get_number. for p in page_set: n = index_page.get_number(p) self.assertEqual(index_page.get_page(n), p)