def test_get_page_number(self, key):
        """Test IndexPage page get_page_number functions."""
        data = self.sites[key]
        index_page = IndexPage(self.site, self.sites[key]["index"])

        # Test get_page_numbers_from_label.
        for label, num_set in data["get_number"]:
            # Get set of numbers from label with label as int or str.
            self.assertEqual(index_page.get_page_number_from_label(label), num_set)
            self.assertEqual(index_page.get_page_number_from_label(str(label)), num_set)

        # Error if label does not exists.
        label, num_set = "dummy label", []
        self.assertRaises(KeyError, index_page.get_page_number_from_label, "dummy label")

        # Test get_page_from_label.
        for label, page_set in data["get_page"]:
            # Get set of pages from label with label as int or str.
            self.assertEqual(index_page.get_page_from_label(label), page_set)
            self.assertEqual(index_page.get_page_from_label(str(label)), page_set)

        # Error if label does not exists.
        self.assertRaises(KeyError, index_page.get_page_from_label, "dummy label")

        # Test consistency of page <-> numbers mapping on last page_set and
        # num_set used.
        for p in page_set:
            n = index_page._numbers_from_page[p]
            self.assertEqual(index_page._page_from_numbers[n], p)
        for n in num_set:
            n = index_page._page_from_numbers[p]
            self.assertEqual(index_page._numbers_from_page[p], n)
 def test_valid_link_as_source(self):
     """Test IndexPage from valid Link as source."""
     source = pywikibot.Link(self.valid_index_title,
                             source=self.site,
                             defaultNamespace=self.site.proofread_page_ns)
     page = IndexPage(source)
     self.assertEqual(page.title(withNamespace=False), source.title)
     self.assertEqual(page.namespace(), source.namespace)
    def test_page_number_mapping(self, key):
        """Test consistency of page <-> mapping."""
        data = self.sites[key]
        num, title_num, label = data["get_label"]

        index_page = IndexPage(self.site, self.sites[key]["index"])
        page_title = self.sites[key]["page"].format(title_num)
        proofread_page = ProofreadPage(self.site, page_title)

        # Get label from number.
        self.assertEqual(index_page.get_label_from_page_number(num), label)
        # Error if number does not exists.
        self.assertRaises(KeyError, index_page.get_label_from_page_number, -1)

        # Get label from page.
        self.assertEqual(index_page.get_label_from_page(proofread_page), label)
        # Error if page does not exists.
        self.assertRaises(KeyError, index_page.get_label_from_page, None)
    def test_get_labels(self, key):
        """Test IndexPage page get_label_from_* functions."""
        data = self.sites[key]
        num, title_num, label = data['get_label']

        index_page = IndexPage(self.site, self.sites[key]['index'])
        page_title = self.sites[key]['page'].format(title_num)
        proofread_page = ProofreadPage(self.site, page_title)

        # Get label from number.
        self.assertEqual(index_page.get_label_from_page_number(num), label)
        # Error if number does not exists.
        self.assertRaises(KeyError, index_page.get_label_from_page_number, -1)

        # Get label from page.
        self.assertEqual(index_page.get_label_from_page(proofread_page), label)
        # Error if page does not exists.
        self.assertRaises(KeyError, index_page.get_label_from_page, None)
    def test_page_gen(self, key):
        """Test Index page generator."""
        data = self.sites[key]
        num, title_num, label = data['get_label']

        index_page = IndexPage(self.site, self.sites[key]['index'])
        page_title = self.sites[key]['page'].format(title_num)
        proofread_page = ProofreadPage(self.site, page_title)

        # Check start/end limits.
        self.assertRaises(ValueError, index_page.page_gen, -1, 2)
        self.assertRaises(ValueError, index_page.page_gen, 1, -1)
        self.assertRaises(ValueError, index_page.page_gen, 2, 1)

        # Check quality filters.
        gen = index_page.page_gen(num, num, filter_ql=range(5))
        self.assertEqual(list(gen), [proofread_page])

        gen = index_page.page_gen(num, num, filter_ql=[0])
        self.assertEqual(list(gen), [])
    def test_get_page_and_number(self, key):
        """Test IndexPage page get_page_number functions."""
        data = self.sites[key]
        index_page = IndexPage(self.site, self.sites[key]['index'])

        # Test get_page_numbers_from_label.
        for label, num_set in data['get_number']:
            # Get set of numbers from label with label as int or str.
            self.assertEqual(index_page.get_page_number_from_label(label),
                             num_set)
            self.assertEqual(index_page.get_page_number_from_label(str(label)),
                             num_set)

        # Error if label does not exists.
        label, num_set = 'dummy label', []
        self.assertRaises(KeyError, index_page.get_page_number_from_label,
                          'dummy label')

        # Test get_page_from_label.
        for label, page_set in data['get_page']:
            # Get set of pages from label with label as int or str.
            self.assertEqual(index_page.get_page_from_label(label),
                             page_set)
            self.assertEqual(index_page.get_page_from_label(str(label)),
                             page_set)

        # Error if label does not exists.
        self.assertRaises(KeyError, index_page.get_page_from_label, 'dummy label')

        # Test get_page.
        for n in num_set:
            p = index_page.get_page(n)
            self.assertEqual(index_page.get_number(p), n)

        # Test get_number.
        for p in page_set:
            n = index_page.get_number(p)
            self.assertEqual(index_page.get_page(n), p)
    def test_check_if_cached(self, key):
        """Test if cache is checked and loaded properly."""
        data = self.sites[key]
        index_page = IndexPage(self.site, self.sites[key]['index'])

        num, title_num, label = data['get_label']
        self.assertIs(index_page._cached, False)
        fetched_label = index_page.get_label_from_page_number(num)

        self.assertIs(index_page._cached, True)
        self.assertEqual(label, fetched_label)

        # Check if cache is refreshed.
        index_page._labels_from_page_number[num] = 'wrong cached value'
        self.assertEqual(index_page.get_label_from_page_number(num),
                         'wrong cached value')
        index_page._cached = False
        self.assertEqual(index_page.get_label_from_page_number(num), label)
Example #8
0
 def test_valid_site_as_source(self):
     """Test IndexPage from valid Site as source."""
     page = IndexPage(self.site, 'Index:dummy test page')
     self.assertEqual(page.namespace(), self.site.proofread_index_ns)
Example #9
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            issue_deprecation_warning('The usage of -showdiff option', None, 0)
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-force':
            issue_deprecation_warning('The usage of -force option', None, 0)
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument %s' % arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error('Site %s must have ProofreadPage extension.' % site)
        return False

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page %s doesn't exist." % index)
        return False

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start, end=end,
                             filter_ql=[1], content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()
 def test_valid_site_as_source(self):
     """Test IndexPage from valid Site as source."""
     page = IndexPage(self.site, 'Index:dummy test page')
     self.assertEqual(page.namespace(), self.site.proofread_index_ns)
Example #11
0
 def setUpClass(cls):
     """Prepare tests by creating an IndexPage instance."""
     super(TestIndexPageHasValidContent, cls).setUpClass()
     cls.index = IndexPage(cls.site, cls.index_name)
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            options['showdiff'] = True
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-ocr':
            options['ocr'] = True
        elif arg == '-force':
            options['force'] = True
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument %s' % arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    # '-force' can be used with '-ocr' only.
    if 'force' in options and 'ocr' not in options:
        pywikibot.error("'-force' can be used with '-ocr' option only.")
        return False

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error('Site %s must have ProofreadPage extension.' % site)
        return False

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page %s doesn't exist." % index)
        return False

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    # gen yields ProofreadPage objects.
    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start, end=end,
                             filter_ql=[1], content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()
Example #13
0
 def setUp(self):
     """Set up test case."""
     self._page = IndexPage(self.site,
                            'Index:Popular Science Monthly Volume 1.djvu')
     super().setUp()
Example #14
0
 def test_invalid_link_as_source(self):
     """Test IndexPage from invalid Link as source."""
     source = pywikibot.Link(self.not_existing_invalid_title,
                             source=self.site)
     with self.assertRaises(ValueError):
         IndexPage(source)
Example #15
0
 def test_invalid_not_existing_page_as_source(self):
     """Test IndexPage from Page not existing in non-Page ns as source."""
     source = pywikibot.Page(self.site, self.not_existing_invalid_title)
     with self.assertRaises(ValueError):
         IndexPage(source)
Example #16
0
 def setUpClass(cls):
     """Prepare tests by creating an IndexPage instance."""
     super().setUpClass()
     cls.index = IndexPage(cls.site, cls.index_name)
Example #17
0
 def test_invalid_site_source(self):
     """Test IndexPage from invalid Site as source."""
     with self.assertRaises(UnknownExtensionError):
         IndexPage(self.site, 'title')
 def setUpClass(cls):
     """Prepare tests by creating page instances."""
     super(TestIndexPageMappingsRedlinks, cls).setUpClass()
     cls.index = IndexPage(cls.site, cls.index_name)
     cls.pages = [ProofreadPage(cls.site, page) for page in cls.page_names]
     cls.missing = ProofreadPage(cls.site, cls.missing_name)
 def test_num_pages(self, key):
     """Test num_pages property."""
     index_page = IndexPage(self.site, self.sites[key]['index'])
     self.assertEqual(index_page.num_pages, self.sites[key]['num_pages'])
 def setUp(self):
     """Set up test case."""
     self._page = IndexPage(
         self.site, 'Index:Popular Science Monthly Volume 1.djvu')
     super(TestLoadRevisionsCachingIndexPage, self).setUp()
Example #21
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            options['showdiff'] = True
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-ocr':
            options['ocr'] = value or 'phetools'
        elif arg == '-threads':
            options['threads'] = int(value)
        elif arg == '-force':
            options['force'] = True
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument ' + arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    # '-force' can be used with '-ocr' only.
    if 'force' in options and 'ocr' not in options:
        pywikibot.error("'-force' can be used with '-ocr' option only.")
        return False

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error(
            'Site {} must have ProofreadPage extension.'.format(site))
        return False

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page {} doesn't exist.".format(index))
        return False

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    # gen yields ProofreadPage objects.
    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start, end=end, filter_ql=[1], content=True)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to {}\n'.format(
        index.title(as_link=True)))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()
Example #22
0
 def test_invalid_existing_page_as_source(self):
     """Test IndexPage from invalid existing Page as source."""
     source = pywikibot.Page(self.site, self.existing_invalid_title)
     with self.assertRaises(ValueError):
         IndexPage(source)
Example #23
0
    def test_get_page_and_number(self, key):
        """Test IndexPage page get_page_number functions."""
        data = self.sites[key]
        index_page = IndexPage(self.site, self.sites[key]['index'])

        # Test get_page_numbers_from_label.
        for label, num_set in data['get_number']:
            # Get set of numbers from label with label as int or str.
            self.assertEqual(index_page.get_page_number_from_label(label),
                             num_set)
            self.assertEqual(index_page.get_page_number_from_label(str(label)),
                             num_set)

        # Error if label does not exists.
        label, num_set = 'dummy label', []
        with self.assertRaises(KeyError):
            index_page.get_page_number_from_label('dummy label')

        # Test get_page_from_label.
        for label, page_set in data['get_page']:
            # Get set of pages from label with label as int or str.
            self.assertEqual(index_page.get_page_from_label(label), page_set)
            self.assertEqual(index_page.get_page_from_label(str(label)),
                             page_set)

        # Error if label does not exists.
        with self.assertRaises(KeyError):
            index_page.get_page_from_label('dummy label')

        # Test get_page.
        for n in num_set:
            p = index_page.get_page(n)
            self.assertEqual(index_page.get_number(p), n)

        # Test get_number.
        for p in page_set:
            n = index_page.get_number(p)
            self.assertEqual(index_page.get_page(n), p)