def test_match_any(self):
        """Test pages with one of many matches."""
        template1 = pywikibot.Page(self.site, 'Template:stack begin')
        template2 = pywikibot.Page(self.site, 'Template:foobar')
        builder = _MultiTemplateMatchBuilder(self.site)

        predicate = builder.search_any_predicate([template1, template2])
        gen = XMLDumpPageGenerator(
            filename=join_xml_data_path('article-pear-0.10.xml'),
            site=self.site,
            text_predicate=predicate)
        pages = list(gen)
        self.assertEqual(len(pages), 1)
        self.assertPagelistTitles(pages, ['Pear'],
                                  site=self.site)

        # reorder templates
        predicate = builder.search_any_predicate([template2, template1])
        gen = XMLDumpPageGenerator(
            filename=join_xml_data_path('article-pear-0.10.xml'),
            site=self.site,
            text_predicate=predicate)
        pages = list(gen)
        self.assertEqual(len(pages), 1)
        self.assertPagelistTitles(pages, ['Pear'],
                                  site=self.site)
    def test_match_any(self):
        """Test pages with one of many matches."""
        template1 = pywikibot.Page(self.site, 'Template:stack begin')
        template2 = pywikibot.Page(self.site, 'Template:foobar')
        builder = _MultiTemplateMatchBuilder(self.site)

        predicate = builder.search_any_predicate([template1, template2])
        gen = XMLDumpPageGenerator(
            filename=join_xml_data_path('article-pear-0.10.xml'),
            site=self.site,
            text_predicate=predicate)
        pages = list(gen)
        self.assertEqual(len(pages), 1)
        self.assertPagelistTitles(pages, ['Pear'],
                                  site=self.site)

        # reorder templates
        predicate = builder.search_any_predicate([template2, template1])
        gen = XMLDumpPageGenerator(
            filename=join_xml_data_path('article-pear-0.10.xml'),
            site=self.site,
            text_predicate=predicate)
        pages = list(gen)
        self.assertEqual(len(pages), 1)
        self.assertPagelistTitles(pages, ['Pear'],
                                  site=self.site)
Exemple #3
0
 def _get_entries(self, filename, **kwargs):
     """Get all entries via XmlDump."""
     entries = [
         r for r in xmlreader.XmlDump(join_xml_data_path(filename), **
                                      kwargs).parse()
     ]
     return entries
 def test_xml_multiple_namespace_ids_2(self):
     """Test the generator using multiple namespaces in one parameter."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
          '-namespace:0,1', '-xmlstart:Fake page')
     gen = self.constructor_args[0]
     self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'],
                                     site=self.get_site())
Exemple #5
0
 def test_xml_multiple_namespace_ids_2(self):
     """Test the generator using multiple namespaces in one parameter."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
          '-namespace:0,1', '-xmlstart:Fake page')
     gen = self.constructor_args[0]
     self.assertPageTitlesCountEqual(gen, ['Fake page', 'Talk:Fake page'],
                                     site=self.get_site())
 def test_xml_namespace_name(self):
     """Test the generator using a namespace name."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
          '-namespace:Talk', '-xmlstart:Fake page')
     gen = self.constructor_args[0]
     pages = list(gen)
     self.assertPagelistTitles(pages, [u'Talk:Fake page'],
                               site=self.get_site())
Exemple #7
0
 def test_xml_start_prefix(self):
     """Test the generator using a start partial page."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
          '-namespace:1', '-xmlstart:Fake')
     gen = self.constructor_kwargs['generator']
     pages = list(gen)
     self.assertPageTitlesEqual(pages, ['Talk:Fake page'],
                                site=self.get_site())
Exemple #8
0
 def test_xml_one_namespace(self):
     """Test the generator using one namespace id."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
          '-namespace:1')
     gen = self.constructor_args[0]
     pages = list(gen)
     self.assertPagelistTitles(pages, [u'Talk:Fake page'],
                               site=self.get_site())
Exemple #9
0
 def test_xml_start_underscore(self):
     """Test the generator using a start page with an underscore."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
          '-namespace:1', '-xmlstart:Fake_page')
     gen = self.constructor_args[0]
     pages = list(gen)
     self.assertPagelistTitles(pages, [u'Talk:Fake page'],
                               site=self.get_site())
Exemple #10
0
 def test_xml_namespace_name(self):
     """Test the generator using a namespace name."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
          '-namespace:Talk', '-xmlstart:Fake page')
     gen = self.constructor_args[0]
     pages = list(gen)
     self.assertPageTitlesEqual(pages, ['Talk:Fake page'],
                                site=self.get_site())
Exemple #11
0
 def test_XmlDumpRedirect(self):
     """Test XmlDump correctly parsing whether a page is a redirect."""
     pages = self._get_entries('article-pyrus.xml', allrevisions=True)
     pages = [
         r for r in xmlreader.XmlDump(
             join_xml_data_path('article-pyrus.xml')).parse()
     ]
     self.assertTrue(pages[0].isredirect)
 def generator(self, title, xml='article-pear-0.10.xml'):
     """Return XMLDumpPageGenerator list for a given template title."""
     template = pywikibot.Page(self.site, title, ns=10)
     builder = _MultiTemplateMatchBuilder(self.site)
     predicate = builder.search_any_predicate([template])
     gen = XMLDumpPageGenerator(filename=join_xml_data_path(xml),
                                site=self.site,
                                text_predicate=predicate)
     return list(gen)
Exemple #13
0
 def test_non_bare_ref_urls(self):
     """Test pages without bare references are not processed."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('article-pear-0.10.xml'),
         start='Pear',
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertEqual(len(pages), 0)
 def test_non_bare_ref_urls(self):
     """Test pages without bare references are not processed."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('article-pear-0.10.xml'),
         start='Pear',
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertEqual(len(pages), 0)
Exemple #15
0
 def test_namespace_names(self):
     """Test namespaces with namespace names."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake page',
         namespaces=['Talk'],
         site=self.site)
     pages = list(gen)
     self.assertPageTitlesEqual(pages, ['Talk:Fake page'], site=self.site)
Exemple #16
0
 def test_xml_multiple_namespace_ids(self):
     """Test the generator using multiple separate namespaces parameters."""
     with suppress_warnings(WARN_SITE_OBJ, category=UserWarning):
         main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
              '-namespace:0', '-namespace:1', '-xmlstart:Fake page')
         gen = self.constructor_kwargs['generator']
         self.assertPageTitlesCountEqual(gen,
                                         ['Fake page', 'Talk:Fake page'],
                                         site=self.get_site())
Exemple #17
0
 def test_namespace_names(self):
     """Test namespaces with namespace names."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake page',
         namespaces=["Talk"],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, (u'Talk:Fake page', ),
                               site=self.get_site())
 def test_simple_bare_refs(self):
     """Test simple bare references in multiple namespaces."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake page',
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
                               site=self.get_site())
Exemple #19
0
 def test_start_with_underscore(self):
     """Test with underscore in start page title."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake_page',
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
                               site=self.get_site())
 def test_namespace_empty_list(self):
     """Test namespaces=[] processes all namespaces."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start=u'Fake page',
         namespaces=[],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
                               site=self.get_site())
Exemple #21
0
 def test_without_start(self):
     """Test without a start page title."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start=None,
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'),
                                site=self.get_site())
 def test_start_with_underscore(self):
     """Test with underscore in start page title."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake_page',
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
                               site=self.get_site())
Exemple #23
0
 def test_namespace_empty_list(self):
     """Test namespaces=[] processes all namespaces."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start=u'Fake page',
         namespaces=[],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
                               site=self.get_site())
Exemple #24
0
 def test_simple_bare_refs(self):
     """Test simple bare references in multiple namespaces."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake page',
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'),
                                site=self.get_site())
Exemple #25
0
 def test_namespace_string_ids(self):
     """Test namespaces with ids as string."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake page',
         namespaces=['0', '1'],
         site=self.get_site())
     pages = list(gen)
     self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'),
                                site=self.get_site())
Exemple #26
0
 def test_namespace_None(self):
     """Test namespaces=None processes all namespaces."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake page',
         namespaces=None,
         site=self.get_site())
     pages = list(gen)
     self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'),
                                site=self.get_site())
Exemple #27
0
 def test_start_prefix(self):
     """Test with a prefix as a start page title."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake',
         namespaces=[0, 1],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, ('Fake page', 'Talk:Fake page'),
                               site=self.get_site())
 def test_namespace_names(self):
     """Test namespaces with namespace names."""
     gen = XmlDumpPageGenerator(
         filename=join_xml_data_path('dummy-reflinks.xml'),
         start='Fake page',
         namespaces=["Talk"],
         site=self.get_site())
     pages = list(gen)
     self.assertPagelistTitles(pages, (u'Talk:Fake page', ),
                               site=self.get_site())
Exemple #29
0
 def test_no_match(self):
     """Test pages without any desired templates."""
     template = pywikibot.Page(self.site, 'Template:foobar')
     builder = _MultiTemplateMatchBuilder(self.site)
     predicate = builder.search_any_predicate([template])
     gen = XMLDumpPageGenerator(
         filename=join_xml_data_path('article-pear-0.10.xml'),
         site=self.site,
         text_predicate=predicate)
     pages = list(gen)
     self.assertEqual(len(pages), 0)
 def test_no_match(self):
     """Test pages without any desired templates."""
     template = pywikibot.Page(self.site, 'Template:foobar')
     builder = _MultiTemplateMatchBuilder(self.site)
     predicate = builder.search_any_predicate([template])
     gen = XMLDumpPageGenerator(
         filename=join_xml_data_path('article-pear-0.10.xml'),
         site=self.site,
         text_predicate=predicate)
     pages = list(gen)
     self.assertEqual(len(pages), 0)
Exemple #31
0
 def test_match_with_params(self):
     """Test pages with one match with parameters."""
     template = pywikibot.Page(self.site, 'Template:Taxobox')
     builder = _MultiTemplateMatchBuilder(self.site)
     predicate = builder.search_any_predicate([template])
     gen = XMLDumpPageGenerator(
         filename=join_xml_data_path('article-pear-0.10.xml'),
         site=self.site,
         text_predicate=predicate)
     pages = list(gen)
     self.assertEqual(len(pages), 1)
     self.assertPagelistTitles(pages, ['Pear'], site=self.site)
 def test_match_with_params(self):
     """Test pages with one match with parameters."""
     template = pywikibot.Page(self.site, 'Template:Taxobox')
     builder = _MultiTemplateMatchBuilder(self.site)
     predicate = builder.search_any_predicate([template])
     gen = XMLDumpPageGenerator(
         filename=join_xml_data_path('article-pear-0.10.xml'),
         site=self.site,
         text_predicate=predicate)
     pages = list(gen)
     self.assertEqual(len(pages), 1)
     self.assertPagelistTitles(pages, ['Pear'],
                               site=self.site)
Exemple #33
0
 def test_nested_match(self):
     """Test pages with one match inside another template."""
     template = pywikibot.Page(self.site, 'Template:boo')
     builder = _MultiTemplateMatchBuilder(self.site)
     predicate = builder.search_any_predicate([template])
     gen = XMLDumpPageGenerator(
         filename=join_xml_data_path('dummy-template.xml'),
         site=self.site,
         text_predicate=predicate)
     pages = list(gen)
     self.assertEqual(len(pages), 1)
     self.assertPagelistTitles(pages, ['Fake page with nested template'],
                               site=self.site)
    def test_match_msg(self):
        """Test pages with {{msg:..}}."""
        template = pywikibot.Page(self.site, 'Template:Foo')
        builder = _MultiTemplateMatchBuilder(self.site)

        predicate = builder.search_any_predicate([template])
        gen = XMLDumpPageGenerator(
            filename=join_xml_data_path('dummy-template.xml'),
            site=self.site,
            text_predicate=predicate)
        pages = list(gen)
        self.assertEqual(len(pages), 1)
        self.assertPagelistTitles(pages, ['Fake page with msg'],
                                  site=self.site)
    def test_match_msg(self):
        """Test pages with {{msg:..}}."""
        template = pywikibot.Page(self.site, 'Template:Foo')
        builder = _MultiTemplateMatchBuilder(self.site)

        predicate = builder.search_any_predicate([template])
        gen = XMLDumpPageGenerator(
            filename=join_xml_data_path('dummy-template.xml'),
            site=self.site,
            text_predicate=predicate)
        pages = list(gen)
        self.assertEqual(len(pages), 1)
        self.assertPageTitlesEqual(pages, ['Fake page with msg'],
                                   site=self.site)
Exemple #36
0
    def test_simple_bare_refs(self):
        """Test simple bare references with several namespaces options."""
        namespace_variants = (None, [], [0, 1], ['0', '1'])

        filename = join_xml_data_path('dummy-reflinks.xml')
        for namespaces in namespace_variants:
            with self.subTest(namespaces=namespaces):
                gen = XmlDumpPageGenerator(filename=filename,
                                           start='Fake page',
                                           namespaces=namespaces,
                                           site=self.site)
                pages = list(gen)
                self.assertPageTitlesEqual(pages,
                                           ('Fake page', 'Talk:Fake page'),
                                           site=self.site)
Exemple #37
0
    def test_match_unnecessary_template_prefix(self):
        """Test pages with {{template:..}}."""
        template = pywikibot.Page(self.site, 'Template:Bar')
        builder = _MultiTemplateMatchBuilder(self.site)

        predicate = builder.search_any_predicate([template])
        gen = XMLDumpPageGenerator(
            filename=join_xml_data_path('dummy-template.xml'),
            site=self.site,
            text_predicate=predicate)
        pages = list(gen)
        self.assertEqual(len(pages), 1)
        self.assertPagelistTitles(
            pages, ['Fake page with unnecessary template prefix'],
            site=self.site)
Exemple #38
0
class TestFileShaCalculator(TestCase):
    """Test calculator of sha of a file."""

    net = False

    filename = join_xml_data_path('article-pear-0.10.xml')

    def setUp(self):
        """Setup tests."""
        super(TestFileShaCalculator, self).setUp()

    def test_md5_complete_calculation(self):
        """"Test md5 of complete file."""
        res = tools.compute_file_hash(self.filename, sha='md5')
        self.assertEqual(res, '5d7265e290e6733e1e2020630262a6f3')

    def test_md5_partial_calculation(self):
        """"Test md5 of partial file (1024 bytes)."""
        res = tools.compute_file_hash(self.filename,
                                      sha='md5',
                                      bytes_to_read=1024)
        self.assertEqual(res, 'edf6e1accead082b6b831a0a600704bc')

    def test_sha1_complete_calculation(self):
        """"Test sha1 of complete file."""
        res = tools.compute_file_hash(self.filename, sha='sha1')
        self.assertEqual(res, '1c12696e1119493a625aa818a35c41916ce32d0c')

    def test_sha1_partial_calculation(self):
        """"Test sha1 of partial file (1024 bytes)."""
        res = tools.compute_file_hash(self.filename,
                                      sha='sha1',
                                      bytes_to_read=1024)
        self.assertEqual(res, 'e56fa7bd5cfdf6bb7e2d8649dd9216c03e7271e6')

    def test_sha224_complete_calculation(self):
        """"Test sha224 of complete file."""
        res = tools.compute_file_hash(self.filename, sha='sha224')
        self.assertEqual(
            res, '3d350d9d9eca074bd299cb5ffe1b325a9f589b2bcd7ba1c033ab4d33')

    def test_sha224_partial_calculation(self):
        """"Test sha224 of partial file (1024 bytes)."""
        res = tools.compute_file_hash(self.filename,
                                      sha='sha224',
                                      bytes_to_read=1024)
        self.assertEqual(
            res, 'affa8cb79656a9b6244a079f8af91c9271e382aa9d5aa412b599e169')
Exemple #39
0
    def test_xml_start_variants(self):
        """Test the generator using variants of start page."""
        start_variants = (
            '-xmlstart:Fake page',  # title
            '-xmlstart:Fake_page',  # underscore
            '-xmlstart:Fake',  # prefix
        )

        filename = '-xml:' + join_xml_data_path('dummy-reflinks.xml')
        for start in start_variants:
            with self.subTest(xmlstart=start):
                with suppress_warnings(WARN_SITE_OBJ, category=UserWarning):
                    main(filename, '-namespace:1', start)
                    gen = self.constructor_kwargs['generator']
                    pages = list(gen)
                    self.assertPageTitlesEqual(pages, ['Talk:Fake page'],
                                               site=self.site)
Exemple #40
0
    def test_start_variants(self):
        """Test with several page title options."""
        start_variants = (
            None,  # None
            'Fake',  # prefix
            'Fake_page',  # underscore
        )

        filename = join_xml_data_path('dummy-reflinks.xml')
        for start in start_variants:
            with self.subTest(start=start):
                gen = XmlDumpPageGenerator(filename=filename,
                                           start=start,
                                           namespaces=[0, 1],
                                           site=self.site)
                pages = list(gen)
                self.assertPageTitlesEqual(pages,
                                           ('Fake page', 'Talk:Fake page'),
                                           site=self.site)
Exemple #41
0
class TestFileShaCalculator(TestCase):
    r"""Test calculator of sha of a file.

    There are two possible hash values for each test. The second one is for
    files with windows line endings (\r\n).

    """

    net = False

    filename = join_xml_data_path('article-pear-0.10.xml')

    def setUp(self):
        """Setup tests."""
        super().setUp()

    def test_md5_complete_calculation(self):
        """Test md5 of complete file."""
        res = tools.compute_file_hash(self.filename, sha='md5')
        self.assertIn(res, (
            '5d7265e290e6733e1e2020630262a6f3',
            '2c941f2fa7e6e629d165708eb02b67f7',
        ))

    def test_md5_partial_calculation(self):
        """Test md5 of partial file (1024 bytes)."""
        res = tools.compute_file_hash(self.filename,
                                      sha='md5',
                                      bytes_to_read=1024)
        self.assertIn(res, (
            'edf6e1accead082b6b831a0a600704bc',
            'be0227b6d490baa49e6d7e131c7f596b',
        ))

    def test_sha1_complete_calculation(self):
        """Test sha1 of complete file."""
        res = tools.compute_file_hash(self.filename, sha='sha1')
        self.assertIn(res, (
            '1c12696e1119493a625aa818a35c41916ce32d0c',
            '146121e6d0461916c9a0fab00dc718acdb6a6b14',
        ))

    def test_sha1_partial_calculation(self):
        """Test sha1 of partial file (1024 bytes)."""
        res = tools.compute_file_hash(self.filename,
                                      sha='sha1',
                                      bytes_to_read=1024)
        self.assertIn(res, (
            'e56fa7bd5cfdf6bb7e2d8649dd9216c03e7271e6',
            '617ce7d539848885b52355ed597a042dae1e726f',
        ))

    def test_sha224_complete_calculation(self):
        """Test sha224 of complete file."""
        res = tools.compute_file_hash(self.filename, sha='sha224')
        self.assertIn(res, (
            '3d350d9d9eca074bd299cb5ffe1b325a9f589b2bcd7ba1c033ab4d33',
            '4a2cf33b7da01f7b0530b2cc624e1180c8651b20198e9387aee0c767',
        ))

    def test_sha224_partial_calculation(self):
        """Test sha224 of partial file (1024 bytes)."""
        res = tools.compute_file_hash(self.filename,
                                      sha='sha224',
                                      bytes_to_read=1024)
        self.assertIn(res, (
            'affa8cb79656a9b6244a079f8af91c9271e382aa9d5aa412b599e169',
            '486467144e683aefd420d576250c4cc984e6d7bf10c85d36e3d249d2',
        ))
 def test_xml_simple(self):
     """Test the generator without any narrowing."""
     main('-xml:' + join_xml_data_path('dummy-reflinks.xml'))
     gen = self.constructor_args[0]
     self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'],
                                     site=self.get_site())
 def _get_entries(self, filename, **kwargs):
     """Get all entries via XmlDump."""
     entries = [r for r in
                xmlreader.XmlDump(join_xml_data_path(filename),
                                  **kwargs).parse()]
     return entries
 def test_XmlDumpRedirect(self):
     """Test XmlDump correctly parsing whether a page is a redirect."""
     pages = self._get_entries('article-pyrus.xml', allrevisions=True)
     pages = [r for r in
              xmlreader.XmlDump(join_xml_data_path('article-pyrus.xml')).parse()]
     self.assertTrue(pages[0].isredirect)
 def setUpClass(cls):
     """Define base_file and original_content."""
     super(OpenArchiveTestCase, cls).setUpClass()
     cls.base_file = join_xml_data_path('article-pyrus.xml')
     with open(cls.base_file, 'rb') as f:
         cls.original_content = f.read()