def test_match_any(self): """Test pages with one of many matches.""" template1 = pywikibot.Page(self.site, 'Template:stack begin') template2 = pywikibot.Page(self.site, 'Template:foobar') builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template1, template2]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('article-pear-0.10.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 1) self.assertPagelistTitles(pages, ['Pear'], site=self.site) # reorder templates predicate = builder.search_any_predicate([template2, template1]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('article-pear-0.10.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 1) self.assertPagelistTitles(pages, ['Pear'], site=self.site)
def _get_entries(self, filename, **kwargs): """Get all entries via XmlDump.""" entries = [ r for r in xmlreader.XmlDump(join_xml_data_path(filename), ** kwargs).parse() ] return entries
def test_xml_multiple_namespace_ids_2(self): """Test the generator using multiple namespaces in one parameter.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:0,1', '-xmlstart:Fake page') gen = self.constructor_args[0] self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'], site=self.get_site())
def test_xml_multiple_namespace_ids_2(self): """Test the generator using multiple namespaces in one parameter.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:0,1', '-xmlstart:Fake page') gen = self.constructor_args[0] self.assertPageTitlesCountEqual(gen, ['Fake page', 'Talk:Fake page'], site=self.get_site())
def test_xml_namespace_name(self): """Test the generator using a namespace name.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:Talk', '-xmlstart:Fake page') gen = self.constructor_args[0] pages = list(gen) self.assertPagelistTitles(pages, [u'Talk:Fake page'], site=self.get_site())
def test_xml_start_prefix(self): """Test the generator using a start partial page.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:1', '-xmlstart:Fake') gen = self.constructor_kwargs['generator'] pages = list(gen) self.assertPageTitlesEqual(pages, ['Talk:Fake page'], site=self.get_site())
def test_xml_one_namespace(self): """Test the generator using one namespace id.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:1') gen = self.constructor_args[0] pages = list(gen) self.assertPagelistTitles(pages, [u'Talk:Fake page'], site=self.get_site())
def test_xml_start_underscore(self): """Test the generator using a start page with an underscore.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:1', '-xmlstart:Fake_page') gen = self.constructor_args[0] pages = list(gen) self.assertPagelistTitles(pages, [u'Talk:Fake page'], site=self.get_site())
def test_xml_namespace_name(self): """Test the generator using a namespace name.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:Talk', '-xmlstart:Fake page') gen = self.constructor_args[0] pages = list(gen) self.assertPageTitlesEqual(pages, ['Talk:Fake page'], site=self.get_site())
def test_XmlDumpRedirect(self): """Test XmlDump correctly parsing whether a page is a redirect.""" pages = self._get_entries('article-pyrus.xml', allrevisions=True) pages = [ r for r in xmlreader.XmlDump( join_xml_data_path('article-pyrus.xml')).parse() ] self.assertTrue(pages[0].isredirect)
def generator(self, title, xml='article-pear-0.10.xml'): """Return XMLDumpPageGenerator list for a given template title.""" template = pywikibot.Page(self.site, title, ns=10) builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template]) gen = XMLDumpPageGenerator(filename=join_xml_data_path(xml), site=self.site, text_predicate=predicate) return list(gen)
def test_non_bare_ref_urls(self): """Test pages without bare references are not processed.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('article-pear-0.10.xml'), start='Pear', namespaces=[0, 1], site=self.get_site()) pages = list(gen) self.assertEqual(len(pages), 0)
def test_namespace_names(self): """Test namespaces with namespace names.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake page', namespaces=['Talk'], site=self.site) pages = list(gen) self.assertPageTitlesEqual(pages, ['Talk:Fake page'], site=self.site)
def test_xml_multiple_namespace_ids(self): """Test the generator using multiple separate namespaces parameters.""" with suppress_warnings(WARN_SITE_OBJ, category=UserWarning): main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), '-namespace:0', '-namespace:1', '-xmlstart:Fake page') gen = self.constructor_kwargs['generator'] self.assertPageTitlesCountEqual(gen, ['Fake page', 'Talk:Fake page'], site=self.get_site())
def test_namespace_names(self): """Test namespaces with namespace names.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake page', namespaces=["Talk"], site=self.get_site()) pages = list(gen) self.assertPagelistTitles(pages, (u'Talk:Fake page', ), site=self.get_site())
def test_simple_bare_refs(self): """Test simple bare references in multiple namespaces.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake page', namespaces=[0, 1], site=self.get_site()) pages = list(gen) self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'), site=self.get_site())
def test_start_with_underscore(self): """Test with underscore in start page title.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake_page', namespaces=[0, 1], site=self.get_site()) pages = list(gen) self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'), site=self.get_site())
def test_namespace_empty_list(self): """Test namespaces=[] processes all namespaces.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start=u'Fake page', namespaces=[], site=self.get_site()) pages = list(gen) self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'), site=self.get_site())
def test_without_start(self): """Test without a start page title.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start=None, namespaces=[0, 1], site=self.get_site()) pages = list(gen) self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'), site=self.get_site())
def test_simple_bare_refs(self): """Test simple bare references in multiple namespaces.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake page', namespaces=[0, 1], site=self.get_site()) pages = list(gen) self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'), site=self.get_site())
def test_namespace_string_ids(self): """Test namespaces with ids as string.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake page', namespaces=['0', '1'], site=self.get_site()) pages = list(gen) self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'), site=self.get_site())
def test_namespace_None(self): """Test namespaces=None processes all namespaces.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake page', namespaces=None, site=self.get_site()) pages = list(gen) self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'), site=self.get_site())
def test_start_prefix(self): """Test with a prefix as a start page title.""" gen = XmlDumpPageGenerator( filename=join_xml_data_path('dummy-reflinks.xml'), start='Fake', namespaces=[0, 1], site=self.get_site()) pages = list(gen) self.assertPagelistTitles(pages, ('Fake page', 'Talk:Fake page'), site=self.get_site())
def test_no_match(self): """Test pages without any desired templates.""" template = pywikibot.Page(self.site, 'Template:foobar') builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('article-pear-0.10.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 0)
def test_match_with_params(self): """Test pages with one match with parameters.""" template = pywikibot.Page(self.site, 'Template:Taxobox') builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('article-pear-0.10.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 1) self.assertPagelistTitles(pages, ['Pear'], site=self.site)
def test_nested_match(self): """Test pages with one match inside another template.""" template = pywikibot.Page(self.site, 'Template:boo') builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('dummy-template.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 1) self.assertPagelistTitles(pages, ['Fake page with nested template'], site=self.site)
def test_match_msg(self): """Test pages with {{msg:..}}.""" template = pywikibot.Page(self.site, 'Template:Foo') builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('dummy-template.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 1) self.assertPagelistTitles(pages, ['Fake page with msg'], site=self.site)
def test_match_msg(self): """Test pages with {{msg:..}}.""" template = pywikibot.Page(self.site, 'Template:Foo') builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('dummy-template.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 1) self.assertPageTitlesEqual(pages, ['Fake page with msg'], site=self.site)
def test_simple_bare_refs(self): """Test simple bare references with several namespaces options.""" namespace_variants = (None, [], [0, 1], ['0', '1']) filename = join_xml_data_path('dummy-reflinks.xml') for namespaces in namespace_variants: with self.subTest(namespaces=namespaces): gen = XmlDumpPageGenerator(filename=filename, start='Fake page', namespaces=namespaces, site=self.site) pages = list(gen) self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'), site=self.site)
def test_match_unnecessary_template_prefix(self): """Test pages with {{template:..}}.""" template = pywikibot.Page(self.site, 'Template:Bar') builder = _MultiTemplateMatchBuilder(self.site) predicate = builder.search_any_predicate([template]) gen = XMLDumpPageGenerator( filename=join_xml_data_path('dummy-template.xml'), site=self.site, text_predicate=predicate) pages = list(gen) self.assertEqual(len(pages), 1) self.assertPagelistTitles( pages, ['Fake page with unnecessary template prefix'], site=self.site)
class TestFileShaCalculator(TestCase): """Test calculator of sha of a file.""" net = False filename = join_xml_data_path('article-pear-0.10.xml') def setUp(self): """Setup tests.""" super(TestFileShaCalculator, self).setUp() def test_md5_complete_calculation(self): """"Test md5 of complete file.""" res = tools.compute_file_hash(self.filename, sha='md5') self.assertEqual(res, '5d7265e290e6733e1e2020630262a6f3') def test_md5_partial_calculation(self): """"Test md5 of partial file (1024 bytes).""" res = tools.compute_file_hash(self.filename, sha='md5', bytes_to_read=1024) self.assertEqual(res, 'edf6e1accead082b6b831a0a600704bc') def test_sha1_complete_calculation(self): """"Test sha1 of complete file.""" res = tools.compute_file_hash(self.filename, sha='sha1') self.assertEqual(res, '1c12696e1119493a625aa818a35c41916ce32d0c') def test_sha1_partial_calculation(self): """"Test sha1 of partial file (1024 bytes).""" res = tools.compute_file_hash(self.filename, sha='sha1', bytes_to_read=1024) self.assertEqual(res, 'e56fa7bd5cfdf6bb7e2d8649dd9216c03e7271e6') def test_sha224_complete_calculation(self): """"Test sha224 of complete file.""" res = tools.compute_file_hash(self.filename, sha='sha224') self.assertEqual( res, '3d350d9d9eca074bd299cb5ffe1b325a9f589b2bcd7ba1c033ab4d33') def test_sha224_partial_calculation(self): """"Test sha224 of partial file (1024 bytes).""" res = tools.compute_file_hash(self.filename, sha='sha224', bytes_to_read=1024) self.assertEqual( res, 'affa8cb79656a9b6244a079f8af91c9271e382aa9d5aa412b599e169')
def test_xml_start_variants(self): """Test the generator using variants of start page.""" start_variants = ( '-xmlstart:Fake page', # title '-xmlstart:Fake_page', # underscore '-xmlstart:Fake', # prefix ) filename = '-xml:' + join_xml_data_path('dummy-reflinks.xml') for start in start_variants: with self.subTest(xmlstart=start): with suppress_warnings(WARN_SITE_OBJ, category=UserWarning): main(filename, '-namespace:1', start) gen = self.constructor_kwargs['generator'] pages = list(gen) self.assertPageTitlesEqual(pages, ['Talk:Fake page'], site=self.site)
def test_start_variants(self): """Test with several page title options.""" start_variants = ( None, # None 'Fake', # prefix 'Fake_page', # underscore ) filename = join_xml_data_path('dummy-reflinks.xml') for start in start_variants: with self.subTest(start=start): gen = XmlDumpPageGenerator(filename=filename, start=start, namespaces=[0, 1], site=self.site) pages = list(gen) self.assertPageTitlesEqual(pages, ('Fake page', 'Talk:Fake page'), site=self.site)
class TestFileShaCalculator(TestCase): r"""Test calculator of sha of a file. There are two possible hash values for each test. The second one is for files with windows line endings (\r\n). """ net = False filename = join_xml_data_path('article-pear-0.10.xml') def setUp(self): """Setup tests.""" super().setUp() def test_md5_complete_calculation(self): """Test md5 of complete file.""" res = tools.compute_file_hash(self.filename, sha='md5') self.assertIn(res, ( '5d7265e290e6733e1e2020630262a6f3', '2c941f2fa7e6e629d165708eb02b67f7', )) def test_md5_partial_calculation(self): """Test md5 of partial file (1024 bytes).""" res = tools.compute_file_hash(self.filename, sha='md5', bytes_to_read=1024) self.assertIn(res, ( 'edf6e1accead082b6b831a0a600704bc', 'be0227b6d490baa49e6d7e131c7f596b', )) def test_sha1_complete_calculation(self): """Test sha1 of complete file.""" res = tools.compute_file_hash(self.filename, sha='sha1') self.assertIn(res, ( '1c12696e1119493a625aa818a35c41916ce32d0c', '146121e6d0461916c9a0fab00dc718acdb6a6b14', )) def test_sha1_partial_calculation(self): """Test sha1 of partial file (1024 bytes).""" res = tools.compute_file_hash(self.filename, sha='sha1', bytes_to_read=1024) self.assertIn(res, ( 'e56fa7bd5cfdf6bb7e2d8649dd9216c03e7271e6', '617ce7d539848885b52355ed597a042dae1e726f', )) def test_sha224_complete_calculation(self): """Test sha224 of complete file.""" res = tools.compute_file_hash(self.filename, sha='sha224') self.assertIn(res, ( '3d350d9d9eca074bd299cb5ffe1b325a9f589b2bcd7ba1c033ab4d33', '4a2cf33b7da01f7b0530b2cc624e1180c8651b20198e9387aee0c767', )) def test_sha224_partial_calculation(self): """Test sha224 of partial file (1024 bytes).""" res = tools.compute_file_hash(self.filename, sha='sha224', bytes_to_read=1024) self.assertIn(res, ( 'affa8cb79656a9b6244a079f8af91c9271e382aa9d5aa412b599e169', '486467144e683aefd420d576250c4cc984e6d7bf10c85d36e3d249d2', ))
def test_xml_simple(self): """Test the generator without any narrowing.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml')) gen = self.constructor_args[0] self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'], site=self.get_site())
def _get_entries(self, filename, **kwargs): """Get all entries via XmlDump.""" entries = [r for r in xmlreader.XmlDump(join_xml_data_path(filename), **kwargs).parse()] return entries
def test_XmlDumpRedirect(self): """Test XmlDump correctly parsing whether a page is a redirect.""" pages = self._get_entries('article-pyrus.xml', allrevisions=True) pages = [r for r in xmlreader.XmlDump(join_xml_data_path('article-pyrus.xml')).parse()] self.assertTrue(pages[0].isredirect)
def setUpClass(cls): """Define base_file and original_content.""" super(OpenArchiveTestCase, cls).setUpClass() cls.base_file = join_xml_data_path('article-pyrus.xml') with open(cls.base_file, 'rb') as f: cls.original_content = f.read()