Exemple #1
0
 def test_partially_qualified_NS1_code(self):
     """Test 'wikipedia:Talk:Main Page' on enws is ns 1."""
     config.mylang = 'en'
     config.family = 'wikisource'
     link = Link('wikipedia:Talk:Main Page')
     link.parse()
     self.assertEqual(link.site, self.get_site())
     self.assertEqual(link.title, 'Main Page')
     self.assertEqual(link.namespace, 1)
 def test_via_local_non_local(self):
     """Test de:translatewiki:Main Page on English Wikipedia."""
     link = Link('de:translatewiki:Main Page', self.get_site('wp'))
     with self.assertRaisesRegex(
             InvalidTitleError,
             'de:translatewiki:Main Page links to a non local '
             'site i18n:i18n '
             'via an interwiki link to wikipedia:de'):
         link.parse()
Exemple #3
0
 def test_partially_qualified_NS0_family(self):
     """Test that Link uses config.family for namespace 0."""
     config.mylang = 'de'
     config.family = 'wikipedia'
     link = Link('en:Main Page')
     link.parse()
     self.assertEqual(link.site, self.get_site())
     self.assertEqual(link.title, 'Main Page')
     self.assertEqual(link.namespace, 0)
Exemple #4
0
 def test_fully_qualified_NS0_family(self):
     """Test 'wikipedia:en:Main Page' on enws is namespace 0."""
     config.mylang = 'en'
     config.family = 'wikisource'
     link = Link('wikipedia:en:Main Page')
     link.parse()
     self.assertEqual(link.site, self.get_site('enwp'))
     self.assertEqual(link.title, 'Main Page')
     self.assertEqual(link.namespace, 0)
Exemple #5
0
 def test_partially_qualified_NS1_family(self):
     """Test 'en:Talk:Main Page' on dewp is namespace 1."""
     config.mylang = 'de'
     config.family = 'wikipedia'
     link = Link('en:Talk:Main Page')
     link.parse()
     self.assertEqual(link.site, self.get_site())
     self.assertEqual(link.title, 'Main Page')
     self.assertEqual(link.namespace, 1)
Exemple #6
0
 def test_fully_qualified_NS1_family(self):
     """Test ':wikidata:testwiki:Talk:Q6' on enwp is namespace 1."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link(':wikidata:testwiki:Talk:Q6')
     link.parse()
     self.assertEqual(link.site, self.get_site('test.wp'))
     self.assertEqual(link.title, 'Q6')
     self.assertEqual(link.namespace, 1)
Exemple #7
0
 def test_fully_qualified_NS1_code(self):
     """Test ':species:species:Talk:Main Page' on species is namespace 1."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link(':species:species:Talk:Main Page')
     link.parse()
     self.assertEqual(link.site, self.get_site())
     self.assertEqual(link.title, 'Main Page')
     self.assertEqual(link.namespace, 1)
Exemple #8
0
 def test_fully_qualified_NS0_code(self):
     """Test ':testwiki:wikidata:Q6' on enwp is namespace 0."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link(':testwiki:wikidata:Q6')
     link.parse()
     self.assertEqual(link.site, self.get_site('wikidata'))
     self.assertEqual(link.title, 'Q6')
     self.assertEqual(link.namespace, 0)
Exemple #9
0
 def test_fully_qualified_NS1_family(self):
     """Test ':wikipedia:en:Talk:Main Page' on wikidata is namespace 1."""
     config.mylang = 'wikidata'
     config.family = 'wikidata'
     link = Link(':wikipedia:en:Talk:Main Page')
     link.parse()
     self.assertEqual(link.site, self.get_site('enwp'))
     self.assertEqual(link.title, 'Main Page')
     self.assertEqual(link.namespace, 1)
Exemple #10
0
 def test_fully_qualified_NS0_family(self):
     """Test 'wikidata:testwiki:Q6' on enwp is namespace 0."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link('wikidata:testwiki:Q6')
     link.parse()
     self.assertEqual(link.site, pywikibot.Site('test', 'wikipedia'))
     self.assertEqual(link.title, 'Q6')
     self.assertEqual(link.namespace, 0)
Exemple #11
0
 def test_fully_qualified_NS1_code(self):
     """Test 'en:wikipedia:Talk:Main Page' on enwp is namespace 4."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link('en:wikipedia:Talk:Main Page')
     link.parse()
     self.assertEqual(link.site, self.get_site())
     self.assertEqual(link.title, 'Talk:Main Page')
     self.assertEqual(link.namespace, 4)
Exemple #12
0
 def test_fully_qualified_NS1(self):
     """Test prefixed links with 'Talk:Q6' on enwp is namespace 1."""
     test = [('testwiki:wikidata', 'wikidata:wikidata'),
             ('wikidata:testwiki', 'wikipedia:test')]
     for linkprefix, sitetitle in test:
         with self.subTest(pattern=linkprefix):
             link = Link(linkprefix + ':Talk:Q6')
             link.parse()
             self.assertEqual(link.site, pywikibot.Site(sitetitle))
             self.assertEqual(link.title, 'Q6')
             self.assertEqual(link.namespace, 1)
Exemple #13
0
 def test_py266_bug_exception(self):
     """Test Python issue 10254 causes an exception."""
     pywikibot.page.unicodedata = __import__('unicodedata')
     title = 'Li̍t-sṳ́'
     with self.assertRaisesRegex(
             UnicodeError,
             re.escape('Link(%r, %s): combining characters detected, which '
                       'are not supported by Pywikibot on Python 2.6.6. '
                       'See https://phabricator.wikimedia.org/T102461' %
                       (title, self.site))):
         Link(title, self.site)
Exemple #14
0
 def test_non_wiki_prefix(self):
     """Test that Link fails if the interwiki prefix is not a wiki."""
     link = Link('bugzilla:1337', source=self.site)
     # bugzilla does not return a json content but redirects to phab.
     # api.Request._json_loads cannot detect this problem and raises
     # a SiteDefinitionError. The site is created anyway but the title
     # cannot be parsed
     with self.assertRaises(SiteDefinitionError):
         link.site
     self.assertEqual(link.site.sitename, 'wikimedia:wikimedia')
     self.assertTrue(link._is_interwiki)
    def test_valid(self):
        """Test that valid titles are correctly normalized."""
        title_tests = [
            'Sandbox', 'A "B"', "A 'B'", '.com', '~', '"', "'",
            'Foo/.../Sandbox', 'Sandbox/...', 'A~~', 'X' * 252
        ]

        extended_title_tests = [
            ('Talk:Sandbox', 'Sandbox'),
            ('Talk:Foo:Sandbox', 'Foo:Sandbox'),
            ('File:Example.svg', 'Example.svg'),
            ('File_talk:Example.svg', 'Example.svg'),
            (':A', 'A'),
            # Length is 256 total, but only title part matters
            ('Category:' + 'X' * 248, 'X' * 248),
            ('A%20B', 'A B'),
            ('A é B', 'A é B'),
            ('A é B', 'A é B'),
            ('A é B', 'A é B'),
            ('A   B', 'A B'),
            ('A   B', 'A B'),
        ]

        site = self.get_site()

        for title in title_tests:
            with self.subTest(title=title):
                self.assertEqual(Link(title, site).title, title)

        for link, title in extended_title_tests:
            with self.subTest(link=link, title=title):
                self.assertEqual(Link(link, site).title, title)

        anchor_link = Link('A | B', site)
        self.assertEqual(anchor_link.title, 'A')
        self.assertEqual(anchor_link.anchor, ' B')

        section_link = Link('A%23B', site)
        self.assertEqual(section_link.title, 'A')
        self.assertEqual(section_link.section, 'B')
    def test_non_wiki_prefix(self):
        """Test that Link fails if the interwiki prefix is not a wiki."""
        link = Link('bugzilla:1337', source=self.site)
        # bugzilla does not return a json content but redirects to phab.
        # api.Request._json_loads cannot detect this problem and retries
        # reloading due to 'the server may be down'

        # ignore Timeout when trying to load siteninfo;
        # the site is created anyway but the title cannot be parsed
        with suppress(TimeoutError):
            link.site
        self.assertEqual(link.site.sitename, 'wikimedia:wikimedia')
        self.assertTrue(link._is_interwiki)
Exemple #17
0
 def test_fully_qualified_NS1_family(self):
     """Test 'wikidata:testwiki:Talk:Q6' on enwp is namespace 1."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link('wikidata:testwiki:Talk:Q6')
     link.parse()
     if show_failures:
         self.assertEqual(link.site, pywikibot.Site('test', 'wikipedia'))
         self.assertEqual(link.title, 'Q6')
         self.assertEqual(link.namespace, 1)
     else:
         self.assertEqual(link.site, pywikibot.Site('en', 'wikipedia'))
         self.assertEqual(link.title, 'Wikidata:testwiki:Talk:Q6')
         self.assertEqual(link.namespace, 0)
Exemple #18
0
 def test_fully_qualified_NS0_family(self):
     """Test ':wikidata:testwiki:Q6' on enwp is namespace 0."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link(':wikidata:testwiki:Q6')
     link.parse()
     if show_failures:
         self.assertEqual(link.site, self.get_site('test.wp'))
         self.assertEqual(link.title, 'Q6')
         self.assertEqual(link.namespace, 0)
     else:
         self.assertEqual(link.site, self.get_site('enwp'))
         self.assertEqual(link.title, 'Wikidata:testwiki:Q6')
         self.assertEqual(link.namespace, 0)
Exemple #19
0
 def test_fully_qualified_NS1_code(self):
     """Test 'testwiki:wikidata:Talk:Q6' on enwp is namespace 1."""
     config.mylang = 'en'
     config.family = 'wikipedia'
     link = Link('testwiki:wikidata:Talk:Q6')
     if show_failures:
         link.parse()
         self.assertEqual(link.site, pywikibot.Site('wikidata', 'wikidata'))
         self.assertEqual(link.title, 'Q6')
         self.assertEqual(link.namespace, 1)
     else:
         self.assertRaisesRegex(
             pywikibot.Error,
             'Family testwiki does not exist',
             link.parse)  # very bad
Exemple #20
0
 def test_fully_qualified_NS0_family(self):
     """Test 'wikipedia:en:Main Page' on wikidata is namespace 0."""
     config.mylang = 'wikidata'
     config.family = 'wikidata'
     link = Link('wikipedia:en:Main Page')
     if show_failures:
         link.parse()
         self.assertEqual(link.site, self.get_site('enwp'))
         self.assertEqual(link.namespace, 0)
         self.assertEqual(link.title, 'Main Page')
     else:
         self.assertRaisesRegex(
             pywikibot.NoSuchSite,
             'Language wikidata does not exist in family wikipedia',
             link.parse)  # very bad
Exemple #21
0
 def test_fully_qualified_NS1_code(self):
     """Test 'en:wikipedia:Talk:Main Page' on wikidata is not namespace 1."""
     config.mylang = 'wikidata'
     config.family = 'wikidata'
     link = Link('en:wikipedia:Talk:Main Page')
     link.parse()
     if show_failures:
         self.assertEqual(link.site, self.get_site('enwp'))
     else:
         self.assertEqual(link.site, self.get_site('wikidata'))
     if show_failures:
         self.assertEqual(link.title, 'Talk:Main Page')
         self.assertEqual(link.namespace, 4)
     else:
         self.assertEqual(link.title, 'En:wikipedia:Talk:Main Page')
         self.assertEqual(link.namespace, 0)
Exemple #22
0
 def test_fully_qualified_NS1(self):
     """Test that fully qualified link is in namespace 1."""
     family, code = 'wikipedia:en'.split(':')
     for colon in ('', ':'):  # with or without preleading colon
         # switch code:family sequence en:wikipedia or wikipedia:en
         for first, second in [(family, code), (code, family)]:
             with self.subTest(colon=colon,
                               site='{}:{}'.format(first, second)):
                 link_title = self.PATTERN.format(colon=colon,
                                                  first=first,
                                                  second=second,
                                                  title='Talk:Main Page')
                 link = Link(link_title)
                 link.parse()
                 self.assertEqual(link.site, self.get_site('enwp'))
                 self.assertEqual(link.title, 'Main Page')
                 self.assertEqual(link.namespace, 1)
Exemple #23
0
 def test_fully_qualified_NS1_code(self):
     """Test 'en:wikipedia:Main Page' on enws is namespace 1."""
     config.mylang = 'en'
     config.family = 'wikisource'
     link = Link('en:wikipedia:Talk:Main Page')
     if show_failures:
         link.parse()
     else:
         self.assertRaisesRegex(
             pywikibot.Error,
             "Improperly formatted interwiki link 'en:wikipedia:Talk:Main Page'",
             link.parse)
     if show_failures:
         self.assertEqual(link.site, self.get_site('enwp'))
     else:
         self.assertEqual(link.site, self.get_site('enws'))
     if show_failures:
         self.assertEqual(link.title, 'Main Page')
         self.assertEqual(link.namespace, 1)
     else:
         try:
             link.title
         except pywikibot.Error as e:
             self.assertEqual(str(e), "Improperly formatted interwiki link 'en:wikipedia:Talk:Main Page'")
 def test_no_text(self):
     """Test that Link doesn't allow empty."""
     link = Link('', self.get_site())
     self.assertRaisesRegex(InvalidTitle,
                            'The link does not contain a page title',
                            link.parse)
 def test_interwiki_namespace_without_title(self):
     """Test that Link doesn't allow links without a title."""
     link = Link('en:Help:', self.get_site())
     self.assertRaisesRegex(InvalidTitle, "'en:Help:' has no title.",
                            link.parse)
 def test_no_change(self):
     """Test T102461 (Python issue 10254) is not encountered."""
     title = 'Li̍t-sṳ́'
     link = Link(title, self.site)
     self.assertEqual(link.title, 'Li̍t-sṳ́')
    def test_invalid(self):
        """Test that invalid titles raise InvalidTitle exception."""

        # Bad characters forbidden regardless of wgLegalTitleChars
        def generate_contains_illegal_chars_exc_regex(text):
            exc_regex = (
                r'^(u|)\'{}\' contains illegal char\(s\) (u|)\'{}\'$'.format(
                    re.escape(text), re.escape(text[2])))
            return exc_regex

        # Directory navigation
        def generate_contains_dot_combinations_exc_regex(text):
            exc_regex = (
                r'^\(contains \. / combinations\): (u|)\'{}\'$'.format(
                    re.escape(text)))
            return exc_regex

        # Tilde
        def generate_contains_tilde_exc_regex(text):
            exc_regex = r'^\(contains ~~~\): (u|)\'%s\'$' % re.escape(text)
            return exc_regex

        # Overlength
        def generate_overlength_exc_regex(text):
            exc_regex = r'^\(over 255 bytes\): (u|)\'%s\'$' % re.escape(text)
            return exc_regex

        # Namespace prefix without actual title
        def generate_has_no_title_exc_regex(text):
            exc_regex = r'^(u|)\'{}\' has no title\.$'.format(
                re.escape(text.strip()))
            return exc_regex

        title_tests = [
            # Empty title
            (['', ':', '__  __',
              '  __  '], r'^The link does not contain a page title$'),
            (['A [ B', 'A ] B', 'A { B', 'A } B', 'A < B',
              'A > B'], generate_contains_illegal_chars_exc_regex),

            # URL encoding
            # %XX is understood by wikimedia but not %XXXX
            (['A%2523B'],
             r'^(u|)\'A%23B\' contains illegal char\(s\) (u|)\'%23\'$'),

            # A link is invalid if their (non-)talk page would be in another
            # namespace than the link's "other" namespace
            (['Talk:File:Example.svg'],
             r'The \(non-\)talk page of (u|)\'Talk:File:Example.svg\''
             r' is a valid title in another namespace.'),
            ([
                '.', '..', './Sandbox', '../Sandbox', 'Foo/./Sandbox',
                'Foo/../Sandbox', 'Sandbox/.', 'Sandbox/..'
            ], generate_contains_dot_combinations_exc_regex),
            (['A ~~~ Name', 'A ~~~~ Signature',
              'A ~~~~~ Timestamp'], generate_contains_tilde_exc_regex),
            ([('x' * 256),
              ('Invalid:' + 'X' * 248)], generate_overlength_exc_regex),
            (['Talk:', 'Category: ',
              'Category: #bar'], generate_has_no_title_exc_regex),
        ]

        for texts_to_test, exception_regex in title_tests:
            for text in texts_to_test:
                with self.subTest(title=text):
                    if callable(exception_regex):
                        regex = exception_regex(text)
                    else:
                        regex = exception_regex
                    with self.assertRaisesRegex(InvalidTitle, regex):
                        Link(text, self.get_site()).parse()
Exemple #28
0
 def test_invalid(self):
     self.assertRaises(InvalidTitle, Link('', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link(':', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('__  __', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('  __  ', self.get_site()).parse)
     # Bad characters forbidden regardless of wgLegalTitleChars
     self.assertRaises(InvalidTitle, Link('A [ B', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('A ] B', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('A { B', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('A } B', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('A < B', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('A > B', self.get_site()).parse)
     # URL encoding
     # %XX is understood by wikimedia but not %XXXX
     self.assertRaises(InvalidTitle, Link('A%2523B', self.get_site()).parse)
     # A link is invalid if their (non-)talk page would be in another
     # namespace than the link's "other" namespace
     self.assertRaises(InvalidTitle, Link('Talk:File:Example.svg', self.get_site()).parse)
     # Directory navigation
     self.assertRaises(InvalidTitle, Link('.', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('..', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('./Sandbox', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('../Sandbox', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('Foo/./Sandbox', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('Foo/../Sandbox', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('Sandbox/.', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('Sandbox/..', self.get_site()).parse)
     # Tilde
     self.assertRaises(InvalidTitle, Link('A ~~~ Name', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('A ~~~~ Signature', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('A ~~~~~ Timestamp', self.get_site()).parse)
     # Overlength
     self.assertRaises(InvalidTitle, Link('x' * 256, self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('Invalid:' + 'X' * 248, self.get_site()).parse)
     # Namespace prefix without actual title
     self.assertRaises(InvalidTitle, Link('Talk:', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('Category: ', self.get_site()).parse)
     self.assertRaises(InvalidTitle, Link('Category: #bar', self.get_site()).parse)
Exemple #29
0
    def test_valid(self):
        self.assertEqual(Link('Sandbox', self.get_site()).title, 'Sandbox')
        self.assertEqual(Link('A "B"', self.get_site()).title, 'A "B"')
        self.assertEqual(Link('A \'B\'', self.get_site()).title, 'A \'B\'')
        self.assertEqual(Link('.com', self.get_site()).title, '.com')
        self.assertEqual(Link('~', self.get_site()).title, '~')
        self.assertEqual(Link('"', self.get_site()).title, '"')
        self.assertEqual(Link('\'', self.get_site()).title, '\'')
        self.assertEqual(Link('Talk:Sandbox', self.get_site()).title, 'Sandbox')
        self.assertEqual(Link('Talk:Foo:Sandbox', self.get_site()).title, 'Foo:Sandbox')
        self.assertEqual(Link('File:Example.svg', self.get_site()).title, 'Example.svg')
        self.assertEqual(Link('File_talk:Example.svg', self.get_site()).title, 'Example.svg')
        self.assertEqual(Link('Foo/.../Sandbox', self.get_site()).title, 'Foo/.../Sandbox')
        self.assertEqual(Link('Sandbox/...', self.get_site()).title, 'Sandbox/...')
        self.assertEqual(Link('A~~', self.get_site()).title, 'A~~')
        self.assertEqual(Link(':A', self.get_site()).title, 'A')
        # Length is 256 total, but only title part matters
        self.assertEqual(Link('Category:' + 'X' * 248, self.get_site()).title, 'X' * 248)
        self.assertEqual(Link('X' * 252, self.get_site()).title, 'X' * 252)
        self.assertEqual(Link('A%20B', self.get_site()).title, 'A B')
        self.assertEqual(Link('A &eacute; B', self.get_site()).title, u'A é B')
        self.assertEqual(Link('A &#233; B', self.get_site()).title, u'A é B')
        self.assertEqual(Link('A &#x00E9; B', self.get_site()).title, u'A é B')

        l = Link('A | B', self.get_site())
        self.assertEqual(l.title, 'A')
        self.assertEqual(l.anchor, ' B')

        l = Link('A%23B', self.get_site())
        self.assertEqual(l.title, 'A')
        self.assertEqual(l.section, 'B')
Exemple #30
0
 def test_other_wiki_prefix(self):
     """Test that Link fails if the interwiki prefix is a unknown family."""
     link = Link('bulba:title on auto-generated Site', source=self.site)
     self.assertEqual(link.title, 'Title on auto-generated Site')
     self.assertEqual(link.site.sitename, 'bulba:bulba')
     self.assertTrue(link._is_interwiki)