def update_family(families):
    """Update family files."""
    for family in families or familiesDict.keys():
        pywikibot.output('\nChecking family %s:' % family)

        original = Family.load(family).languages_by_size
        obsolete = Family.load(family).obsolete

        feed = urlopen(URL % familiesDict[family])
        tree = cElementTree.parse(feed)

        new = []
        for field in tree.findall('row/field'):
            if field.get('name') == 'prefix':
                code = field.text
                if not (code in obsolete or code in exceptions):
                    new.append(code)
                continue

        # put the missing languages to the right place
        missing = original != new and set(original) - set(new)
        if missing:
            pywikibot.output(u"WARNING: ['%s'] not listed at wikistats."
                             % "', '".join(missing))
            index = {}
            for code in missing:
                index[original.index(code)] = code
            i = len(index) - 1
            for key in sorted(index.keys(), reverse=True):
                new.insert(key - i, index[key])
                i -= 1

        if original == new:
            pywikibot.output(u'The lists match!')
        else:
            pywikibot.output(u"The lists don't match, the new list is:")
            text = u'        self.languages_by_size = [\r\n'
            line = ' ' * 11
            for code in new:
                if len(line) + len(code) <= 76:
                    line += u" '%s'," % code
                else:
                    text += u'%s\r\n' % line
                    line = ' ' * 11
                    line += u" '%s'," % code
            text += u'%s\r\n' % line
            text += u'        ]'
            pywikibot.output(text)
            family_file_name = 'pywikibot/families/%s_family.py' % family
            family_file = codecs.open(family_file_name, 'r', 'utf8')
            family_text = family_file.read()
            old = re.findall(r'(?msu)^ {8}self.languages_by_size.+?\]',
                             family_text)[0]
            family_text = family_text.replace(old, text)
            family_file = codecs.open(family_file_name, 'w', 'utf8')
            family_file.write(family_text)
            family_file.close()
def update_family(families):
    """Update family files."""
    for family in families or familiesDict.keys():
        pywikibot.output('\nChecking family %s:' % family)

        original = Family.load(family).languages_by_size
        obsolete = Family.load(family).obsolete

        feed = urlopen(URL % familiesDict[family])
        tree = cElementTree.parse(feed)

        new = []
        for field in tree.findall('row/field'):
            if field.get('name') == 'prefix':
                code = field.text
                if not (code in obsolete or code in exceptions):
                    new.append(code)
                continue

        # put the missing languages to the right place
        missing = original != new and set(original) - set(new)
        if missing:
            pywikibot.output(u"WARNING: ['%s'] not listed at wikistats."
                             % "', '".join(missing))
            index = {}
            for code in missing:
                index[original.index(code)] = code
            i = len(index) - 1
            for key in sorted(index.keys(), reverse=True):
                new.insert(key - i, index[key])
                i -= 1

        if original == new:
            pywikibot.output(u'The lists match!')
        else:
            pywikibot.output(u"The lists don't match, the new list is:")
            text = u'        self.languages_by_size = [\r\n'
            line = ' ' * 11
            for code in new:
                if len(line) + len(code) <= 76:
                    line += u" '%s'," % code
                else:
                    text += u'%s\r\n' % line
                    line = ' ' * 11
                    line += u" '%s'," % code
            text += u'%s\r\n' % line
            text += u'        ]'
            pywikibot.output(text)
            family_file_name = 'pywikibot/families/%s_family.py' % family
            family_file = codecs.open(family_file_name, 'r', 'utf8')
            family_text = family_file.read()
            old = re.findall(r'(?msu)^ {8}self.languages_by_size.+?\]',
                             family_text)[0]
            family_text = family_text.replace(old, text)
            family_file = codecs.open(family_file_name, 'w', 'utf8')
            family_file.write(family_text)
            family_file.close()
def update_family(families):
    """Update family files."""
    ws = wikistats.WikiStats()
    for family in families or families_list:
        pywikibot.output('\nChecking family %s:' % family)

        original = Family.load(family).languages_by_size
        for code in exceptions:
            if code in original:
                original.remove(code)
        obsolete = Family.load(family).obsolete

        new = []
        table = ws.languages_by_size(family)
        for code in table:
            if not (code in obsolete or code in exceptions):
                new.append(code)

        # put the missing languages to the right place
        missing = original != new and set(original) - set(new)
        if missing:
            pywikibot.warning("['%s'] not listed at wikistats."
                              % "', '".join(missing))
            index = {}
            for code in missing:
                index[original.index(code)] = code
            i = len(index) - 1
            for key in sorted(index.keys(), reverse=True):
                new.insert(key - i, index[key])
                i -= 1

        if original == new:
            pywikibot.output(u'The lists match!')
        else:
            pywikibot.output(u"The lists don't match, the new list is:")
            text = u'        self.languages_by_size = [\r\n'
            line = ' ' * 11
            for code in new:
                if len(line) + len(code) <= 76:
                    line += u" '%s'," % code
                else:
                    text += u'%s\r\n' % line
                    line = ' ' * 11
                    line += u" '%s'," % code
            text += u'%s\r\n' % line
            text += u'        ]'
            pywikibot.output(text)
            family_file_name = 'pywikibot/families/%s_family.py' % family
            family_file = codecs.open(family_file_name, 'r', 'utf8')
            family_text = family_file.read()
            old = re.findall(r'(?msu)^ {8}self.languages_by_size.+?\]',
                             family_text)[0]
            family_text = family_text.replace(old, text)
            family_file = codecs.open(family_file_name, 'w', 'utf8')
            family_file.write(family_text)
            family_file.close()
def update_family(families):
    """Update family files."""
    ws = wikistats.WikiStats()
    for family in families or families_list:
        pywikibot.output('\nChecking family %s:' % family)

        original = Family.load(family).languages_by_size
        for code in exceptions:
            if code in original:
                original.remove(code)
        obsolete = Family.load(family).obsolete

        new = []
        table = ws.languages_by_size(family)
        for code in table:
            if not (code in obsolete or code in exceptions):
                new.append(code)

        # put the missing languages to the right place
        missing = original != new and set(original) - set(new)
        if missing:
            pywikibot.warning("['%s'] not listed at wikistats."
                              % "', '".join(missing))
            index = {}
            for code in missing:
                index[original.index(code)] = code
            i = len(index) - 1
            for key in sorted(index.keys(), reverse=True):
                new.insert(key - i, index[key])
                i -= 1

        if original == new:
            pywikibot.output(u'The lists match!')
        else:
            pywikibot.output(u"The lists don't match, the new list is:")
            text = '        self.languages_by_size = [\n'
            line = ' ' * 11
            for code in new:
                if len(line) + len(code) < 76:
                    line += u" '%s'," % code
                else:
                    text += '%s\n' % line
                    line = ' ' * 11
                    line += u" '%s'," % code
            text += '%s\n' % line
            text += u'        ]'
            pywikibot.output(text)
            family_file_name = 'pywikibot/families/%s_family.py' % family
            family_file = codecs.open(family_file_name, 'r', 'utf8')
            family_text = family_file.read()
            old = re.findall(r'(?msu)^ {8}self.languages_by_size.+?\]',
                             family_text)[0]
            family_text = family_text.replace(old, text)
            family_file = codecs.open(family_file_name, 'w', 'utf8')
            family_file.write(family_text)
            family_file.close()
Exemple #5
0
def update_family(families):
    """Update family files."""
    ws = wikistats.WikiStats()
    for family in families or families_list:
        pywikibot.output('\nChecking family {}:'.format(family))

        original = Family.load(family).languages_by_size
        for code in exceptions.get(family, []):
            if code in original:
                original.remove(code)
        obsolete = Family.load(family).obsolete

        new = []
        table = ws.languages_by_size(family)
        for code in table:
            if not (code in obsolete or code in exceptions.get(family, [])):
                new.append(code)

        # put the missing languages to the right place
        missing = original != new and set(original) - set(new)
        if missing:
            pywikibot.warning("['{}'] not listed at wikistats.".format(
                "', '".join(missing)))
            index = {}
            for code in missing:
                index[original.index(code)] = code
            i = len(index) - 1
            for key in sorted(index.keys(), reverse=True):
                new.insert(key - i, index[key])
                i -= 1

        if original == new:
            pywikibot.output('The lists match!')
            continue

        pywikibot.output("The lists don't match, the new list is:")
        text = '    languages_by_size = [\n'
        line = ' ' * 7
        for code in new:
            if len(line) + len(code) >= 76:
                text += line + '\n'
                line = ' ' * 7
            line += " '{}',".format(code)
        text += line + '\n'
        text += '    ]'
        pywikibot.output(text)
        family_file_name = 'pywikibot/families/{}_family.py'.format(family)
        with codecs.open(family_file_name, 'r', 'utf8') as family_file:
            family_text = family_file.read()
        family_text = re.sub(r'(?ms)^ {4}languages_by_size.+?\]', text,
                             family_text, 1)
        with codecs.open(family_file_name, 'w', 'utf8') as family_file:
            family_file.write(family_text)
Exemple #6
0
def _code_fam_from_url(url):
    """Set url to cache and get code and family from cache.

    Site helper method.
    @param url: The site URL to get code and family
    @type url: str
    @raises SiteDefinitionError: Unknown URL
    """
    if url not in _url_cache:
        matched_sites = []
        # Iterate through all families and look, which does apply to
        # the given URL
        for fam in config.family_files:
            family = Family.load(fam)
            code = family.from_url(url)
            if code is not None:
                matched_sites.append((code, family))

        if not matched_sites:
            # TODO: As soon as AutoFamily is ready, try and use an
            #       AutoFamily
            raise SiteDefinitionError("Unknown URL '{0}'.".format(url))
        if len(matched_sites) > 1:
            warning('Found multiple matches for URL "{0}": {1} (use first)'
                    .format(url, ', '.join(str(s) for s in matched_sites)))
        _url_cache[url] = matched_sites[0]
    return _url_cache[url]
Exemple #7
0
 def test_WikimediaFamily_obsolete_readonly(self):
     """Test WikimediaFamily obsolete is readonly."""
     family = Family.load('wikipedia')
     with self.assertRaisesRegex(
             TypeError,
             "'frozenset' object does not support item assignment"):
         family.obsolete = {'a': 'b', 'c': None}
Exemple #8
0
 def test_eq_family_with_string_repr_not_existing_family(self):
     """Test that Family and string with different name are not equal."""
     family = Family.load('wikipedia')
     other = 'unknown'
     with self.assertRaisesRegex(UnknownFamilyError,
                                 'Family unknown does not exist'):
         family.__eq__(other)
Exemple #9
0
def _code_fam_from_url(url: str, name: Optional[str] = None):
    """Set url to cache and get code and family from cache.

    Site helper method.
    @param url: The site URL to get code and family
    @param name: A family name used by AutoFamily
    """
    matched_sites = []
    # Iterate through all families and look, which does apply to
    # the given URL
    for fam in config.family_files:
        family = Family.load(fam)
        code = family.from_url(url)
        if code is not None:
            matched_sites.append((code, family))

    if not matched_sites:
        if not name:  # create a name from url
            name = urlparse(url).netloc.split('.')[-2]
            name = removesuffix(name, 'wiki')
        family = AutoFamily(name, url)
        matched_sites.append((family.code, family))

    if len(matched_sites) > 1:
        warning('Found multiple matches for URL "{}": {} (use first)'.format(
            url, ', '.join(str(s) for s in matched_sites)))
    return matched_sites[0]
Exemple #10
0
 def test_WikimediaFamily_obsolete_readonly(self):
     """Test WikimediaFamily obsolete is readonly."""
     family = Family.load('test')
     self.assertRaises(TypeError, family.__setattr__, 'obsolete', {
         'a': 'b',
         'c': None
     })
 def test_get_obsolete_test(self):
     """Test WikimediaFamily default obsolete."""
     family = Family.load('test')
     self.assertIn('dk', family.obsolete)
     self.assertIn('dk', family.interwiki_replacements)
     self.assertEqual(family.obsolete, family.interwiki_replacements)
     self.assertEqual(family.interwiki_removals, set())
    def test_from_url_wikipedia_extra(self):
        """Test various URLs against wikipedia regex."""
        f = Family.load('wikipedia')

        prefix = 'https://vo.wikipedia.org'

        self.assertEqual(f.from_url(prefix + '/wiki/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php?title=$1'), 'vo')

        self.assertEqual(f.from_url(prefix + '/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1/foo'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1/foo'), 'vo')

        # wrong protocol
        self.assertIsNone(f.from_url('http://vo.wikipedia.org/wiki/$1'))
        self.assertIsNone(f.from_url('ftp://vo.wikipedia.org/wiki/$1'))
        # wrong code
        self.assertIsNone(f.from_url('https://foobar.wikipedia.org/wiki/$1'))
        # wrong family
        self.assertIsNone(f.from_url('https://vo.wikibooks.org/wiki/$1'))
        self.assertIsNone(f.from_url('http://vo.wikibooks.org/wiki/$1'))
        # invalid path
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/wik/$1'))
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/index.php/$1'))
Exemple #13
0
 def test_get_obsolete_test(self):
     """Test WikimediaFamily default obsolete."""
     family = Family.load('test')
     self.assertIn('dk', family.obsolete)
     self.assertIn('dk', family.interwiki_replacements)
     self.assertEqual(family.obsolete, family.interwiki_replacements)
     self.assertEqual(family.interwiki_removals, set())
Exemple #14
0
def _code_fam_from_url(url):
    """Set url to cache and get code and family from cache.

    Site helper method.
    @param url: The site URL to get code and family
    @type url: str
    @raises SiteDefinitionError: Unknown URL
    """
    if url not in _url_cache:
        matched_sites = []
        # Iterate through all families and look, which does apply to
        # the given URL
        for fam in config.family_files:
            if fam == 'test':  # test_family.py is deprecated
                continue
            family = Family.load(fam)
            code = family.from_url(url)
            if code is not None:
                matched_sites.append((code, family))

        if not matched_sites:
            # TODO: As soon as AutoFamily is ready, try and use an
            #       AutoFamily
            raise SiteDefinitionError("Unknown URL '{0}'.".format(url))
        if len(matched_sites) > 1:
            warning('Found multiple matches for URL "{0}": {1} (use first)'
                    .format(url, ', '.join(str(s) for s in matched_sites)))
        _url_cache[url] = matched_sites[0]
    return _url_cache[url]
Exemple #15
0
 def test_obsolete_readonly(self):
     """Test obsolete result not updatable."""
     family = Family.load('test')
     self.assertRaisesRegex(TypeError, self.FAMILY_TYPEERROR_RE,
                            family.obsolete.update, {})
     self.assertRaisesRegex(TypeError, self.FAMILY_TYPEERROR_RE,
                            family.obsolete.__setitem__, 'a', 'b')
Exemple #16
0
 def test_family_load_valid(self):
     """Test that a family can be loaded via Family.load."""
     for name in pywikibot.config.family_files:
         f = Family.load(name)
         self.assertIsInstance(f.langs, dict)
         self.assertTrue(f.langs)
         self.assertTrue(f.codes)
         self.assertTrue(iter(f.codes))
         self.assertIsInstance(next(iter(f.codes)), basestring)
         self.assertTrue(f.domains)
         self.assertTrue(iter(f.domains))
         for domain in f.domains:
             self.assertIsInstance(domain, basestring)
             if domain.split(':', 1)[0] != 'localhost':
                 self.assertIn('.', domain)
         self.assertEqual(f.name, name)
         self.assertIsInstance(f.languages_by_size, list)
         self.assertGreaterEqual(set(f.langs), set(f.languages_by_size))
         if len(f.langs) > 2 and f.name not in [
                 'wikimediachapter', 'vikidia'
         ]:
             self.assertNotEqual(f.languages_by_size, [])
         if isinstance(f, SingleSiteFamily):
             self.assertIsNotNone(f.code)
             self.assertIsNotNone(f.domain)
             self.assertEqual(set(f.langs), set([f.code]))
             self.assertEqual(set(f.codes), set([f.code]))
    def test_family_load_valid(self):
        """Test that a family can be loaded via Family.load."""
        for name in pywikibot.config.family_files:
            with self.subTest(family=name):
                f = Family.load(name)
                self.assertIsInstance(f.langs, dict)
                self.assertTrue(f.langs)
                self.assertTrue(f.codes)
                self.assertTrue(iter(f.codes))
                self.assertIsInstance(next(iter(f.codes)), str)
                self.assertTrue(f.domains)
                self.assertTrue(iter(f.domains))
                for domain in f.domains:
                    self.assertIsInstance(domain, str)
                    if domain.split(':', 1)[0] != 'localhost':
                        self.assertIn('.', domain)

                self.assertEqual(f.name, name)

                with suppress_warnings(
                        'wowwiki_family.Family.languages_by_size '
                        'is deprecated'):
                    self.assertIsInstance(f.languages_by_size, list)
                    self.assertGreaterEqual(set(f.langs),
                                            set(f.languages_by_size))

                if isinstance(f, SingleSiteFamily):
                    self.assertIsNotNone(f.code)
                    self.assertIsNotNone(f.domain)
                    self.assertEqual(set(f.langs), {f.code})
                    self.assertEqual(set(f.codes), {f.code})
Exemple #18
0
 def test_family_load_valid(self):
     """Test that a family can be loaded via Family.load."""
     for name in pywikibot.config.family_files:
         f = Family.load(name)
         self.assertIsInstance(f.langs, dict)
         self.assertTrue(f.langs)
         self.assertTrue(f.codes)
         self.assertTrue(iter(f.codes))
         self.assertIsInstance(next(iter(f.codes)), basestring)
         self.assertTrue(f.domains)
         self.assertTrue(iter(f.domains))
         for domain in f.domains:
             self.assertIsInstance(domain, basestring)
             if domain != 'localhost':
                 self.assertIn('.', domain)
         self.assertEqual(f.name, name)
         self.assertIsInstance(f.languages_by_size, list)
         self.assertGreaterEqual(set(f.langs), set(f.languages_by_size))
         if len(f.langs) > 6 and f.name != 'wikimediachapter':
             self.assertNotEqual(f.languages_by_size, [])
         if isinstance(f, SingleSiteFamily):
             self.assertIsNotNone(f.code)
             self.assertIsNotNone(f.domain)
             self.assertEqual(set(f.langs), set([f.code]))
             self.assertEqual(set(f.codes), set([f.code]))
    def test_from_url_wikipedia_extra(self):
        """Test various URLs against wikipedia regex."""
        f = Family.load('wikipedia')

        prefix = 'https://vo.wikipedia.org'

        self.assertEqual(f.from_url(prefix + '/wiki/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php?title=$1'), 'vo')

        self.assertEqual(f.from_url(prefix + '/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1/foo'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1/foo'), 'vo')

        # wrong protocol
        self.assertIsNone(f.from_url('http://vo.wikipedia.org/wiki/$1'))
        self.assertIsNone(f.from_url('ftp://vo.wikipedia.org/wiki/$1'))
        # wrong code
        self.assertIsNone(f.from_url('https://foobar.wikipedia.org/wiki/$1'))
        # wrong family
        self.assertIsNone(f.from_url('https://vo.wikibooks.org/wiki/$1'))
        self.assertIsNone(f.from_url('http://vo.wikibooks.org/wiki/$1'))
        # invalid path
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/wik/$1'))
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/index.php/$1'))
    def test_get_regex_wikipedia_precise(self):
        """Test the family regex is optimal."""
        f = Family.load('wikipedia')
        regex = f._get_regex_all()

        self.assertTrue(regex.startswith('(?:\/\/|https\:\/\/)('))
        self.assertIn('vo\.wikipedia\.org', regex)
        self.assertTrue(regex.endswith(')(?:\/w\/index\.php\/?|\/wiki\/)'))
Exemple #21
0
 def test_WikimediaFamily_obsolete_readonly(self):
     """Test WikimediaFamily obsolete is readonly."""
     family = Family.load('test')
     self.assertRaisesRegex(TypeError, self.FROZENSET_TYPEERROR_RE,
                            family.__setattr__, 'obsolete', {
                                'a': 'b',
                                'c': None
                            })
    def test_get_regex_wikipedia_precise(self):
        """Test the family regex is optimal."""
        f = Family.load('wikipedia')
        regex = f._get_regex_all()

        self.assertTrue(regex.startswith('(?:\/\/|https\:\/\/)('))
        self.assertIn('vo\.wikipedia\.org', regex)
        self.assertTrue(regex.endswith(')(?:\/w\/index\.php\/?|\/wiki\/)'))
 def test_WikimediaFamily_obsolete_readonly(self):
     """Test WikimediaFamily obsolete is readonly."""
     family = Family.load('test')
     self.assertRaisesRegex(
         TypeError,
         self.FROZENSET_TYPEERROR_RE,
         family.__setattr__,
         'obsolete',
         {'a': 'b', 'c': None})
Exemple #24
0
 def test_eq_family_with_string_repr_not_existing_family(self):
     """Test that Family and string with different name are not equal."""
     family = Family.load('wikipedia')
     other = 'unknown'
     self.assertRaisesRegex(
         UnknownFamily,
         self.UNKNOWNFAMILY_RE,
         family.__eq__,
         other)
 def test_eq_family_with_string_repr_not_existing_family(self):
     """Test that Family and string with different name are not equal."""
     family = Family.load('wikipedia')
     other = 'unknown'
     self.assertRaisesRegex(
         UnknownFamily,
         self.UNKNOWNFAMILY_RE,
         family.__eq__,
         other)
Exemple #26
0
 def test_get_obsolete_wp(self):
     """Test three types of obsolete codes."""
     family = Family.load('wikipedia')
     self.assertIsInstance(family.obsolete, dict)
     # redirected code (see site tests test_alias_code_site)
     self.assertEqual(family.obsolete['dk'], 'da')
     # closed/locked site (see site tests test_locked_site)
     self.assertEqual(family.obsolete['mh'], None)
     # offline site (see site tests test_removed_site)
     self.assertEqual(family.obsolete['ru-sib'], None)
 def test_get_obsolete_wp(self):
     """Test three types of obsolete codes."""
     family = Family.load('wikipedia')
     self.assertIsInstance(family.obsolete, dict)
     # redirected code (see site tests test_alias_code_site)
     self.assertEqual(family.obsolete['dk'], 'da')
     # closed/locked site (see site tests test_locked_site)
     self.assertEqual(family.obsolete['mh'], None)
     # offline site (see site tests test_removed_site)
     self.assertEqual(family.obsolete['ru-sib'], None)
Exemple #28
0
def preload_family(family, executor):
    """Preload all sites of a single family file."""
    msg = 'Preloading sites of {} family{}'
    pywikibot.output(msg.format(family, '...'))

    codes = Family.load(family).languages_by_size
    for code in exceptions.get(family, []):
        if code in codes:
            codes.remove(code)
    obsolete = Family.load(family).obsolete

    futures = set()
    for code in codes:
        if code not in obsolete:
            site = pywikibot.Site(code, family)
            # page title does not care
            futures.add(executor.submit(pywikibot.Page, site, 'Main page'))
    wait(futures)
    pywikibot.output(msg.format(family, ' completed.'))
 def test_family_load_valid(self):
     """Test that a family can be loaded via Family.load."""
     for name in pywikibot.config.family_files:
         f = Family.load(name)
         self.assertIsInstance(f.langs, dict)
         self.assertNotEqual(f.langs, {})
         # There is one inconsistency
         if f.name == 'wikimediachapter' and name == 'wikimedia':
             continue
         self.assertEqual(f.name, name)
 def test_family_load_valid(self):
     """Test that a family can be loaded via Family.load."""
     for name in pywikibot.config.family_files:
         f = Family.load(name)
         self.assertIsInstance(f.langs, dict)
         self.assertNotEqual(f.langs, {})
         # There is one inconsistency
         if f.name == 'wikimediachapter' and name == 'wikimedia':
             continue
         self.assertEqual(f.name, name)
    def test_obsolete_readonly(self):
        """Test obsolete result not updatable."""
        family = Family.load('wikipedia')
        with self.assertRaisesRegex(
            AttributeError,
                "'frozenmap' object has no attribute 'update'"):
            family.obsolete.update({})

        with self.assertRaisesRegex(TypeError, self.FROZEN_TYPEERROR_RE):
            family.obsolete['a'] = 'b'
Exemple #32
0
    def test_obsolete_readonly(self):
        """Test obsolete result not updatable."""
        family = Family.load('wikipedia')
        with self.assertRaisesRegex(
                AttributeError,
                "'mappingproxy' object has no attribute 'update'"):
            family.obsolete.update({})

        with self.assertRaisesRegex(
                TypeError,
                "'mappingproxy' object does not support item assignment"):
            family.obsolete['a'] = 'b'
 def test_each_family(self):
     """Test each family builds a working regex."""
     for family in pywikibot.config.family_files:
         family = Family.load(family)
         # Test family does not respond to from_url due to overlap
         # with Wikipedia family.
         if family.name == 'test':
             continue
         for code in family.langs:
             url = ('%s://%s%s$1' % (family.protocol(code),
                                     family.hostname(code),
                                     family.path(code)))
             self.assertEqual(family.from_url(url), code)
 def test_each_family(self):
     """Test each family builds a working regex."""
     for family in pywikibot.config.family_files:
         family = Family.load(family)
         # Test family does not respond to from_url due to overlap
         # with Wikipedia family.
         if family.name == 'test':
             continue
         for code in family.langs:
             url = ('%s://%s%s$1' %
                    (family.protocol(code), family.hostname(code),
                     family.path(code)))
             self.assertEqual(family.from_url(url), code)
 def test_obsolete_readonly(self):
     """Test obsolete result not updatable."""
     family = Family.load('test')
     self.assertRaisesRegex(
         TypeError,
         self.FAMILY_TYPEERROR_RE,
         family.obsolete.update,
         {})
     self.assertRaisesRegex(
         TypeError,
         self.FAMILY_TYPEERROR_RE,
         family.obsolete.__setitem__,
         'a',
         'b')
Exemple #36
0
 def test_each_family(self):
     """Test each family builds a working regex."""
     for family in pywikibot.config.family_files:
         self.current_family = family
         family = Family.load(family)
         for code in family.codes:
             self.current_code = code
             url = ('%s://%s%s/$1' % (family.protocol(code),
                                      family.hostname(code),
                                      family.path(code)))
             # Families can switch off if they want to be detected using URL
             # this applies for test:test (there is test:wikipedia)
             if family._ignore_from_url or code in family._ignore_from_url:
                 self.assertIsNone(family.from_url(url))
             else:
                 self.assertEqual(family.from_url(url), code)
Exemple #37
0
 def test_each_family(self):
     """Test each family builds a working regex."""
     for family in pywikibot.config.family_files:
         self.current_family = family
         family = Family.load(family)
         for code in family.codes:
             self.current_code = code
             url = ('%s://%s%s/$1' %
                    (family.protocol(code), family.hostname(code),
                     family.path(code)))
             # Families can switch off if they want to be detected using URL
             # this applies for test:test (there is test:wikipedia)
             if family._ignore_from_url or code in family._ignore_from_url:
                 self.assertIsNone(family.from_url(url))
             else:
                 self.assertEqual(family.from_url(url), code)
 def test_each_family(self):
     """Test each family builds a working regex."""
     for family in pywikibot.config.family_files:
         with self.subTest(family=family):
             if family == 'wowwiki':
                 self.skipTest(
                     'Family.from_url() does not work for {} (T215077)'
                     .format(family))
             self.current_family = family
             family = Family.load(family)
             for code in family.codes:
                 self.current_code = code
                 url = ('{}://{}{}/$1'.format(family.protocol(code),
                                              family.hostname(code),
                                              family.path(code)))
                 # Families can switch off if they want to be detected using
                 # URL. This applies for test:test (there is test:wikipedia)
                 self.assertEqual(family.from_url(url), code)
    def test_from_url_wikipedia_extra(self):
        """Test various URLs against wikipedia regex."""
        self.current_code = 'vo'
        self.current_family = 'wikipedia'

        f = Family.load('wikipedia')

        prefix = 'https://vo.wikipedia.org'

        self.assertEqual(f.from_url(prefix + '/wiki/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php?title=$1'), 'vo')

        self.assertEqual(f.from_url(prefix + '/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        # including title
        self.assertEqual(f.from_url(prefix + '/wiki/Main_page'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php?title=Foo'), 'vo')

        # Text after $1 is not allowed
        with self.assertRaisesRegex(
                ValueError,
                r'Text after the \$1 placeholder is not supported'
                r' \(T111513\)'):
            f.from_url('//vo.wikipedia.org/wiki/$1/foo')

        # the IWM may contain the wrong protocol, but it's only used to
        # determine a site so using HTTP or HTTPS is not an issue
        self.assertEqual(f.from_url('http://vo.wikipedia.org/wiki/$1'), 'vo')

        # wrong protocol
        self.assertIsNone(f.from_url('ftp://vo.wikipedia.org/wiki/$1'))
        # wrong code
        self.assertIsNone(f.from_url('https://foobar.wikipedia.org/wiki/$1'))
        # wrong family
        self.assertIsNone(f.from_url('https://vo.wikibooks.org/wiki/$1'))
        self.assertIsNone(f.from_url('http://vo.wikibooks.org/wiki/$1'))
        # invalid path
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/wik/$1'))
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/index.php/$1'))
Exemple #40
0
    def test_from_url_wikipedia_extra(self):
        """Test various URLs against wikipedia regex."""
        self.current_code = 'vo'
        self.current_family = 'wikipedia'

        f = Family.load('wikipedia')

        prefix = 'https://vo.wikipedia.org'

        self.assertEqual(f.from_url(prefix + '/wiki/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php?title=$1'), 'vo')

        self.assertEqual(f.from_url(prefix + '/wiki/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php/$1'), 'vo')
        self.assertEqual(f.from_url('//vo.wikipedia.org/wiki/$1'), 'vo')
        # including title
        self.assertEqual(f.from_url(prefix + '/wiki/Main_page'), 'vo')
        self.assertEqual(f.from_url(prefix + '/w/index.php?title=Foo'), 'vo')

        # Text after $1 is not allowed
        self.assertRaisesRegex(
            ValueError,
            r'Text after the \$1 placeholder is not supported \(T111513\)',
            f.from_url,
            '//vo.wikipedia.org/wiki/$1/foo')

        # the IWM may contain the wrong protocol, but it's only used to
        # determine a site so using HTTP or HTTPS is not an issue
        self.assertEqual(f.from_url('http://vo.wikipedia.org/wiki/$1'), 'vo')

        # wrong protocol
        self.assertIsNone(f.from_url('ftp://vo.wikipedia.org/wiki/$1'))
        # wrong code
        self.assertIsNone(f.from_url('https://foobar.wikipedia.org/wiki/$1'))
        # wrong family
        self.assertIsNone(f.from_url('https://vo.wikibooks.org/wiki/$1'))
        self.assertIsNone(f.from_url('http://vo.wikibooks.org/wiki/$1'))
        # invalid path
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/wik/$1'))
        self.assertIsNone(f.from_url('https://vo.wikipedia.org/index.php/$1'))
Exemple #41
0
 def test_each_family(self):
     """Test each family builds a working regex."""
     for family in pywikibot.config.family_files:
         if family == 'wowwiki':
             raise unittest.SkipTest(
                 'Family.from_url() does not work for wowwiki (T215077)')
         self.current_family = family
         family = Family.load(family)
         for code in family.codes:
             self.current_code = code
             url = ('{}://{}{}/$1'.format(family.protocol(code),
                                          family.hostname(code),
                                          family.path(code)))
             # Families can switch off if they want to be detected using URL
             # this applies for test:test (there is test:wikipedia)
             if family._ignore_from_url or code in family._ignore_from_url:
                 self.assertIsNone(family.from_url(url))
             else:
                 self.assertEqual(family.from_url(url), code)
Exemple #42
0
def _code_fam_from_url(url: str):
    """Set url to cache and get code and family from cache.

    Site helper method.
    @param url: The site URL to get code and family
    @raises pywikibot.exceptions.SiteDefinitionError: Unknown URL
    """
    matched_sites = []
    # Iterate through all families and look, which does apply to
    # the given URL
    for fam in config.family_files:
        family = Family.load(fam)
        code = family.from_url(url)
        if code is not None:
            matched_sites.append((code, family))

    if not matched_sites:
        # TODO: As soon as AutoFamily is ready, try and use an
        #       AutoFamily
        raise SiteDefinitionError("Unknown URL '{}'.".format(url))
    if len(matched_sites) > 1:
        warning('Found multiple matches for URL "{}": {} (use first)'
                .format(url, ', '.join(str(s) for s in matched_sites)))
    return matched_sites[0]
 def test_new_different_families_ne(self):
     """Test that two different Family are not same nor equal."""
     family_1 = Family.load('wikipedia')
     family_2 = Family.load('wiktionary')
     self.assertIsNot(family_1, family_2)
     self.assertNotEqual(family_1, family_2)
Exemple #44
0
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None):
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    @param code: language code (override config.mylang)
    @type code: str
    @param fam: family name or object (override config.family)
    @type fam: str or Family
    @param user: bot user name to use on this site (override config.usernames)
    @type user: str
    @param sysop: sysop user to use on this site (override config.sysopnames)
    @type sysop: str
    @param interface: site class or name of class in pywikibot.site
        (override config.site_interface)
    @type interface: subclass of L{pywikibot.site.BaseSite} or string
    @param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    @type url: str
    @rtype: pywikibot.site.APISite
    @raises ValueError: URL and pair of code and family given
    @raises ValueError: Invalid interface name
    @raises SiteDefinitionError: Unknown URL
    """
    _logger = 'wiki'

    if url:
        # Either code and fam or only url
        if code or fam:
            raise ValueError(
                'URL to the wiki OR a pair of code and family name '
                'should be provided')
        code, fam = _code_fam_from_url(url)
    else:
        # Fallback to config defaults
        code = code or config.mylang
        fam = fam or config.family

        if not isinstance(fam, Family):
            fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = config.usernames['*'].copy()
    code_to_user.update(config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    code_to_sysop = config.sysopnames['*'].copy()
    code_to_sysop.update(config.sysopnames[family_name])
    sysop = sysop or code_to_sysop.get(code) or code_to_sysop.get('*')

    if not isinstance(interface, type):
        # If it isn't a class, assume it is a string
        if PY2:  # Must not be unicode in Python 2
            interface = str(interface)
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
        except ImportError:
            raise ValueError('Invalid interface name: {0}'.format(interface))
        else:
            interface = getattr(tmp, interface)

    if not issubclass(interface, BaseSite):
        warning('Site called with interface=%s' % interface.__name__)

    user = normalize_username(user)
    key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop)
        debug("Instantiated %s object '%s'"
              % (interface.__name__, _sites[key]), _logger)

        if _sites[key].code != code:
            warn('Site %s instantiated using different code "%s"'
                 % (_sites[key], code), UserWarning, 2)

    return _sites[key]
 def test_WikimediaFamily_obsolete_readonly(self):
     """Test WikimediaFamily obsolete is readonly."""
     family = Family.load('test')
     self.assertRaises(TypeError, family.__setattr__, 'obsolete',
                       {'a': 'b', 'c': None})
 def test_eq_family_with_string_repr_same_family(self):
     """Test that Family and string with same name are equal."""
     family = Family.load('wikipedia')
     other = 'wikipedia'
     self.assertEqual(family, other)
     self.assertFalse(family != other)
 def test_ne_family_with_string_repr_different_family(self):
     """Test that Family and string with different name are not equal."""
     family = Family.load('wikipedia')
     other = 'wikisource'
     self.assertNotEqual(family, other)
     self.assertFalse(family == other)
 def test_eq_family_with_string_repr_same_family(self):
     """Test that Family and string with same name are equal."""
     family = Family.load('wikipedia')
     other = 'wikipedia'
     self.assertEqual(family, other)
     self.assertFalse(family != other)  # noqa: H204
 def test_ne_family_with_string_repr_different_family(self):
     """Test that Family and string with different name are not equal."""
     family = Family.load('wikipedia')
     other = 'wikisource'
     self.assertNotEqual(family, other)
     self.assertFalse(family == other)  # noqa: H204
Exemple #50
0
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None):
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    @param code: language code (override config.mylang)
    @type code: string
    @param fam: family name or object (override config.family)
    @type fam: string or Family
    @param user: bot user name to use on this site (override config.usernames)
    @type user: unicode
    @param sysop: sysop user to use on this site (override config.sysopnames)
    @type sysop: unicode
    @param interface: site class or name of class in pywikibot.site
        (override config.site_interface)
    @type interface: subclass of L{pywikibot.site.BaseSite} or string
    @param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    @type url: string
    @rtype: pywikibot.site.APISite

    """
    # Either code and fam or only url
    if url and (code or fam):
        raise ValueError('URL to the wiki OR a pair of code and family name '
                         'should be provided')
    _logger = "wiki"

    if url:
        if url not in _url_cache:
            matched_sites = []
            # Iterate through all families and look, which does apply to
            # the given URL
            for fam in config.family_files:
                family = Family.load(fam)
                code = family.from_url(url)
                if code is not None:
                    matched_sites += [(code, family)]

            if matched_sites:
                if len(matched_sites) > 1:
                    warning(
                        'Found multiple matches for URL "{0}": {1} (use first)'
                        .format(url, ', '.join(str(s) for s in matched_sites)))
                _url_cache[url] = matched_sites[0]
            else:
                # TODO: As soon as AutoFamily is ready, try and use an
                #       AutoFamily
                _url_cache[url] = None

        cached = _url_cache[url]
        if cached:
            code = cached[0]
            fam = cached[1]
        else:
            raise SiteDefinitionError("Unknown URL '{0}'.".format(url))
    else:
        # Fallback to config defaults
        code = code or config.mylang
        fam = fam or config.family

        if not isinstance(fam, Family):
            fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = config.usernames['*'].copy()
    code_to_user.update(config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    code_to_sysop = config.sysopnames['*'].copy()
    code_to_sysop.update(config.sysopnames[family_name])
    sysop = sysop or code_to_sysop.get(code) or code_to_sysop.get('*')

    if not isinstance(interface, type):
        # If it isnt a class, assume it is a string
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
            interface = getattr(tmp, interface)
        except ImportError:
            raise ValueError('Invalid interface name: {0}'.format(interface))

    if not issubclass(interface, BaseSite):
        warning('Site called with interface=%s' % interface.__name__)

    user = normalize_username(user)
    key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop)
        debug(u"Instantiated %s object '%s'"
              % (interface.__name__, _sites[key]), _logger)

        if _sites[key].code != code:
            warn('Site %s instantiated using different code "%s"'
                 % (_sites[key], code), UserWarning, 2)

    return _sites[key]