Example #1
0
def update_family(families):
    if not families:
        families = familiesDict.keys()
    for family in families:
        pywikibot.output('\nChecking family %s:' % family)

        original = pywikibot.Family(family).languages_by_size
        obsolete = pywikibot.Family(family).obsolete

        feed = urllib2.urlopen(URL % familiesDict[family])
        tree = cElementTree.parse(feed)

        new = []
        for field in tree.findall('row/field'):
            if field.get('name') == 'prefix':
                code = field.text
                if not (code in obsolete or code in exceptions):
                    new.append(code)
                continue

        # put the missing languages to the right place
        missing = original != new and set(original) - set(new)
        if missing:
            pywikibot.output(u"WARNING: ['%s'] not listed at wikistats." %
                             "', '".join(missing))
            index = {}
            for code in missing:
                index[original.index(code)] = code
            i = len(index) - 1
            for key in sorted(index.keys(), reverse=True):
                new.insert(key - i, index[key])
                i -= 1

        if original == new:
            pywikibot.output(u'The lists match!')
        else:
            pywikibot.output(u"The lists don't match, the new list is:")
            text = u'        self.languages_by_size = [\r\n'
            line = ' ' * 11
            for code in new:
                if len(line) + len(code) <= 76:
                    line += u" '%s'," % code
                else:
                    text += u'%s\r\n' % line
                    line = ' ' * 11
                    line += u" '%s'," % code
            text += u'%s\r\n' % line
            text += u'        ]'
            pywikibot.output(text)
            family_file_name = '../families/%s_family.py' % family
            family_file = codecs.open(family_file_name, 'r', 'utf8')
            family_text = family_file.read()
            old = re.findall(ur'(?msu)^ {8}self.languages_by_size.+?\]',
                             family_text)[0]
            family_text = family_text.replace(old, text)
            family_file = codecs.open(family_file_name, 'w', 'utf8')
            family_file.write(family_text)
            family_file.close()
Example #2
0
    def _parse_post_117(self):
        """Parse 1.17+ siteinfo data."""
        response = fetch(self.api + '?action=query&meta=siteinfo&format=json')
        check_response(response)
        # remove preleading newlines and Byte Order Mark (BOM), see T128992
        content = response.text.strip().lstrip('\uFEFF')
        info = json.loads(content)
        self.private_wiki = ('error' in info
                             and info['error']['code'] == 'readapidenied')
        if self.private_wiki:
            # user-config.py is not loaded because PYWIKIBOT_NO_USER_CONFIG
            # is set to '2' by generate_family_file.py.
            # Prepare a temporary config for login.
            username = pywikibot.input(
                'Private wiki detected. Login is required.\n'
                'Please enter your username?')
            config.usernames['temporary_family'] = {'temporary_code': username}
            # Setup a dummy family so that we can create a site object
            fam = pywikibot.Family()
            fam.name = 'temporary_family'
            fam.scriptpath = lambda code: self.api[:-8]  # without /api.php
            fam.langs = {'temporary_code': self.server}
            site = pywikibot.Site('temporary_code', fam)
            site.version = lambda: str(self.version)
            # Now the site object is able to login
            info = site.siteinfo
        else:
            info = info['query']['general']
        self.version = MediaWikiVersion.from_generator(info['generator'])
        if self.version < MediaWikiVersion('1.17'):
            return

        self.server = urljoin(self.fromurl, info['server'])
        for item in ['scriptpath', 'articlepath', 'lang']:
            setattr(self, item, info[item])
Example #3
0
def getLanguageLinks(text, insite=None, pageLink="[[]]",
                     template_subpage=False):
    """
    Return a dict of interlanguage links found in text.

    Dict uses language codes as keys and Page objects as values.
    Do not call this routine directly, use Page.interwiki() method
    instead.

    """
    if insite is None:
        insite = pywikibot.getSite()
    fam = insite.family
    # when interwiki links forward to another family, retrieve pages & other
    # infos there
    if fam.interwiki_forward:
        fam = pywikibot.Family(fam.interwiki_forward)
    result = {}
    # Ignore interwiki links within nowiki tags, includeonly tags, pre tags,
    # and HTML comments
    tags = ['comments', 'nowiki', 'pre', 'source']
    if not template_subpage:
        tags += ['includeonly']
    text = removeDisabledParts(text, tags)

    # This regular expression will find every link that is possibly an
    # interwiki link.
    # NOTE: language codes are case-insensitive and only consist of basic latin
    # letters and hyphens.
    # TODO: currently, we do not have any, but BCP 47 allows digits, and
    #       underscores.
    # TODO: There is no semantic difference between hyphens and
    #       underscores -> fold them.
    interwikiR = re.compile(r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]')
    for lang, pagetitle in interwikiR.findall(text):
        lang = lang.lower()
        # Check if it really is in fact an interwiki link to a known
        # language, or if it's e.g. a category tag or an internal link
        if lang in fam.obsolete:
            lang = fam.obsolete[lang]
        if lang in fam.langs.keys():
            if '|' in pagetitle:
                # ignore text after the pipe
                pagetitle = pagetitle[:pagetitle.index('|')]
            # we want the actual page objects rather than the titles
            site = pywikibot.getSite(code=lang, fam=fam)
            try:
                result[site] = pywikibot.Page(site, pagetitle, insite=insite)
            except pywikibot.InvalidTitle:
                pywikibot.output(u'[getLanguageLinks] Text contains invalid '
                                 u'interwiki link [[%s:%s]].'
                                 % (lang, pagetitle))
                continue
    return result
Example #4
0
 def iwkeys(self):
     if self.interwiki_forward:
         return pywikibot.Family(self.interwiki_forward).langs.keys()
     return self.langs.keys()