def update_family(families): if not families: families = familiesDict.keys() for family in families: pywikibot.output('\nChecking family %s:' % family) original = pywikibot.Family(family).languages_by_size obsolete = pywikibot.Family(family).obsolete feed = urllib2.urlopen(URL % familiesDict[family]) tree = cElementTree.parse(feed) new = [] for field in tree.findall('row/field'): if field.get('name') == 'prefix': code = field.text if not (code in obsolete or code in exceptions): new.append(code) continue # put the missing languages to the right place missing = original != new and set(original) - set(new) if missing: pywikibot.output(u"WARNING: ['%s'] not listed at wikistats." % "', '".join(missing)) index = {} for code in missing: index[original.index(code)] = code i = len(index) - 1 for key in sorted(index.keys(), reverse=True): new.insert(key - i, index[key]) i -= 1 if original == new: pywikibot.output(u'The lists match!') else: pywikibot.output(u"The lists don't match, the new list is:") text = u' self.languages_by_size = [\r\n' line = ' ' * 11 for code in new: if len(line) + len(code) <= 76: line += u" '%s'," % code else: text += u'%s\r\n' % line line = ' ' * 11 line += u" '%s'," % code text += u'%s\r\n' % line text += u' ]' pywikibot.output(text) family_file_name = '../families/%s_family.py' % family family_file = codecs.open(family_file_name, 'r', 'utf8') family_text = family_file.read() old = re.findall(ur'(?msu)^ {8}self.languages_by_size.+?\]', family_text)[0] family_text = family_text.replace(old, text) family_file = codecs.open(family_file_name, 'w', 'utf8') family_file.write(family_text) family_file.close()
def _parse_post_117(self): """Parse 1.17+ siteinfo data.""" response = fetch(self.api + '?action=query&meta=siteinfo&format=json') check_response(response) # remove preleading newlines and Byte Order Mark (BOM), see T128992 content = response.text.strip().lstrip('\uFEFF') info = json.loads(content) self.private_wiki = ('error' in info and info['error']['code'] == 'readapidenied') if self.private_wiki: # user-config.py is not loaded because PYWIKIBOT_NO_USER_CONFIG # is set to '2' by generate_family_file.py. # Prepare a temporary config for login. username = pywikibot.input( 'Private wiki detected. Login is required.\n' 'Please enter your username?') config.usernames['temporary_family'] = {'temporary_code': username} # Setup a dummy family so that we can create a site object fam = pywikibot.Family() fam.name = 'temporary_family' fam.scriptpath = lambda code: self.api[:-8] # without /api.php fam.langs = {'temporary_code': self.server} site = pywikibot.Site('temporary_code', fam) site.version = lambda: str(self.version) # Now the site object is able to login info = site.siteinfo else: info = info['query']['general'] self.version = MediaWikiVersion.from_generator(info['generator']) if self.version < MediaWikiVersion('1.17'): return self.server = urljoin(self.fromurl, info['server']) for item in ['scriptpath', 'articlepath', 'lang']: setattr(self, item, info[item])
def getLanguageLinks(text, insite=None, pageLink="[[]]", template_subpage=False): """ Return a dict of interlanguage links found in text. Dict uses language codes as keys and Page objects as values. Do not call this routine directly, use Page.interwiki() method instead. """ if insite is None: insite = pywikibot.getSite() fam = insite.family # when interwiki links forward to another family, retrieve pages & other # infos there if fam.interwiki_forward: fam = pywikibot.Family(fam.interwiki_forward) result = {} # Ignore interwiki links within nowiki tags, includeonly tags, pre tags, # and HTML comments tags = ['comments', 'nowiki', 'pre', 'source'] if not template_subpage: tags += ['includeonly'] text = removeDisabledParts(text, tags) # This regular expression will find every link that is possibly an # interwiki link. # NOTE: language codes are case-insensitive and only consist of basic latin # letters and hyphens. # TODO: currently, we do not have any, but BCP 47 allows digits, and # underscores. # TODO: There is no semantic difference between hyphens and # underscores -> fold them. interwikiR = re.compile(r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]') for lang, pagetitle in interwikiR.findall(text): lang = lang.lower() # Check if it really is in fact an interwiki link to a known # language, or if it's e.g. a category tag or an internal link if lang in fam.obsolete: lang = fam.obsolete[lang] if lang in fam.langs.keys(): if '|' in pagetitle: # ignore text after the pipe pagetitle = pagetitle[:pagetitle.index('|')] # we want the actual page objects rather than the titles site = pywikibot.getSite(code=lang, fam=fam) try: result[site] = pywikibot.Page(site, pagetitle, insite=insite) except pywikibot.InvalidTitle: pywikibot.output(u'[getLanguageLinks] Text contains invalid ' u'interwiki link [[%s:%s]].' % (lang, pagetitle)) continue return result
def iwkeys(self): if self.interwiki_forward: return pywikibot.Family(self.interwiki_forward).langs.keys() return self.langs.keys()