def test_known_native_names(): lang_obj = languages.getlang_by_native_name('English') assert lang_obj is not None, 'English not found' assert lang_obj.code == "en", 'Wrong code' assert lang_obj.name == "English", 'Wrong name' assert lang_obj.native_name == "English", 'Wrong native_name' lang_obj = languages.getlang_by_native_name('isiZulu') assert lang_obj is not None, 'Zulu not found' assert lang_obj.code == "zul", 'Wrong internal repr. code' assert lang_obj.name == "Zulu", 'Wrong name' assert lang_obj.native_name == "isiZulu", 'Wrong native_name' # NOTE: Currently only support full-name matching so would have to lookup by # "name, country" to get local language version lang_obj = languages.getlang_by_native_name('Português') assert lang_obj is not None, 'Portuguese not found' assert lang_obj.code == "pt", 'Wrong internal repr. code' assert lang_obj.name == "Portuguese", 'Wrong name' assert lang_obj.native_name == "Português", 'Wrong native_name' # NOTE: Currently only support full match lookups where multiple language # specified spearated by semicolons, e.g. "Scottish Gaelic; Gaelic" lang_obj = languages.getlang_by_native_name('Gàidhlig') assert lang_obj is not None, 'Scottish Gaelic; Gaelic not found' assert lang_obj.code == "gd", 'Wrong internal repr. code' assert lang_obj.name == "Scottish Gaelic; Gaelic", 'Wrong name' assert lang_obj.native_name == "Gàidhlig", 'Wrong native_name'
def test_list_like_language_native_names(): lang_obj = languages.getlang_by_native_name('Iñupiaq') assert lang_obj is not None, 'Inupiaq not found' assert lang_obj.code == "ik", 'Wrong internal repr. code' assert lang_obj.name == "Inupiaq", 'Wrong name' assert lang_obj.native_name == "Iñupiaq, Iñupiatun", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('Iñupiatun') assert lang_obj is not None, 'Inupiaq not found' assert lang_obj.code == "ik", 'Wrong internal repr. code' assert lang_obj.name == "Inupiaq", 'Wrong name' assert lang_obj.native_name == "Iñupiaq, Iñupiatun", 'Wrong native_name'
def test_list_like_language_native_names(): lang_obj = languages.getlang_by_native_name("Iñupiaq") assert lang_obj is not None, "Inupiaq not found" assert lang_obj.code == "ik", "Wrong internal repr. code" assert lang_obj.name == "Inupiaq", "Wrong name" assert lang_obj.native_name == "Iñupiaq, Iñupiatun", "Wrong native_name" # lang_obj = languages.getlang_by_native_name("Iñupiatun") assert lang_obj is not None, "Inupiaq not found" assert lang_obj.code == "ik", "Wrong internal repr. code" assert lang_obj.name == "Inupiaq", "Wrong name" assert lang_obj.native_name == "Iñupiaq, Iñupiatun", "Wrong native_name"
def test_african_languages(african_languages_list): missing_names = [] for native_name in african_languages_list: lang_obj = languages.getlang_by_native_name(native_name) if lang_obj is None: missing_names.append(native_name) assert missing_names == [], 'Languages with native_names missing: ' + str(missing_names)
def __get_language_code(self, language_str): language = getlang_by_name(language_str) or getlang_by_native_name( language_str) if language: return language.code else: print('Unknown language:', language_str) return NalibaliChef.ENGLISH_LANGUAGE_CODE
def construct_channel(self, *args, **kwargs): channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info lang_names = list(self.data.keys()) lang_names.sort() for lang_name in lang_names: lang_data = self.data[lang_name] LOGGER.info("Creating app for language: {}".format(lang_name)) lang = languages.getlang_by_native_name(lang_name) zip_dir = self.client.create_zip_dir_for_page(lang_data['url']) soup = self.client.get_page_soup(lang_data['url']) # Remove the translation list if found translations = soup.find('div', {'id': 'translations'}) if translations: translations.extract() # Grab the localized title title = soup.find('span', {'id': 'share_title'}).text # Save the modified index.html page thumbnail = None for resource in lang_data['resources']: if 'dp3t.png' in resource: thumbnail = os.path.join(zip_dir, resource) break with open(os.path.join(zip_dir, 'index.html'), 'wb') as f: f.write(soup.prettify(encoding='utf-8')) # create_predictable_zip ensures that the ZIP file does not change each time it's created. This # ensures that the zip doesn't get re-uploaded just because zip metadata changed. zip_file = zip.create_predictable_zip(zip_dir) zip_name = lang.primary_code if lang else lang_name zip_filename = os.path.join(self.ZIP_DIR, "{}.zip".format(zip_name)) os.makedirs(os.path.dirname(zip_filename), exist_ok=True) os.rename(zip_file, zip_filename) topic = nodes.TopicNode(source_id=lang_name, title=lang_name) zip_node = nodes.HTML5AppNode( source_id="covid19-sim-{}".format(lang_name), title=title, files=[files.HTMLZipFile(zip_filename)], license=licenses.PublicDomainLicense( "Marcel Salathé & Nicky Case"), language=lang, thumbnail=thumbnail) topic.add_child(zip_node) channel.add_child(topic) return channel
def test_language_names_with_modifier_in_bracket(): # try to match based on language name (stuff before subcode in brackets) lang_obj = languages.getlang_by_native_name('日本語') assert lang_obj is not None, 'Japanese not found' assert lang_obj.code == "ja", 'Wrong internal repr. code' assert lang_obj.name == "Japanese", 'Wrong name' assert lang_obj.native_name == "日本語 (にほんご/にっぽんご)", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('한국어') assert lang_obj is not None, 'Korean not found' assert lang_obj.code == "ko", 'Wrong internal repr. code' assert lang_obj.name == "Korean", 'Wrong name' assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('조선말') assert lang_obj is not None, 'Korean not found' assert lang_obj.code == "ko", 'Wrong internal repr. code' assert lang_obj.name == "Korean", 'Wrong name' assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name'
def test_language_names_with_modifier_in_bracket(): # try to match based on language name (stuff before subcode in brackets) lang_obj = languages.getlang_by_native_name('中文') assert lang_obj is not None, 'Chinese 1 not found' assert lang_obj.code == "zh", 'Wrong internal repr. code' assert lang_obj.name == "Chinese", 'Wrong name' assert lang_obj.native_name == "中文 (Zhōngwén), 汉语, 漢語", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('汉语') assert lang_obj is not None, 'Chinese 2 not found' assert lang_obj.code == "zh", 'Wrong internal repr. code' assert lang_obj.name == "Chinese", 'Wrong name' assert lang_obj.native_name == "中文 (Zhōngwén), 汉语, 漢語", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('漢語') assert lang_obj is not None, 'Chinese 3 not found' assert lang_obj.code == "zh", 'Wrong internal repr. code' assert lang_obj.name == "Chinese", 'Wrong name' assert lang_obj.native_name == "中文 (Zhōngwén), 汉语, 漢語", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('日本語') assert lang_obj is not None, 'Japanese not found' assert lang_obj.code == "ja", 'Wrong internal repr. code' assert lang_obj.name == "Japanese", 'Wrong name' assert lang_obj.native_name == "日本語 (にほんご/にっぽんご)", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('한국어') assert lang_obj is not None, 'Korean not found' assert lang_obj.code == "ko", 'Wrong internal repr. code' assert lang_obj.name == "Korean", 'Wrong name' assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name' # lang_obj = languages.getlang_by_native_name('조선말') assert lang_obj is not None, 'Korean not found' assert lang_obj.code == "ko", 'Wrong internal repr. code' assert lang_obj.name == "Korean", 'Wrong name' assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name'
def test_unknown_name(): lang_obj = languages.getlang_by_native_name('UnknoenNativeLanguage') assert lang_obj is None, 'query for natove_name UnknoenNativeLanguage returned non-None'
def test_unknown_native_language(): lang_obj = languages.getlang_by_native_name("UnknoenNativeLanguage") assert ( lang_obj is None ), "query for natove_name UnknoenNativeLanguage returned non-None"
def build_lang_lookup_table(FEED_ROOT_URL): """ Extracts all the root URLs of the languages, based on the links with face `Languages` in FEED_ROOT_URL. """ OPDS_LANG_ROOTS = {} # Check for languages we don't yet support in Kolibri. langs_not_found = [] feed = feedparser.parse(FEED_ROOT_URL) lang_links = [] for link in feed.feed.links: if 'opds:facetgroup' in link: fg = link['opds:facetgroup'] if fg == 'Languages': lang_links.append(link) # Build lookup table lang_code --> dict with info about content in that langauge # where lang_code is the Learning Equality internal language codes defined in le_utils # Assume the chef scrill will be run on the command line using lang=lang_code # E.g. lang_code for Zulu is `zul`, for Amharic it's `am`, and for Nepali it's `ne-NP` for link in lang_links: href = link['href'] m = _LANG_CODE_RE.search(href) if not m: raise ValueError('Cannot find language code in href: ' + str(href)) gdl_lang_code = m.groupdict()['gdl_lang_code'] lang_title = link['title'] if lang_title == "isiNdebele seSewula": lang_title = "isiNdebele" elif lang_title == 'বাঙালি': lang_title = 'বাংলা' print('Processig lang_title', lang_title) # # ATTEMPT 1 ############## lang_obj = getlang_by_name(lang_title) if not lang_obj: lang_obj = getlang_by_native_name(lang_title) # # ATTEMPT 2 ######### if not lang_obj: pyc_lang = pycountry.languages.lookup(gdl_lang_code) code = pyc_lang.alpha_3 if hasattr(pyc_lang, 'alpha_2'): # # ATTEMPT 3 ############## code = pyc_lang.alpha_2 # getlang_by_alpha2 is a misnomer, codes can be alpha2, alpha3, or lang+locale. lang_obj = getlang_by_alpha2(code) if not lang_obj: langs_not_found.append((pyc_lang, lang_title)) print('ERROR could not find Kolibri lang info for ', pyc_lang) continue lang_code = lang_obj.code OPDS_LANG_ROOTS[lang_code] = dict( alpha_3=gdl_lang_code, lang_title=lang_title, href=href, name=lang_obj.name, native_name=lang_obj.native_name, ) # For now, make missing languages a hard error so we can evaluate new language support case-by-case. if len(langs_not_found) > 0: lang_codes = [] for pyc_lang, lang_title in langs_not_found: lang_codes.append(pyc_lang.alpha_3) message = "The following languages are not yet supported in Kolibri: {}".format( ",".join(lang_codes)) assert len(langs_not_found) == 0, message return OPDS_LANG_ROOTS