def __get_short_to_long_langs(short_lang): """ For each short language name, figures out its long name. Arguments: short_langs --- Array of strings. Each string is the short name of a language. Should be 3 characters long (more should be fine as well) Returns: Tuples: (short name, long name) """ try: extra = short_lang[3:] short_lang = short_lang[:3] long_lang = short_lang if extra != "" and (extra[0] == "-" or extra[0] == "_"): extra = extra[1:] lang = find_language(short_lang, allow_none=True) if lang: long_lang = lang.name if extra != "": long_lang += " (%s)" % (extra) return long_lang except KeyError: return None
def __get_short_to_long_langs(short_langs): """ For each short language name, figures out its long name. Arguments: short_langs --- Array of strings. Each string is the short name of a language. Should be 3 characters long (more should be fine as well) Returns: Tuples: (short name, long name) """ langs = [] for short_lang in short_langs: try: extra = short_lang[3:] short_lang = short_lang[:3] long_lang = short_lang if extra != "" and (extra[0] == "-" or extra[0] == "_"): extra = extra[1:] lang = find_language(short_lang, allow_none=True) if lang: long_lang = lang.name if extra != "": long_lang += " (%s)" % (extra) langs.append((short_lang, long_lang)) except KeyError: logger.error("Warning: Long name not found for language " "'%s'." % short_lang) logger.warning(" Will use short name as long name.") langs.append((short_lang, short_lang)) return langs
def get_default_spellcheck_lang(ocr_lang): ocr_lang = ocr_lang.value if ocr_lang is None: return None # Try to guess the lang based on the ocr lang lang = find_language(ocr_lang) if hasattr(lang, 'iso639_1_code'): return lang.iso639_1_code if hasattr(lang, 'alpha2'): return lang.alpha2 return lang.alpha_2
def get_default_ocr_lang(): # Try to guess based on the system locale what would be # the best OCR language ocr_tools = pyocr.get_available_tools() if len(ocr_tools) == 0: return DEFAULT_OCR_LANG ocr_langs = ocr_tools[0].get_available_languages() lang = find_language() if hasattr(lang, 'iso639_3_code') and lang.iso639_3_code in ocr_langs: return lang.iso639_3_code if hasattr(lang, 'terminology') and lang.terminology in ocr_langs: return lang.terminology return DEFAULT_OCR_LANG
def get_lang_infos(lang_name): if isinstance(lang_name, dict): return lang_name lang = lang_name.split("_") lang_name = lang[0] suffix = "" if len(lang) <= 1 else lang[1] lang = find_language(lang_name) if not suffix: long_name = lang.name else: long_name = "{} ({})".format(lang.name, suffix) return { "lower": lang_name.lower() + suffix.lower(), "upper": lang_name.upper() + suffix.upper(), "long": long_name, }