def _get_lang(lang: str): if len(lang) == 2: return languages.get(alpha_2=lang) elif len(lang) == 3: return languages.get(alpha_3=lang) else: return None
def language(field): """Check if a language is valid ISO 639-1 (alpha 2) or ISO 639-3 (alpha 3). Prints the value if it is invalid. """ # Skip fields with missing values if pd.isna(field): return # need to handle "Other" values here... # Try to split multi-value field on "||" separator for value in field.split("||"): # After splitting, check if language value is 2 or 3 characters so we # can check it against ISO 639-1 or ISO 639-3 accordingly. if len(value) == 2: if not languages.get(alpha_2=value): print( f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}" ) elif len(value) == 3: if not languages.get(alpha_3=value): print( f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}" ) else: print(f"{Fore.RED}Invalid language: {Fore.RESET}{value}") return
def get(name=None, part1=None, part3=None): if (part3 is not None): return _copy_fields(pyc_languages.get(alpha_3=part3)) if (part1 is not None): return _copy_fields(pyc_languages.get(alpha_2=part1)) if (name is not None): return _copy_fields(pyc_languages.get(name=name))
def get_language_from_wiktionary_code(code): lang = None try: lang = languages.get(iso639_1_code=code) except KeyError: pass if not lang: try: lang = languages.get(iso639_3_code=code) except KeyError: pass if not lang: try: lang = languages.get(iso639_2T_code=code) except KeyError: pass if not lang: code = code.split('-')[0] try: lang = iso.get(part5=code) setattr(lang, 'iso639_3_code', lang.part3) except KeyError: # print("no lang for: ", code) pass return lang
def get(cls, language): try: if PYCOUNTRY: c = languages.lookup(language) return Language(c.alpha_2, c.alpha_3, c.name, getattr(c, "bibliographic", None)) else: l = None if len(language) == 2: l = languages.get(alpha2=language) elif len(language) == 3: for code_type in ['part2b', 'part2t', 'part3']: try: l = languages.get(**{code_type: language}) break except KeyError: pass if not l: raise KeyError(language) else: raise KeyError(language) return Language(l.alpha2, l.part3, l.name, l.part2b or l.part2t) except (LookupError, KeyError): raise LookupError("Invalid language code: {0}".format(language))
def populate_db(self, values, subjects, filename): author = {} book = {} subject_ids = [] print('file', filename) author_related = [ "name", "date_of_birth", "date_of_death", "wiki_link" ] for key, value in values.items(): # print("value", value) if key in author_related: author[key] = value if key == "language": if len(value) == 2: data = { "iso_639_1": value, "iso_639_2": lan.get(alpha_2=value).alpha_3, "name": lan.get(alpha_2=value).name } else: data = { "iso_639_1": value, "iso_639_2": value, "name": value } try: lang_id = self.db.language.insert_one(data).inserted_id except DuplicateKeyError: lang_id = self.db.language.find_one({ "iso_639_1": value }).get("_id") elif key == "title": book[key] = value try: author_id = self.db.author.insert_one(author).inserted_id except DuplicateKeyError: author_id = self.db.author.find_one({ 'name': author.get('name', None) }).get("_id") for subject in subjects: try: subj_id = self.db.subject.insert_one({ 'description': subject }).inserted_id subject_ids.append(subj_id) except DuplicateKeyError: subject_ids.append( self.db.subject.find_one({ 'description': subject }).get('_id')) try: book['author'] = author_id book['language'] = lang_id book['subjects'] = subject_ids book['number'] = int(re.search(r'\d+', filename).group()) self.db.book.insert_one(book) except DuplicateKeyError: pass
def __init__(self,title,length,relativeDays,lang,mp,pattern,has_gamma=True,start=0,archLink=None,archLink_fmt=None,archTime_fmt=None): """ Promotion pattern in a given Wiki title: Name of the Promotion pattern (type: str) length: Length of display during promotion (type: int) relativeDays: lang: mp: Name of language main page in pv database (e.g. Main_Page for en) pattern: Regular expression for promotion pattern (type: regex) has_gamma: True is there is a sharp decay in page view pattern (e.g. by Today's Featured article) start: The start time of the promotion pattern (e.g. 25 for On this Day) archive: Link to site with archive of promotion pattern (type: string) arch_fmt: strftime format of specified time in link to archive of promotion pattern """ self.title = title self.relativeDays = relativeDays self.length = length self.mp_title = mp self.has_gamma = has_gamma self.start = start try: languages.get(iso639_1_code=lang) self.lang = lang except KeyError: raise KeyError('Language "'+lang+'" does not exist') if archLink: self.scrapePattern = ScrapePattern(lang,pattern,archLink,archLink_fmt,archTime_fmt)
def run(self,artDict): for lang in artDict: try: languages.get(iso639_1_code=lang) except KeyError: raise KeyError('Language "'+lang+'" does not exist') current = datetime(self.start.year,self.start.month,self.start.day,self.start.hour,0,0) while current < self.end: results = processFile(current,artDict) makeOutput(current,results) current += timedelta(hours=1)
def get_language(multilanguage, doc, text, default_lingo, supported_lingos): '''returns a list or string of language(s) and text(s) per input text''' '''if len(doc.text) >= 50 and doc.text.isdigit() == False:''' if multilanguage == "n": textlist = text langcode = doc._.language['language'] if langcode != "UNKNOWN": langlist = languages.get(alpha_2=langcode).name if langlist not in supported_lingos: langlist = default_lingo print( "Language detection probably not successfull, using default language.") else: langlist = default_lingo print( "Language detection not successfull, using default language.") else: langlist = [] # the list of all unique detected languages textlist = [] # the list which contains the string of text for each unique languages removerow = [] # just an index of rows count_sents = 0 '''doc = nlp(text)''' for sent in doc.sents: count_sents += 1 langcode = sent._.language['language'] if langcode != "UNKNOWN": langname = languages.get(alpha_2=langcode).name if langname in supported_lingos: if langname not in set(langlist): langlist.append(langname) textlist.append(sent.text) else: for k in range(0, len(langlist)): if langname == langlist[k]: textlist[k] += " "+sent.text for i in range(len(langlist)): percentageoflingo = int( 100*len(textlist[i])/len(''.join(textlist))) if percentageoflingo < 2 and not len(textlist[i]) > 500: removerow.append(i) # or len(textlist[i]) < 50: # wir löschen also jenen Teil des Textes, der eine der obrigen Bedingungen erfüllt # reverse the order so that not the first elements # are deleted before the latter ones, which would cause errors for j in sorted(removerow, reverse=True): del langlist[j] del textlist[j] return langlist, textlist
def __init__(self, key, data): self.key = key if not isinstance(data, dict): self.filename = data # TODO allow other data formats? with open(data, 'r', encoding='UTF-8') as fh: data = json.load(fh) else: self.filename = data.get('filename') logger.info( f'Loading {self.__class__.__name__.lower()} for {str(self.filename)}' ) self.all_data = data['values'] if len(data['language']) > 3 or data['language'][0].isupper(): self.language = languages.get(name=data['language']) elif len(data['language']) == 3: self.language = languages.get(alpha_3=data['language']) else: self.language = languages.get(alpha_2=data['language']) # allow bare lists of strings as well as lists of dicts if self.all_data and isinstance(self.all_data[0], str): self.data = [item for item in self.all_data] else: self.data = [item[key] for item in self.all_data] # detect if we're using multiple texts per instance self.multi_ref = isinstance(self.data[0], list) # tokenize & keep a list and a whitespace version tokenize_func = default_tokenize_func(self.language) if self.multi_ref: self._tokenized = [[tokenize_func(i) for i in inst] for inst in self.data] self._ws_tokenized = [[' '.join(i) for i in inst] for inst in self._tokenized] self._lc_tokenized = [[[w.lower() for w in ref] for ref in inst] for inst in self._tokenized] self._nopunct_lc_tokenized = [[[ w for w in ref if w not in self.PUNCTUATION ] for ref in inst] for inst in self._lc_tokenized] else: self._tokenized = [tokenize_func(ref) for ref in self.data] self._ws_tokenized = [' '.join(ref) for ref in self._tokenized] self._lc_tokenized = [[w.lower() for w in ref] for ref in self._tokenized] self._nopunct_lc_tokenized = [[ w for w in ref if w not in self.PUNCTUATION ] for ref in self._lc_tokenized]
def language_info(lang_code): if lang_code.endswith('-'): lang_code = lang_code[:-1] if len(lang_code) == 2: try: return languages.get(iso639_1_code=lang_code) except: return None try: return languages.get(iso639_3_code=lang_code) except: try: return languages.get(iso639_2T_code=lang_code) except: return None
def get_wikt_code_from_iso639_3(code): lang = languages.get(iso639_3_code=code) if lang.iso639_1_code: return lang.iso639_1_code if lang.iso639_3_code: return lang.iso639_3_code logging.debug('Do not know the Wikt code for {}'.format(code))
def crawl_document(self, document): foreign_id = '%s:%s' % (self.DC_INSTANCE, document.get('id')) if self.skip_incremental(foreign_id): return meta = self.make_meta({ 'source_url': document.get('canonical_url'), 'title': document.get('title'), 'author': document.get('account_name'), 'foreign_id': foreign_id, 'file_name': os.path.basename(document.get('pdf_url')), 'extension': 'pdf', 'mime_type': 'application/pdf' }) try: created = parse(document.get('created_at')) meta.add_date(created.date().isoformat()) except: pass try: lang = languages.get(iso639_3_code=document.get('language')) meta.add_language(lang.iso639_1_code) except: pass self.emit_url(meta, document.get('pdf_url'))
def decode_language_ios_639(lang): try: from pycountry import languages return languages.get(iso639_1_code=lang).name except Exception as ex: self.logger.warning('Exception with language decoding according to ISO 693-1: {0}'.format(str(ex))) return 'Unknown'
class VideoSubtitle(VideoS3): """A VTT file that provides captions for a Video""" filename = models.CharField(max_length=1024, null=False, blank=True) language = models.CharField( max_length=2, null=False, blank=True, default=languages.get(name="English").alpha_2, ) unique_together = (("video", "language"), ) @property def language_name(self): """ Gets the name associated with the language code """ return languages.get(alpha_2=self.language).name def __str__(self): return "{}: {}: {}".format(self.video.title, self.s3_object_key, self.language) def __repr__(self): return "<VideoSubtitle: {self.s3_object_key!r} {self.language!r} >".format( self=self)
def get(cls, language): try: lang = (languages.get(alpha_2=language) or languages.get(alpha_3=language) or languages.get(bibliographic=language) or languages.get(name=language)) if not lang: raise KeyError(language) return Language( # some languages don't have an alpha_2 code getattr(lang, "alpha_2", ""), lang.alpha_3, lang.name, getattr(lang, "bibliographic", "")) except (LookupError, KeyError): raise LookupError(f"Invalid language code: {language}")
def extract_video(self, video): language = video['language'] if language in (None, 'none'): language = 'en' return items.VideoItem( title=self.extract_title(video), summary='', description=video['description'], category=self.get_event_url(video), quality_notes='', language=languages.get(iso639_1_code=language).name, copyright_text=self.LICENSE_TYPES.get(video['license'], video['license']), speakers=self.extract_speakers(video), thumbnail_url=video['pictures']['sizes'][-1]['link'], duration=video['duration'] * 60, # API gives duration in minutes source_url=self.get_source_url(video), recorded=video['created_time'][0:10], slug=utils.slugify(video['name']), tags=video['tags'], videos=[items.VideoField( length=video['duration'] * 60, # API gives duration in minutes url=video['link'], type='vimeo')] )
def get_language(post_content): try: lang_detect = detect(post_content) post_lang = languages.get(alpha_2=lang_detect).name except: post_lang = 'unknown' return post_lang
def parse_video(self, response): """Parse Video and build a VideoItem""" payload = json.loads(response.body.decode()) data = payload['items'][0] snippet = data['snippet'] thumbnail = snippet['thumbnails'].get('standard', snippet['thumbnails']['maxres']) url = self.WEB_VIDEO_URL.format(video_id=data['id']) duration = utils.duration_as_seconds(data['contentDetails']['duration']) yield items.VideoItem( title=snippet['title'], summary='', description=snippet['description'], category=response.meta['event']['title'], quality_notes=data['contentDetails']['definition'], language=languages.get(iso639_1_code=snippet.get('defaultAudioLanguage', 'en')).name, copyright_text=self.LICENSE_TYPES.get(data['status']['license'], data['status']['license']), thumbnail_url=thumbnail['url'], duration=duration, source_url=url, recorded=snippet['publishedAt'][0:10], slug=utils.slugify(snippet['title']), tags=[], speakers=self.extract_speakers(snippet['description']), videos=[{ 'length': duration, 'url': url, 'type': 'youtube', }] )
def _detect_message_language(message): lang = detect_langs(message)[0] if (lang.prob > 0.99): lang = lang.lang else: lang = 'en' return languages.get(alpha_2=lang).name
def get_language_name(code): if code is None: return "" language = languages.get(alpha2=code) if language is not None: return language.name
def getStats(self, pretty=False): statsList = [word.parentLanguages.stats for word in self.wordObjects] stats = {} for item in statsList: if len(item) > 0: for lang, perc in item.items(): if lang not in stats: stats[lang] = perc else: stats[lang] += perc allPercs = sum(stats.values()) for lang, perc in stats.items(): stats[lang] = ( perc / allPercs ) * 100 if pretty: prettyStats = {} for lang, perc in stats.items(): try: prettyLang = languages.get(iso639_3_code=lang).name except: prettyLang = "Other Language" prettyStats[prettyLang] = round(perc, 2) # rename the key return prettyStats else: return stats
def getStats(self, pretty=False): statsList = [word.parentLanguages.stats for word in self.wordObjects] stats = {} for item in statsList: if len(item) > 0: for lang, perc in item.items(): if lang not in stats: stats[lang] = perc else: stats[lang] += perc allPercs = sum(stats.values()) for lang, perc in stats.items(): stats[lang] = (perc / allPercs) * 100 if pretty: prettyStats = {} for lang, perc in stats.items(): try: prettyLang = languages.get(alpha_3=lang).name except: prettyLang = "Other Language" prettyStats[prettyLang] = round(perc, 2) # rename the key return prettyStats else: return stats
def crawl_document(self, document): foreign_id = '%s:%s' % (self.DC_INSTANCE, document.get('id')) if self.skip_incremental(foreign_id): return document = self.create_document(foreign_id=foreign_id) document.source_url = document.get('canonical_url') document.title = document.get('title') document.author = document.get('author') document.file_name = os.path.basename(document.get('pdf_url')) document.mime_type = 'application/pdf' try: created = parse(document.get('created_at')) document.add_date(created.date().isoformat()) except: pass try: lang = languages.get(iso639_3_code=document.get('language')) document.add_language(lang.iso639_1_code) except: pass self.emit_url(document, document.get('pdf_url'))
def get_question_answer(): """ get answer for a given question :return: json object/error HTTP response """ try: content_type = request.content_type if "form-" in content_type: # for key in request.form.keys(): # print("{}:{}".format(key, request.form[key])) message = request.form["Body"] num_media = 0 if "NumMedia" in request.form.keys(): num_media = request.form[ "NumMedia"] # check if user sent any media msg (e.g. voice, picture) if len(message) == 0 and int(num_media) > 0: message = twilio_client.messages.create( body= "Sorry, I can only answer to textual messages at the moment! 😉🧐", from_=request.form["To"], to=request.form["From"], ) user_id = request.form["From"].replace("whatsapp:", "") if len( message ) > 2: # if the length of message is more than 2 characters then check the language; TextBlob library requires a sentence/word with at least 3 characters to detect a language blob = TextBlob(message) query_language = blob.detect_language() if query_language is not None: lang_name = languages.get(alpha_2=query_language) if lang_name is None: result = "I don't understand your language 🧐" else: if lang_name.name == "English": result = rule_controller.answer_question( user_id, message) else: result = "I can only talk in *English* 🇬🇧 at the moment, but soon I will be able to talk in _{}_ 😎".format( lang_name.name) else: result = "I don't understand your language 🧐" else: result = rule_controller.answer_question(user_id, message) if result is not None: # in case of handovering user's question to a human, we do not return anything here message = twilio_client.messages.create( body=result, from_=request.form["To"], to=request.form["From"], ) return "OK" # this is to fix the "The view function did not return a valid response" error as we do not return a Response to twilio api, we use twilio library instead. except Exception as err: logger.error(str(err)) message = twilio_client.messages.create( body="Oops! Something wrong happened on my side!", from_=request.form["To"], to=request.form["From"], ) return "Not OK!"
def iso_to_name(iso): if not iso: return 'Unknown' try: language = languages.get(alpha2=iso) except KeyError as e: return 'Unknown' return language.name
def get_lang_readable(coms_lng): """ Maps list of two character language code to full language name """ return [ languages.get(alpha_2=lng).name if lng != "Unknown" else lng for lng in coms_lng ]
def detect_language(self, column = ''): ''' This function will search through the Pandas DataFrame column of textual data to detect the language of the corpus. ------------ Argument column(string): If specified, it will be used when generating summary. If it is an empty string, the default one will be used ------------ Return pd.DataFrame: contains one column having information about the language ------------ Example >>> df = nlp.NLPFrame({'text_col' : ['I love travelling to Japan and eating Mexican food but I can only speak English!']}) >>> df.detect_language() [1] language 0 English ------------ ''' if not fasttext_dependencies(self.fasttext_model): print('Dependencies are not met. Please read the instructions or contact the developers for further details') return None column = column if column else self.column if not column: raise ValueError('There is no column with text in the NLPFrame') try: pd_df_col = self.__getitem__(column) except KeyError: raise ValueError(f"The column {column} doesn't exist in the NLPFrame") # path = 'model/lid.176.bin' # if not os.path.isfile(path): # try: # print('Downloading fasttext pre-trained model') # # url = 'https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin' # wget.download(url, path) # except: # print('Something went wrong when downloading!!') # return False model = fasttext.load_model(self.fasttext_model) predictions = model.predict(''.join(pd_df_col)) result = predictions[0][0][-2:] language = languages.get(alpha_2 = result) return pd.DataFrame({'language': [language.name]})
def standardize_language(code): """Match `code` to a standard RFC5646 or RFC3066 language. The following approaches are tried in order: * Match a RFC5646 language string. * Match a RFC3066 language string. * Use a ISO-6639/2 bibliographic synonym, and match a RFC3066 language string for the ISO-6639/2 terminological code. If no results are found, `None` is returned. http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dclanguage http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.12 :param code: string with a language code ('en-GB', ...) :returns: `LanguageTuple` with the RFC5646 code and the list of description tags, or `None` if the language could not be identified. """ if not code: return None # Try RFC5646 (for EPUB 3). if tags.check(code): return LanguageTuple(code=code.lower(), description=tags.description(code)) # Try RFC3066 (for EPUB 2). # Try to get the ISO639-1 code for the language. try: lang = languages.get(iso639_2T_code=code) new_code = lang.iso639_1_code except KeyError: # Try synonym. if code in ISO_6639_2_B.keys(): try: lang = languages.get(iso639_2T_code=ISO_6639_2_B[code]) new_code = lang.iso639_1_code except KeyError: return None else: return None # Try RFC5646 for the ISO639-1 code. if tags.check(new_code): return LanguageTuple(code=new_code.lower(), description=tags.description(new_code)) return None
def normalize_language(language): for lookup_key in ("alpha_2", "alpha_3"): try: language = languages.get(**{lookup_key: language}) return language.name.lower() except KeyError: pass return language
def display_language_name_filter(ctx, locale): language_code, _, country_code = locale.partition('_') term_code = languages.get(bibliographic=language_code).terminology try: return Locale.parse(term_code).language_name except UnknownLocaleError: # Fallback value is the generated value in English or the code return constants.LANGUAGES.get(term_code, locale)
def normalize_language(language): for lookup_key in ("alpha_2", "alpha_3"): try: from pycountry import languages # get ISO list of languages for lookup language = languages.get(**{lookup_key: language}) return language.name.lower() except KeyError: pass return language
def normalize_language(language): for lookup_key in ("alpha_2", "alpha_3"): try: lang = languages.get(**{lookup_key: language}) if lang: language = lang.name.lower() except KeyError: pass return language
def print_languages_ios_639(langauge_set): try: from pycountry import languages result = '' for lang in langauge_set: result += '{0}:{1}\n'.format(lang, languages.get(iso639_1_code=lang).name) return result except Exception as ex: self.logger.warning('Exception with language decoding according to ISO 693-1: {0}'.format(str(ex))) return None
def get_iso_lang_code(lang): """ Convert two-digit iso code into three-digit iso code :param lang: Two digit iso code :return: Three digit iso code """ iso_lang = languages.get(alpha_2=lang) if iso_lang is not None: if hasattr(iso_lang, "bibliographic"): return getattr(iso_lang, "bibliographic") else: return getattr(iso_lang, "alpha_3")
def language(v): if isinstance(v, languages.data_class_base): return v for k in 'name alpha_3 alpha_2'.split(): try: r = languages.get(**{k: v}) except KeyError: pass else: if r is not None: return r raise ValueError('invalid language: %r' % v)
def get_lang(code, iso639=None): """Returns a language instance from the given code.""" code_head = code.split('.', 1)[0] if len(code_head) < 2 or len(code_head) > 3: raise ValueError('%r is an invalid language code' % code) def make_lang(code, submods): try: code = '.'.join([code] + list(submods)) return import_lang_module(code).__lang__() except ImportError: raise ValueError('Hangulize does not support %s' % code) # split module path if '.' in code: code = code.split('.') submods = code[1:] code = code[0] else: submods = () # guess if ISO 639-1 or 639-3 if iso639 is None: if len(code) == 2: iso639 = 1 elif len(code) == 3: iso639 = 3 try: # fix the warning when importing pycountry import logging from pycountry import languages if not getattr(logging, 'NullHandler', None): class NullHandler(logging.Handler): def emit(self, record): pass logging.NullHandler = NullHandler logging.getLogger('pycountry.db').addHandler(logging.NullHandler()) attr = ['alpha2', 'bibliographic', 'terminology'][iso639 - 1] code = languages.get(**{attr: code}).terminology except TypeError: # out of 2~3 characters raise ValueError('%r is an invalid language code' % code) except KeyError: try: return make_lang(code, submods) except ValueError: raise ValueError('{0!r} is an invalid ISO 639-{1} code' ''.format(code, iso639)) except ImportError: if iso639 != 3: raise ImportError('ISO 639-%d requires pycountry' % iso639) return make_lang(code, submods)
def get_display_name(self, locale): language_code, _, country_code = locale.partition('_') term_code = languages.get(bibliographic=language_code).terminology available_languages = dict(literal_eval( (self.settings.get('available_languages', '[]')))) try: return Locale.parse(term_code).language_name except UnknownLocaleError: # Fallback value is the generated value in English or the code return available_languages.get(locale, locale)
def get_display_name(self, locale): language_code, _, country_code = locale.partition('_') term_code = languages.get(bibliographic=language_code).terminology available_languages = dict( literal_eval((self.settings.get('available_languages', '[]')))) try: return Locale.parse(term_code).language_name except UnknownLocaleError: # Fallback value is the generated value in English or the code return available_languages.get(locale, locale)
def execute(self, maybe_code): if isinstance(maybe_code, dict): maybe_code = maybe_code['#text'] # Force indices to populate if not languages._is_loaded: languages._load() for kwarg in languages.indices.keys(): try: return languages.get(**{kwarg: maybe_code}).iso639_3_code except KeyError: continue return None
def _get_subtitle_info(self): lang = languages.get(alpha2=self.subtitle_language).name to_search = self.strip_version(self.subtitle_release) sub_dict_list = self.__search_subtitles() for sub_dict in sub_dict_list: sub_regex = compile(self.strip_version(sub_dict['release_name']), flags=IGNORECASE) lang_regex = compile(sub_dict['lang'], flags=IGNORECASE) if sub_regex.match(to_search) and lang_regex.match(lang): self.__sub_url = sub_dict['url'] return True return False
def translate_iana_language_code_to_iso_639_3(iana_lang_code): """ Translates the 2 character iana_lang_code provided into the appropriate ISO-639 3 character language code :param iana_lang_code: :return: str corresponding ISO-639 3 character language code """ iso_lang_code = iana_lang_code try: language = languages.get(iso639_1_code=iana_lang_code) iso_lang_code = language.iso639_3_code except Exception, e: log.error('Error retrieving ISO-639 language code: %s', e)
def get_lang(code, iso639=None): """Returns a language instance from the given code.""" code_head = code.split('.', 1)[0] if len(code_head) < 2 or len(code_head) > 3: raise ValueError('%r is an invalid language code' % code) def make_lang(code, submods): try: code = '.'.join([code] + list(submods)) return import_lang_module(code).__lang__() except ImportError: raise ValueError('Hangulize does not support %s' % code) # split module path if '.' in code: code = code.split('.') submods = code[1:] code = code[0] else: submods = () # guess if ISO 639-1 or 639-3 if iso639 is None: if len(code) == 2: iso639 = 1 elif len(code) == 3: iso639 = 3 try: # fix the warning when importing pycountry import logging from pycountry import languages if not getattr(logging, 'NullHandler', None): class NullHandler(logging.Handler): def emit(self, record): pass logging.NullHandler = NullHandler logging.getLogger('pycountry.db').addHandler(logging.NullHandler()) attr = ['alpha2', 'bibliographic', 'terminology'][iso639 - 1] code = languages.get(**{attr: code}).terminology except TypeError: # out of 2~3 characters raise ValueError('%r is an invalid language code' % code) except KeyError: try: return make_lang(code, submods) except ValueError: raise ValueError('%r is an invalid ISO 639-%d code' % \ (code, iso639)) except ImportError: if iso639 != 3: raise ImportError('ISO 639-%d requires pycountry' % iso639) return make_lang(code, submods)
def get_languages_iso3(codes): if codes is None: codes = [] supported = [] for lang in codes: if lang is None or len(lang.strip()) not in [2, 3]: continue lang = lang.lower().strip() if len(lang) == 2: try: c = languages.get(iso639_1_code=lang) lang = c.iso639_3_code except KeyError: continue supported.append(lang) supported.append("eng") return "+".join(sorted(set(supported)))
def display_language_name_filter(ctx, locale): language_code, _, country_code = locale.partition('_') term_code = languages.get(bibliographic=language_code).terminology return Locale.parse(term_code).language_name
def get_code(language): try: return languages.get(name=language).bibliographic except KeyError: return None
def language_code(language): try: return languages.get(name=language) except KeyError: return None