def identify( self, text, constrain_to_discussion_locales=SECURE_IDENTIFICATION_LIMIT): "Try to identify locale of text. Boost if one of the expected locales." if not text: return Locale.UNDEFINED, {Locale.UNDEFINED: 1} len_nourl = self.strlen_nourl(text) if len_nourl < 5: return Locale.NON_LINGUISTIC expected_locales = set(( Locale.extract_root_locale(l) for l in self.discussion.discussion_locales)) language_data = detect_langs(text) if constrain_to_discussion_locales and ( len_nourl < constrain_to_discussion_locales): data = [(x.prob, x.lang) for x in language_data if Locale.any_compatible( Locale.extract_root_locale(x.lang), expected_locales)] else: # boost with discussion locales. data = [ (x.prob * ( 5 if Locale.Locale.extract_root_locale(x.lang) in expected_locales else 1 ), x.lang) for x in language_data] data.sort(reverse=True) top = data[0][1] if (data and (data[0][0] > 0.5) ) else Locale.UNDEFINED return top, {lang: prob for (prob, lang) in data}
def identify(self, text, constrain_to_discussion_locales=SECURE_IDENTIFICATION_LIMIT): "Try to identify locale of text. Boost if one of the expected locales." if not text: return Locale.UNDEFINED, {Locale.UNDEFINED: 1} len_nourl = self.strlen_nourl(text) if len_nourl < 5: return Locale.NON_LINGUISTIC expected_locales = set((Locale.extract_root_locale(l) for l in self.discussion.discussion_locales)) language_data = detect_langs(text) if constrain_to_discussion_locales and ( len_nourl < constrain_to_discussion_locales): data = [(x.prob, x.lang) for x in language_data if Locale.any_compatible( Locale.extract_root_locale(x.lang), expected_locales)] else: # boost with discussion locales. data = [(x.prob * (5 if Locale.Locale.extract_root_locale(x.lang) in expected_locales else 1), x.lang) for x in language_data] data.sort(reverse=True) top = data[0][1] if (data and (data[0][0] > 0.5)) else Locale.UNDEFINED return top, {lang: prob for (prob, lang) in data}
def identify(self, text, constrain_to_discussion_locales=True): "Try to identify locale of text. Boost if one of the expected locales." if not text: return Locale.UNDEFINED, {Locale.UNDEFINED: 1} expected_locales = set(( Locale.extract_root_locale(l) for l in self.discussion.discussion_locales)) language_data = detect_langs(text) if constrain_to_discussion_locales: data = [(x.prob, x.lang) for x in language_data if Locale.extract_root_locale(x.lang) in expected_locales] else: # boost with discussion locales. data = [ (x.prob * ( 5 if Locale.Locale.extract_root_locale(x.lang) in expected_locales else 1 ), x.lang) for x in language_data] data.sort(reverse=True) top = data[0][1] if (data and (data[0][0] > 0.5) ) else Locale.UNDEFINED return top, {lang: prob for (prob, lang) in data}
def confirm_locale(self, langstring_entry, expected_locales=None): try: lang, data = self.identify( langstring_entry.value, expected_locales) data["service"] = self.__class__.__name__ changed = langstring_entry.identify_locale(lang, data) if changed: langstring_entry.db.expire(langstring_entry, ["locale"]) langstring_entry.db.expire( langstring_entry.langstring, ["entries"]) if lang == Locale.UNDEFINED: pass # say you can't identify except Exception as e: print_exc() expected_locales = [ Locale.extract_root_locale(l) for l in self.discussion.discussion_locales] self.set_error(langstring_entry, *self.decode_exception(e, True))
def confirm_locale( self, langstring_entry, constrain_to_discussion_locales=SECURE_IDENTIFICATION_LIMIT): try: lang, data = self.identify(langstring_entry.value, constrain_to_discussion_locales) data["service"] = self.__class__.__name__ changed = langstring_entry.identify_locale(lang, data) if changed: langstring_entry.db.expire(langstring_entry, ["locale"]) langstring_entry.db.expire(langstring_entry.langstring, ["entries"]) if lang == Locale.UNDEFINED: pass # say you can't identify except Exception as e: print_exc() expected_locales = [ Locale.extract_root_locale(l) for l in self.discussion.discussion_locales ] self.set_error(langstring_entry, *self.decode_exception(e, True))