예제 #1
0
 def identify(
         self, text,
         constrain_to_discussion_locales=SECURE_IDENTIFICATION_LIMIT):
     "Try to identify locale of text. Boost if one of the expected locales."
     if not text:
         return Locale.UNDEFINED, {Locale.UNDEFINED: 1}
     len_nourl = self.strlen_nourl(text)
     if len_nourl < 5:
         return Locale.NON_LINGUISTIC
     expected_locales = set((
         Locale.extract_root_locale(l)
         for l in self.discussion.discussion_locales))
     language_data = detect_langs(text)
     if constrain_to_discussion_locales and (
             len_nourl < constrain_to_discussion_locales):
         data = [(x.prob, x.lang)
                 for x in language_data
                 if Locale.any_compatible(
                     Locale.extract_root_locale(x.lang),
                     expected_locales)]
     else:
         # boost with discussion locales.
         data = [
             (x.prob * (
                 5 if Locale.Locale.extract_root_locale(x.lang)
                 in expected_locales else 1
             ), x.lang) for x in language_data]
     data.sort(reverse=True)
     top = data[0][1] if (data and (data[0][0] > 0.5)
                          ) else Locale.UNDEFINED
     return top, {lang: prob for (prob, lang) in data}
예제 #2
0
 def identify(self,
              text,
              constrain_to_discussion_locales=SECURE_IDENTIFICATION_LIMIT):
     "Try to identify locale of text. Boost if one of the expected locales."
     if not text:
         return Locale.UNDEFINED, {Locale.UNDEFINED: 1}
     len_nourl = self.strlen_nourl(text)
     if len_nourl < 5:
         return Locale.NON_LINGUISTIC
     expected_locales = set((Locale.extract_root_locale(l)
                             for l in self.discussion.discussion_locales))
     language_data = detect_langs(text)
     if constrain_to_discussion_locales and (
             len_nourl < constrain_to_discussion_locales):
         data = [(x.prob, x.lang) for x in language_data
                 if Locale.any_compatible(
                     Locale.extract_root_locale(x.lang), expected_locales)]
     else:
         # boost with discussion locales.
         data = [(x.prob * (5 if Locale.Locale.extract_root_locale(x.lang)
                            in expected_locales else 1), x.lang)
                 for x in language_data]
     data.sort(reverse=True)
     top = data[0][1] if (data and (data[0][0] > 0.5)) else Locale.UNDEFINED
     return top, {lang: prob for (prob, lang) in data}
예제 #3
0
 def identify(self, text, constrain_to_discussion_locales=True):
     "Try to identify locale of text. Boost if one of the expected locales."
     if not text:
         return Locale.UNDEFINED, {Locale.UNDEFINED: 1}
     expected_locales = set((
         Locale.extract_root_locale(l)
         for l in self.discussion.discussion_locales))
     language_data = detect_langs(text)
     if constrain_to_discussion_locales:
         data = [(x.prob, x.lang)
                 for x in language_data
                 if Locale.extract_root_locale(x.lang) in expected_locales]
     else:
         # boost with discussion locales.
         data = [
             (x.prob * (
                 5 if Locale.Locale.extract_root_locale(x.lang)
                 in expected_locales else 1
             ), x.lang) for x in language_data]
     data.sort(reverse=True)
     top = data[0][1] if (data and (data[0][0] > 0.5)
                          ) else Locale.UNDEFINED
     return top, {lang: prob for (prob, lang) in data}
예제 #4
0
 def confirm_locale(self, langstring_entry, expected_locales=None):
     try:
         lang, data = self.identify(
             langstring_entry.value, expected_locales)
         data["service"] = self.__class__.__name__
         changed = langstring_entry.identify_locale(lang, data)
         if changed:
             langstring_entry.db.expire(langstring_entry, ["locale"])
             langstring_entry.db.expire(
                 langstring_entry.langstring, ["entries"])
         if lang == Locale.UNDEFINED:
             pass  # say you can't identify
     except Exception as e:
         print_exc()
         expected_locales = [
             Locale.extract_root_locale(l)
             for l in self.discussion.discussion_locales]
         self.set_error(langstring_entry, *self.decode_exception(e, True))
예제 #5
0
 def confirm_locale(
         self,
         langstring_entry,
         constrain_to_discussion_locales=SECURE_IDENTIFICATION_LIMIT):
     try:
         lang, data = self.identify(langstring_entry.value,
                                    constrain_to_discussion_locales)
         data["service"] = self.__class__.__name__
         changed = langstring_entry.identify_locale(lang, data)
         if changed:
             langstring_entry.db.expire(langstring_entry, ["locale"])
             langstring_entry.db.expire(langstring_entry.langstring,
                                        ["entries"])
         if lang == Locale.UNDEFINED:
             pass  # say you can't identify
     except Exception as e:
         print_exc()
         expected_locales = [
             Locale.extract_root_locale(l)
             for l in self.discussion.discussion_locales
         ]
         self.set_error(langstring_entry, *self.decode_exception(e, True))