def detect_language(text, num_words=None, fail_silently=True): """ Detects the language from the value given based on ranges defined in active language packs. :param unicode value: Input string. :param int num_words: Number of words to base decision on. :param bool fail_silently: :return str: Language code. """ ensure_autodiscover() if num_words is None: num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') most_common_words = extract_most_common_words(text, num_words=num_words) counter = Counter() available_language_packs = get_available_language_packs() for word, occurrencies in most_common_words: for letter in word: for language_pack in available_language_packs: if language_pack.contains(letter): counter[language_pack.language_code] += 1 continue try: return counter.most_common(1)[0][0] except Exception as e: if get_setting('DEBUG'): print_(e) if not fail_silently: raise LanguageDetectionError(_("""Can't detect language for the text "%s" given.""") % text)
def detect_language(text, num_words=None, fail_silently=True): """ Detects the language from the value given based on ranges defined in active language packs. :param unicode value: Input string. :param int num_words: Number of words to base decision on. :param bool fail_silently: :return str: Language code. """ ensure_autodiscover() if num_words is None: num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') most_common_words = extract_most_common_words(text, num_words=num_words) counter = Counter() available_language_packs = get_available_language_packs() for word, occurrencies in most_common_words: for letter in word: for language_pack in available_language_packs: if language_pack.contains(letter): counter[language_pack.language_code] += 1 continue try: return counter.most_common(1)[0][0] except Exception as e: if get_setting('DEBUG'): print_(e) if not fail_silently: raise LanguageDetectionError( _("""Can't detect language for the text "%s" given.""") % text)
def autodiscover(): """ Auto-discovers the language packs in contrib/apps. """ LANGUAGES_DIR = get_setting('LANGUAGES_DIR') LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME') DEBUG = get_setting('DEBUG') for app_path in os.listdir(PROJECT_DIR(LANGUAGES_DIR)): full_app_path = list(LANGUAGES_DIR) full_app_path.append(app_path) if os.path.isdir(PROJECT_DIR(full_app_path)): try: import_module( "transliterate.{0}.{1}.{2}".format( '.'.join(LANGUAGES_DIR), app_path, LANGUAGE_PACK_MODULE_NAME ) ) except ImportError as e: if DEBUG: print_(e) except Exception as e: if DEBUG: print_(e) else: pass
def detect_language(text, num_words=None, fail_silently=True, heavy_check=False): """ Detects the language from the value given based on ranges defined in active language packs. :param unicode value: Input string. :param int num_words: Number of words to base decision on. :param bool fail_silently: :param bool heavy_check: If given, heavy checks would be applied when simple checks don't give any results. Heavy checks are language specific and do not apply to a common logic. Heavy language detection is defined in the ``detect`` method of each language pack. :return str: Language code. """ ensure_autodiscover() if num_words is None: num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') most_common_words = extract_most_common_words(text, num_words=num_words) counter = Counter() available_language_packs = get_available_language_packs() for word, occurrencies in most_common_words: for letter in word: for language_pack in available_language_packs: if language_pack.detectable and language_pack.contains(letter): counter[language_pack.language_code] += 1 continue try: return counter.most_common(1)[0][0] except Exception as e: if get_setting('DEBUG'): logger.debug(str(e)) if not fail_silently: raise LanguageDetectionError( _("""Can't detect language for the text "%s" given.""") % text)
def detect_language(text, num_words=None, fail_silently=True, heavy_check=False): """ Detects the language from the value given based on ranges defined in active language packs. :param unicode value: Input string. :param int num_words: Number of words to base decision on. :param bool fail_silently: :param bool heavy_check: If given, heavy checks would be applied when simple checks don't give any results. Heavy checks are language specific and do not apply to a common logic. Heavy language detection is defined in the ``detect`` method of each language pack. :return str: Language code. """ ensure_autodiscover() if num_words is None: num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') most_common_words = extract_most_common_words(text, num_words=num_words) counter = Counter() available_language_packs = get_available_language_packs() for word, occurrencies in most_common_words: for letter in word: for language_pack in available_language_packs: if language_pack.detectable and language_pack.contains(letter): counter[language_pack.language_code] += 1 continue try: return counter.most_common(1)[0][0] except Exception as e: if get_setting('DEBUG'): logger.debug(str(e)) if not fail_silently: raise LanguageDetectionError( _("""Can't detect language for the text "%s" given.""") % text )
def autodiscover(): """ Autodiscovers the language packs in contrib/apps. """ LANGUAGES_DIR = get_setting('LANGUAGES_DIR') LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME') DEBUG = get_setting('DEBUG') for app_path in os.listdir(PROJECT_DIR(LANGUAGES_DIR)): full_app_path = list(LANGUAGES_DIR) full_app_path.append(app_path) if os.path.isdir(PROJECT_DIR(full_app_path)): try: import_module("transliterate.{0}.{1}.{2}".format( '.'.join(LANGUAGES_DIR), app_path, LANGUAGE_PACK_MODULE_NAME)) except ImportError as e: if DEBUG: print_(e) except Exception as e: if DEBUG: print_(e) else: pass
def extract_most_common_words(text, num_words=None): """ Extracts most common words. :param unicode text: :param int num_words: :return list: """ if num_words is None: num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') text = strip_numbers(text) counter = Counter() for word in text.split(' '): if len(word) > 1: counter[word] += 1 return counter.most_common(num_words)
def override_settings(): return get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')
__title__ = 'transliterate.settings' __version__ = '1.0' __build__ = 0x000010 __author__ = 'Artur Barseghyan' __all__ = ('LANGUAGES_DIR', 'CONTRIB_DIR', 'LANGUAGE_PACK_MODULE_NAME', 'LANGUAGE_DETECTION_MAX_NUM_KEYWORDS', 'DEBUG') import warnings warnings.warn( """transliterate.settings is deprecated; use transliterate.conf.get_setting function instead.""", DeprecationWarning) from transliterate.conf import get_setting LANGUAGES_DIR = get_setting('LANGUAGES_DIR') CONTRIB_DIR = get_setting('CONTRIB_DIR') LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME') LANGUAGE_DETECTION_MAX_NUM_KEYWORDS = get_setting( 'LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') DEBUG = get_setting('DEBUG')
__title__ = 'transliterate.settings' __version__ = '1.0' __build__ = 0x000010 __author__ = 'Artur Barseghyan' __all__ = ('LANGUAGES_DIR', 'CONTRIB_DIR', 'LANGUAGE_PACK_MODULE_NAME', 'LANGUAGE_DETECTION_MAX_NUM_KEYWORDS', 'DEBUG') import warnings warnings.warn("""transliterate.settings is deprecated; use transliterate.conf.get_setting function instead.""", DeprecationWarning) from transliterate.conf import get_setting LANGUAGES_DIR = get_setting('LANGUAGES_DIR') CONTRIB_DIR = get_setting('CONTRIB_DIR') LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME') LANGUAGE_DETECTION_MAX_NUM_KEYWORDS = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS') DEBUG = get_setting('DEBUG')