Beispiel #1
0
def detect_language(text, num_words=None, fail_silently=True):
    """
    Detects the language from the value given based on ranges defined in active language packs.

    :param unicode value: Input string.
    :param int num_words: Number of words to base decision on.
    :param bool fail_silently:
    :return str: Language code.
    """
    ensure_autodiscover()

    if num_words is None:
        num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

    most_common_words = extract_most_common_words(text, num_words=num_words)

    counter = Counter()

    available_language_packs = get_available_language_packs()

    for word, occurrencies in most_common_words:
        for letter in word:
            for language_pack in available_language_packs:
                if language_pack.contains(letter):
                    counter[language_pack.language_code] += 1
                    continue
    try:
        return counter.most_common(1)[0][0]
    except Exception as e:
        if get_setting('DEBUG'):
            print_(e)

    if not fail_silently:
        raise LanguageDetectionError(_("""Can't detect language for the text "%s" given.""") % text)
Beispiel #2
0
def detect_language(text, num_words=None, fail_silently=True):
    """
    Detects the language from the value given based on ranges defined in active language packs.

    :param unicode value: Input string.
    :param int num_words: Number of words to base decision on.
    :param bool fail_silently:
    :return str: Language code.
    """
    ensure_autodiscover()

    if num_words is None:
        num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

    most_common_words = extract_most_common_words(text, num_words=num_words)

    counter = Counter()

    available_language_packs = get_available_language_packs()

    for word, occurrencies in most_common_words:
        for letter in word:
            for language_pack in available_language_packs:
                if language_pack.contains(letter):
                    counter[language_pack.language_code] += 1
                    continue
    try:
        return counter.most_common(1)[0][0]
    except Exception as e:
        if get_setting('DEBUG'):
            print_(e)

    if not fail_silently:
        raise LanguageDetectionError(
            _("""Can't detect language for the text "%s" given.""") % text)
Beispiel #3
0
def autodiscover():
    """
    Auto-discovers the language packs in contrib/apps.
    """
    LANGUAGES_DIR = get_setting('LANGUAGES_DIR')
    LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME')
    DEBUG = get_setting('DEBUG')

    for app_path in os.listdir(PROJECT_DIR(LANGUAGES_DIR)):
        full_app_path = list(LANGUAGES_DIR)
        full_app_path.append(app_path)
        if os.path.isdir(PROJECT_DIR(full_app_path)):
            try:
                import_module(
                    "transliterate.{0}.{1}.{2}".format(
                        '.'.join(LANGUAGES_DIR),
                        app_path,
                        LANGUAGE_PACK_MODULE_NAME
                    )
                )
            except ImportError as e:
                if DEBUG:
                    print_(e)
            except Exception as e:
                if DEBUG:
                    print_(e)
        else:
            pass
Beispiel #4
0
def detect_language(text,
                    num_words=None,
                    fail_silently=True,
                    heavy_check=False):
    """
    Detects the language from the value given based on ranges defined in active
    language packs.

    :param unicode value: Input string.
    :param int num_words: Number of words to base decision on.
    :param bool fail_silently:
    :param bool heavy_check: If given, heavy checks would be applied when
        simple checks don't give any results. Heavy checks are language
        specific and do not apply to a common logic. Heavy language detection
        is defined in the ``detect`` method of each language pack.
    :return str: Language code.
    """
    ensure_autodiscover()

    if num_words is None:
        num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

    most_common_words = extract_most_common_words(text, num_words=num_words)

    counter = Counter()

    available_language_packs = get_available_language_packs()

    for word, occurrencies in most_common_words:
        for letter in word:
            for language_pack in available_language_packs:
                if language_pack.detectable and language_pack.contains(letter):
                    counter[language_pack.language_code] += 1
                    continue
    try:
        return counter.most_common(1)[0][0]
    except Exception as e:
        if get_setting('DEBUG'):
            logger.debug(str(e))

    if not fail_silently:
        raise LanguageDetectionError(
            _("""Can't detect language for the text "%s" given.""") % text)
Beispiel #5
0
def detect_language(text, num_words=None, fail_silently=True, heavy_check=False):
    """
    Detects the language from the value given based on ranges defined in active
    language packs.

    :param unicode value: Input string.
    :param int num_words: Number of words to base decision on.
    :param bool fail_silently:
    :param bool heavy_check: If given, heavy checks would be applied when
        simple checks don't give any results. Heavy checks are language
        specific and do not apply to a common logic. Heavy language detection
        is defined in the ``detect`` method of each language pack.
    :return str: Language code.
    """
    ensure_autodiscover()

    if num_words is None:
        num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

    most_common_words = extract_most_common_words(text, num_words=num_words)

    counter = Counter()

    available_language_packs = get_available_language_packs()

    for word, occurrencies in most_common_words:
        for letter in word:
            for language_pack in available_language_packs:
                if language_pack.detectable and language_pack.contains(letter):
                    counter[language_pack.language_code] += 1
                    continue
    try:
        return counter.most_common(1)[0][0]
    except Exception as e:
        if get_setting('DEBUG'):
            logger.debug(str(e))

    if not fail_silently:
        raise LanguageDetectionError(
            _("""Can't detect language for the text "%s" given.""") % text
        )
Beispiel #6
0
def autodiscover():
    """
    Autodiscovers the language packs in contrib/apps.
    """
    LANGUAGES_DIR = get_setting('LANGUAGES_DIR')
    LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME')
    DEBUG = get_setting('DEBUG')

    for app_path in os.listdir(PROJECT_DIR(LANGUAGES_DIR)):
        full_app_path = list(LANGUAGES_DIR)
        full_app_path.append(app_path)
        if os.path.isdir(PROJECT_DIR(full_app_path)):
            try:
                import_module("transliterate.{0}.{1}.{2}".format(
                    '.'.join(LANGUAGES_DIR), app_path,
                    LANGUAGE_PACK_MODULE_NAME))
            except ImportError as e:
                if DEBUG:
                    print_(e)
            except Exception as e:
                if DEBUG:
                    print_(e)
        else:
            pass
Beispiel #7
0
def extract_most_common_words(text, num_words=None):
    """
    Extracts most common words.

    :param unicode text:
    :param int num_words:
    :return list:
    """
    if num_words is None:
        num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

    text = strip_numbers(text)
    counter = Counter()
    for word in text.split(' '):
        if len(word) > 1:
            counter[word] += 1
    return counter.most_common(num_words)
Beispiel #8
0
def extract_most_common_words(text, num_words=None):
    """
    Extracts most common words.

    :param unicode text:
    :param int num_words:
    :return list:
    """
    if num_words is None:
        num_words = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

    text = strip_numbers(text)
    counter = Counter()
    for word in text.split(' '):
        if len(word) > 1:
            counter[word] += 1
    return counter.most_common(num_words)
Beispiel #9
0
 def override_settings():
     return get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')
Beispiel #10
0
__title__ = 'transliterate.settings'
__version__ = '1.0'
__build__ = 0x000010
__author__ = 'Artur Barseghyan'
__all__ = ('LANGUAGES_DIR', 'CONTRIB_DIR', 'LANGUAGE_PACK_MODULE_NAME',
           'LANGUAGE_DETECTION_MAX_NUM_KEYWORDS', 'DEBUG')

import warnings
warnings.warn(
    """transliterate.settings is deprecated; use transliterate.conf.get_setting function instead.""",
    DeprecationWarning)

from transliterate.conf import get_setting

LANGUAGES_DIR = get_setting('LANGUAGES_DIR')
CONTRIB_DIR = get_setting('CONTRIB_DIR')
LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME')
LANGUAGE_DETECTION_MAX_NUM_KEYWORDS = get_setting(
    'LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

DEBUG = get_setting('DEBUG')
 def override_settings():
     return get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')
Beispiel #12
0
__title__ = 'transliterate.settings'
__version__ = '1.0'
__build__ = 0x000010
__author__ = 'Artur Barseghyan'
__all__ = ('LANGUAGES_DIR', 'CONTRIB_DIR', 'LANGUAGE_PACK_MODULE_NAME', 'LANGUAGE_DETECTION_MAX_NUM_KEYWORDS', 'DEBUG')

import warnings
warnings.warn("""transliterate.settings is deprecated; use transliterate.conf.get_setting function instead.""",
              DeprecationWarning)

from transliterate.conf import get_setting

LANGUAGES_DIR = get_setting('LANGUAGES_DIR')
CONTRIB_DIR =  get_setting('CONTRIB_DIR')
LANGUAGE_PACK_MODULE_NAME = get_setting('LANGUAGE_PACK_MODULE_NAME')
LANGUAGE_DETECTION_MAX_NUM_KEYWORDS = get_setting('LANGUAGE_DETECTION_MAX_NUM_KEYWORDS')

DEBUG = get_setting('DEBUG')