def __init__(self, language):
        language = normalize_language(language)
        self._language = language

        tokenizer_language = self.LANGUAGE_ALIASES.get(language, language)
        self._sentence_tokenizer = self._get_sentence_tokenizer(
            tokenizer_language)
        self._word_tokenizer = self._get_word_tokenizer(tokenizer_language)
 def __init__(self, language):
     language = normalize_language(language)
     self._stemmer = null_stemmer
     if language.lower() in self.SPECIAL_STEMMERS:
         self._stemmer = self.SPECIAL_STEMMERS[language.lower()]
         return
     stemmer_classname = language.capitalize() + 'Stemmer'
     try:
         stemmer_class = getattr(nltk_stemmers_module, stemmer_classname)
     except AttributeError:
         raise LookupError("Stemmer is not available for language %s." %
                           language)
     self._stemmer = stemmer_class().stem
def test_normalize_language_with_language_name():
    assert normalize_language("french") == "french"
    assert normalize_language("chinese") == "chinese"
    assert normalize_language("slovak") == "slovak"
def test_normalize_language_with_alpha_3_code():
    assert normalize_language("fra") == "french"
    assert normalize_language("zho") == "chinese"
    assert normalize_language("slk") == "slovak"