def test_lemmatize(): """Test lemmatization method.""" assert utils.lemmatize({"corriendo": "correr"}, "corriendo") == "correr"
def test_lemmatize(): """Test lemmatization method.""" assert utils.lemmatize({'corriendo': 'correr'}, 'corriendo') \ == 'correr'
def __init__(self, doc, lexico_semantic_norm_dict, lemmatizer=None): """Initialize lexico semantic norm object. Calculate average over number of tokens given a text. :param doc: Text to be processed :type doc: Spacy Doc :param lexico_semantic_norm_dict: Lexico semantic norms for words :type lexico_semantic_norm_dict: dict :param lemmatizer: Lemmatizer, defaults to None :type lemmatizer: dict, optional """ valence = 0 arousal = 0 concreteness = 0 imageability = 0 context_availability = 0 familiarity = 0 count = 0.0 for token in doc: word = token.text.lower() word_lemma = word if lemmatizer: word_lemma = lemmatize(lemmatizer, word) if word in lexico_semantic_norm_dict: valence += lexico_semantic_norm_dict[word].get("valence") arousal += lexico_semantic_norm_dict[word].get("arousal") concreteness += lexico_semantic_norm_dict[word].get( "concreteness") imageability += lexico_semantic_norm_dict[word].get( "imageability") context_availability += lexico_semantic_norm_dict[word].get( "context_availability") familiarity += lexico_semantic_norm_dict[word].get( "familiarity") count += 1 elif word_lemma in lexico_semantic_norm_dict: word = word_lemma valence += lexico_semantic_norm_dict[word].get("valence") arousal += lexico_semantic_norm_dict[word].get("arousal") concreteness += lexico_semantic_norm_dict[word].get( "concreteness") imageability += lexico_semantic_norm_dict[word].get( "imageability") context_availability += lexico_semantic_norm_dict[word].get( "context_availability") familiarity += lexico_semantic_norm_dict[word].get( "familiarity") count += 1.0 self.__valence = valence self.__arousal = arousal self.__concreteness = concreteness self.__imageability = imageability self.__context_avilability = context_availability self.__familiarity = familiarity if count > 0: self.__valence /= count self.__arousal /= count self.__concreteness /= count self.__imageability /= count self.__context_avilability /= count self.__familiarity /= count
def __init__(self, doc: Doc, lemmatizer: Optional[Dict[str, str]] = None): """Initialize emotions class. Average over number of tokens. :param doc: Texto a ser procesado :type doc: Spacy Doc :param lemmatizer: Lematizador a utilizar, defaults to None :type lemmatizer: Python dict, optional """ alegria = 0 enojo = 0 miedo = 0 repulsion = 0 sorpresa = 0 tristeza = 0 count = 0.0 for token in doc: word = token.text.lower() word_lemma = word if lemmatizer: word_lemma = lemmatize(lemmatizer, word) if word in SPANISH_EMOTION_LEXICON: pfa, emotion = SPANISH_EMOTION_LEXICON[word] if emotion == "Alegría": alegria += pfa elif emotion == "Enojo": enojo += pfa elif emotion == "Miedo": miedo += pfa elif emotion == "Repulsión": repulsion += pfa elif emotion == "Sorpresa": sorpresa += pfa else: tristeza += pfa count += 1 elif word_lemma in SPANISH_EMOTION_LEXICON: word = word_lemma pfa, emotion = SPANISH_EMOTION_LEXICON[word] if emotion == "Alegría": alegria += pfa elif emotion == "Enojo": enojo += pfa elif emotion == "Miedo": miedo += pfa elif emotion == "Repulsión": repulsion += pfa elif emotion == "Sorpresa": sorpresa += pfa else: tristeza += pfa count += 1.0 self.__alegria = alegria self.__enojo = enojo self.__miedo = miedo self.__repulsion = repulsion self.__sorpresa = sorpresa self.__tristeza = tristeza if count > 0: self.__alegria /= count self.__enojo /= count self.__miedo /= count self.__repulsion /= count self.__sorpresa /= count self.__tristeza /= count