def get_Emoji_sentiment(self, lemma): lexeme_sentiment = {} unicode_emoji = Emoji.ascii_to_unicode(lemma.upper()) if len(unicode_emoji) == 1: category = unicodedata.category(unicode_emoji) if category == 'So': # is "Symbol other" (a specific unicode category) shortcode = Emoji.unicode_to_shortcode(unicode_emoji) if shortcode != unicode_emoji: # is an emoji only if it has an emoji shortcode # check whether the emoji is in the emoji polarity lexicon if unicode_emoji in self.emoji: # is in the lexicon emodict = self.emoji[unicode_emoji] lexeme_sentiment["0"] = { "shortcode":shortcode, "negativity":emodict["negativity"], "positivity":emodict["positivity"] } else: # tokenize the shortcode and get its tokens polarities from SentiWordNet tokens = shortcode.strip(' :').split('_') # shortcode tokenization seems an easy problem to solve negativity = 0 positivity = 0 count = 0 # count the number of shortcode tokens with a synset for token in tokens: synsets = list(swn.senti_synsets(token)) if len(synsets)>0: senti_synset = synsets[0] negativity += senti_synset.neg_score() positivity += senti_synset.pos_score() count += 1 if count > 1: # take the average of all shortcode tokens polarities negativity /= count positivity /= count lexeme_sentiment["0"] = { "shortcode":shortcode, "negativity":negativity, "positivity":positivity } # print(lexeme_sentiment["0"]) return lexeme_sentiment
def ascii_to_unicode(text, **kwargs): return Emoji.ascii_to_unicode(text, **kwargs)