def emExtract(texts1, texts2): emDict = {} count = 0 for text in texts1: em = emot.emoticons(text) try: list = em.get('value') for e in list: if e not in emDict: emDict[e] = count count += 1 except: pass for text in texts2: em = emot.emoticons(text) try: list = em.get('value') for e in list: if e not in emDict: emDict[e] = count count += 1 except: pass saveEmD("emD.dict", emDict)
def prune_emojis_emoticons(string): # at tim doesn't work specially when emoticons comes after a weird emoji like O.o which isnt registered if "location" in emot.emoji(string).keys() is not None: for loc in reversed(emot.emoji(string)['location']): string = string[0:loc[0]] + string[loc[1] + 1::] # print(emot.emoticons(string)) if "location" in emot.emoticons(string): for loc in reversed(emot.emoticons(string)['location']): string = string[0:loc[0]] + string[loc[1] + 1::] return string
def process_emoji(tweets): ##stop_words = get_stop_words("en") emoji_count = len(emot.emoji(tweets)) emoticon_count = len(emot.emoticons(tweets)) '''remove stop-words text_token = tokenizer.tokenize(tweets) non_stop_token = [word for word in text_token if word not in stop_words] non_stop_text = " ".join(non_stop_token) ''' '''remove non-ascii letters''' new_string = re.sub(r"[^\w']", " ", tweets) new_string = re.sub(r"[\s]+", ' ', new_string) new_string = new_string.strip() '''Textblob: Spell correction and analysis polarity''' text_blob = TextBlob(new_string) correct_string = str(text_blob.correct()) polarity = text_blob.sentiment.polarity subjectity = text_blob.sentiment.subjectivity emoji_result = emojiClass(correct_string, emoji_count, emoticon_count, polarity, subjectity) return emoji_result
def test_emo(): test = "I love it, 👨 :-) 🏁:-) :-)🏁 :-) 🏁 <3" print(emo.emoji(test)) print(emo.emoticons(test)) print(test[27:30]) print(test[17]) return None
def classByEmoji(text): global positive_emojis global negative_emojis global neutral_emojis emojis = emot.emoji(text) emoticons = emot.emoticons(text) emots = set() for map_emoji in emojis: emots.add(map_emoji['value']) for map_emoji in emoticons: emots.add(map_emoji['value']) positive_inter = emots.intersection(positive_emojis) negative_inter = emots.intersection(negative_emojis) neutral_inter = emots.intersection(neutral_emojis) if positive_inter: if len(negative_inter) == 0 and len(neutral_inter) == 0: return 1 elif negative_inter: if len(neutral_inter) == 0: return -1 elif neutral_inter: return 0 return ''
def find_emoticon(text): x = emot.emoticons(text) if type(x)==list: return x[0] else: return x
def convert_emoticons(old_text): smiley = emot.emoticons(old_text) new_text = re.sub(r'https?:\/\/.*[\r\n]*', '', old_text) # remove URL before if len(smiley) > 1 and smiley['flag']: for i in range(0, len(smiley['value'])): new_text = old_text.replace(smiley['value'][i], " "+smiley['mean'][i]+" ") old_text = new_text return new_text
def is_emo(word: str) -> bool: """Use emot to detect if something is an emoticion or emoji.""" emoji = emot.emoji(word)['flag'] emoticon = emot.emoticons(word) try: emoticon = emoticon['flag'] except TypeError: emoticon = emoticon[0]['flag'] return bool(emoji) + bool(emoticon)
def emoToVec(self, line): line = re.sub(r"http\S+", "", line) li = emot.emoticons(line) retList = [0] * self.emList.__len__() for v in li.get('value'): if self.emDict.get(v) is not None: retList[self.emDict.get(v)] = 1 return retList
def get_emoticon_sentiment(self, tweet): sum = 0 cleaned_tweet = self.cleaned(tweet) emotion = emot.emoticons(cleaned_tweet.encode('ascii', 'ignore').decode('ascii')) for i in range(len(emotion)): a = TextBlob(emotion[i]['value']) sum = sum + (a.sentiment.polarity) print(sum) return sum
def formate_smiley(txt): """ Utilisent le package emot pour extraire les emoji (inutilisé à l'heure actuelle). Entrée : txt, une string Sortie : la liste des emoji texte et caractères. """ lst = list(map(lambda x: x["value"], emot.emoji(txt))) lst2 = list(map(lambda x: x["value"], emot.emoticons(txt))) lst.extend(lst2) return lst
def extract_emojis_emoticons(text): extracted = [] vals = emot.emoticons(text) if len(vals) > 1: extracted.extend(vals['value']) vals = emot.emoji(text) if len(vals) > 1: extracted.extend(vals['value']) return extracted
def convert_emoticons(text): """Funcion para convertir emoticones a palabras""" # OUREMOTIC = dict([(e, f":{EMOTICONS[e].lower().split(',')[0].replace('or ','').replace(' ','_')}:") for e in EMOTICONS.keys()]) try: text0 = emot.emoticons(text) if text0['flag']: return ':' + text0['mean'][0].replace(' ', '_').lower() + ':' return text except Exception as e: return text
def getAllEmojis(cthresh, inpf, outf): fin = open (inpf, "r") lx=0 line = fin.readline() hvemoji=0 ecounts={} while line: parts = line.split(" ") twid = parts[0] twdate = parts[1] tweet = line.replace(twid,"").replace(twdate,"").strip() emojis = tn.parseEmojis(tweet) temp = emot.emoticons(tweet) #tn.removeHTAtEmoji(tweet)) emoticons=[] if 'value' in temp: emoticons = temp['value'] if lx < 10: print (emoticons) if len(emojis)>0 or len(emoticons)>0: hvemoji+=1 for emoji in emojis: if emoji not in ecounts: ecounts[emoji]=1 else: ecounts[emoji]+=1 for emoticon in emoticons: if emoticon not in ecounts: ecounts[emoticon]=1 else: ecounts[emoticon]+=1 lx+=1 line = fin.readline() fin.close() print ((lx)) print ((hvemoji)) print (len(ecounts)) fout = open (outf, "w") for emoji in ecounts: if ecounts[emoji]>cthresh: fout.write(emoji+" "+str(ecounts[emoji])+"\n") fout.close()
def extractEmoji(self, lines): count = 0 for line in lines: line = re.sub(r"http\S+", "", line) print("processing emoji: line===>" + str(count)) li = emot.emoticons(line) count += 1 for v in li.get('value'): if self.emDict.get(v) is None: self.emList.append(v) self.emDict[v] = self.emList.__len__() - 1
def detect_emoticons_emojis(self, string): emoticons = emot.emoticons(string) emojis = emot.emoji(string) if len(emoticons) > 0: for emoticon in emoticons: value = emoticon['value'] if value != (')' or ':'): self.emoticons.setdefault(value, set()).add(string) if len(emojis) > 0: for emoji in emojis: value = emoji['value'] if value != (')' or ':'): self.emojis.setdefault(value, set()).add(string)
def isEmoticon(word): x=emot.emoticons(word) if isinstance(x, list): for v in x: val=v['flag'] else: val=x['flag'] if(val): return True else: return False
def text_demoticons(text, how_replace=""): emoticons = emot.emoticons(text) if isinstance(emoticons, list): emoticons = emoticons[0] if emoticons['flag']: for index in range(len(emoticons["value"])): if how_replace == 'mean': source = emoticons['value'][index] target = emoticons['mean'][index] text = text.replace(source, target) else: text = text.replace(emoticons['value'][index], "") return text
def convert_emoticons(text): def replace_emoticons(l, id_to_del, replacements): for idx, replacement in zip(sorted(id_to_del, reverse=True), sorted(replacements, reverse=True)): del l[idx[0]:idx[-1]] l[idx[0]:idx[0]] = [x for x in replacement] return l converted_emoticons = emot.emoticons(text) if converted_emoticons["flag"] is True: text = [x for x in text] text = replace_emoticons(text, converted_emoticons["location"], converted_emoticons["mean"]) text = "".join(text) return text
def extract_emoji(text): try: emoticons_list = emot.emoticons(text)['value'] except TypeError: emoticons_list = [] try: emoji_list = emot.emoji(text)['value'] except TypeError: emoji_list = [] emo_list = emoticons_list + emoji_list return emo_list
def emVec(texts): vecs = [] for text in texts: vec = [0] * eDic.keys().__len__() em = emot.emoticons(text) try: ed = em.get('value') for s in ed: if s in eDic: vec[eDic.get(s)] += 1 except: pass vecs.append(vec) return np.array(vecs)
def _strip_emoticons(text): global UNCOMMON_EMOTICONS tokens = tweet_tokenize(text) emoticons = set() for token in tokens: for em in emot.emoticons(token): emoticon = em['value'] if emoticon in ('(', ')', ':') or emoticon != token: continue emoticons.add(emoticon) if Emoticon_RE.match(token) or token in (':*(', ): emoticons.add(token) for em in UNCOMMON_EMOTICONS: if em in text: emoticons.add(em) normalized = text for emoticon in emoticons: if re.match(r'^[a-zA-Z0-9]+$', emoticon.lower()): continue if re.match(r'^[a-zA-Z0-9].*', emoticon): if re.match(r'.*\b{}.*'.format(misc.escRegex(emoticon)), normalized): normalized = normalized.replace(emoticon, '') elif re.match(r'.*[a-zA-Z0-9]$', emoticon): if re.match(r'.*{}\b.*'.format(misc.escRegex(emoticon)), normalized): normalized = normalized.replace(emoticon, '') else: if re.match(r'.*\s{}.*'.format(misc.escRegex(emoticon)), normalized) or \ re.match(r'.*{}\s.*'.format(misc.escRegex(emoticon)), normalized) or \ re.match(r'^{}$'.format(misc.escRegex(emoticon)), normalized): normalized = normalized.replace(emoticon, '') normalized = re.sub( r'(^|\s)([;:8=][\-^]\s+[><}{)(|/*x$#&3D0OoPpc\[\]])(.*)', r'\1\3', normalized) normalized = re.sub( r'(.*)([;:8=][\-^]\s+[><}{)(|/*x$#&3D0OoPpc\[\]])(\s|$)', r'\1\3', normalized) return normalized
def clean_sentence(sentence): if re.search(hashtag, sentence) is not None: sentence = re.sub(hashtag, r'\1', sentence) sentence = re.sub(links, 'URL', sentence) reference = demoji.findall(sentence) # print(reference) emoticons = emot.emoticons(sentence) if isinstance(emoticons, list): emoticons = emoticons[0] # print(emoticons) if len(reference) > 0: for key, value in reference.items(): sentence = sentence.replace(key, value+" ") if emoticons['flag']: for i in range(len(emoticons['value'])): # print(emoticons['value'][i]) sentence = sentence.replace(emoticons['value'][i], extract_emotion(emoticons['mean'][i])) return sentence
def desmilify(text): """Replaces emoticons in a given piece of text with their meanings. Authors: Keerthi Arguments: text: string - The text to be processed Returns: text with emoticons substituted """ emoticons = emot.emoticons(text) if type(emoticons) == dict: for mean, value in zip(emoticons.get('mean'), emoticons.get('value')): text = text.replace(value, ':%s:' % '_'.join(mean.split())) return text
def remove_punc_keep_emoj(tweet): emoticons = emot.emoticons(tweet) punct_removed = [] for word in tweet: word_list = [] for char in word: if char not in string.punctuation: word_list.append(char) word_list = ''.join(word_list) if len(word_list) == 0: continue punct_removed.append(word_list) if len(emoticons) != 0: for record in emoticons: emo = record['value'] punct_removed.append(emo) return punct_removed
def base_emoji(text, flag): '''base_emoji return setiment of the text based on emoji and emoticons in text. Args: text (str): Setence of paragraph for calculating setiment. flag (boolean): True --> It gives 5 criteria 0,1,2,3,4 where 2(Nutral), 4(very positive), 1(very negative) False --> Gives probability with 2 floating point accuray between -1(negative) to 1(positive) Returns: __prob_sentiment: If flag = True it will return number(int) between 0 to 4 If flag = False it will return nmber(float-2f) between -1 to 1 ''' #convert input to string text = str(text) __temp_emoji = emot.emoji(text) __temp_emoti = emot.emoticons(text) __pre_final_text = "" #Finding emoji and emoticons from text if __temp_emoji['flag'] == True: for data in __temp_emoji['mean']: __pre_final_text = str(__pre_final_text) + str(data) + " " try: if __temp_emoti['flag'] == True: for data in __temp_emoti['mean']: __pre_final_text = str(__pre_final_text) + str(data) + " " except: pass if len(__pre_final_text) < 2: __pre_final_text = text __analysis = TextBlob(__pre_final_text) #choosing output formate of sentiment based on flag if flag == False: __prob_sentiment = round(__analysis.sentiment.polarity, 4) else: __prob_sentiment = get_solid_setiment(__analysis.sentiment.polarity) return __prob_sentiment
def clean_sentence(sentence): """ replaces all emojis and emoticons with their text equivalent :param sentence: str, raw text :return: clean text """ reference = demoji.findall(sentence) # print(reference) emoticons = emot.emoticons(sentence) if isinstance(emoticons, list): emoticons = emoticons[0] # print(emoticons) if len(reference) > 0: for key, value in reference.items(): sentence = sentence.replace(key, value+" ") if emoticons['flag']: for i in range(len(emoticons['value'])): # print(emoticons['value'][i]) sentence = sentence.replace(emoticons['value'][i], extract_emotion(emoticons['mean'][i])) return sentence
def better_tokenize(self, s): #Lowercase every string s = s.lower() #convert emojis to text s = emoji.demojize(s, delimiters=("", "")) s = emoji.demojize(s, delimiters=("", "")) answ = emot.emoticons(s) if (str(type(answ)) == "<class 'list'>"): answ = answ[0] if (answ['flag']): # s = s.replace(answ['value'],answ['mean']) j = 0 for i in answ['value']: s = s.replace(i, " " + answ['mean'][j].split()[-1]) j = j + 1 # s = s.replace(ans['value'],ans['mean']) # remove punctuation and all weird characters s = re.sub("\W", " ", s) #Special character cleaning s = re.sub("\s", " ", s) return list(s.split())
def convertSymbolEmojisToText(data): for example in data: for i in range(3): emoticon = emot.emoticons(example[i]) if isinstance(emoticon, list): continue if len(emoticon['value']) == 0: continue value = ' '.join(emoticon['value']) meaning = ' ::'.join(emoticon['mean']) meaning += ':: ' example[i] = example[i].replace(value, meaning.lower()) example[i] = example[i].replace("-_-", " ::annoyed:: ") example[i] = example[i].replace(":))", " ::smiley_face:: ") example[i] = example[i].replace("(:", " ::smiley_face:: ") example[i] = example[i].replace("=‑d", " ::laughing:: ") example[i] = example[i].replace(":d", " ::laughing:: ") example[i] = example[i].replace("*_*", " ::pleased:: ") example[i] = example[i].replace("^_^", " ::pleased:: ") example[i] = example[i].replace(";-)", " ::wink:: ") example[i] = example[i].replace("8)", " ::wearing_glasses:: ") example[i] = example[i].replace(":c", " ::sad:: ") example[i] = example[i].replace("xd", " ::laughing:: ") return data
def hasEmoji(self, line): result = emot.emoticons(line) if result.__len__() > 0: return 1 return 0