コード例 #1
0
ファイル: new2.py プロジェクト: A12134/tfidf
def emExtract(texts1, texts2):
    emDict = {}
    count = 0
    for text in texts1:
        em = emot.emoticons(text)
        try:
            list = em.get('value')
            for e in list:
                if e not in emDict:
                    emDict[e] = count
                    count += 1
        except:
            pass

    for text in texts2:
        em = emot.emoticons(text)
        try:
            list = em.get('value')
            for e in list:
                if e not in emDict:
                    emDict[e] = count
                    count += 1
        except:
            pass

    saveEmD("emD.dict", emDict)
コード例 #2
0
 def prune_emojis_emoticons(string):
     # at tim doesn't work specially when emoticons comes after a weird emoji like O.o which isnt registered
     if "location" in emot.emoji(string).keys() is not None:
         for loc in reversed(emot.emoji(string)['location']):
             string = string[0:loc[0]] + string[loc[1] + 1::]
     #     print(emot.emoticons(string))
     if "location" in emot.emoticons(string):
         for loc in reversed(emot.emoticons(string)['location']):
             string = string[0:loc[0]] + string[loc[1] + 1::]
     return string
コード例 #3
0
def process_emoji(tweets):
    ##stop_words = get_stop_words("en")

    emoji_count = len(emot.emoji(tweets))
    emoticon_count = len(emot.emoticons(tweets))
    '''remove stop-words
    text_token = tokenizer.tokenize(tweets)
    non_stop_token = [word for word in text_token if word not in stop_words]
    non_stop_text = " ".join(non_stop_token)
    '''
    '''remove non-ascii letters'''

    new_string = re.sub(r"[^\w']", " ", tweets)
    new_string = re.sub(r"[\s]+", ' ', new_string)
    new_string = new_string.strip()
    '''Textblob: Spell correction and analysis polarity'''
    text_blob = TextBlob(new_string)
    correct_string = str(text_blob.correct())

    polarity = text_blob.sentiment.polarity
    subjectity = text_blob.sentiment.subjectivity
    emoji_result = emojiClass(correct_string, emoji_count, emoticon_count,
                              polarity, subjectity)

    return emoji_result
コード例 #4
0
def test_emo():
    test = "I love it, 👨 :-) 🏁:-) :-)🏁 :-) 🏁 <3"
    print(emo.emoji(test))
    print(emo.emoticons(test))
    print(test[27:30])
    print(test[17])
    return None
コード例 #5
0
def classByEmoji(text):
    global positive_emojis
    global negative_emojis
    global neutral_emojis

    emojis = emot.emoji(text)
    emoticons = emot.emoticons(text)
    emots = set()
    for map_emoji in emojis:
        emots.add(map_emoji['value'])
    for map_emoji in emoticons:
        emots.add(map_emoji['value'])

    positive_inter = emots.intersection(positive_emojis)
    negative_inter = emots.intersection(negative_emojis)
    neutral_inter = emots.intersection(neutral_emojis)

    if positive_inter:
        if len(negative_inter) == 0 and len(neutral_inter) == 0:
            return 1
    elif negative_inter:
        if len(neutral_inter) == 0:
            return -1
    elif neutral_inter:
        return 0

    return ''
コード例 #6
0
def find_emoticon(text):

  x = emot.emoticons(text)

  if type(x)==list:
    return x[0]
  else:
    return x
コード例 #7
0
ファイル: tfidf.py プロジェクト: drndr/sentiment_analysis
def convert_emoticons(old_text):
    smiley = emot.emoticons(old_text)
    new_text = re.sub(r'https?:\/\/.*[\r\n]*', '', old_text)  # remove URL before
    if len(smiley) > 1 and smiley['flag']:
        for i in range(0, len(smiley['value'])):
            new_text = old_text.replace(smiley['value'][i], " "+smiley['mean'][i]+" ")
            old_text = new_text
    return new_text
コード例 #8
0
ファイル: reader.py プロジェクト: cmry/amica
 def is_emo(word: str) -> bool:
     """Use emot to detect if something is an emoticion or emoji."""
     emoji = emot.emoji(word)['flag']
     emoticon = emot.emoticons(word)
     try:
         emoticon = emoticon['flag']
     except TypeError:
         emoticon = emoticon[0]['flag']
     return bool(emoji) + bool(emoticon)
コード例 #9
0
    def emoToVec(self, line):
        line = re.sub(r"http\S+", "", line)
        li = emot.emoticons(line)
        retList = [0] * self.emList.__len__()
        for v in li.get('value'):
            if self.emDict.get(v) is not None:
                retList[self.emDict.get(v)] = 1

        return retList
コード例 #10
0
 def get_emoticon_sentiment(self, tweet):
     sum = 0
     cleaned_tweet = self.cleaned(tweet)
     emotion = emot.emoticons(cleaned_tweet.encode('ascii', 'ignore').decode('ascii'))
     for i in range(len(emotion)):
         a = TextBlob(emotion[i]['value'])
         sum = sum + (a.sentiment.polarity)
     print(sum)
     return sum
コード例 #11
0
def formate_smiley(txt):
    """
    Utilisent le package emot pour extraire les emoji (inutilisé à l'heure actuelle).
    Entrée : txt, une string
    Sortie : la liste des emoji texte et caractères.
    """
    lst = list(map(lambda x: x["value"], emot.emoji(txt)))
    lst2 = list(map(lambda x: x["value"], emot.emoticons(txt)))
    lst.extend(lst2)
    return lst
コード例 #12
0
def extract_emojis_emoticons(text):
    extracted = []
    vals = emot.emoticons(text)
    if len(vals) > 1:
        extracted.extend(vals['value'])

    vals = emot.emoji(text)
    if len(vals) > 1:
        extracted.extend(vals['value'])
    return extracted
コード例 #13
0
def convert_emoticons(text):
    """Funcion para convertir emoticones a palabras"""
    #     OUREMOTIC = dict([(e, f":{EMOTICONS[e].lower().split(',')[0].replace('or ','').replace(' ','_')}:") for e in EMOTICONS.keys()])
    try:
        text0 = emot.emoticons(text)
        if text0['flag']:
            return ':' + text0['mean'][0].replace(' ', '_').lower() + ':'
        return text
    except Exception as e:
        return text
コード例 #14
0
ファイル: TweetDataCleaner.py プロジェクト: gud111/emnlp20SP
def getAllEmojis(cthresh, inpf, outf):
    
    fin = open (inpf, "r")
    lx=0   
    line = fin.readline()
    hvemoji=0
    ecounts={}
    while line:
        
        parts = line.split(" ")    
        twid = parts[0]
        twdate = parts[1]
        tweet = line.replace(twid,"").replace(twdate,"").strip()
        emojis = tn.parseEmojis(tweet)
        temp = emot.emoticons(tweet) #tn.removeHTAtEmoji(tweet))
        emoticons=[]
        if 'value' in temp:
            emoticons = temp['value']
        
        if lx < 10:
            print (emoticons)
        
        
        if len(emojis)>0 or len(emoticons)>0:
            hvemoji+=1
            
        for emoji in emojis:
            if emoji not in ecounts:
                ecounts[emoji]=1
            else:
                ecounts[emoji]+=1
                
        for emoticon in emoticons:
            
            if emoticon not in ecounts:
                ecounts[emoticon]=1
            else:
                ecounts[emoticon]+=1
                
        lx+=1
        line = fin.readline()
        
        
        
    fin.close()
    print ((lx))
    print ((hvemoji))
    print (len(ecounts))
    fout = open (outf, "w")
    
    for emoji in ecounts:
        if ecounts[emoji]>cthresh:
            fout.write(emoji+" "+str(ecounts[emoji])+"\n")
    
    fout.close()
コード例 #15
0
 def extractEmoji(self, lines):
     count = 0
     for line in lines:
         line = re.sub(r"http\S+", "", line)
         print("processing emoji: line===>" + str(count))
         li = emot.emoticons(line)
         count += 1
         for v in li.get('value'):
             if self.emDict.get(v) is None:
                 self.emList.append(v)
                 self.emDict[v] = self.emList.__len__() - 1
コード例 #16
0
 def detect_emoticons_emojis(self, string):
     emoticons = emot.emoticons(string)
     emojis = emot.emoji(string)
     if len(emoticons) > 0:
         for emoticon in emoticons:
             value = emoticon['value']
             if value != (')' or ':'):
                 self.emoticons.setdefault(value, set()).add(string)
     if len(emojis) > 0:
         for emoji in emojis:
             value = emoji['value']
             if value != (')' or ':'):
                 self.emojis.setdefault(value, set()).add(string)
コード例 #17
0
def isEmoticon(word):
    x=emot.emoticons(word)
    
    if isinstance(x, list):
        for v in x:
            val=v['flag']
    else:
        val=x['flag']
    
    if(val):
        return True
    else:
        return False
コード例 #18
0
ファイル: normalization.py プロジェクト: Jwuthri/Aristote
    def text_demoticons(text, how_replace=""):
        emoticons = emot.emoticons(text)
        if isinstance(emoticons, list):
            emoticons = emoticons[0]
        if emoticons['flag']:
            for index in range(len(emoticons["value"])):
                if how_replace == 'mean':
                    source = emoticons['value'][index]
                    target = emoticons['mean'][index]
                    text = text.replace(source, target)
                else:
                    text = text.replace(emoticons['value'][index], "")

        return text
コード例 #19
0
def convert_emoticons(text):
    def replace_emoticons(l, id_to_del, replacements):
        for idx, replacement in zip(sorted(id_to_del, reverse=True),
                                    sorted(replacements, reverse=True)):
            del l[idx[0]:idx[-1]]
            l[idx[0]:idx[0]] = [x for x in replacement]
        return l

    converted_emoticons = emot.emoticons(text)
    if converted_emoticons["flag"] is True:
        text = [x for x in text]
        text = replace_emoticons(text, converted_emoticons["location"],
                                 converted_emoticons["mean"])
        text = "".join(text)
    return text
コード例 #20
0
    def extract_emoji(text):
        try:
            emoticons_list = emot.emoticons(text)['value']

        except TypeError:
            emoticons_list = []

        try:
            emoji_list = emot.emoji(text)['value']

        except TypeError:
            emoji_list = []
        emo_list = emoticons_list + emoji_list

        return emo_list
コード例 #21
0
ファイル: new2.py プロジェクト: A12134/tfidf
def emVec(texts):
    vecs = []
    for text in texts:
        vec = [0] * eDic.keys().__len__()
        em = emot.emoticons(text)
        try:
            ed = em.get('value')
            for s in ed:
                if s in eDic:
                    vec[eDic.get(s)] += 1
        except:
            pass

        vecs.append(vec)

    return np.array(vecs)
コード例 #22
0
ファイル: nlp.py プロジェクト: shark-3/THRED
def _strip_emoticons(text):
    global UNCOMMON_EMOTICONS

    tokens = tweet_tokenize(text)

    emoticons = set()
    for token in tokens:
        for em in emot.emoticons(token):
            emoticon = em['value']
            if emoticon in ('(', ')', ':') or emoticon != token:
                continue
            emoticons.add(emoticon)

        if Emoticon_RE.match(token) or token in (':*(', ):
            emoticons.add(token)

    for em in UNCOMMON_EMOTICONS:
        if em in text:
            emoticons.add(em)

    normalized = text
    for emoticon in emoticons:
        if re.match(r'^[a-zA-Z0-9]+$', emoticon.lower()):
            continue

        if re.match(r'^[a-zA-Z0-9].*', emoticon):
            if re.match(r'.*\b{}.*'.format(misc.escRegex(emoticon)),
                        normalized):
                normalized = normalized.replace(emoticon, '')
        elif re.match(r'.*[a-zA-Z0-9]$', emoticon):
            if re.match(r'.*{}\b.*'.format(misc.escRegex(emoticon)),
                        normalized):
                normalized = normalized.replace(emoticon, '')
        else:
            if re.match(r'.*\s{}.*'.format(misc.escRegex(emoticon)), normalized) or \
                    re.match(r'.*{}\s.*'.format(misc.escRegex(emoticon)), normalized) or \
                    re.match(r'^{}$'.format(misc.escRegex(emoticon)), normalized):
                normalized = normalized.replace(emoticon, '')

    normalized = re.sub(
        r'(^|\s)([;:8=][\-^]\s+[><}{)(|/*x$#&3D0OoPpc\[\]])(.*)', r'\1\3',
        normalized)
    normalized = re.sub(
        r'(.*)([;:8=][\-^]\s+[><}{)(|/*x$#&3D0OoPpc\[\]])(\s|$)', r'\1\3',
        normalized)

    return normalized
コード例 #23
0
def clean_sentence(sentence):
    if re.search(hashtag, sentence) is not None:
        sentence = re.sub(hashtag, r'\1', sentence)
    sentence = re.sub(links, 'URL', sentence)
    reference = demoji.findall(sentence)
    # print(reference)
    emoticons = emot.emoticons(sentence)
    if isinstance(emoticons, list):
        emoticons = emoticons[0]
    # print(emoticons)
    if len(reference) > 0:
        for key, value in reference.items():
            sentence = sentence.replace(key, value+" ")
    if emoticons['flag']:
        for i in range(len(emoticons['value'])):
            # print(emoticons['value'][i])
            sentence = sentence.replace(emoticons['value'][i], extract_emotion(emoticons['mean'][i]))
    return sentence
コード例 #24
0
ファイル: preprocessing.py プロジェクト: byramag/Emo_Eval
def desmilify(text):
    """Replaces emoticons in a given piece of text with their meanings.

    Authors:
        Keerthi
    
    Arguments:
        text: string
            - The text to be processed
    
    Returns:
        text with emoticons substituted
    """
    emoticons = emot.emoticons(text)
    if type(emoticons) == dict:
        for mean, value in zip(emoticons.get('mean'), emoticons.get('value')):
            text = text.replace(value, ':%s:' % '_'.join(mean.split()))
    return text
def remove_punc_keep_emoj(tweet):
    emoticons = emot.emoticons(tweet)
    punct_removed = []
    for word in tweet:
        word_list = []
        for char in word:
            if char not in string.punctuation:
                word_list.append(char)

        word_list = ''.join(word_list)
        if len(word_list) == 0:
            continue
        punct_removed.append(word_list)
    if len(emoticons) != 0:
        for record in emoticons:
            emo = record['value']
            punct_removed.append(emo)

    return punct_removed
コード例 #26
0
ファイル: getsenti.py プロジェクト: NeelShah18/sentencelabel
def base_emoji(text, flag):
    '''base_emoji return setiment of the text based on emoji and emoticons in text.

	Args:
		text (str): Setence of paragraph for calculating setiment.
		flag (boolean): True --> It gives 5 criteria 0,1,2,3,4 where 2(Nutral), 4(very positive), 1(very negative)
						False --> Gives probability with 2 floating point accuray between -1(negative) to 1(positive)
	
	Returns:
		__prob_sentiment: If flag = True it will return number(int) between 0 to 4
						  If flag = False it will return nmber(float-2f) between -1 to 1

	'''
    #convert input to string
    text = str(text)
    __temp_emoji = emot.emoji(text)
    __temp_emoti = emot.emoticons(text)
    __pre_final_text = ""

    #Finding emoji and emoticons from text
    if __temp_emoji['flag'] == True:
        for data in __temp_emoji['mean']:
            __pre_final_text = str(__pre_final_text) + str(data) + " "
    try:
        if __temp_emoti['flag'] == True:
            for data in __temp_emoti['mean']:
                __pre_final_text = str(__pre_final_text) + str(data) + " "
    except:
        pass

    if len(__pre_final_text) < 2:
        __pre_final_text = text

    __analysis = TextBlob(__pre_final_text)

    #choosing output formate of sentiment based on flag
    if flag == False:
        __prob_sentiment = round(__analysis.sentiment.polarity, 4)
    else:
        __prob_sentiment = get_solid_setiment(__analysis.sentiment.polarity)

    return __prob_sentiment
コード例 #27
0
def clean_sentence(sentence):
    """
    replaces all emojis and emoticons with their text equivalent
    :param sentence: str, raw text
    :return: clean text
    """
    reference = demoji.findall(sentence)
    # print(reference)
    emoticons = emot.emoticons(sentence)
    if isinstance(emoticons, list):
        emoticons = emoticons[0]
    # print(emoticons)
    if len(reference) > 0:
        for key, value in reference.items():
            sentence = sentence.replace(key, value+" ")
    if emoticons['flag']:
        for i in range(len(emoticons['value'])):
            # print(emoticons['value'][i])
            sentence = sentence.replace(emoticons['value'][i], extract_emotion(emoticons['mean'][i]))
    return sentence
コード例 #28
0
ファイル: naive.py プロジェクト: nineTailsKurama/si630
 def better_tokenize(self, s):
     #Lowercase every string
     s = s.lower()
     #convert emojis to text
     s = emoji.demojize(s, delimiters=("", ""))
     s = emoji.demojize(s, delimiters=("", ""))
     answ = emot.emoticons(s)
     if (str(type(answ)) == "<class 'list'>"):
         answ = answ[0]
     if (answ['flag']):
         # s = s.replace(answ['value'],answ['mean'])
         j = 0
         for i in answ['value']:
             s = s.replace(i, " " + answ['mean'][j].split()[-1])
             j = j + 1
     # s = s.replace(ans['value'],ans['mean'])
     # remove punctuation and all weird characters
     s = re.sub("\W", " ", s)
     #Special character cleaning
     s = re.sub("\s", " ", s)
     return list(s.split())
コード例 #29
0
def convertSymbolEmojisToText(data):
    for example in data:
        for i in range(3):
            emoticon = emot.emoticons(example[i])
            if isinstance(emoticon, list):
                continue
            if len(emoticon['value']) == 0:
                continue
            value = ' '.join(emoticon['value'])
            meaning = ' ::'.join(emoticon['mean'])
            meaning += ':: '
            example[i] = example[i].replace(value, meaning.lower())
            example[i] = example[i].replace("-_-", " ::annoyed:: ")
            example[i] = example[i].replace(":))", " ::smiley_face:: ")
            example[i] = example[i].replace("(:", " ::smiley_face:: ")
            example[i] = example[i].replace("=‑d", " ::laughing:: ")
            example[i] = example[i].replace(":d", " ::laughing:: ")
            example[i] = example[i].replace("*_*", " ::pleased:: ")
            example[i] = example[i].replace("^_^", " ::pleased:: ")
            example[i] = example[i].replace(";-)", " ::wink:: ")
            example[i] = example[i].replace("8)", " ::wearing_glasses:: ")
            example[i] = example[i].replace(":c", " ::sad:: ")
            example[i] = example[i].replace("xd", " ::laughing:: ")
    return data
コード例 #30
0
 def hasEmoji(self, line):
     result = emot.emoticons(line)
     if result.__len__() > 0:
         return 1
     return 0