def sentiment_words(self, filename):
        """
        Parameters
        ----------
        filename : str
            file path for sentiment scores. Represented with a pos, neg, or both score. .

        Returns
        -------
        Dictionary of sentiment scores for words.
        
        """
        
        df = pd.read_table(filename, skiprows=26)
        df['score'] = df['PosScore'] - df['NegScore']
        df = df[['SynsetTerms', 'score']]
        df.columns = ['words', 'score']

        # remove neutral words
        mask = df['score'] != 0
        df = df[mask]

        # Regex to find number
        rx1 = re.compile('#([0-9])')
        
        
        # Regex to find words
        verEx = VerEx()
        exp = verEx.range('a', 'z', 'A', 'Z')
        rx2 = re.compile(exp.source())
        
        sent_dict = {}
        for i, row in df.iterrows():
            w = row['words']
            s = row['score']
            nums = re.findall(rx1, w)
            
            w = w.split(' ')
            words = []
            if len(w) == 1:
                words = ''.join(re.findall(rx2, str(w)))
            else:
                words = [''.join(re.findall(rx2, str(string))) for string in w]
                
                
            for nn, ww in zip(nums, words):
                # only sentiment for the most common meaning of the word
                if nn == '1':
                    sent_dict[ww] = s

        return sent_dict
Beispiel #2
0
# In[198]:

from verbalexpressions import VerEx

# In[199]:

verEx = VerEx()

# In[200]:

strings = ['123Abdul233', '233Raja434', '223Ethan Hunt444']

# In[201]:

expression = verEx.range('a', 'z', 'A', 'Z', ' ')

# In[202]:

expression.source()

# In[204]:

import re

re_exp = expression.compile()

# In[205]:

re.findall(re_exp, strings[0])