def sentiment_words(self, filename): """ Parameters ---------- filename : str file path for sentiment scores. Represented with a pos, neg, or both score. . Returns ------- Dictionary of sentiment scores for words. """ df = pd.read_table(filename, skiprows=26) df['score'] = df['PosScore'] - df['NegScore'] df = df[['SynsetTerms', 'score']] df.columns = ['words', 'score'] # remove neutral words mask = df['score'] != 0 df = df[mask] # Regex to find number rx1 = re.compile('#([0-9])') # Regex to find words verEx = VerEx() exp = verEx.range('a', 'z', 'A', 'Z') rx2 = re.compile(exp.source()) sent_dict = {} for i, row in df.iterrows(): w = row['words'] s = row['score'] nums = re.findall(rx1, w) w = w.split(' ') words = [] if len(w) == 1: words = ''.join(re.findall(rx2, str(w))) else: words = [''.join(re.findall(rx2, str(string))) for string in w] for nn, ww in zip(nums, words): # only sentiment for the most common meaning of the word if nn == '1': sent_dict[ww] = s return sent_dict
# In[198]: from verbalexpressions import VerEx # In[199]: verEx = VerEx() # In[200]: strings = ['123Abdul233', '233Raja434', '223Ethan Hunt444'] # In[201]: expression = verEx.range('a', 'z', 'A', 'Z', ' ') # In[202]: expression.source() # In[204]: import re re_exp = expression.compile() # In[205]: re.findall(re_exp, strings[0])