def __init__(self, lexicon='Vader', lex_sep=None): # if lex is not any valid type - return error if lexicon == None: raise ValueError('lexicon must be one of ' + str(list(CUSTOM_TYPES.keys())) + ' or Custom') #### regarding lexicons # if it is one of the lexicons that is with the package: set to correct file if lexicon in CUSTOM_TYPES: self.lex_dict = CUSTOM_TYPES[lexicon][0] self.lex_sep = CUSTOM_TYPES[lexicon][1] # if it is a custom type: fill in with the user inputted file else: self.lex_dict = lexicon self.lex_sep = lex_sep #### regarding emojis self.emoji_dict = get_data('emoji_utf8_lexicon.txt') self.emoji_sep = '\t' self.lexicon = self.make_lex_dict(self.lex_dict, self.lex_sep) self.emojis = self.make_emoji_dict(self.emoji_dict, self.emoji_sep)
import numpy as np import pandas as pd from moodscores.helper_functions import tokenizer from moodscores import _ROOT, get_data AROUSAL_DATA = get_data('Anew_arousal.txt') VALENCE_DATA = get_data('Anew_valence.txt') DOMINANCE_DATA = get_data('Anew_dominance.txt') class Anew(object): def __init__(self): self.words, self.Arousal, self.Dominance, self.Valence = self.Setup() def Setup(self): with open(AROUSAL_DATA) as f: Arousal = [x.strip().split('\t') for x in f.readlines()] Arousal = {x: float(y) for x, y in Arousal} with open(DOMINANCE_DATA) as f: Dominance = [x.strip().split('\t') for x in f.readlines()] Dominance = {x: float(y) for x, y in Dominance} with open(VALENCE_DATA) as f: Valence = [x.strip().split('\t') for x in f.readlines()] Valence = {x: float(y) for x, y in Valence} words = Arousal.keys()
import numpy as np import pandas as pd from moodscores.helper_functions import tokenizer from moodscores import _ROOT,get_data GPOMS_DATA = get_data('GPOMS.csv') class GPOMS(object): def __init__(self): self.gpoms = self.Setup() def Setup(self): gpoms = pd.read_csv(GPOMS_DATA, sep = '\t', index_col='Word') gpoms = gpoms.replace({0: None}) columns = ['composed/anxious', 'agreeable/hostile','elated/depressed', 'confident/unsure', 'clearheaded/confused', 'energetic/tired'] gpoms_dict = dict([(i,np.array([a,b,c,d,e,f])) for i, a,b,c,d,e,f in zip(gpoms.index.tolist(), gpoms['composed/anxious'], gpoms['agreeable/hostile'], gpoms['elated/depressed'], gpoms['confident/unsure'], gpoms['clearheaded/confused'], gpoms['energetic/tired'])]) return gpoms_dict def Score(self,tweet, calculation_type): if calculation_type != 'Sum' and calculation_type != 'Average':
""" import os import re import math import string import requests import json from itertools import product from inspect import getsourcefile from moodscores import _ROOT, get_data # dictionary that holds location of all data # lex dict, lex seperator CUSTOM_TYPES = { 'Vader': [get_data('vader_lexicon.txt'), '\t'], 'OF': [get_data('OpFi-Sent.txt'), ' '], 'ANEW_Valence': [get_data('Anew_valence.txt'), '\t'], 'ANEW_Dominance': [get_data('Anew_dominance.txt'), '\t'], 'ANEW_Arousal': [get_data('Anew_arousal.txt'), '\t'], 'GPOMS_composed/anxious': [get_data('GPOMS-composed_anxious.csv'), ','], 'GPOMS_agreeable/hostile': [get_data('GPOMS-agreeable_hostile.csv'), ','], 'GPOMS_elated/depressed': [get_data('GPOMS-elated_depressed.csv'), ','], 'GPOMS_confident/unsure': [get_data('GPOMS-confident_unsure.csv'), ','], 'GPOMS_clearheaded/confused': [get_data('GPOMS-clearheaded_confused.csv'), ','], 'GPOMS_energetic/tired': [get_data('GPOMS-energetic_tired.csv'), ','] } # ##Constants##
import numpy as np from moodscores.helper_functions import tokenizer from moodscores import _ROOT,get_data OF_DATA = get_data('OpFi-Sent.txt') class OpinionFinder(object): def __init__(self): self.OP = self.Setup() def Setup(self): with open(OF_DATA,'r') as f: OP = f.readlines() OP = [x.strip().split(' ') for x in OP] OP = {x:np.sign(float(y)) for x,y in OP} return OP def Score(self,tweet, calculation_type): if calculation_type != 'Sum' and calculation_type != 'Average': raise ValueError("calculation_type return be 'Sum' or 'Average'") total = 0 sent = 0 tokenized_list = tokenizer(tweet) tokens_in_wordlist = [] for word in tokenized_list: if word in self.OP: