def __init__(self, lexicon='Vader', lex_sep=None):

        # if lex is not any valid type - return error
        if lexicon == None:
            raise ValueError('lexicon must be one of ' +
                             str(list(CUSTOM_TYPES.keys())) + ' or Custom')

        #### regarding lexicons
        # if it is one of the lexicons that is with the package: set to correct file
        if lexicon in CUSTOM_TYPES:
            self.lex_dict = CUSTOM_TYPES[lexicon][0]
            self.lex_sep = CUSTOM_TYPES[lexicon][1]
        # if it is a custom type: fill in with the user inputted file
        else:
            self.lex_dict = lexicon
            self.lex_sep = lex_sep

        #### regarding emojis
        self.emoji_dict = get_data('emoji_utf8_lexicon.txt')
        self.emoji_sep = '\t'

        self.lexicon = self.make_lex_dict(self.lex_dict, self.lex_sep)
        self.emojis = self.make_emoji_dict(self.emoji_dict, self.emoji_sep)
Exemple #2
0
import numpy as np
import pandas as pd
from moodscores.helper_functions import tokenizer
from moodscores import _ROOT, get_data

AROUSAL_DATA = get_data('Anew_arousal.txt')
VALENCE_DATA = get_data('Anew_valence.txt')
DOMINANCE_DATA = get_data('Anew_dominance.txt')


class Anew(object):
    def __init__(self):

        self.words, self.Arousal, self.Dominance, self.Valence = self.Setup()

    def Setup(self):

        with open(AROUSAL_DATA) as f:
            Arousal = [x.strip().split('\t') for x in f.readlines()]
            Arousal = {x: float(y) for x, y in Arousal}

        with open(DOMINANCE_DATA) as f:
            Dominance = [x.strip().split('\t') for x in f.readlines()]
            Dominance = {x: float(y) for x, y in Dominance}

        with open(VALENCE_DATA) as f:
            Valence = [x.strip().split('\t') for x in f.readlines()]
            Valence = {x: float(y) for x, y in Valence}

        words = Arousal.keys()
Exemple #3
0
import numpy as np
import pandas as pd
from moodscores.helper_functions import tokenizer
from moodscores import _ROOT,get_data

GPOMS_DATA = get_data('GPOMS.csv')


class GPOMS(object):

    def __init__(self):

        self.gpoms = self.Setup()

    def Setup(self):

        gpoms = pd.read_csv(GPOMS_DATA, sep = '\t', index_col='Word')
        gpoms = gpoms.replace({0: None})

        columns = ['composed/anxious', 'agreeable/hostile','elated/depressed',
        'confident/unsure', 'clearheaded/confused', 'energetic/tired']
        gpoms_dict = dict([(i,np.array([a,b,c,d,e,f])) for i, a,b,c,d,e,f in 
            zip(gpoms.index.tolist(), gpoms['composed/anxious'], gpoms['agreeable/hostile'], 
                gpoms['elated/depressed'], gpoms['confident/unsure'], 
                gpoms['clearheaded/confused'], gpoms['energetic/tired'])])

        return gpoms_dict

    def Score(self,tweet, calculation_type):

        if calculation_type != 'Sum' and calculation_type != 'Average':
"""
import os
import re
import math
import string
import requests
import json
from itertools import product
from inspect import getsourcefile

from moodscores import _ROOT, get_data

# dictionary that holds location of all data
# lex dict, lex seperator
CUSTOM_TYPES = {
    'Vader': [get_data('vader_lexicon.txt'), '\t'],
    'OF': [get_data('OpFi-Sent.txt'), ' '],
    'ANEW_Valence': [get_data('Anew_valence.txt'), '\t'],
    'ANEW_Dominance': [get_data('Anew_dominance.txt'), '\t'],
    'ANEW_Arousal': [get_data('Anew_arousal.txt'), '\t'],
    'GPOMS_composed/anxious': [get_data('GPOMS-composed_anxious.csv'), ','],
    'GPOMS_agreeable/hostile': [get_data('GPOMS-agreeable_hostile.csv'), ','],
    'GPOMS_elated/depressed': [get_data('GPOMS-elated_depressed.csv'), ','],
    'GPOMS_confident/unsure': [get_data('GPOMS-confident_unsure.csv'), ','],
    'GPOMS_clearheaded/confused':
    [get_data('GPOMS-clearheaded_confused.csv'), ','],
    'GPOMS_energetic/tired': [get_data('GPOMS-energetic_tired.csv'), ',']
}

# ##Constants##
Exemple #5
0
import numpy as np
from moodscores.helper_functions import tokenizer
from moodscores import _ROOT,get_data

OF_DATA = get_data('OpFi-Sent.txt')

class OpinionFinder(object):

    def __init__(self):
        self.OP = self.Setup()

    def Setup(self):
        with open(OF_DATA,'r') as f:
            OP = f.readlines()
            OP = [x.strip().split(' ') for x in OP]
            OP = {x:np.sign(float(y)) for x,y in OP}

        return OP

    def Score(self,tweet, calculation_type):

        if calculation_type != 'Sum' and calculation_type != 'Average':
            raise ValueError("calculation_type return be 'Sum' or 'Average'")
            
        total = 0
        sent = 0
        tokenized_list = tokenizer(tweet)
        tokens_in_wordlist = []

        for word in tokenized_list:
            if word in self.OP: