Beispiel #1
0
def mapTweet(tweet,sentiWordnet,emoDict,unigram,slangs):
    out=[]
    line=preprocessing.processTweet(tweet,stopWords,slangs)
   
#    p=polarity.polarity(line,sentiWordnet)
    p=polarity.posPolarity(line,sentiWordnet)
   
    out.extend([float(p[0]),float(p[1]),float(p[2])]) # aggregate polarity for pos neg and neutral here neutral is stripped
#    pos=polarity.posFreq(line,sentiWordnet)
    out.extend(p[7:]) # frequencies of pos 

#    out.extend([float(pos['v']),float(pos['n']),float(pos['a']),float(pos['r'])]) # pos counts inside the tweet
    out.append(float(features.emoticonScore(line,emoDict))) # emo aggregate score be careful to modify weights
    out.append(float(len(features.hashtagWords(line))/40)) # number of hashtagged words
    out.append(float(len(line)/140)) # for the length
    out.append(float(features.upperCase(line))) # uppercase existence : 0 or 1
    out.append(float(features.exclamationTest(line)))
    out.append(float(line.count("!")/140))
    out.append(float((features.questionTest(line))))
    out.append(float(line.count('?')/140))
    out.append(float(features.freqCapital(line)))
    for w in unigram:  # unigram
            if (w in line.split()):
                out.append(float(1))
            else:
                out.append(float(0))
    return out
Beispiel #2
0
def mapper(filename,label):
#    k=0
    f=open(filename,'r')
    line=f.readline()
    while line:
        out=label+'\t'
        p=polarity.polarity(line,sentiWordnet)
        out=out+str(p[0])+'\t'+str(p[1])+'\t'+str(p[2])+'\t' # aggregate polarity for pos neg and neutral 
#        print out
        pos=polarity.posFreq(line,sentiWordnet)
        out=out+str(pos['v'])+'\t'+str(pos['n'])+'\t'+str(pos['a'])+'\t'+str(pos['r'])+'\t' # pos counts inside the tweet
        out=out+str(features.emoticonScore(line,emoticonDict))+'\t' # emo aggregate score be careful to modify weights
        out=out+str(len(line))+'\t' # for the length
        out=out+str(features.upperCase(line))+'\t' # uppercase existence : 0 or 1
        out=out+str(features.exclamationTest(line))+'\t'
        out=out+str(line.count("!"))+'\t'
        out=out+str(features.questionTest(line))+'\t'
        out=out+str(line.count('?'))
        out=out+str(features.freqCapital(line))+'\t'
        for w in total:  # unigram
            if (w in line):
                out=out+'1\t'
            else:
                out=out+'0\t'

        
        fo.write(out[:-1]+'\n')
#        k=k+1
#        print str(k)+' line(s) mapped'
        line=f.readline()
    f.close()
    return None
def mapTweet(tweet, afinn, emoDict, positive, negative, neutral, slangs):
    out = []
    line = preprocessing.processTweet(
        tweet, stopWords, slangs
    )  # limpio el tweet, eliminando las palabras innecesarias y sobreescribiendo los modismos
    out.append(polarity.afinnPolarity(line, afinn))  # afinidad
    out.append(float(features.emoticonScore(line, emoDict)))  # emoticon score
    out.append(float(features.hashtagWordsRatio(
        line)))  # porcentaje de palabras con hashtag
    out.append(float(len(line) /
                     140))  # tamaño total de los 140 carácteres utilizados
    out.append(float(features.upperCase(
        line)))  # si existen mayúsuculas en el tweet; 1 = si, 0 = no
    out.append(float(features.exclamationTest(
        line)))  # si tiene signo de exclamación o no; 1 = si, 0 = no
    out.append(float(line.count("!") /
                     140))  # procentaje de signos de exlamación
    out.append(float(
        (features.questionTest(line))))  # si tiene un signo de pregunta
    out.append(float(line.count('?') /
                     140))  # procentaje de signos de preguntas
    out.append(float(
        features.freqCapital(line)))  # porcentaje de las letras en mayusculas
    u = features.scoreUnigram(
        line, positive, negative, neutral
    )  # Score sobre el vector de palabras utilizadas en los documentos de prueba
    out.extend(u)
    return out
Beispiel #4
0
def mapTweet(tweet,afinn,emoDict,positive,negative,neutral,slangs):
    out=[]
    line=preprocessing.processTweet(tweet,stopWords,slangs)
    p=polarity.afinnPolarity(line,afinn)
    out.append(p)
    out.append(float(features.emoticonScore(line,emoDict))) # emo aggregate score be careful to modify weights
    out.append(float(len(features.hashtagWords(line))/40)) # number of hashtagged words
    out.append(float(len(line)/140)) # for the length
    out.append(float(features.upperCase(line))) # uppercase existence : 0 or 1
    out.append(float(features.exclamationTest(line)))
    out.append(float(line.count("!")/140))
    out.append(float((features.questionTest(line))))
    out.append(float(line.count('?')/140))
    out.append(float(features.freqCapital(line)))
    u=features.scoreUnigram(line,positive,negative,neutral)
    out.extend(u)
    return out
def mapTweet(tweet,afinn,emoDict,positive,negative,neutral,slangs):
    out=[]
    line=preprocessing.processTweet(tweet,stopWords,slangs)
    p=polarity.afinnPolarity(line,afinn)
    out.append(p)
    out.append(float(features.emoticonScore(line,emoDict))) # emo aggregate score be careful to modify weights
    out.append(float(len(features.hashtagWords(line))/40)) # number of hashtagged words
    out.append(float(len(line)/140)) # for the length
    out.append(float(features.upperCase(line))) # uppercase existence : 0 or 1
    out.append(float(features.exclamationTest(line)))
    out.append(float(line.count("!")/140))
    out.append(float((features.questionTest(line))))
    out.append(float(line.count('?')/140))
    out.append(float(features.freqCapital(line)))
    u=features.scoreUnigram(line,positive,negative,neutral)
    out.extend(u)
    return out
Beispiel #6
0
def mapTweet(tweet,sentiWordnet,emoDict,unigram,slangs):
    out=[]
    line=preprocessing.processTweet(tweet,stopWords,slangs)
   
    p=polarity.posPolarity(line,sentiWordnet)
    out.extend([p[0],p[1],p[2]]) # aggregate polsarity pos - negative
    out.extend(p[7:]) # frequencies of pos 
    out.append(float(features.emoticonScore(line,emoDict))) # emo aggregate score be careful to modify weights
    out.append(float(len(features.hashtagWords(line))/40)) # number of hashtagged words
    out.append(float(len(line)/140)) # for the length
    out.append(float(features.upperCase(line))) # uppercase existence : 0 or 1
    out.append(float(features.exclamationTest(line)))
    out.append(float(line.count("!")/140))
    out.append(float((features.questionTest(line))))
    out.append(float(line.count('?')/140))
    out.append(float(features.freqCapital(line)))
    for w in unigram:  # unigram
            if (w in line.split()):
                out.append(float(1))
            else:
                out.append(float(0))
    return out