def mapTweet(tweet,sentiWordnet,emoDict,unigram,slangs): out=[] line=preprocessing.processTweet(tweet,stopWords,slangs) # p=polarity.polarity(line,sentiWordnet) p=polarity.posPolarity(line,sentiWordnet) out.extend([float(p[0]),float(p[1]),float(p[2])]) # aggregate polarity for pos neg and neutral here neutral is stripped # pos=polarity.posFreq(line,sentiWordnet) out.extend(p[7:]) # frequencies of pos # out.extend([float(pos['v']),float(pos['n']),float(pos['a']),float(pos['r'])]) # pos counts inside the tweet out.append(float(features.emoticonScore(line,emoDict))) # emo aggregate score be careful to modify weights out.append(float(len(features.hashtagWords(line))/40)) # number of hashtagged words out.append(float(len(line)/140)) # for the length out.append(float(features.upperCase(line))) # uppercase existence : 0 or 1 out.append(float(features.exclamationTest(line))) out.append(float(line.count("!")/140)) out.append(float((features.questionTest(line)))) out.append(float(line.count('?')/140)) out.append(float(features.freqCapital(line))) for w in unigram: # unigram if (w in line.split()): out.append(float(1)) else: out.append(float(0)) return out
def mapper(filename,label): # k=0 f=open(filename,'r') line=f.readline() while line: out=label+'\t' p=polarity.polarity(line,sentiWordnet) out=out+str(p[0])+'\t'+str(p[1])+'\t'+str(p[2])+'\t' # aggregate polarity for pos neg and neutral # print out pos=polarity.posFreq(line,sentiWordnet) out=out+str(pos['v'])+'\t'+str(pos['n'])+'\t'+str(pos['a'])+'\t'+str(pos['r'])+'\t' # pos counts inside the tweet out=out+str(features.emoticonScore(line,emoticonDict))+'\t' # emo aggregate score be careful to modify weights out=out+str(len(line))+'\t' # for the length out=out+str(features.upperCase(line))+'\t' # uppercase existence : 0 or 1 out=out+str(features.exclamationTest(line))+'\t' out=out+str(line.count("!"))+'\t' out=out+str(features.questionTest(line))+'\t' out=out+str(line.count('?')) out=out+str(features.freqCapital(line))+'\t' for w in total: # unigram if (w in line): out=out+'1\t' else: out=out+'0\t' fo.write(out[:-1]+'\n') # k=k+1 # print str(k)+' line(s) mapped' line=f.readline() f.close() return None
def mapTweet(tweet, afinn, emoDict, positive, negative, neutral, slangs): out = [] line = preprocessing.processTweet( tweet, stopWords, slangs ) # limpio el tweet, eliminando las palabras innecesarias y sobreescribiendo los modismos out.append(polarity.afinnPolarity(line, afinn)) # afinidad out.append(float(features.emoticonScore(line, emoDict))) # emoticon score out.append(float(features.hashtagWordsRatio( line))) # porcentaje de palabras con hashtag out.append(float(len(line) / 140)) # tamaño total de los 140 carácteres utilizados out.append(float(features.upperCase( line))) # si existen mayúsuculas en el tweet; 1 = si, 0 = no out.append(float(features.exclamationTest( line))) # si tiene signo de exclamación o no; 1 = si, 0 = no out.append(float(line.count("!") / 140)) # procentaje de signos de exlamación out.append(float( (features.questionTest(line)))) # si tiene un signo de pregunta out.append(float(line.count('?') / 140)) # procentaje de signos de preguntas out.append(float( features.freqCapital(line))) # porcentaje de las letras en mayusculas u = features.scoreUnigram( line, positive, negative, neutral ) # Score sobre el vector de palabras utilizadas en los documentos de prueba out.extend(u) return out
def mapTweet(tweet,afinn,emoDict,positive,negative,neutral,slangs): out=[] line=preprocessing.processTweet(tweet,stopWords,slangs) p=polarity.afinnPolarity(line,afinn) out.append(p) out.append(float(features.emoticonScore(line,emoDict))) # emo aggregate score be careful to modify weights out.append(float(len(features.hashtagWords(line))/40)) # number of hashtagged words out.append(float(len(line)/140)) # for the length out.append(float(features.upperCase(line))) # uppercase existence : 0 or 1 out.append(float(features.exclamationTest(line))) out.append(float(line.count("!")/140)) out.append(float((features.questionTest(line)))) out.append(float(line.count('?')/140)) out.append(float(features.freqCapital(line))) u=features.scoreUnigram(line,positive,negative,neutral) out.extend(u) return out
def mapTweet(tweet,sentiWordnet,emoDict,unigram,slangs): out=[] line=preprocessing.processTweet(tweet,stopWords,slangs) p=polarity.posPolarity(line,sentiWordnet) out.extend([p[0],p[1],p[2]]) # aggregate polsarity pos - negative out.extend(p[7:]) # frequencies of pos out.append(float(features.emoticonScore(line,emoDict))) # emo aggregate score be careful to modify weights out.append(float(len(features.hashtagWords(line))/40)) # number of hashtagged words out.append(float(len(line)/140)) # for the length out.append(float(features.upperCase(line))) # uppercase existence : 0 or 1 out.append(float(features.exclamationTest(line))) out.append(float(line.count("!")/140)) out.append(float((features.questionTest(line)))) out.append(float(line.count('?')/140)) out.append(float(features.freqCapital(line))) for w in unigram: # unigram if (w in line.split()): out.append(float(1)) else: out.append(float(0)) return out