Ejemplo n.º 1
0
 def test_tokenizer(self):
     # Assert that french sentiment() uses French tokenizer. ("t'aime" => "t' aime").
     v1 = fr.sentiment("je t'aime")
     v2 = fr.sentiment("je ne t'aime pas")
     self.assertTrue(v1[0] > 0)
     self.assertTrue(v2[0] < 0)
     self.assertTrue(v1.assessments[0][0] == ["aime"])
     self.assertTrue(v2.assessments[0][0] == ["ne", "aime"])
Ejemplo n.º 2
0
 def test_tokenizer(self):
     # Assert that french sentiment() uses French tokenizer. ("t'aime" => "t' aime").
     v1 = fr.sentiment("je t'aime")
     v2 = fr.sentiment("je ne t'aime pas")
     self.assertTrue(v1[0] > 0)
     self.assertTrue(v2[0] < 0)
     self.assertTrue(v1.assessments[0][0] == ["aime"])
     self.assertTrue(v2.assessments[0][0] == ["ne", "aime"])
Ejemplo n.º 3
0
def emotion_ts_from_text(tier, nlp):
    x = []
    y = []
    z = []

    for sppasOb in tier:

        label, [start, stop], [start_r, stop_r] = get_interval(sppasOb)

        i = (start + stop) / 2.0
        if label in ["#", "", " ", "***", "*"]:
            polarity = 0
            subjectivity = 0

        else:
            polarity_and_sunjectivity = sentiment(label)
            polarity = polarity_and_sunjectivity[0]
            subjectivity = polarity_and_sunjectivity[1]

        while (i < stop):
            x.append(i)
            y.append(polarity)
            z.append(subjectivity)
            i = i + ((start + stop) / 10.0)

    return x, y, z
Ejemplo n.º 4
0
 def test_sentiment(self):
     # Assert < 0 for negative adjectives and > 0 for positive adjectives.
     self.assertTrue(fr.sentiment("fabuleux")[0] > 0)
     self.assertTrue(fr.sentiment("terrible")[0] < 0)
     # Assert the accuracy of the sentiment analysis.
     # Given are the scores for 1,500 book reviews.
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     reviews = []
     for review, score in Datasheet.load(os.path.join(PATH, "corpora", "polarity-fr-amazon.csv")):
         reviews.append((review, int(score) > 0))
     A, P, R, F = test(lambda review: fr.positive(review), reviews)
     self.assertTrue(A > 0.75)
     self.assertTrue(P > 0.76)
     self.assertTrue(R > 0.73)
     self.assertTrue(F > 0.75)
     print "pattern.fr.sentiment()"
Ejemplo n.º 5
0
 def test_sentiment(self):
     # Assert < 0 for negative adjectives and > 0 for positive adjectives.
     self.assertTrue(fr.sentiment("fabuleux")[0] > 0)
     self.assertTrue(fr.sentiment("terrible")[0] < 0)
     # Assert the accuracy of the sentiment analysis.
     # Given are the scores for 1,500 book reviews.
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     reviews = []
     for review, score in Datasheet.load(os.path.join(PATH, "corpora", "polarity-fr-amazon.csv")):
         reviews.append((review, int(score) > 0))
     A, P, R, F = test(lambda review: fr.positive(review), reviews)
     self.assertTrue(A > 0.75)
     self.assertTrue(P > 0.76)
     self.assertTrue(R > 0.73)
     self.assertTrue(F > 0.75)
     print "pattern.fr.sentiment()"
Ejemplo n.º 6
0
    
    if i == 0:
        header = row
        
    else:
        
        for c in string.punctuation:
            body = body.replace(c,' ')
        for c in string.whitespace:
            body = body.replace(c,' ')
        for c in ["'"]:
            body = body.replace(c,' ')
        words = clean_body([n.lower() for n in body.split(" ")])
        comment = " ".join(words)
        
        polarity, subjectivity = sentiment(comment)
        vote = (polarity+subjectivity)
        subjectivity = subjectivity+1.0000001
        polarity = polarity*subjectivity
        
        try:
            mood[polarity].append(row)
        except:
            mood[polarity] = [row]
        #.append(comment)
        
        #print  polarity,"\t\t", comment[:50],"\t\t", len(comment[:50]),"\t\t"
        #print stem(s, stemmer=PORTER)
        words.extend(words)

    i =+ 1
Ejemplo n.º 7
0
import pandas as pd
import sys

store = 'tweets.csv'

df = pd.read_csv(store,
                 sep=';',
                 names=['brand', 'id', 'username', 'date', 'tweet', 'source'])

brand1 = df[df.brand == 'SNCB']
brand2 = df[df.brand == 'SNCF']

from pattern.fr import parse, sentiment, ngrams, pprint
from pattern.web import URL, plaintext
from pattern.metrics import readability

b1_read = 0.0
b1_sent = 0.0
for tweet in brand1.tweet:
    b1_sent += sentiment(plaintext(tweet))[0]
    b1_read += readability(tweet)

b2_read = 0.0
b2_sent = 0.0
for tweet in brand2.tweet:
    b2_sent += sentiment(plaintext(tweet))[0]
    b2_read += readability(tweet)

print 'SNCB: %f' % b1_sent
print 'SNCF: %f' % b2_sent