def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(en.sentiment("wonderful")[0] > 0) self.assertTrue(en.sentiment("horrible")[0] < 0) self.assertTrue(en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0) self.assertTrue(en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0) # Assert that :) and :( are recognized. self.assertTrue(en.sentiment(":)")[0] > 0) self.assertTrue(en.sentiment(":(")[0] < 0) # Assert the accuracy of the sentiment analysis (for the positive class). # Given are the scores for Pang & Lee's polarity dataset v2.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-en-pang&lee1.csv")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.755) self.assertTrue(P > 0.760) self.assertTrue(R > 0.747) self.assertTrue(F > 0.754) # Assert the accuracy of the sentiment analysis on short text (for the positive class). # Given are the scores for Pang & Lee's sentence polarity dataset v1.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ reviews = [] for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-en-pang&lee2.csv")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.642) self.assertTrue(P > 0.653) self.assertTrue(R > 0.607) self.assertTrue(F > 0.629) print "pattern.en.sentiment()"
def test_modality(self): # Assert -1.0 => +1.0 representing the degree of certainty. v = en.modality(en.Sentence(en.parse("I wish it would stop raining."))) self.assertTrue(v < 0) v = en.modality( en.Sentence(en.parse("It will surely stop raining soon."))) self.assertTrue(v > 0) # Assert the accuracy of the modality algorithm. # Given are the scores for the CoNLL-2010 Shared Task 1 Wikipedia uncertainty data: # http://www.inf.u-szeged.hu/rgai/conll2010st/tasks.html#task1 # The baseline should increase (not decrease) when the algorithm is # modified. from pattern.db import Datasheet from pattern.metrics import test sentences = [] for certain, sentence in Datasheet.load(os.path.join(PATH, "corpora", "uncertainty-conll2010.csv")): sentence = en.parse(sentence, chunks=False, light=True) sentence = en.Sentence(sentence) sentences.append((sentence, int(certain) > 0)) A, P, R, F = test( lambda sentence: en.modality(sentence) > 0.5, sentences) #print(A, P, R, F) self.assertTrue(A > 0.69) self.assertTrue(P > 0.72) self.assertTrue(R > 0.64) self.assertTrue(F > 0.68) print("pattern.en.modality()")
def test_sentiment_twitter(self): sanders = os.path.join(PATH, "corpora", "polarity-en-sanders.csv") if os.path.exists(sanders): # Assert the accuracy of the sentiment analysis on tweets. # Given are the scores for Sanders Twitter Sentiment Corpus: # http://www.sananalytics.com/lab/twitter-sentiment/ # Positive + neutral is taken as polarity >= 0.0, # Negative is taken as polarity < 0.0. # Since there are a lot of neutral cases, # and the algorithm predicts 0.0 by default (i.e., majority class) the results are good. # Distinguishing negative from neutral from positive is a much # harder task from pattern.db import Datasheet from pattern.metrics import test reviews = [] for i, id, date, tweet, polarity, topic in Datasheet.load(sanders): if polarity != "irrelevant": reviews.append( (tweet, polarity in ("positive", "neutral"))) A, P, R, F = test( lambda review: en.positive(review, threshold=0.0), reviews) #print(A, P, R, F) self.assertTrue(A > 0.824) self.assertTrue(P > 0.879) self.assertTrue(R > 0.911) self.assertTrue(F > 0.895)
def test_sentiment_twitter(self): sanders = os.path.join(PATH, "corpora", "polarity-en-sanders.csv") if os.path.exists(sanders): # Assert the accuracy of the sentiment analysis on tweets. # Given are the scores for Sanders Twitter Sentiment Corpus: # http://www.sananalytics.com/lab/twitter-sentiment/ # Positive + neutral is taken as polarity >= 0.0, # Negative is taken as polarity < 0.0. # Since there are a lot of neutral cases, # and the algorithm predicts 0.0 by default (i.e., majority class) the results are good. # Distinguishing negative from neutral from positive is a much harder task from pattern.db import Datasheet from pattern.metrics import test reviews = [] for i, id, date, tweet, polarity, topic in Datasheet.load(sanders): if polarity != "irrelevant": reviews.append((tweet, polarity in ("positive", "neutral"))) A, P, R, F = test( lambda review: en.positive(review, threshold=0.0), reviews) #print A, P, R, F self.assertTrue(A > 0.824) self.assertTrue(P > 0.879) self.assertTrue(R > 0.911) self.assertTrue(F > 0.895)
def test_modality(self): # Assert -1.0 => +1.0 representing the degree of certainty. v = en.modality(en.Sentence(en.parse("I wish it would stop raining."))) self.assertTrue(v < 0) v = en.modality( en.Sentence(en.parse("It will surely stop raining soon."))) self.assertTrue(v > 0) # Assert the accuracy of the modality algorithm. # Given are the scores for the CoNLL-2010 Shared Task 1 Wikipedia uncertainty data: # http://www.inf.u-szeged.hu/rgai/conll2010st/tasks.html#task1 # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test sentences = [] for certain, sentence in Datasheet.load( os.path.join(PATH, "corpora", "uncertainty-conll2010.csv")): sentence = en.parse(sentence, chunks=False, light=True) sentence = en.Sentence(sentence) sentences.append((sentence, int(certain) > 0)) A, P, R, F = test(lambda sentence: en.modality(sentence) > 0.5, sentences) #print A, P, R, F self.assertTrue(A > 0.69) self.assertTrue(P > 0.71) self.assertTrue(R > 0.64) self.assertTrue(F > 0.67) print "pattern.en.modality()"
def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(en.sentiment("wonderful")[0] > 0) self.assertTrue(en.sentiment("horrible")[0] < 0) self.assertTrue( en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0) self.assertTrue( en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0) # Assert the accuracy of the sentiment analysis. # Given are the scores for Pang & Lee's polarity dataset v2.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for score, review in Datasheet.load( os.path.join("corpora", "pang&lee-polarity.txt")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.71) self.assertTrue(P > 0.72) self.assertTrue(R > 0.70) self.assertTrue(F > 0.71) print "pattern.en.sentiment()"
def test_intertextuality(self): # Evaluate accuracy for plagiarism detection. from pattern.db import Datasheet data = Datasheet.load(os.path.join(PATH, "corpora", "plagiarism-clough&stevenson.csv")) data = [((txt, src), int(plagiarism) > 0) for txt, src, plagiarism in data] def plagiarism(txt, src): return metrics.intertextuality([txt, src], n=3)[0,1] > 0.05 A, P, R, F = metrics.test(lambda x: plagiarism(*x), data) self.assertTrue(P > 0.96) self.assertTrue(R > 0.94) print("pattern.metrics.intertextuality()")
def test_intertextuality(self): # Evaluate accuracy for plagiarism detection. from pattern.db import Datasheet data = Datasheet.load(os.path.join(PATH, "corpora", "plagiarism-clough&stevenson.csv")) data = [((txt, src), int(plagiarism) > 0) for txt, src, plagiarism in data] def plagiarism(txt, src): return metrics.intertextuality([txt, src], n=3)[0,1] > 0.05 A, P, R, F = metrics.test(lambda x: plagiarism(*x), data) self.assertTrue(P > 0.96) self.assertTrue(R > 0.94) print "pattern.metrics.intertextuality()"
def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(nl.sentiment("geweldig")[0] > 0) self.assertTrue(nl.sentiment("verschrikkelijk")[0] < 0) # Assert the accuracy of the sentiment analysis. # Given are the scores for 3,000 book reviews. # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-nl-bol.com.csv")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: nl.positive(review), reviews) self.assertTrue(A > 0.80) self.assertTrue(P > 0.77) self.assertTrue(R > 0.85) self.assertTrue(F > 0.81) print "pattern.nl.sentiment()"
def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(fr.sentiment("fabuleux")[0] > 0) self.assertTrue(fr.sentiment("terrible")[0] < 0) # Assert the accuracy of the sentiment analysis. # Given are the scores for 1,500 book reviews. # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for review, score in Datasheet.load(os.path.join(PATH, "corpora", "polarity-fr-amazon.csv")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: fr.positive(review), reviews) self.assertTrue(A > 0.75) self.assertTrue(P > 0.76) self.assertTrue(R > 0.73) self.assertTrue(F > 0.75) print "pattern.fr.sentiment()"
def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(en.sentiment("wonderful")[0] > 0) self.assertTrue(en.sentiment("horrible")[0] < 0) self.assertTrue(en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0) self.assertTrue(en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0) # Assert the accuracy of the sentiment analysis. # Given are the scores for Pang & Lee's polarity dataset v2.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for score, review in Datasheet.load(os.path.join("corpora", "pang&lee-polarity.txt")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.71) self.assertTrue(P > 0.72) self.assertTrue(R > 0.70) self.assertTrue(F > 0.71) print "pattern.en.sentiment()"
w = w.lower() w = w.strip(",.!?") if w in sentiment: score += sentiment[w] n += 1 return score / (n or 1) > threshold # Load the testing data. data = Datasheet.load("books-fr.test.csv") data.columns[1].map(lambda v: v == "True") # I quickly annotated the top 50 adjectives and got # P 0.56 and R 0.78, which approximates the performance of the SVM. # We can probably get better scores by annotating more adjectives. print test(lambda review: positive(review), data) print # We can also calculate kappa on the manual annotation scores. # Kappa is a measurement of agreement or consensus. # We want to know the general agreement of positive (+1) vs. negative (-1). # If the agreement is low, that means the sentiment lexicon is biased, # since the annotators did not agree on all scores. scores = Datasheet.load("sentiment.csv - Sheet 1.csv", headers=True) # 1) Cut off the first three columns. scores = scores[:, 3:] # 2) Remove empty fields (= annotator did not enter a score for this adjective). scores = [[float(x) for x in row if x != ""] for row in scores] # 3) Calculate the maximum number of different annotators. n = max([len(row) for row in scores]) # 4) Keep only rows for which each annotator entered a score.