# Good reasons: # 1) It's nicer to have an XML data set that a binary pickled file. # 2) The classifier is domain-specific. # If it is trained on book reviews, it will do well for book reviews. # If it is trained on Twitter messages, it will do well for Twitter messages. # A classifier trained on Twitter messages may perform very poorly on book reviews. # A lexicon of adjectives and their score "should be" cross-domain. # Bad reasons: # 1) Intuitively, we don't trust machines and we think we can do better. # Load the sentiment lexicon. sentiment = {} for row in Datasheet.load("sentiment.csv - Sheet 1.csv", headers=True): scores = [float(x) for x in row[3:] if x != ""] # Exclude empty fields. if scores: sentiment[row[0]] = avg(scores) # Inherit the score of each adjective to the inflected forms of the adjective. # If parfait = +1.0, then parfaite = +1.0 and parfaites = +1.0. for lemma, forms in Datasheet.load("adj-fr.csv"): for form in forms.split(","): if lemma in sentiment: sentiment[form] = sentiment[lemma] def positive(review, threshold=0.0): """ Returns True if the given review is positive, based on the average sentiment score of the adjectives in the text. """ score = 0.0 n = 0 for w in review.replace("\n", " ").split(" "):
from pattern.en import parse from pattern.en import pprint from pattern.en import sentiment, polarity, subjectivity, positive from pattern.en import wordnet, ADJECTIVE #s = "Poland says the combination of a second wave of COVID-19 with flu season could create ""a lot of confusion"" because of their overlap in symptoms and put a heavy strain on the health care system. " ''' for word in ("amazing", "horrible", "public"): print(word, sentiment(word)) print(sentiment( "The movie attempts to be surreal by incorporating time travel and various time paradoxes," "but it's presented in such a ridiculous way it's seriously boring.")) ''' #s="<p>In a matter of weeks, the coronavirus has spiralled from a handful of cases in China to what many experts fear will become the next global pandemic. " s = "Poland says the combination of a second wave of COVID-19 with flu season could create " "a lot of confusion" " because of their overlap in symptoms and put a heavy strain on the health care system. " print(sentiment(s)) for chunk, polarity, subjectivity, label in sentiment(s).assessments: print(chunk, polarity, subjectivity, label) from pattern.metrics import avg a = sentiment(s).assessments score1 = avg([p for chunk, p, s, label in a if label is None]) print(score1) print(sentiment("fear")) #print(wordnet.sentiwordnet["horrible"]) #'''
# For fine-grained analysis, # the return value of sentiment() has a special "assessments" property. # Each assessment is a (chunk, polarity, subjectivity, label)-tuple, # where chunk is a list of words (e.g., "not very good"). # The label offers additional meta-information. # For example, its value is MOOD for emoticons: s = "amazing... :/" print sentiment(s) for chunk, polarity, subjectivity, label in sentiment(s).assessments: print chunk, polarity, subjectivity, label # Observe the output. # The average sentiment is positive because the expression contains "amazing". # However, the smiley is slightly negative, hinting at the author's bad mood. # He or she might be using sarcasm. # We could work this out from the fine-grained analysis. from pattern.metrics import avg from pattern.en import MOOD a = sentiment(s).assessments score1 = avg([p for chunk, p, s, label in a if label is None]) # average polarity for words score2 = avg([p for chunk, p, s, label in a if label is MOOD]) # average polarity for emoticons if score1 > 0 and score2 < 0: print "...sarcasm?"
def variance(cluster): return avg([distance(centroid(cluster), v) for v in cluster])
# the return value of sentiment() has a special "assessments" property. # Each assessment is a (chunk, polarity, subjectivity, label)-tuple, # where chunk is a list of words (e.g., "not very good"). # The label offers additional meta-information. # For example, its value is MOOD for emoticons: s = "amazing... :/" print(sentiment(s)) for chunk, polarity, subjectivity, label in sentiment(s).assessments: print(chunk, polarity, subjectivity, label) # Observe the output. # The average sentiment is positive because the expression contains "amazing". # However, the smiley is slightly negative, hinting at the author's bad mood. # He or she might be using sarcasm. # We could work this out from the fine-grained analysis. from pattern.metrics import avg from pattern.en import mood a = sentiment(s).assessments # average polarity for words score1 = avg([p for chunk, p, s, label in a if label is None]) # average polarity for emoticons score2 = avg([p for chunk, p, s, label in a if label is mood]) if score1 > 0 and score2 < 0: print("...sarcasm?")
from pattern.db import Datasheet from pattern.metrics import avg # This is just the stuff from 5-annotation.py, without the tests. # You can bundle it in an application for predicting sentiment in French text. sentiment = {} for row in Datasheet.load("sentiment.csv - Sheet 1.csv", headers=True): scores = [float(x) for x in row[3:] if x != ""] if scores: sentiment[row[0]] = avg(scores) for lemma, forms in Datasheet.load("adj-fr.csv"): for form in forms.split(","): if lemma in sentiment: sentiment[form] = sentiment[lemma] def positive(review, threshold=0.0): """ Returns True if the given review is positive, based on the average sentiment score of the adjectives in the text. """ score = 0.0 n = 0 for w in review.replace("\n", " ").split(" "): w = w.lower() w = w.strip(",.!?") if w in sentiment: score += sentiment[w] n += 1 return score / (n or 1) > threshold