def test_agreement(self): # Assert 0.210 (example from http://en.wikipedia.org/wiki/Fleiss'_kappa). m = [[0, 0, 0, 0, 14], [0, 2, 6, 4, 2], [0, 0, 3, 5, 6], [0, 3, 9, 2, 0], [2, 2, 8, 1, 1], [7, 7, 0, 0, 0], [3, 2, 6, 3, 0], [2, 5, 3, 2, 2], [6, 5, 2, 1, 0], [0, 2, 2, 3, 7]] v = metrics.agreement(m) self.assertAlmostEqual(v, 0.210, places=3) print "pattern.metrics.agreement()"
def test_agreement(self): # Returns 0.210 (example from http://en.wikipedia.org/wiki/Fleiss'_kappa). m = [[0, 0, 0, 0, 14], [0, 2, 6, 4, 2 ], [0, 0, 3, 5, 6 ], [0, 3, 9, 2, 0 ], [2, 2, 8, 1, 1 ], [7, 7, 0, 0, 0 ], [3, 2, 6, 3, 0 ], [2, 5, 3, 2, 2 ], [6, 5, 2, 1, 0 ], [0, 2, 2, 3, 7 ]] v = metrics.agreement(m) self.assertAlmostEqual(v, 0.210, places=3)
print # We can also calculate kappa on the manual annotation scores. # Kappa is a measurement of agreement or consensus. # We want to know the general agreement of positive (+1) vs. negative (-1). # If the agreement is low, that means the sentiment lexicon is biased, # since the annotators did not agree on all scores. scores = Datasheet.load("sentiment.csv - Sheet 1.csv", headers=True) # 1) Cut off the first three columns. scores = scores[:, 3:] # 2) Remove empty fields (= annotator did not enter a score for this adjective). scores = [[float(x) for x in row if x != ""] for row in scores] # 3) Calculate the maximum number of different annotators. n = max([len(row) for row in scores]) # 4) Keep only rows for which each annotator entered a score. scores = [row for row in scores if len(row) == n] # 5) Sum all positive / negative / neutral votes per adjective. scores = [ [len([x for x in row if x > 0]), len([x for x in row if x < 0]), len([x for x in row if x == 0])] for row in scores ] try: print agreement(scores) except: pass # Can you think of ways to make the positive() function better? # - Should we do something with exclamation marks? (e.g., "belle" <=> "belle!") # - Should we do something with adverbs? (e.g., "belle" <=> "tres belle") # - Should we process emoticons? Verbs? # - ...
# We can also calculate kappa on the manual annotation scores. # Kappa is a measurement of agreement or consensus. # We want to know the general agreement of positive (+1) vs. negative (-1). # If the agreement is low, that means the sentiment lexicon is biased, # since the annotators did not agree on all scores. scores = Datasheet.load("sentiment.csv - Sheet 1.csv", headers=True) # 1) Cut off the first three columns. scores = scores[:, 3:] # 2) Remove empty fields (= annotator did not enter a score for this adjective). scores = [[float(x) for x in row if x != ""] for row in scores] # 3) Calculate the maximum number of different annotators. n = max([len(row) for row in scores]) # 4) Keep only rows for which each annotator entered a score. scores = [row for row in scores if len(row) == n] # 5) Sum all positive / negative / neutral votes per adjective. scores = [[ len([x for x in row if x > 0]), len([x for x in row if x < 0]), len([x for x in row if x == 0]) ] for row in scores] try: print agreement(scores) except: pass # Can you think of ways to make the positive() function better? # - Should we do something with exclamation marks? (e.g., "belle" <=> "belle!") # - Should we do something with adverbs? (e.g., "belle" <=> "tres belle") # - Should we process emoticons? Verbs? # - ...