class KappaRater(object): def __init__(self, S): this_dir = os.path.dirname(os.path.realpath(__file__)) dir1 = os.path.join(this_dir, S, "G1") dir2 = os.path.join(this_dir, S, "G2") self.annotation_task = AnnotationTask(data=self.__readfile(dir1, dir2)) def __readfile(self, *args): data = [] for i in xrange(len(args)): lines = self.__get_lines(args[i]) coder = "c"+str(i+1) for ind, line in enumerate(lines): item, label = line d = (coder, str(ind)+"_"+item, label) # print d data.append(d) return data def __get_lines(self, dir): lines = [] for root, dirs, files in os.walk(dir): for file in files: if file.endswith(".txt"): # f = open(os.path.join(root, file), 'r') with io.open(os.path.join(root, file), 'r', newline='', encoding="utf-8") as f: # keep \r\n for .ann positioning print f lines += anntoconll.text_to_conll_lines(f) f.close() return lines def kappa(self): return self.annotation_task.kappa()
class KappaRater(object): def __init__(self, S): this_dir = os.path.dirname(os.path.realpath(__file__)) dir1 = os.path.join(this_dir, S, "G1") dir2 = os.path.join(this_dir, S, "G2") self.annotation_task = AnnotationTask(data=self.__readfile(dir1, dir2)) def __readfile(self, *args): data = [] for i in xrange(len(args)): lines = self.__get_lines(args[i]) coder = "c"+str(i+1) for ind, line in enumerate(lines): item, label = line d = (coder, str(ind)+"_"+item, label) data.append(d) return data def __get_lines(self, dir): lines = [] for root, dirs, files in os.walk(dir): for file in files: if file.endswith(".txt"): with io.open(os.path.join(root, file), 'r', newline='', encoding="utf-8") as f: print f lines += anntoconll.text_to_conll_lines(f) f.close() return lines def kappa(self): return self.annotation_task.kappa()
def __main__(argv): if len(argv) != 2: print("Specify cmd arg") sys.exit(2) else: arg = argv[1] if arg == 'img': reliability_mat = getReliabilityMatImg("../data/imageGID_job_map_expt2_corrected.csv") else: reliability_mat = getReliabilityMatTurker() t = AnnotationTask(data=reliability_mat) print("Calculating the agreement scores") alpha = t.alpha() print("Alpha = %f" %alpha) s = t.S() print("S = %f" %s) pi = t.pi() print("Pi = %f" %pi) kappa = t.kappa() print("kappa = %f" %kappa)
def calculate_kappa(filename): # save labels label_list = [] with open('data/' + filename + '_data_result.json') as json_file: tweets = json.load(json_file) for row in tweets: label_list.append(row['label']) # Generate two fake labels to calculate kappa man_1_label = change_some_values(label_list) man_2_label = change_some_values(label_list) # save the labels to a csv file save_to_csv('data/label_1.csv', man_1_label) save_to_csv('data/label_2.csv', man_2_label) # calculate inter annotator agreement civ_1 = ['c1'] * len(man_1_label) civ_2 = ['c2'] * len(man_2_label) item_num_list = range(0, len(man_1_label)) civ_1 = zip(civ_1, item_num_list, man_1_label) civ_2 = zip(civ_2, item_num_list, man_2_label) task_data = civ_1 + civ_2 task = AnnotationTask(data=task_data) # observed disagreement for the weighted kappa coefficient print 'kappa: ' + str(task.kappa())
def calc_agreements(nr_of_abstracts=150): # Loop over the abstracts and calculate the kappa and alpha per abstract aggregate = [] for i in range(0, nr_of_abstracts): # try: annotators = round_robin(i) annotations_A = flatten(get_annotations(i, annotators[0])) annotations_B = flatten(get_annotations(i, annotators[1])) annotations = __str_combine_annotations(annotations_A, annotations_B) a = AnnotationTask(annotations, agreement_fn) aggregate.append({ "kappa" : a.kappa(), "alpha" : a.alpha(), "annotator_A" : annotators[0], "annotator_B" : annotators[1] }) # except: # print("Could not calculate kappa for abstract %i" % (i + 1)) # pass # Summary statistics kappa = describe([a['kappa'] for a in aggregate]) print("number of abstracts %i" % kappa[0]) print("[kappa] mean: " + str(kappa[2])) print("[kappa] variance: " + str(kappa[3])) alpha = describe([a['alpha'] for a in aggregate]) print("[alpha] mean: " + str(alpha[2])) print("[alpha] variance: " + str(alpha[3]))
def get_kappa(input): head,body = fio.ReadMatrix(input, True) data = [] for i,row in enumerate(body): for coder, label in enumerate(row): if label == 'a': label = '0' data.append((head[coder], i, label)) task = AnnotationTask(data) print head[0], head[1], task.kappa_pairwise(head[0], head[1]) print head[0], head[2], task.kappa_pairwise(head[0], head[2]) print head[1], head[2], task.kappa_pairwise(head[1], head[2]) return task.kappa()
def agree_tags(delta, column): """ egytokenes címkézési feladatokra számol egyetértést :param delta: az összevetett adat :param column: az az oszlop, amelyre egyetértést akarunk számolni :return: """ by_field = reverse_tags(delta, column) task = AnnotationTask(data=by_field) oa = task.avg_Ao() # observed agreement s = task.S() # Bennett, Albert and Goldstein S (1954) all categories are equally likely pi = task.pi() # Scott pi (1955) single distribution kappa = task.kappa() # Cohen kappa (1960) individual coder distribution w_kappa = task.weighted_kappa() alpha = task.alpha() # Krippendorff alpha (1980) return oa, s, pi, kappa, w_kappa, alpha
def compute_annotator_agreement_nltkmetrics(data_array): ''' See http://nltk.org/api/nltk.metrics.html#nltk.metrics.agreement ''' print "####### Agreement coefficients according to NLTK metrics.agreement #######" t = AnnotationTask(data=data_array) print "Average observed agreement across all coders and items: "+str(t.avg_Ao()) print "Cohen's Kappa (Cohen 1960): "+str(t.kappa()) print "Weighted kappa (Cohen 1968): "+str(t.weighted_kappa()) print "Scott's pi (Scott 1955): "+str(t.pi()) #print "pi_avg: "+str(t.pi_avg()) print "alpha (Krippendorff 1980): "+str(t.alpha()) print "Observed disagreement for the alpha coefficient: "+str(t.Do_alpha()) print "S (Bennett, Albert and Goldstein 1954): "+str(t.S()) #print "n-notation used in Artstein and Poesio (2007): "+str(t.N(k=, ic???)) print "Observed disagreement for the weighted kappa coefficient averaged over all labelers: "+str(t.Do_Kw())
def kappa(self): """Data is a list of list. Each element is a list : [annotator, element, label] """ if not KAPPA: return 'Not installed' #if self.last is None: return # must be specific to a feature data = [] nb = 1 for elem in self.ano: u1 = elem[1] u2 = elem[2] if u1 is None or u2 is None: continue else: data.append([self.ano1.get_code(), nb, u1.get(self.last)]) data.append([self.ano2.get_code(), nb, u2.get(self.last)]) nb += 1 task = AnnotationTask(data) return task.kappa()
def getagreement(tpl,datadir,task_type='all'): """Get agreement values for annotators in the :data:'tpl' list Args: tpl (list): combination group of annotators datadir (str): Cache data directory used by joblib Returns: namedtuple defined as ``Agree = collections.namedtuple('Agree', ['kappa', 'alpha','avg_ao'], verbose=True)`` """ mem = Memory(cachedir=datadir) readjson=mem.cache(json2taskdata.readjson,mmap_mode='r') create_task_data= mem.cache(json2taskdata.create_task_data) count_occurrances=mem.cache(json2taskdata.count_occurrances) count_labels=mem.cache(json2taskdata.count_labels) annotators=set() lectask=[] #------------------------------------------------------------------------------- # for each annotator in group tpl #------------------------------------------------------------------------------- for stditem in tpl: aname=stditem.split('.')[0][3:][-2:] annotators.add(aname) lecdict=readjson(stditem) newlectask= create_task_data(lecdict,task_type=task_type,annotator=aname) label_data=json2taskdata.create_labels_list(newlectask) abscount=count_occurrances(str(label_data)) yaml.dump(abscount,open(os.path.join( datadir,'abscount-'+aname+'.yaml'),'w')) setcount=count_labels(newlectask) yaml.dump(setcount,open(os.path.join( datadir,'setcount-'+aname+'.yaml'),'w')) lectask=lectask+newlectask task=AnnotationTask(data=lectask,distance=nltk.metrics.distance.masi_distance_mod) return {frozenset(annotators): Agree(task.kappa(),task.alpha(),task.avg_Ao())}
def status_view(request, task_id=None): """ Renders the evaluation tasks status page for staff users. """ LOGGER.info('Rendering evaluation task overview for user "{0}".'.format( request.user.username)) # Check if user is member in WMT13 group. If so, redirect to wmt13 app. if request.user.groups.filter(name="WMT13").exists(): LOGGER.info('Redirecting user "{0}" to WMT13 overview.'.format( request.user.username)) return redirect('appraise.wmt13.views.overview') if task_id: task = get_object_or_404(EvaluationTask, task_id=task_id) headers = task.get_status_header() status = [] for user in task.users.all(): status.append((user.username, task.get_status_for_user(user))) scores = None result_data = [] raw_result_data = Counter() users = list(task.users.all()) for item in EvaluationItem.objects.filter(task=task): results = [] for user in users: qset = EvaluationResult.objects.filter(user=user, item=item) if qset.exists(): category = str(qset[0].results) results.append((user.id, item.id, category)) raw_result_data[qset[0].raw_result] += 1 if len(results) == len(users): result_data.extend(results) # todo for gisting, calculate - somehow - the percentage of answers against the number of different answers -> # in that same gap, and also regroup them for readability _raw_results = [] _keys = raw_result_data.keys() _total_results = float(sum(raw_result_data.values())) for key in sorted(_keys): value = raw_result_data[key] _raw_results.append((key, value, 100 * value / _total_results)) try: # Computing inter-annotator agreement only makes sense for more # than one coder -- otherwise, we only display result_data... if len(users) > 1: # Check if we can safely use NLTK's AnnotationTask class. try: from nltk.metrics.agreement import AnnotationTask chk = AnnotationTask(data=[('b', '1', 'k'), ('a', '1', 'k')]) assert(chk == 1.0) except AssertionError: LOGGER.debug('Fixing outdated version of AnnotationTask.') from appraise.utils import AnnotationTask # We have to sort annotation data to prevent StopIterator errors. result_data.sort() annotation_task = AnnotationTask(result_data) scores = ( annotation_task.alpha(), annotation_task.kappa(), annotation_task.S(), annotation_task.pi() ) except ZeroDivisionError: scores = None except ImportError: scores = None dictionary = { 'combined': task.get_status_for_users(), 'commit_tag': COMMIT_TAG, 'headers': headers, 'scores': scores, 'raw_results': _raw_results, 'status': status, 'task_id': task.task_id, 'task_name': task.task_name, 'title': 'Evaluation Task Status', } return render(request, 'evaluation/status_task.html', dictionary) else: evaluation_tasks = {} for task_type_id, task_type in APPRAISE_TASK_TYPE_CHOICES: # We collect a list of task descriptions for this task_type. evaluation_tasks[task_type] = [] # Super users see all EvaluationTask items, even non-active ones. if request.user.is_superuser: _tasks = EvaluationTask.objects.filter(task_type=task_type_id) else: _tasks = EvaluationTask.objects.filter(task_type=task_type_id, active=True) # Loop over the QuerySet and compute task description data. for _task in _tasks: if not APPRAISE_TASK_CACHE.has_key(_task.task_id): APPRAISE_TASK_CACHE[_task.task_id] = {} _cache = APPRAISE_TASK_CACHE[_task.task_id] if not _cache.has_key(request.user.username): _update_task_cache(_task, request.user) _task_data = _cache[request.user.username] # Append new task description to current task_type list. evaluation_tasks[task_type].append(_task_data) # If there are no tasks descriptions for this task_type, we skip it. if len(evaluation_tasks[task_type]) == 0: evaluation_tasks.pop(task_type) dictionary = { 'active_page': "STATUS", 'commit_tag': COMMIT_TAG, 'evaluation_tasks': evaluation_tasks, 'title': 'Evaluation Task Status', } return render(request, 'evaluation/status.html', dictionary)
data = [] sentiment_r1_5_scale = [] sentiment_r2_5_scale = [] for r1, r2 in zip(sentences_r1, sentences_r2): sentiment_r1_5_scale.append(int(r1[5])) data.append((6, r1[0], r1[5])) sentiment_r2_5_scale.append(int(r2[5])) data.append((7, r2[0], r2[5])) if (r1[0] != r2[0]): print r1[0] except Exception, e: print e # disconnect from server db.close() print i print skll.kappa(sentiment_r1_5_scale, sentiment_r2_5_scale) annotation = AnnotationTask(data=data) print annotation.kappa() print annotation.alpha()
def status_view(request, task_id=None): """ Renders the evaluation tasks status page for staff users. """ LOGGER.info('Rendering evaluation task overview for user "{0}".'.format( request.user.username)) # Check if user is member in WMT13 group. If so, redirect to wmt13 app. if request.user.groups.filter(name="WMT13").exists(): LOGGER.info('Redirecting user "{0}" to WMT13 overview.'.format( request.user.username)) return redirect('appraise.wmt13.views.overview') if task_id: task = get_object_or_404(EvaluationTask, task_id=task_id) headers = task.get_status_header() status = [] for user in task.users.all(): status.append((user.username, task.get_status_for_user(user))) scores = None result_data = [] raw_result_data = Counter() users = list(task.users.all()) for item in EvaluationItem.objects.filter(task=task): results = [] for user in users: qset = EvaluationResult.objects.filter(user=user, item=item) if qset.exists(): category = str(qset[0].results) results.append((user.id, item.id, category)) raw_result_data[qset[0].raw_result] += 1 if len(results) == len(users): result_data.extend(results) _raw_results = [] _keys = raw_result_data.keys() _total_results = float(sum(raw_result_data.values())) for key in sorted(_keys): value = raw_result_data[key] _raw_results.append((key, value, 100 * value / _total_results)) try: # Computing inter-annotator agreement only makes sense for more # than one coder -- otherwise, we only display result_data... if len(users) > 1: # Check if we can safely use NLTK's AnnotationTask class. try: from nltk.metrics.agreement import AnnotationTask chk = AnnotationTask(data=[('b', '1', 'k'), ('a', '1', 'k')]) assert (chk == 1.0) except AssertionError: LOGGER.debug('Fixing outdated version of AnnotationTask.') from appraise.utils import AnnotationTask # We have to sort annotation data to prevent StopIterator errors. result_data.sort() annotation_task = AnnotationTask(result_data) scores = (annotation_task.alpha(), annotation_task.kappa(), annotation_task.S(), annotation_task.pi()) except ZeroDivisionError: scores = None except ImportError: scores = None dictionary = { 'combined': task.get_status_for_users(), 'commit_tag': COMMIT_TAG, 'headers': headers, 'scores': scores, 'raw_results': _raw_results, 'status': status, 'task_id': task.task_id, 'task_name': task.task_name, 'title': 'Evaluation Task Status', } return render(request, 'evaluation/status_task.html', dictionary) else: evaluation_tasks = {} for task_type_id, task_type in APPRAISE_TASK_TYPE_CHOICES: # We collect a list of task descriptions for this task_type. evaluation_tasks[task_type] = [] # Super users see all EvaluationTask items, even non-active ones. if request.user.is_superuser: _tasks = EvaluationTask.objects.filter(task_type=task_type_id) else: _tasks = EvaluationTask.objects.filter(task_type=task_type_id, active=True) # Loop over the QuerySet and compute task description data. for _task in _tasks: if not APPRAISE_TASK_CACHE.has_key(_task.task_id): APPRAISE_TASK_CACHE[_task.task_id] = {} _cache = APPRAISE_TASK_CACHE[_task.task_id] if not _cache.has_key(request.user.username): _update_task_cache(_task, request.user) _task_data = _cache[request.user.username] # Append new task description to current task_type list. evaluation_tasks[task_type].append(_task_data) # If there are no tasks descriptions for this task_type, we skip it. if len(evaluation_tasks[task_type]) == 0: evaluation_tasks.pop(task_type) dictionary = { 'active_page': "STATUS", 'commit_tag': COMMIT_TAG, 'evaluation_tasks': evaluation_tasks, 'title': 'Evaluation Task Status', } return render(request, 'evaluation/status.html', dictionary)
def annotation(output): t = AnnotationTask(data=[x.split() for x in open(output)]) print "\nAverage observed agreement: " + str(t.avg_Ao()) print "\nKappa: " + str(t.kappa())
def dispatch(self, request, *args, **kwargs): self.task = get_object_or_404(Task, pk=self.kwargs['pk']) self.array = [] self.kappa = [] self.kappa1 = [] self.kappa_name = "/media/csvfileFinal.csv" self.eval_name = "/media/csvfileP.csv" self.kappa_nameLong = "/media/csvfileFinal.csv" self.lblr = [] self.head = [] self.coder_emails = PostResponse.objects.filter( task=self.task.pk).values_list( 'responder__email', flat=True).distinct().order_by('responder__email') post_list = self.task.post_list.all() if os.path.exists( '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileFinal.csv' ): print('existsssss') os.remove( '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileFinal.csv' ) if os.path.exists( '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileP.csv' ): os.remove( '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileP.csv' ) if os.path.exists( '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfile.csv' ): os.remove( '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfile.csv' ) name = "media/csvfileP.csv" # + str(self.task.pk) self.eval_name = "/" + name # print (name) #dateVal =datetime.datetime.now() filepp = open(str(name), "w+") filepp.write(',') for coder_email in self.coder_emails: filepp.write(coder_email) filepp.write(',') filepp.write('Majority Vote') filepp.write('\n') voteList = {} listTemp = [] cpr = 0 for post in post_list: row = [] cpr = cpr + 1 #if cpr > 6: # row.append('...') # break row.append(post.content) filepp.write(post.content) filepp.write(',') i = 0 if len(self.coder_emails) > 5: self.coder_emails_temp = self.coder_emails[0:5] temp_emails = self.coder_emails self.coder_emails_temp.append("(List continues...)") #for coder_email in temp_emails: # filepp.write(coder_email) # filepp.write(';') #filepp.write('\n') voteList = {} listTemp = [] for coder_email in temp_emails: # if len(self.coder_emails) > 5 and coder_email == "(List continues...)": # label = '...' # else : # print ('/....N?A////') label = 'N/A' try: post_response = PostResponse.objects.filter( task=self.task.pk, post=post.pk, responder__email=coder_email).last() label = post_response.label #print('label...',label) #filepp.write(coder_email) # filepp.write(';') filepp.write(str(label)) # myMap = {} listTemp.append(str(label)) # maximum = ( '', 0 ) # (occurring element, occurrences) # for n in : # if n in voteList:voteList[n] += 1 # else: voteList[n] = 1 # # Keep track of maximum on the go # if voteList[n] > maximum[1]: maximum = (n,voteList[n]) filepp.write(',') except: filepp.write('N/A') listTemp.append('N/A') filepp.write(',') pass #if len(self.coder_emails) > 5: # label = '...' # row.append(label) # filepp maximum = ('', 0) # (occurring element, occurrences) for n in listTemp: if n in voteList: voteList[n] += 1 else: voteList[n] = 1 # Keep track of maximum on the go if voteList[n] > maximum[1]: maximum = (n, voteList[n]) # filepp.write(';') #print('maximum', maximum) # filepp.write(maximum[0]) # filepp.write('\n') else: self.coder_emails_temp = self.coder_emails voteList = {} listTemp = [] i = 0 for coder_email in self.coder_emails_temp: #i = i+1 #if i>6: #self.coder_emails) > 5 and coder_email == "(List continues...)": # break if len(self.coder_emails ) > 5 and coder_email == "(List continues...)": # print ('coder email-----------') label = '...' #continue #try: # post_response = PostResponse.objects.filter(task=self.task.pk, post=post.pk, responder__email=coder_email).last() #print (post_response) #label = post_response.label #filepp.write(str(label)) #filepp.write(';') # listTemp.append(str(label)) #except: # filepp.write('N/A')#listTemp.append('N/A') #filepp.write(';') # listTemp.append(str(label)) #pass else: label = 'N/A' try: post_response = PostResponse.objects.filter( task=self.task.pk, post=post.pk, responder__email=coder_email).last() print(post_response) label = post_response.label if len(self.coder_emails) <= 5: filepp.write(str(label)) filepp.write(',') listTemp.append(str(label)) except: if len(self.coder_emails) <= 5: filepp.write('N/A') #listTemp.append('N/A') filepp.write(',') listTemp.append(str(label)) pass row.append(label) maximum = ('', 0) for n in listTemp: if n in voteList: voteList[n] += 1 else: voteList[n] = 1 # Keep track of maximum on the go if voteList[n] > maximum[1]: maximum = (n, voteList[n]) #filepp.write(';') filepp.write(maximum[0]) filepp.write('\n') # i = i+1 # maximum = ( '', 0 ) # (occurring element, occurrences) # for n in listTemp: # if n in voteList: # voteList[n] += 1 # else: # voteList[n] = 1 # Keep track of maximum on the go # if voteList[n] > maximum[1]: # maximum = (n,voteList[n]) #filepp.write(';') # print('maximum', maximum) # filepp.write(maximum[0]) # filepp.write('\n') # row.append(maximum[0]) #row.append(label) # self.coder_emails_temp.append("(List continues...)") # row.append(maximum[0]) # self.array.append(row) #maximum = ( '', 0 ) # (occurring element, occurrences) #for n in listTemp: # if n in voteList: # voteList[n] += 1 # else: # voteList[n] = 1 # # Keep track of maximum on the go # if voteList[n] > maximum[1]: # maximum = (n,voteList[n]) # #filepp.write(';') #print('maximum', maximum) #row.append(label) maximum = ('', 0) # (occurring element, occurrences) for n in listTemp: if n in voteList: voteList[n] += 1 else: voteList[n] = 1 # Keep track of maximum on the go if voteList[n] > maximum[1]: maximum = (n, voteList[n]) #filepp.write(';') print('maximum', maximum) #filepp.write(maximum[0]) #filepp.write('\n') row.append(maximum[0]) self.array.append(row) try: annotation_triplet_list = [] post_response_list = PostResponse.objects.filter(task=self.task.pk) #rint (post_response_list) post_response_t = [ part.encode("utf8") for part in PostResponse.objects.filter( task=self.task.pk).values_list('responder__email', flat=True).distinct() ] lst_rp = [] triple_list = [] ctr = 0 # for post_response in post_response_t: # post_response = str(post_response).replace('b\'', '') # post_response = post_response.replace('\'', '') # lst_rp.append(post_response) # print (post_response) #triple_list.append([]) #triple_list[ctr].append(post_response) #triple_list[ctr].append(post_response) #triple_list[ctr].append('0') #ctr = ctr + 1 #print (triple_list) # Get all combinations of [1, 2, 3] # and length 2 # print (post_response) #post_response = post_response.replace('b', '\'') # print(post_response) #print ('here') #print("post_reposne", post_response_t) if len(post_response_t) > 5: post_response_t_temp = post_response_t[0:5] for post_response in post_response_t_temp: post_response = str(post_response).replace('b\'', '') post_response = post_response.replace('\'', '') lst_rp.append(post_response) lst_rp.append("(List continues...)") comb_temp = combinations(post_response_t, 2) for i in list(comb_temp): #print ("in the comb") annotation_triplet_list = [] ip = [] sp = "" # ct = ctr + 1 #print (ctr) temp = str(i[0]).replace('b\'', '') temp = temp.replace('\'', '') if ([temp, temp, '0'] not in triple_list): triple_list.append([]) triple_list[ctr].append(temp) triple_list[ctr].append(temp) triple_list[ctr].append('0') ctr = ctr + 1 triple_list.append([]) for s in i: st = str(s).replace('b\'', '') st = st.replace('\'', '') ip.append(st) triple_list[ctr].append(st) #triple_list[ctr].append(i[0]) #triple_list[ctr].append(i[0]) #triple_list[ctr].append(0) print(triple_list[ctr]) for post_response in post_response_list: # print(post_response) # print(ip, post_response.responder.email) if (post_response.responder.email in ip): annotation_triplet = ( post_response.responder.email, post_response.post.content, post_response.label.content) # print (post_response.responder.email) # print(annotation_triplet) annotation_triplet_list.append(annotation_triplet) t = AnnotationTask(annotation_triplet_list) #print("kappa " +str(t.kappa())) triple_list[ctr].append(str(t.kappa())) #str(t.kappa())) self.lblr.append(triple_list) ctr = ctr + 1 self.alpha1 = t.alpha() # print (triple_list) self.kappa1.append(triple_list) #print ('before EXPORT') # exportCSV(triple_list, self.alpha1, self.coder_emails) # print ('in export CSV') #with open('result.csv','w') as file: #print(self.task) name = "media/csvfile.csv" #+ str(self.task.pk) self.kappa_nameLong = "/" + name print(name) #dateVal =datetime.datetime.now() filep = open(str(name), "w+") #print ('here in csv') #print (filep) i = 0 ct = 1 filep.write(',') prev_email = 's' #if 's' is not '*****@*****.**': # print (True) for email in triple_list: # print (email) if email[0] != prev_email: #print ('in email 0', email[0]) prev_email = email[0] filep.write(email[0]) filep.write(',') filep.write(email[1]) filep.write('\n') for row in triple_list: # print (row[0], row[1], row[2]) #filep.write(row[0]) #filep.write(row[0]) # for i in range(0, len(self.coder_emails)): # filep.write(row[0]) # filep.write(';') # print (i) if i == 0 or i == ct - 1: filep.write(row[0]) filep.write(',') # print('row 0', row[0]) for k in range(0, i): filep.write('--,') # filep.write(row[0]) # filep.write(';') if i == len(self.coder_emails) - 1: i = ct filep.write(row[2]) # filep.write(row[0]) # print (ct) # print (range(0,ct)) # for k in range(0,ct) : # filep.write('--;') filep.write('\n') ct = ct + 1 else: i = i + 1 # filep.write('--') filep.write(row[2]) filep.write(',') #filep.write('\n') filep.close() #or col in row: #file.write(col) #print (triple_list) # for rowp in triple_list: # print (rowp) # if forloop.counter != forloop.parentloop.counter: # if col in rowp: # file.write(rowp) # file.close() #self.kappa.append(triple_list) else: post_response_t_temp = post_response_t for post_response in post_response_t_temp: post_response = str(post_response).replace('b\'', '') post_response = post_response.replace('\'', '') lst_rp.append(post_response) print(post_response) self.head.append(lst_rp) comb = combinations(post_response_t_temp, 2) # print (comb) #ctr = 0 #triple_list = [] ip = [] lst_rp = [] triple_list = [] ctr = 0 #triple_list.append([]) #triple_list.append(post_response_t) # Print the obtained combinations # for iv in list(comb): # annotation_triplet_list = [] # triple_list.append([]) #print (i[0]) # temp = str(iv[0]).replace('b\'', '') # temp = temp.replace('\'', '') # if ([temp,temp,'0'] not in triple_list) : # triple_list.append([]) # triple_list[ctr].append(temp) # triple_list[ctr].append(temp) # triple_list[ctr].append('0') # ctr = ctr + 1 # print (triple_list) # print (triple_list) # comb = combinations(post_response_t, 2) for i in list(comb): # print ("in the comb") annotation_triplet_list = [] ip = [] sp = "" # ct = ctr + 1 print(ctr) temp = str(i[0]).replace('b\'', '') temp = temp.replace('\'', '') if ([temp, temp, '0'] not in triple_list): triple_list.append([]) triple_list[ctr].append(temp) triple_list[ctr].append(temp) triple_list[ctr].append('0') ctr = ctr + 1 triple_list.append([]) for s in i: # print (s) #print (sp) #p.join(s) #p.join(" , ") # print ("sds"+s) #print (s) # sp = sp+s+" , " st = str(s).replace('b\'', '') st = st.replace('\'', '') print(st) ip.append(st) triple_list[ctr].append(st) #triple_list[ctr].append(i[0]) #triple_list[ctr].append(i[0]) #triple_list[ctr].append(0) print(triple_list[ctr]) #triple_list.append(sp) # print(triple_list) #print(post_response_list) for post_response in post_response_list: # print(post_response) # print(ip, post_response.responder.email) if (post_response.responder.email in ip): annotation_triplet = (post_response.responder.email, post_response.post.content, post_response.label.content) # print (post_response.responder.email) # print(annotation_triplet) annotation_triplet_list.append(annotation_triplet) t = AnnotationTask(annotation_triplet_list) #print("kappa " +str(t.kappa())) triple_list[ctr].append(str(t.kappa())) #str(t.kappa())) self.lblr.append(triple_list) ctr = ctr + 1 if len(post_response_t) > 5: self.alpha = self.alpha1 else: self.alpha = t.alpha() print(triple_list) self.kappa.append(triple_list) name = "media/csvfileFinal.csv" #+ str(self.task.pk) self.kappa_name = "/" + name #print (name) #dateVal =datetime.datetime.now() filep = open(str(name), "w+") #print ('here in csv') #print (filep) i = 0 ct = 1 filep.write(',') prev_email = 's' #if 's' is not '*****@*****.**': # print (True) for email in triple_list: # print (email) if email[0] != prev_email: prev_email = email[0] filep.write(email[0]) filep.write(',') filep.write(email[1]) filep.write('\n') for row in triple_list: # print (row[0], row[1], row[2]) #filep.write(row[0]) #filep.write(row[0]) # for i in range(0, len(self.coder_emails)): # filep.write(row[0]) # filep.write(';') # print (i) if i == 0 or i == ct - 1: filep.write(row[0]) filep.write(',') # print('row 0', row[0]) for k in range(0, i): filep.write('--,') # filep.write(row[0]) # filep.write(';') if i == len(self.coder_emails) - 1: i = ct filep.write(row[2]) # filep.write(row[0]) # print (ct) # print (range(0,ct)) # for k in range(0,ct) : # filep.write('--;') filep.write('\n') ct = ct + 1 else: i = i + 1 # filep.write('--') filep.write(row[2]) filep.write(',') #filep.write('\n') filep.close() #self.kappa.append(triple_list) # print (self.kappa) except: self.alpha = 'N/A' name = "media/csvfileFinal.csv" #+ str(self.task.pk) self.kappa_name = "/" + name filep = open(str(name), "w+") return super(TaskEvaluationDetailView, self).dispatch(request, *args, **kwargs)
''' if ('0' in line): print((annotator, word, '0')) data.append((annotator, '0', word)) if ('1' in line): data.append((annotator, '1', word)) if ('2' in line): data.append((annotator, '2', word)) with open("annotd") as d: content = d.readlines() for line in content: appender('d', line) with open("annots") as d: content = d.readlines() for line in content: appender('s', line) with open("annots") as d: content = d.readlines() for line in content: appender('k', line) task = AnnotationTask(data) #print(task.avg_Ao()) print(task.kappa())
""" Compute the inter-annotator agreement """ import nltk from nltk.metrics.agreement import AnnotationTask t1 = AnnotationTask(data=[x.split() for x in open("1.txt")]) print t1.kappa() t2 = AnnotationTask(data=[x.split() for x in open("2.txt")]) print t2.kappa()
def annotation(output): t = AnnotationTask(data=[x.split() for x in open(output)]) print "\nAverage observed agreement: " + str(t.avg_Ao()) print "\nKappa: " + str(t.kappa());
data = pd.read_csv('../input_data/labels-C.csv', sep=';', index_col=0) allcoders = data.columns experts = ['KEY', 'MG', 'MS', 'TM'] novices = ['KEY', 'CK', 'GK', 'RM'] cols = novices # Total values taskdata = [] for coder in cols: for i in data[coder].index: taskdata.append([coder, i, data[coder][i]]) ratingtask = AnnotationTask(data=taskdata) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("alpha " + str(ratingtask.alpha())) print("scotts " + str(ratingtask.pi())) # Pairwise values similarities = [] for coders in itertools.product(cols, repeat=2): if coders[0] == coders[1]: similarities.append(1) else: taskdata = [] for coder in coders: for i in data[coder].index: taskdata.append([coder, i, data[coder][i]])
from nltk.metrics.agreement import AnnotationTask from nltk.metrics import ConfusionMatrix # Here we have four items, each labeled by two different annotators. In two cases, the annotators agree. In two cases they don't. toy_data = [ # annotators, element, label ['1', 5723, 'ORG'], ['2', 5723, 'ORG'], ['1', 55829, 'LOC'], ['2', 55829, 'LOC'], ['1', 259742, 'PER'], ['2', 259742, 'LOC'], ['1', 269340, 'PER'], ['2', 269340, 'LOC'] ] task = AnnotationTask(data=toy_data) print(task.kappa()) print(task.alpha()) # 16h52 : Yes ! ça marche ! # L'annotateur est remplacé par une division en deux variables # L'élément est remplacé par la position dans la liste toy1 = ['ORG', 'LOC', 'PER', 'PER'] toy2 = ['ORG', 'LOC', 'LOC', 'LOC'] cm = ConfusionMatrix(toy1, toy2) print(cm) # multilabel pour une classe (un but) # only 2 utilisateurs rater1 = ['no', 'no', 'no', 'no', 'no', 'yes', 'no', 'no', 'no', 'no'] rater2 = ['yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes']