Beispiel #1
0
class KappaRater(object):
    def __init__(self, S):
        this_dir = os.path.dirname(os.path.realpath(__file__))
        dir1 = os.path.join(this_dir, S, "G1")
        dir2 = os.path.join(this_dir, S, "G2")
        self.annotation_task = AnnotationTask(data=self.__readfile(dir1, dir2))

    def __readfile(self, *args):
        data = []

        for i in xrange(len(args)):
            lines = self.__get_lines(args[i])
            coder = "c"+str(i+1)
            for ind, line in enumerate(lines):
                item, label = line
                d = (coder, str(ind)+"_"+item, label)
                # print d
                data.append(d)

        return data

    def __get_lines(self, dir):
        lines = []
        for root, dirs, files in os.walk(dir):
            for file in files:
                if file.endswith(".txt"):
                    # f = open(os.path.join(root, file), 'r')
                    with io.open(os.path.join(root, file), 'r', newline='', encoding="utf-8") as f:  # keep \r\n for .ann positioning
                        print f
                        lines += anntoconll.text_to_conll_lines(f)
                        f.close()
        return lines

    def kappa(self):
        return self.annotation_task.kappa()
Beispiel #2
0
class KappaRater(object):
    def __init__(self, S):
        this_dir = os.path.dirname(os.path.realpath(__file__))
        dir1 = os.path.join(this_dir, S, "G1")
        dir2 = os.path.join(this_dir, S, "G2")
        self.annotation_task = AnnotationTask(data=self.__readfile(dir1, dir2))

    def __readfile(self, *args):
        data = []

        for i in xrange(len(args)):
            lines = self.__get_lines(args[i])
            coder = "c"+str(i+1)
            for ind, line in enumerate(lines):
                item, label = line
                d = (coder, str(ind)+"_"+item, label)
                
                data.append(d)

        return data

    def __get_lines(self, dir):
        lines = []
        for root, dirs, files in os.walk(dir):
            for file in files:
                if file.endswith(".txt"):
                    
                    with io.open(os.path.join(root, file), 'r', newline='', encoding="utf-8") as f:  
                        print f
                        lines += anntoconll.text_to_conll_lines(f)
                        f.close()
        return lines

    def kappa(self):
        return self.annotation_task.kappa()
def __main__(argv):
	if len(argv) != 2:
		print("Specify cmd arg")
		sys.exit(2)
	else:
		arg = argv[1]
		if arg == 'img':
			reliability_mat = getReliabilityMatImg("../data/imageGID_job_map_expt2_corrected.csv")
		else:
			reliability_mat = getReliabilityMatTurker()

		
		t = AnnotationTask(data=reliability_mat)

		print("Calculating the agreement scores")
		
		alpha = t.alpha()
		print("Alpha = %f" %alpha)
		
		s = t.S()
		print("S = %f" %s)

		pi = t.pi()
		print("Pi = %f" %pi)

		kappa = t.kappa()
		print("kappa = %f" %kappa)
def calculate_kappa(filename):
  # save labels
  label_list = []
  with open('data/' + filename + '_data_result.json') as json_file:
    tweets = json.load(json_file)
    for row in tweets:
      label_list.append(row['label'])

  # Generate two fake labels to calculate kappa
  man_1_label = change_some_values(label_list)
  man_2_label = change_some_values(label_list)

  # save the labels to a csv file
  save_to_csv('data/label_1.csv', man_1_label)
  save_to_csv('data/label_2.csv', man_2_label)

  # calculate inter annotator agreement
  civ_1 = ['c1'] * len(man_1_label)
  civ_2 = ['c2'] * len(man_2_label)
  item_num_list = range(0, len(man_1_label))
  civ_1 = zip(civ_1, item_num_list, man_1_label)
  civ_2 = zip(civ_2, item_num_list, man_2_label)
  task_data = civ_1 + civ_2
  task = AnnotationTask(data=task_data)

  # observed disagreement for the weighted kappa coefficient
  print 'kappa: ' + str(task.kappa())
Beispiel #5
0
def calc_agreements(nr_of_abstracts=150):
    # Loop over the abstracts and calculate the kappa and alpha per abstract
    aggregate = []
    for i in range(0, nr_of_abstracts):
        # try:
            annotators = round_robin(i)
            annotations_A = flatten(get_annotations(i, annotators[0]))
            annotations_B = flatten(get_annotations(i, annotators[1]))
            annotations = __str_combine_annotations(annotations_A, annotations_B)
            a = AnnotationTask(annotations, agreement_fn)
            aggregate.append({
                "kappa" : a.kappa(),
                "alpha" : a.alpha(),
                "annotator_A" : annotators[0],
                "annotator_B" : annotators[1] })
        # except:
        #     print("Could not calculate kappa for abstract %i" % (i + 1))
        #     pass

    # Summary statistics
    kappa = describe([a['kappa'] for a in aggregate])
    print("number of abstracts %i" % kappa[0])
    print("[kappa] mean: " + str(kappa[2]))
    print("[kappa] variance: " + str(kappa[3]))
    alpha = describe([a['alpha'] for a in aggregate])
    print("[alpha] mean: " + str(alpha[2]))
    print("[alpha] variance: " + str(alpha[3]))
def calculate_kappa(filename):
    # save labels
    label_list = []
    with open('data/' + filename + '_data_result.json') as json_file:
        tweets = json.load(json_file)
        for row in tweets:
            label_list.append(row['label'])

    # Generate two fake labels to calculate kappa
    man_1_label = change_some_values(label_list)
    man_2_label = change_some_values(label_list)

    # save the labels to a csv file
    save_to_csv('data/label_1.csv', man_1_label)
    save_to_csv('data/label_2.csv', man_2_label)

    # calculate inter annotator agreement
    civ_1 = ['c1'] * len(man_1_label)
    civ_2 = ['c2'] * len(man_2_label)
    item_num_list = range(0, len(man_1_label))
    civ_1 = zip(civ_1, item_num_list, man_1_label)
    civ_2 = zip(civ_2, item_num_list, man_2_label)
    task_data = civ_1 + civ_2
    task = AnnotationTask(data=task_data)

    # observed disagreement for the weighted kappa coefficient
    print 'kappa: ' + str(task.kappa())
Beispiel #7
0
def get_kappa(input):
    head,body = fio.ReadMatrix(input, True)
    
    data = []
    for i,row in enumerate(body):
        for coder, label in enumerate(row):
            if label == 'a': label = '0'
            data.append((head[coder], i, label))
    
    task = AnnotationTask(data)
    
    print head[0], head[1], task.kappa_pairwise(head[0], head[1])
    print head[0], head[2], task.kappa_pairwise(head[0], head[2])
    print head[1], head[2], task.kappa_pairwise(head[1], head[2])
    return task.kappa()
Beispiel #8
0
def agree_tags(delta, column):
    """
    egytokenes címkézési feladatokra számol egyetértést
    :param delta:  az összevetett adat
    :param column:  az az oszlop, amelyre egyetértést akarunk számolni
    :return:
    """
    by_field = reverse_tags(delta, column)

    task = AnnotationTask(data=by_field)

    oa = task.avg_Ao()      # observed agreement
    s = task.S()            # Bennett, Albert and Goldstein S (1954) all categories are equally likely
    pi = task.pi()          # Scott pi (1955) single distribution
    kappa = task.kappa()    # Cohen kappa (1960) individual coder distribution
    w_kappa = task.weighted_kappa()
    alpha = task.alpha()    # Krippendorff alpha (1980)

    return oa, s, pi, kappa, w_kappa, alpha
Beispiel #9
0
def compute_annotator_agreement_nltkmetrics(data_array):
    ''' See http://nltk.org/api/nltk.metrics.html#nltk.metrics.agreement '''
    
    print "####### Agreement coefficients according to NLTK metrics.agreement #######"
    
    t = AnnotationTask(data=data_array)
    print "Average observed agreement across all coders and items: "+str(t.avg_Ao())
    
    print "Cohen's Kappa (Cohen 1960): "+str(t.kappa())
    print "Weighted kappa (Cohen 1968): "+str(t.weighted_kappa())
    
    print "Scott's pi (Scott 1955): "+str(t.pi())
    #print "pi_avg: "+str(t.pi_avg())
    
    print "alpha (Krippendorff 1980): "+str(t.alpha())
    
    print "Observed disagreement for the alpha coefficient: "+str(t.Do_alpha())
    print "S (Bennett, Albert and Goldstein 1954): "+str(t.S())
    #print "n-notation used in Artstein and Poesio (2007): "+str(t.N(k=, ic???))
    print "Observed disagreement for the weighted kappa coefficient averaged over all labelers: "+str(t.Do_Kw())
Beispiel #10
0
 def kappa(self):
     """Data is a list of list.
        Each element is a list :
        [annotator, element, label]
     """
     if not KAPPA: return 'Not installed'
     #if self.last is None: return # must be specific to a feature
     data = []
     nb = 1
     for elem in self.ano:
         u1 = elem[1]
         u2 = elem[2]
         if u1 is None or u2 is None:
             continue
         else:
             data.append([self.ano1.get_code(), nb, u1.get(self.last)])
             data.append([self.ano2.get_code(), nb, u2.get(self.last)])
             nb += 1
     task = AnnotationTask(data)
     return task.kappa()
def getagreement(tpl,datadir,task_type='all'):
    """Get agreement values for annotators in the :data:'tpl' list

    Args:
       tpl (list):  combination group of annotators
       datadir (str): Cache data directory used by joblib

    Returns:
       namedtuple defined as ``Agree = collections.namedtuple('Agree', ['kappa', 'alpha','avg_ao'], verbose=True)``
    """

    mem = Memory(cachedir=datadir)
    readjson=mem.cache(json2taskdata.readjson,mmap_mode='r')
    create_task_data= mem.cache(json2taskdata.create_task_data)
    count_occurrances=mem.cache(json2taskdata.count_occurrances)
    count_labels=mem.cache(json2taskdata.count_labels)

    annotators=set()
    lectask=[]
    #-------------------------------------------------------------------------------
    # for each annotator in group tpl
    #-------------------------------------------------------------------------------

    for stditem in tpl:
        aname=stditem.split('.')[0][3:][-2:]
        annotators.add(aname)
        lecdict=readjson(stditem)
        newlectask= create_task_data(lecdict,task_type=task_type,annotator=aname)
        label_data=json2taskdata.create_labels_list(newlectask)
        abscount=count_occurrances(str(label_data))
        yaml.dump(abscount,open(os.path.join( datadir,'abscount-'+aname+'.yaml'),'w'))

        setcount=count_labels(newlectask)
        yaml.dump(setcount,open(os.path.join( datadir,'setcount-'+aname+'.yaml'),'w'))

        lectask=lectask+newlectask

    task=AnnotationTask(data=lectask,distance=nltk.metrics.distance.masi_distance_mod)

    return  {frozenset(annotators): Agree(task.kappa(),task.alpha(),task.avg_Ao())}
Beispiel #12
0
def status_view(request, task_id=None):
    """
    Renders the evaluation tasks status page for staff users.
    """
    LOGGER.info('Rendering evaluation task overview for user "{0}".'.format(
      request.user.username))
    
    # Check if user is member in WMT13 group.  If so, redirect to wmt13 app.
    if request.user.groups.filter(name="WMT13").exists():
        LOGGER.info('Redirecting user "{0}" to WMT13 overview.'.format(
          request.user.username))
        return redirect('appraise.wmt13.views.overview')
    
    if task_id:
        task = get_object_or_404(EvaluationTask, task_id=task_id)
        
        headers = task.get_status_header()
        status = []
        
        for user in task.users.all():
            status.append((user.username, task.get_status_for_user(user)))
        
        scores = None
        result_data = []
        raw_result_data = Counter()
        users = list(task.users.all())
        
        for item in EvaluationItem.objects.filter(task=task):
            results = []
            for user in users:
                qset = EvaluationResult.objects.filter(user=user, item=item)
                if qset.exists():
                    category = str(qset[0].results)
                    results.append((user.id, item.id, category))
                    raw_result_data[qset[0].raw_result] += 1
            
            if len(results) == len(users):
                result_data.extend(results)

        # todo for gisting, calculate - somehow - the percentage of answers against the number of different answers ->
        # in that same gap, and also regroup them for readability
        _raw_results = []
        _keys = raw_result_data.keys()
        _total_results = float(sum(raw_result_data.values()))
        for key in sorted(_keys):
            value = raw_result_data[key]
            _raw_results.append((key, value, 100 * value / _total_results))
        
        try:
            # Computing inter-annotator agreement only makes sense for more
            # than one coder -- otherwise, we only display result_data...
            if len(users) > 1:
                # Check if we can safely use NLTK's AnnotationTask class.
                try:
                    from nltk.metrics.agreement import AnnotationTask
                    chk = AnnotationTask(data=[('b', '1', 'k'),
                      ('a', '1', 'k')])
                    assert(chk == 1.0)
                
                except AssertionError:
                    LOGGER.debug('Fixing outdated version of AnnotationTask.')
                    from appraise.utils import AnnotationTask

                # We have to sort annotation data to prevent StopIterator errors.
                result_data.sort()
                annotation_task = AnnotationTask(result_data)
                
                scores = (
                  annotation_task.alpha(),
                  annotation_task.kappa(),
                  annotation_task.S(),
                  annotation_task.pi()
                )
        
        except ZeroDivisionError:
            scores = None
        
        except ImportError:
            scores = None
        
        dictionary = {
          'combined': task.get_status_for_users(),
          'commit_tag': COMMIT_TAG,
          'headers': headers,
          'scores': scores,
          'raw_results': _raw_results,
          'status': status,
          'task_id': task.task_id,
          'task_name': task.task_name,
          'title': 'Evaluation Task Status',
        }

        return render(request, 'evaluation/status_task.html', dictionary)
    
    else:
        evaluation_tasks = {}
        for task_type_id, task_type in APPRAISE_TASK_TYPE_CHOICES:
            # We collect a list of task descriptions for this task_type.
            evaluation_tasks[task_type] = []
        
            # Super users see all EvaluationTask items, even non-active ones.
            if request.user.is_superuser:
                _tasks = EvaluationTask.objects.filter(task_type=task_type_id)
        
            else:
                _tasks = EvaluationTask.objects.filter(task_type=task_type_id,
                  active=True)
        
            # Loop over the QuerySet and compute task description data.
            for _task in _tasks:
                if not APPRAISE_TASK_CACHE.has_key(_task.task_id):
                    APPRAISE_TASK_CACHE[_task.task_id] = {}
                
                _cache = APPRAISE_TASK_CACHE[_task.task_id]
                if not _cache.has_key(request.user.username):
                    _update_task_cache(_task, request.user)
                
                _task_data = _cache[request.user.username]
                
                # Append new task description to current task_type list.
                evaluation_tasks[task_type].append(_task_data)
            
            # If there are no tasks descriptions for this task_type, we skip it.
            if len(evaluation_tasks[task_type]) == 0:
                evaluation_tasks.pop(task_type)

        dictionary = {
          'active_page': "STATUS",
          'commit_tag': COMMIT_TAG,
          'evaluation_tasks': evaluation_tasks,
          'title': 'Evaluation Task Status',
        }

        return render(request, 'evaluation/status.html', dictionary)
    data = []

    sentiment_r1_5_scale = []
    sentiment_r2_5_scale = []

    for r1, r2 in zip(sentences_r1, sentences_r2):

        sentiment_r1_5_scale.append(int(r1[5]))
        data.append((6, r1[0], r1[5]))

        sentiment_r2_5_scale.append(int(r2[5]))
        data.append((7, r2[0], r2[5]))

        if (r1[0] != r2[0]):
            print r1[0]

except Exception, e:
    print e

# disconnect from server
db.close()
print i

print skll.kappa(sentiment_r1_5_scale, sentiment_r2_5_scale)

annotation = AnnotationTask(data=data)

print annotation.kappa()
print annotation.alpha()
Beispiel #14
0
def status_view(request, task_id=None):
    """
    Renders the evaluation tasks status page for staff users.
    """
    LOGGER.info('Rendering evaluation task overview for user "{0}".'.format(
        request.user.username))

    # Check if user is member in WMT13 group.  If so, redirect to wmt13 app.
    if request.user.groups.filter(name="WMT13").exists():
        LOGGER.info('Redirecting user "{0}" to WMT13 overview.'.format(
            request.user.username))
        return redirect('appraise.wmt13.views.overview')

    if task_id:
        task = get_object_or_404(EvaluationTask, task_id=task_id)

        headers = task.get_status_header()
        status = []

        for user in task.users.all():
            status.append((user.username, task.get_status_for_user(user)))

        scores = None
        result_data = []
        raw_result_data = Counter()
        users = list(task.users.all())

        for item in EvaluationItem.objects.filter(task=task):
            results = []
            for user in users:
                qset = EvaluationResult.objects.filter(user=user, item=item)
                if qset.exists():
                    category = str(qset[0].results)
                    results.append((user.id, item.id, category))
                    raw_result_data[qset[0].raw_result] += 1

            if len(results) == len(users):
                result_data.extend(results)

        _raw_results = []
        _keys = raw_result_data.keys()
        _total_results = float(sum(raw_result_data.values()))
        for key in sorted(_keys):
            value = raw_result_data[key]
            _raw_results.append((key, value, 100 * value / _total_results))

        try:
            # Computing inter-annotator agreement only makes sense for more
            # than one coder -- otherwise, we only display result_data...
            if len(users) > 1:
                # Check if we can safely use NLTK's AnnotationTask class.
                try:
                    from nltk.metrics.agreement import AnnotationTask
                    chk = AnnotationTask(data=[('b', '1', 'k'), ('a', '1',
                                                                 'k')])
                    assert (chk == 1.0)

                except AssertionError:
                    LOGGER.debug('Fixing outdated version of AnnotationTask.')
                    from appraise.utils import AnnotationTask

                # We have to sort annotation data to prevent StopIterator errors.
                result_data.sort()
                annotation_task = AnnotationTask(result_data)

                scores = (annotation_task.alpha(), annotation_task.kappa(),
                          annotation_task.S(), annotation_task.pi())

        except ZeroDivisionError:
            scores = None

        except ImportError:
            scores = None

        dictionary = {
            'combined': task.get_status_for_users(),
            'commit_tag': COMMIT_TAG,
            'headers': headers,
            'scores': scores,
            'raw_results': _raw_results,
            'status': status,
            'task_id': task.task_id,
            'task_name': task.task_name,
            'title': 'Evaluation Task Status',
        }

        return render(request, 'evaluation/status_task.html', dictionary)

    else:
        evaluation_tasks = {}
        for task_type_id, task_type in APPRAISE_TASK_TYPE_CHOICES:
            # We collect a list of task descriptions for this task_type.
            evaluation_tasks[task_type] = []

            # Super users see all EvaluationTask items, even non-active ones.
            if request.user.is_superuser:
                _tasks = EvaluationTask.objects.filter(task_type=task_type_id)

            else:
                _tasks = EvaluationTask.objects.filter(task_type=task_type_id,
                                                       active=True)

            # Loop over the QuerySet and compute task description data.
            for _task in _tasks:
                if not APPRAISE_TASK_CACHE.has_key(_task.task_id):
                    APPRAISE_TASK_CACHE[_task.task_id] = {}

                _cache = APPRAISE_TASK_CACHE[_task.task_id]
                if not _cache.has_key(request.user.username):
                    _update_task_cache(_task, request.user)

                _task_data = _cache[request.user.username]

                # Append new task description to current task_type list.
                evaluation_tasks[task_type].append(_task_data)

            # If there are no tasks descriptions for this task_type, we skip it.
            if len(evaluation_tasks[task_type]) == 0:
                evaluation_tasks.pop(task_type)

        dictionary = {
            'active_page': "STATUS",
            'commit_tag': COMMIT_TAG,
            'evaluation_tasks': evaluation_tasks,
            'title': 'Evaluation Task Status',
        }

        return render(request, 'evaluation/status.html', dictionary)
def annotation(output):
    t = AnnotationTask(data=[x.split() for x in open(output)])
    print "\nAverage observed agreement: " + str(t.avg_Ao())
    print "\nKappa: " + str(t.kappa())
    def dispatch(self, request, *args, **kwargs):
        self.task = get_object_or_404(Task, pk=self.kwargs['pk'])
        self.array = []
        self.kappa = []
        self.kappa1 = []
        self.kappa_name = "/media/csvfileFinal.csv"
        self.eval_name = "/media/csvfileP.csv"
        self.kappa_nameLong = "/media/csvfileFinal.csv"
        self.lblr = []
        self.head = []
        self.coder_emails = PostResponse.objects.filter(
            task=self.task.pk).values_list(
                'responder__email',
                flat=True).distinct().order_by('responder__email')
        post_list = self.task.post_list.all()
        if os.path.exists(
                '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileFinal.csv'
        ):
            print('existsssss')
            os.remove(
                '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileFinal.csv'
            )
        if os.path.exists(
                '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileP.csv'
        ):
            os.remove(
                '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfileP.csv'
            )
        if os.path.exists(
                '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfile.csv'
        ):
            os.remove(
                '/home/salae001/new/LabelingSystem-master/labelingsystem/media/csvfile.csv'
            )
        name = "media/csvfileP.csv"  # + str(self.task.pk)
        self.eval_name = "/" + name

        #                    print (name)
        #dateVal =datetime.datetime.now()
        filepp = open(str(name), "w+")
        filepp.write(',')
        for coder_email in self.coder_emails:
            filepp.write(coder_email)
            filepp.write(',')
        filepp.write('Majority Vote')
        filepp.write('\n')
        voteList = {}
        listTemp = []
        cpr = 0
        for post in post_list:
            row = []
            cpr = cpr + 1
            #if cpr > 6:
            #	row.append('...')
            #	break
            row.append(post.content)
            filepp.write(post.content)
            filepp.write(',')
            i = 0
            if len(self.coder_emails) > 5:
                self.coder_emails_temp = self.coder_emails[0:5]
                temp_emails = self.coder_emails
                self.coder_emails_temp.append("(List continues...)")
                #for coder_email in temp_emails:
                #	filepp.write(coder_email)
                #	filepp.write(';')
                #filepp.write('\n')
                voteList = {}
                listTemp = []
                for coder_email in temp_emails:
                    #                                	if len(self.coder_emails) > 5  and coder_email == "(List continues...)":
                    #                                        	label = '...'
                    #	                                else :
                    #					print ('/....N?A////')
                    label = 'N/A'
                    try:
                        post_response = PostResponse.objects.filter(
                            task=self.task.pk,
                            post=post.pk,
                            responder__email=coder_email).last()
                        label = post_response.label
                        #print('label...',label)
                        #filepp.write(coder_email)
                        #		filepp.write(';')
                        filepp.write(str(label))
                        #						myMap = {}
                        listTemp.append(str(label))
                        #						maximum = ( '', 0 ) # (occurring element, occurrences)
                        #						for n in :
                        #							if n in voteList:voteList[n] += 1
                        #							else: voteList[n] = 1
                        #						        # Keep track of maximum on the go
                        #						        if voteList[n] > maximum[1]: maximum = (n,voteList[n])
                        filepp.write(',')
                    except:
                        filepp.write('N/A')
                        listTemp.append('N/A')
                        filepp.write(',')
                        pass
                    #if len(self.coder_emails) > 5:
                    #        label = '...'

            #		row.append(label)
            #	filepp
                maximum = ('', 0)  # (occurring element, occurrences)
                for n in listTemp:
                    if n in voteList:
                        voteList[n] += 1
                    else:
                        voteList[n] = 1
# Keep track of maximum on the go
                    if voteList[n] > maximum[1]:
                        maximum = (n, voteList[n])
            #	filepp.write(';')
            #print('maximum', maximum)
            #	filepp.write(maximum[0])
            #	filepp.write('\n')

            else:
                self.coder_emails_temp = self.coder_emails
                voteList = {}
                listTemp = []
            i = 0
            for coder_email in self.coder_emails_temp:
                #i = i+1

                #if i>6: #self.coder_emails) > 5 and coder_email == "(List continues...)":
                #   break
                if len(self.coder_emails
                       ) > 5 and coder_email == "(List continues...)":
                    # print ('coder email-----------')
                    label = '...'
                    #continue
                    #try:
                    # post_response = PostResponse.objects.filter(task=self.task.pk, post=post.pk, responder__email=coder_email).last()
                    #print (post_response)
                    #label = post_response.label
                    #filepp.write(str(label))
                    #filepp.write(';')
                #  listTemp.append(str(label))
                #except:
                # filepp.write('N/A')#listTemp.append('N/A')
                #filepp.write(';')
                # listTemp.append(str(label))
                #pass

                else:
                    label = 'N/A'
                    try:
                        post_response = PostResponse.objects.filter(
                            task=self.task.pk,
                            post=post.pk,
                            responder__email=coder_email).last()
                        print(post_response)
                        label = post_response.label
                        if len(self.coder_emails) <= 5:
                            filepp.write(str(label))
                            filepp.write(',')
                        listTemp.append(str(label))
                    except:
                        if len(self.coder_emails) <= 5:
                            filepp.write('N/A')  #listTemp.append('N/A')
                            filepp.write(',')
                        listTemp.append(str(label))
                        pass
                row.append(label)
            maximum = ('', 0)
            for n in listTemp:
                if n in voteList:
                    voteList[n] += 1
                else:
                    voteList[n] = 1
                    # Keep track of maximum on the go
                if voteList[n] > maximum[1]:
                    maximum = (n, voteList[n])
            #filepp.write(';')
            filepp.write(maximum[0])
            filepp.write('\n')
            #                              i = i+1
            #			maximum = ( '', 0 ) # (occurring element, occurrences)
            #			for n in listTemp:
            #				if n in voteList:
            #					voteList[n] += 1
            #				else:
            #					voteList[n] = 1
            # Keep track of maximum on the go
            #				if voteList[n] > maximum[1]:
            #					maximum = (n,voteList[n])
            #filepp.write(';')
            #			print('maximum', maximum)
            #                               filepp.write(maximum[0])
            #                               filepp.write('\n')
            #				row.append(maximum[0])
            #row.append(label)
            #			self.coder_emails_temp.append("(List continues...)")
            #			row.append(maximum[0])
            #			self.array.append(row)
            #maximum = ( '', 0 ) # (occurring element, occurrences)
            #for n in listTemp:
            #       if n in voteList:
            #              voteList[n] += 1
            #     else:
            #                voteList[n] = 1
            #                               # Keep track of maximum on the go
            #      if voteList[n] > maximum[1]:
            #             maximum = (n,voteList[n])
            #    #filepp.write(';')
            #print('maximum', maximum)
            #row.append(label)
            maximum = ('', 0)  # (occurring element, occurrences)
            for n in listTemp:
                if n in voteList:
                    voteList[n] += 1
                else:
                    voteList[n] = 1
                    # Keep track of maximum on the go
                if voteList[n] > maximum[1]:
                    maximum = (n, voteList[n])
                #filepp.write(';')
            print('maximum', maximum)
            #filepp.write(maximum[0])
            #filepp.write('\n')

            row.append(maximum[0])
            self.array.append(row)

        try:
            annotation_triplet_list = []
            post_response_list = PostResponse.objects.filter(task=self.task.pk)
            #rint (post_response_list)
            post_response_t = [
                part.encode("utf8") for part in PostResponse.objects.filter(
                    task=self.task.pk).values_list('responder__email',
                                                   flat=True).distinct()
            ]
            lst_rp = []
            triple_list = []
            ctr = 0
            #			for post_response in post_response_t:
            #				post_response = str(post_response).replace('b\'', '')
            #				post_response = post_response.replace('\'', '')
            #				lst_rp.append(post_response)
            #				print (post_response)
            #triple_list.append([])
            #triple_list[ctr].append(post_response)
            #triple_list[ctr].append(post_response)
            #triple_list[ctr].append('0')
            #ctr = ctr + 1
            #print (triple_list)
            # Get all combinations of [1, 2, 3]
            # and length 2
            #	print (post_response)
            #post_response = post_response.replace('b', '\'')
            #	print(post_response)
            #print ('here')
            #print("post_reposne", post_response_t)
            if len(post_response_t) > 5:
                post_response_t_temp = post_response_t[0:5]
                for post_response in post_response_t_temp:
                    post_response = str(post_response).replace('b\'', '')
                    post_response = post_response.replace('\'', '')
                    lst_rp.append(post_response)
                lst_rp.append("(List continues...)")

                comb_temp = combinations(post_response_t, 2)
                for i in list(comb_temp):
                    #print ("in the comb")
                    annotation_triplet_list = []
                    ip = []
                    sp = ""
                    #       ct = ctr + 1
                    #print (ctr)
                    temp = str(i[0]).replace('b\'', '')
                    temp = temp.replace('\'', '')
                    if ([temp, temp, '0'] not in triple_list):
                        triple_list.append([])
                        triple_list[ctr].append(temp)
                        triple_list[ctr].append(temp)
                        triple_list[ctr].append('0')
                        ctr = ctr + 1
                    triple_list.append([])
                    for s in i:
                        st = str(s).replace('b\'', '')
                        st = st.replace('\'', '')
                        ip.append(st)
                        triple_list[ctr].append(st)
#triple_list[ctr].append(i[0])
#triple_list[ctr].append(i[0])
#triple_list[ctr].append(0)
                    print(triple_list[ctr])
                    for post_response in post_response_list:
                        #               print(post_response)
                        #               print(ip, post_response.responder.email)
                        if (post_response.responder.email in ip):

                            annotation_triplet = (
                                post_response.responder.email,
                                post_response.post.content,
                                post_response.label.content)
                            #       print (post_response.responder.email)
                            #       print(annotation_triplet)
                            annotation_triplet_list.append(annotation_triplet)

                            t = AnnotationTask(annotation_triplet_list)
#print("kappa " +str(t.kappa()))
                    triple_list[ctr].append(str(t.kappa()))
                    #str(t.kappa()))
                    self.lblr.append(triple_list)
                    ctr = ctr + 1
                self.alpha1 = t.alpha()
                #	                        print (triple_list)
                self.kappa1.append(triple_list)
                #print ('before EXPORT')
                #				exportCSV(triple_list, self.alpha1, self.coder_emails)
                #				 print ('in export CSV')
                #with open('result.csv','w') as file:
                #print(self.task)

                name = "media/csvfile.csv"  #+ str(self.task.pk)
                self.kappa_nameLong = "/" + name
                print(name)

                #dateVal =datetime.datetime.now()
                filep = open(str(name), "w+")
                #print ('here in csv')
                #print (filep)
                i = 0
                ct = 1
                filep.write(',')
                prev_email = 's'
                #if 's' is not '*****@*****.**':
                #	print (True)
                for email in triple_list:
                    #					print (email)
                    if email[0] != prev_email:
                        #print ('in email 0', email[0])
                        prev_email = email[0]
                        filep.write(email[0])
                        filep.write(',')
                filep.write(email[1])
                filep.write('\n')
                for row in triple_list:
                    #	print (row[0], row[1], row[2])
                    #filep.write(row[0])
                    #filep.write(row[0])
                    #				for i in range(0, len(self.coder_emails)):
                    #					filep.write(row[0])
                    #					filep.write(';')
                    #	print (i)
                    if i == 0 or i == ct - 1:
                        filep.write(row[0])
                        filep.write(',')
                        #		print('row 0', row[0])
                        for k in range(0, i):
                            filep.write('--,')
#						filep.write(row[0])
#						filep.write(';')

                    if i == len(self.coder_emails) - 1:
                        i = ct
                        filep.write(row[2])
                        #	filep.write(row[0])
                        #						print (ct)
                        #						print (range(0,ct))
                        #						for k in range(0,ct) :
                        #							filep.write('--;')
                        filep.write('\n')
                        ct = ct + 1
                    else:
                        i = i + 1
                        #						filep.write('--')
                        filep.write(row[2])
                        filep.write(',')
                    #filep.write('\n')
                filep.close()
                #or col in row:
                #file.write(col)
                #print (triple_list)
                #	for rowp in triple_list:
#							print (rowp)
#	if forloop.counter != forloop.parentloop.counter:
#		if col in rowp:
#	file.write(rowp)
#					file.close()
#self.kappa.append(triple_list)

            else:
                post_response_t_temp = post_response_t
                for post_response in post_response_t_temp:
                    post_response = str(post_response).replace('b\'', '')
                    post_response = post_response.replace('\'', '')
                    lst_rp.append(post_response)
                    print(post_response)

            self.head.append(lst_rp)
            comb = combinations(post_response_t_temp, 2)
            #	print (comb)
            #ctr = 0
            #triple_list = []
            ip = []
            lst_rp = []
            triple_list = []
            ctr = 0

            #triple_list.append([])
            #triple_list.append(post_response_t)
            # Print the obtained combinations
            #			for iv in list(comb):
            #				annotation_triplet_list = []
            #				triple_list.append([])
            #print (i[0])
            #				temp =  str(iv[0]).replace('b\'', '')
            #				temp = temp.replace('\'', '')
            #				if ([temp,temp,'0'] not in triple_list) :
            #					triple_list.append([])
            #					triple_list[ctr].append(temp)
            #					triple_list[ctr].append(temp)
            #					triple_list[ctr].append('0')
            #					ctr = ctr + 1
            #	print (triple_list)
            #			print (triple_list)
            #			comb = combinations(post_response_t, 2)
            for i in list(comb):
                #				print ("in the comb")
                annotation_triplet_list = []
                ip = []
                sp = ""
                #	ct = ctr + 1
                print(ctr)
                temp = str(i[0]).replace('b\'', '')
                temp = temp.replace('\'', '')
                if ([temp, temp, '0'] not in triple_list):
                    triple_list.append([])
                    triple_list[ctr].append(temp)
                    triple_list[ctr].append(temp)
                    triple_list[ctr].append('0')
                    ctr = ctr + 1
                triple_list.append([])
                for s in i:
                    #	print (s)
                    #print (sp)
                    #p.join(s)
                    #p.join(" , ")
                    #					print ("sds"+s)
                    #print (s)
                    #					sp = sp+s+" , "
                    st = str(s).replace('b\'', '')
                    st = st.replace('\'', '')
                    print(st)
                    ip.append(st)
                    triple_list[ctr].append(st)
                #triple_list[ctr].append(i[0])
                #triple_list[ctr].append(i[0])
                #triple_list[ctr].append(0)

                print(triple_list[ctr])
                #triple_list.append(sp)
                #	print(triple_list)
                #print(post_response_list)
                for post_response in post_response_list:
                    #		print(post_response)
                    #		print(ip, post_response.responder.email)
                    if (post_response.responder.email in ip):

                        annotation_triplet = (post_response.responder.email,
                                              post_response.post.content,
                                              post_response.label.content)
                        #	print (post_response.responder.email)
                        #	print(annotation_triplet)
                        annotation_triplet_list.append(annotation_triplet)

                        t = AnnotationTask(annotation_triplet_list)
                #print("kappa " +str(t.kappa()))
                triple_list[ctr].append(str(t.kappa()))
                #str(t.kappa()))
                self.lblr.append(triple_list)

                ctr = ctr + 1
            if len(post_response_t) > 5:
                self.alpha = self.alpha1
            else:
                self.alpha = t.alpha()
            print(triple_list)
            self.kappa.append(triple_list)
            name = "media/csvfileFinal.csv"  #+ str(self.task.pk)
            self.kappa_name = "/" + name
            #print (name)
            #dateVal =datetime.datetime.now()
            filep = open(str(name), "w+")
            #print ('here in csv')
            #print (filep)
            i = 0
            ct = 1
            filep.write(',')
            prev_email = 's'
            #if 's' is not '*****@*****.**':
            #       print (True)
            for email in triple_list:
                #                       print (email)
                if email[0] != prev_email:
                    prev_email = email[0]
                    filep.write(email[0])
                    filep.write(',')
            filep.write(email[1])
            filep.write('\n')
            for row in triple_list:
                #       print (row[0], row[1], row[2])
                #filep.write(row[0])
                #filep.write(row[0])
                #                               for i in range(0, len(self.coder_emails)):
                #                                       filep.write(row[0])
                #                                       filep.write(';')
                #       print (i)
                if i == 0 or i == ct - 1:
                    filep.write(row[0])
                    filep.write(',')
                    #               print('row 0', row[0])
                    for k in range(0, i):
                        filep.write('--,')
#                                               filep.write(row[0])
#                                               filep.write(';')

                if i == len(self.coder_emails) - 1:
                    i = ct
                    filep.write(row[2])
                    #       filep.write(row[0])
                    #                                               print (ct)
                    #                                               print (range(0,ct))
                    #                                               for k in range(0,ct) :
                    #                                                       filep.write('--;')
                    filep.write('\n')
                    ct = ct + 1
                else:
                    i = i + 1
                    #                                               filep.write('--')
                    filep.write(row[2])
                    filep.write(',')
                #filep.write('\n')
            filep.close()
            #self.kappa.append(triple_list)

    #	print (self.kappa)
        except:
            self.alpha = 'N/A'
            name = "media/csvfileFinal.csv"  #+ str(self.task.pk)
            self.kappa_name = "/" + name
            filep = open(str(name), "w+")

        return super(TaskEvaluationDetailView,
                     self).dispatch(request, *args, **kwargs)
Beispiel #17
0
    '''

    if ('0' in line):
        print((annotator, word, '0'))
        data.append((annotator, '0', word))
    if ('1' in line):
        data.append((annotator, '1', word))
    if ('2' in line):
        data.append((annotator, '2', word))


with open("annotd") as d:
    content = d.readlines()
    for line in content:
        appender('d', line)

with open("annots") as d:
    content = d.readlines()
    for line in content:
        appender('s', line)

with open("annots") as d:
    content = d.readlines()
    for line in content:
        appender('k', line)

task = AnnotationTask(data)
#print(task.avg_Ao())

print(task.kappa())
Beispiel #18
0
"""
Compute the inter-annotator agreement
"""

import nltk
from nltk.metrics.agreement import AnnotationTask

t1 = AnnotationTask(data=[x.split() for x in open("1.txt")])
print t1.kappa()
t2 = AnnotationTask(data=[x.split() for x in open("2.txt")])
print t2.kappa()
def annotation(output):
	t = AnnotationTask(data=[x.split() for x in open(output)])
	print "\nAverage observed agreement: " + str(t.avg_Ao())
	print "\nKappa: " + str(t.kappa());
Beispiel #20
0
data = pd.read_csv('../input_data/labels-C.csv', sep=';', index_col=0)

allcoders = data.columns
experts = ['KEY', 'MG', 'MS', 'TM']
novices = ['KEY', 'CK', 'GK', 'RM']

cols = novices

# Total values
taskdata = []
for coder in cols:
    for i in data[coder].index:
        taskdata.append([coder, i, data[coder][i]])

ratingtask = AnnotationTask(data=taskdata)
print("kappa " + str(ratingtask.kappa()))
print("fleiss " + str(ratingtask.multi_kappa()))
print("alpha " + str(ratingtask.alpha()))
print("scotts " + str(ratingtask.pi()))

# Pairwise values
similarities = []
for coders in itertools.product(cols, repeat=2):
    if coders[0] == coders[1]:
        similarities.append(1)
    else:
        taskdata = []
        for coder in coders:
            for i in data[coder].index:
                taskdata.append([coder, i, data[coder][i]])
Beispiel #21
0
from nltk.metrics.agreement import AnnotationTask
from nltk.metrics import ConfusionMatrix
# Here we have four items, each labeled by two different annotators. In two cases, the annotators agree. In two cases they don't.
toy_data = [
    # annotators, element, label
    ['1', 5723, 'ORG'],
    ['2', 5723, 'ORG'],
    ['1', 55829, 'LOC'],
    ['2', 55829, 'LOC'],
    ['1', 259742, 'PER'],
    ['2', 259742, 'LOC'],
    ['1', 269340, 'PER'],
    ['2', 269340, 'LOC']
]
task = AnnotationTask(data=toy_data)
print(task.kappa())
print(task.alpha())
# 16h52 : Yes ! ça marche !

# L'annotateur est remplacé par une division en deux variables
# L'élément est remplacé par la position dans la liste
toy1 = ['ORG', 'LOC', 'PER', 'PER']
toy2 = ['ORG', 'LOC', 'LOC', 'LOC']
cm = ConfusionMatrix(toy1, toy2)
print(cm)

# multilabel pour une classe (un but)
# only 2 utilisateurs

rater1 = ['no', 'no', 'no', 'no', 'no', 'yes', 'no', 'no', 'no', 'no']
rater2 = ['yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes']