Example #1
0
def print_krippendorff_per_leaning(job, raters_normal):
    all_agreement_scores = [
    ]  # contains agreement obj with job, leaning and value

    for leaning in [-1, 0, 1]:
        leaning_taskdata = []
        article_id_current_leaning = [
            article.article_id for article in articles
            if int(article.leaning) == leaning
        ]
        for i, rater in enumerate(raters_normal):
            name = 'rater' + str(i)
            my_triplets = []

            for j, article_rating in enumerate(rater):
                item = 'item_' + str(j) + '_'

                if article_rating['id'] in article_id_current_leaning:
                    my_article_triplets = [
                        (name, item + str(k), rating)
                        for k, rating in enumerate(article_rating[job])
                    ]
                    my_triplets += my_article_triplets

            leaning_taskdata += my_triplets

        ratingtask = agreement.AnnotationTask(data=leaning_taskdata,
                                              distance=interval_distance)
        ratingtask.K = categories[job]
        all_agreement_scores.append({leaning: ratingtask.alpha()})

    print('Krippendorff ' + job + ':')
    print(all_agreement_scores)

    return all_agreement_scores
Example #2
0
def print_krippendorff_per_job(raters, category):
    all_agreement_scores = []  # contains agreement obj with job and value

    for job in jobs:
        job_taskdata = []

        for i, rater in enumerate(raters):
            name = 'rater' + str(i)
            my_triplets = []

            for j, article_rating in enumerate(rater):
                item = 'item_' + str(j) + '_'
                my_article_triplets = [
                    (name, item + str(k), rating)
                    for k, rating in enumerate(article_rating[job])
                ]
                my_triplets += my_article_triplets

            job_taskdata += my_triplets

        ratingtask = agreement.AnnotationTask(data=job_taskdata,
                                              distance=interval_distance)
        ratingtask.K = category[job]
        all_agreement_scores.append({job: ratingtask.alpha()})

    print('Krippendorff:')
    print(all_agreement_scores)

    return all_agreement_scores
Example #3
0
def overall_agreement(combined_df,coders,frame_types,distance_metric=masi_distance):
	results = []
	for frame_type in frame_types:
		all_labels = format_labels(combined_df,coders,frame_type)
		task = agreement.AnnotationTask(data=all_labels,distance=distance_metric)
		results.append((frame_type,task.alpha()))
	return pd.DataFrame(results,columns=['Frame Type','Alpha'])
Example #4
0
def overall_agreement(df_list,
                      frame_types,
                      distance_metric=masi_distance,
                      convert_mixed=True):
    results = []
    for frame in frame_types:
        if frame == 'Narrative' and convert_mixed == True:
            new_df_list = convert_narrative_mixed_to_both(df_list)
            all_labels = format_labels(new_df_list, frame)
        else:
            all_labels = format_labels(df_list, frame)
        task = agreement.AnnotationTask(data=all_labels,
                                        distance=distance_metric)
        results.append((frame, task.alpha()))
    return pd.DataFrame(results, columns=['Frame Type', 'Alpha'])
Example #5
0
def get_article_agreement_helper(my_data, my_cat, article_id):
    ratingtask = agreement.AnnotationTask(data=my_data,
                                          distance=interval_distance)
    ratingtask.K = my_cat

    article_agreement = {}
    article_agreement['id'] = article_id

    try:
        article_agreement['krippendorff'] = ratingtask.alpha()

    except ZeroDivisionError:
        article_agreement['krippendorff'] = 1.0

    return article_agreement
def toy_cohens_kappa():
    # rater1 = [1, 1, 1, 0]
    # rater2 = [1, 1, 0, 0]
    # rater3 = [0, 1, 1]
    rater1 = ['s', 's', 's', 'g', 'u']
    rater2 = ['s', 's', 'g', 'g', 's']

    taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))
                ] + [[1, str(i), str(rater2[i])]
                     for i in range(0, len(rater2))]  # + [
    # [2, str(i), str(rater3[i])] for i in range(0, len(rater3))]
    print(taskdata)
    ratingtask = agreement.AnnotationTask(data=taskdata)
    print("kappa " + str(ratingtask.kappa()))
    print("fleiss " + str(ratingtask.multi_kappa()))
    print("alpha " + str(ratingtask.alpha()))
    print("scotts " + str(ratingtask.pi()))

    print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2)))
Example #7
0
def vectors_to_annotation_task(*args, drop: List[str] = []):
    """transform vectors of labels into a nltk AnnotationTask object.

    :param args: vector of labels for each annotator; add one argument per annotator.
    :type args: 1d np.array() of labels
    :param drop: list of labels that should be ignored
    :type drop: List[str]
    :return: the AnnotationTask object
    :rtype: nltk.metrics.agreement.AnnotationTask
    """
    from nltk.metrics import agreement

    v = np.vstack(args)
    it = np.nditer(v, flags=["multi_index"])

    if len(drop):
        data = [(it.multi_index[0], it.multi_index[1], str(x)) for x in it
                if str(x) not in drop]
    else:
        data = [(it.multi_index[0], it.multi_index[1], str(x)) for x in it]

    return agreement.AnnotationTask(data=data)
def cohens_kappa():

    data_folder = '/Users/fpena/UCC/Thesis/datasets/context/manuallyLabeledReviews/'

    business_type = Constants.ITEM_TYPE
    file_name = data_folder + '%s_%s_reviews.json'

    labelers = [
        # 'francisco',
        'diego',
        'mesut',
        'rohit',
    ]

    all_records = [
        load_data(file_name % (labeler, business_type)) for labeler in labelers
    ]

    rater1 = [record['review_type'] for record in all_records[0]]
    rater2 = [record['review_type'] for record in all_records[1]]
    rater3 = [record['review_type'] for record in all_records[2]]

    taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))] + [
        [1, str(i), str(rater2[i])] for i in range(0, len(rater2))
    ] + [[2, str(i), str(rater3[i])] for i in range(0, len(rater3))]
    print(taskdata)
    ratingtask = agreement.AnnotationTask(data=taskdata)
    print("Observed agreement " + str(ratingtask.avg_Ao()))
    print("kappa " + str(ratingtask.kappa()))
    print("fleiss " + str(ratingtask.multi_kappa()))
    print("alpha " + str(ratingtask.alpha()))
    print("scotts " + str(ratingtask.pi()))

    print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2)))
    print("sklearn kappa " + str(cohen_kappa_score(rater1, rater3)))
    print("sklearn kappa " + str(cohen_kappa_score(rater2, rater3)))
Example #9
0
    #data5.append(("a4", idx, row["frank5"]))
    #data5.append(("a5", idx, row["pum5"]))


#label 6
data6 = []
for idx, row in df_final.iterrows():
    data6.append(("a1", idx, row["palang6"]))
    #data6.append(("a2", idx, row["pum6"]))
    data6.append(("a3", idx, row["vicky6"]))
    #data6.append(("a4", idx, row["frank6"]))
    #data6.append(("a5", idx, row["gill6"]))


#calculate agreement
label_1 = agreement.AnnotationTask(data=data1)
label_2 = agreement.AnnotationTask(data=data2)
label_3 = agreement.AnnotationTask(data=data3)
label_4 = agreement.AnnotationTask(data=data4)
label_5 = agreement.AnnotationTask(data=data5)
label_6 = agreement.AnnotationTask(data=data6)

#print
print("Cohen's Kappa Label 1:", label_1.kappa())
print("Cohen's Kappa Label 2:", label_2.kappa())
print("Cohen's Kappa Label 3:", label_3.kappa())
print("Cohen's Kappa Label 4:", label_4.kappa())
print("Cohen's Kappa Label 5:", label_5.kappa())
print("Cohen's Kappa Label 6:", label_6.kappa())
print("\n")
print("Fleiss's Kappa Label 1:", label_1.multi_kappa())
Example #10
0
print(
    "Cohens kappa for ukrainian comments between annotator labeler1 and labeler2: {:.3f}"
    .format(cohens_ua))
print(
    "Cohens kappa for russian comments between annotator labeler3 and labeler4: {:.3f}"
    .format(cohens_ru))

matthews_corrcoef_score_ua = matthews_corrcoef(labeler1, labeler2)
matthews_corrcoef_score_ru = matthews_corrcoef(labeler3, labeler4)
print(
    "Matthews corrcoef score for ukrainian comments between annotator labeler1 and labeler2: {:.3f}"
    .format(matthews_corrcoef_score_ua))
print(
    "Matthews corrcoef score for russian comments between annotator labeler3 and labeler4: {:.3f}"
    .format(matthews_corrcoef_score_ru))

from nltk.metrics import agreement

# Reformat the data into the form AnnotationTask
#  expects.
labels_ru = labels_ru.dropna(subset=['Type of HS'])
data = []
for idx, row in labels_ru.iterrows():
    data.append(("a1", idx, row["Type of HS"]))
    data.append(("a2", idx, row["Type of 1 char"]))

atask = agreement.AnnotationTask(data=data)

print("Cohen's Kappa:", atask.kappa())
print("Fleiss's Kappa:", atask.multi_kappa())