def getInterraterReliabilityWithinGroup(group_list,df_annotations,shared_comments):
    alphas = []
    
    # pivot data frame
    df_annotations_pivot = pd.pivot_table(df_annotations, values='attack', index=['rev_id'], columns=['worker_id'], aggfunc=np.sum)
    
    # baseline group
    df_annotations_pivot_baseline_group = df_annotations_pivot[df_annotations_pivot.index.isin(shared_comments['rev_id'])]
    annotations_of_group = []
    for rater in df_annotations_pivot_baseline_group.columns:
        annotation = df_annotations_pivot_baseline_group[int(rater)].tolist()
        # add only with at least one annotation
        if np.count_nonzero(~np.isnan(annotation)) != 0:
            annotations_of_group.append(annotation)
    print(len(df_annotations_pivot_baseline_group))
    print(len(annotations_of_group))
    alphas.append(krippendorff.alpha(reliability_data=annotations_of_group, level_of_measurement='nominal'))
    
    # other groups
    for i in range(1,len(group_list)):
        annotations_of_group = []
        # select only annotations from selected group
        for rater in group_list[i]:
            annotations_of_group.append(df_annotations_pivot_baseline_group[int(rater)].tolist())
        # calculate krippendorffs alpha for sleected group
        if len(annotations_of_group) == 0:
            alphas.append(0.6)
        else:
            alphas.append(krippendorff.alpha(reliability_data=annotations_of_group, level_of_measurement='nominal'))
    return alphas
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    if not os.path.exists(FLAGS.input_file_path):
        raise ValueError("No data found at %s" % FLAGS.input_file_path)

    reliability_matrix, annotators_to_idx, failed_count = get_reliability_matrix(
        FLAGS.input_file_path)

    alpha = krippendorff.alpha(reliability_matrix)
    print("Alpha without removing annotators: %.8f" % alpha)

    # Remmove annotator with too little answers.
    failed_annotators = []
    for annotator, count in failed_count.items():
        if count >= FLAGS.num_failing:
            row_to_remove = annotators_to_idx[annotator]
            failed_annotators.append(annotator)
            reliability_matrix = np.delete(reliability_matrix,
                                           row_to_remove,
                                           axis=0)

    alpha = krippendorff.alpha(reliability_matrix)
    print("Alpha with removing annotators: %.8f" % alpha)
    print("Removed annotators: ", failed_annotators)
def compute_krippendorff(sce_path,
                         output_path='',
                         wo_attention_check=False,
                         bad_annotators_path='',
                         dataset=''):
    """
    Compute Krippendorff's alpha with krippendorff library
    (https://github.com/pln-fing-udelar/fast-krippendorff/blob/master/sample.py)
    :param sce_path: csv file with columns UID, ANSWER, ANNOTATOR
    :param output_path: path of the output file where the results will be printed (if empty string the results are
    printed in the standart output)
    :param wo_attention_check: if True remove the attention check when computing alpha
    :param bad_annotators_path: path of the pkl file containing for each threshold the list of 'bad' annotators.
    For each threshold remove the annotations of the annotators listed when computing alpha. If empty string no
    annotator's annotation it removed.
    :param dataset: alphanumeric characters identifying the corpus to compute the alpha (if empty string the alpha is
    computed with annotation from all corpora and from attention check)
    """

    if output_path:
        sys.stdout = open(output_path, "w")

    rows = read_csv(sce_path, dataset=dataset)

    bad_annotators_per_th = get_bad_annotators(bad_annotators_path)
    for th, bad_annotators in bad_annotators_per_th.items():
        print(f'--- Threshold {th}---')
        annotations = get_annotations_per_annotators(
            rows,
            wo_attention_check=wo_attention_check,
            wo_annotator=bad_annotators)

        print('- After filtering: -')
        print_annotation_statistics(annotations)

        ratings_per_annotator = get_annotator_tab(annotations)

        data = [[np.nan if not r else int(r) for r in ratings]
                for ratings in ratings_per_annotator]

        print(
            "Krippendorff's alpha for nominal metric: ",
            krippendorff.alpha(reliability_data=data,
                               level_of_measurement='nominal'))
        print("Krippendorff's alpha for interval metric: ",
              krippendorff.alpha(reliability_data=data))
        print(
            "Krippendorff's alpha for ordinal metric: ",
            krippendorff.alpha(reliability_data=data,
                               level_of_measurement='ordinal'))

        # with nltk library
        task_data = annotations2task_data(annotations)
        rating_task = AnnotationTask(data=task_data, distance=ordinal)
        print("Krippendorff's alpha for ordinal metric (nltk): ",
              rating_task.alpha())
Esempio n. 4
0
def compute_reliability(date_scores):
    tl_base_path = Path("./gold-timelines")
    all_annotators = sorted(date_scores)

    all_topics = set()

    for scores in date_scores.values():
        for topic, tl_name in scores:
            all_topics.add((topic, tl_name))

    for topic, tl_name in all_topics:
        with open(tl_base_path / topic / (tl_name + ".txt"),
                  errors="ignore") as f:
            tl = Timeline.from_file(f)

            score_matrix = np.zeros((len(date_scores), len(tl.get_dates())))

            all_dates = sorted(tl.get_dates())

            for annotator_idx, annotator in enumerate(all_annotators):
                annotator_tl_scores = date_scores[annotator][(topic, tl_name)]

                sorted_dates = sorted(
                    all_dates,
                    key=lambda date: annotator_tl_scores.get(date, 0),
                    reverse=True)
                new_annotator_tl_scores = {}
                curr_idx = 0
                prev_score = None
                for date in sorted_dates:
                    score = annotator_tl_scores.get(date, 0)
                    if prev_score is None or prev_score != score:
                        curr_idx += 1
                        prev_score = score
                    new_annotator_tl_scores[date] = curr_idx

                for date_idx, date in enumerate(all_dates):
                    score_matrix[annotator_idx,
                                 date_idx] = new_annotator_tl_scores.get(
                                     date, 0)

        print(topic, tl_name,
              k.alpha(score_matrix, level_of_measurement="ordinal"))

        for annotator_1_idx, annotator_2_idx in it.combinations(
                range(len(all_annotators)), 2):
            annotator_1 = ANNOTATORS[all_annotators[annotator_1_idx]]
            annotator_2 = ANNOTATORS[all_annotators[annotator_2_idx]]

            annotator_rows = score_matrix[[annotator_1_idx, annotator_2_idx]]

            print(annotator_1, annotator_2,
                  k.alpha(annotator_rows, level_of_measurement="interval"))
Esempio n. 5
0
def compare(et_coders, et_judgments, ht_coders, ht_judgments):
    """Compare the agreement between two annotation jobs <et> and <ht>.

    :param et_coders: coders of the easier task
    :param et_judgments: labels assigned by the coders of the easier task
    :param ht_coders: coders of the harder task
    :param ht_judgments: labels assigned by the coders of the harder task
    :return: difference between the agreement of <et> and <ht>
    """
    rd1 = build_reliability_data(et_coders, et_judgments)
    a1 = krippendorff.alpha(rd1)
    rd2 = build_reliability_data(ht_coders, ht_judgments)
    a2 = krippendorff.alpha(rd2)
    return a1 - a2
def getInterraterReliabilityBetweenGroups(group_list,df_annotations,shared_comments):
    alphas = np.empty([len(group_list), len(group_list)])
    
    # pivot data frame
    df_annotations_pivot = pd.pivot_table(df_annotations, values='attack', index=['rev_id'], columns=['worker_id'], aggfunc=np.sum)
    
    # baseline group
    df_annotations_pivot_baseline_group = df_annotations_pivot[df_annotations_pivot.index.isin(shared_comments['rev_id'])]
    
    for i in range(0,len(group_list)):
        for j in range(i+1,len(group_list)):
            annotations_of_group = []
            if i == 0:
                for rater in df_annotations_pivot_baseline_group.columns:
                    annotation = df_annotations_pivot_baseline_group[int(rater)].tolist()
                    # add only with at least one annotation
                    if np.count_nonzero(~np.isnan(annotation)) != 0:
                        annotations_of_group.append(annotation)
            else:
                for rater in group_list[i]:
                    annotations_of_group.append(df_annotations_pivot_baseline_group[int(rater)].tolist()) 
            
            for rater in group_list[j]:
                annotations_of_group.append(df_annotations_pivot_baseline_group[int(rater)].tolist()) 
    
            alpha = krippendorff.alpha(reliability_data=annotations_of_group, level_of_measurement='nominal')
            alphas[i][j] = alpha
            alphas[j][i] = alpha
    
    return alphas
Esempio n. 7
0
def compute_alpha(item_annotations):
    matrix = []

    for batch_idx, batch in enumerate(item_annotations):
        if len(matrix) == 0:
            previous_length = 0
        else:
            previous_length = len(matrix[-1])
        for annotator_idx, annotator_data in enumerate(batch):
            matrix.append([np.nan] * previous_length + annotator_data)

    longest_row_length = max(map(len, matrix))

    for row in matrix:
        missing_nans = longest_row_length - len(row)

        if missing_nans == 0:
            continue

        row.extend([np.nan] * missing_nans)

    #print(matrix[0])
    matrix = np.array(matrix)
    #print(matrix[:2,:200])

    return krippendorff.alpha(reliability_data=matrix,
                              level_of_measurement='interval')
Esempio n. 8
0
    def alpha(self,
              ids=None,
              staff="upper",
              common_id=None,
              lib='nltk',
              label='bigram',
              distance=None):
        if ids is None:
            ids = []
        if staff not in ('upper', 'lower'):
            raise Exception(
                "Alpha measure only applicable one staff at a time.")

        data = self._staff_annotation_data(ids=ids,
                                           staff=staff,
                                           lib=lib,
                                           label=label,
                                           common_id=common_id)
        if distance is None and label == "bigram":
            distance = DScore.bigram_label_distance

        if lib == 'nltk':
            if distance is None:
                distance = binary_distance
            annot_task = AnnotationTask(data=data, distance=distance)
            krip = annot_task.alpha()
        else:
            if distance is None:
                distance = 'nominal'
            krip = alpha(reliability_data=data, level_of_measurement=distance)

        return krip
Esempio n. 9
0
    def krippendorff_alpha(self, window=1.):
        import krippendorff
        import numpy as np

        data1 = []
        data2 = []
        t = self.start

        while t < self.end:
            c1 = self.t1p.attime(t)
            if c1 == MISSINGDATA:
                data1.append(np.nan)
            else:
                data1.append(hash(c1))

            c2 = self.t2p.attime(t)
            if c2 == MISSINGDATA:
                data2.append(np.nan)
            else:
                data2.append(hash(c2))

            t += window

        return krippendorff.alpha([data1, data2],
                                  level_of_measurement='nominal')
Esempio n. 10
0
    def pypi_alpha_old(self,
                       ids=None,
                       staff="both",
                       common_id=None,
                       label='bigram',
                       distance=binary_distance):
        if ids is None:
            ids = []
        if staff not in ('upper', 'lower'):
            raise Exception(
                "PyPI krippendorff alpha only applicable one staff at a time.")

        if label == 'bigram':
            data = self._bigram_reliability_data(ids=ids,
                                                 staff=staff,
                                                 common_id=common_id)
        else:
            data = self._reliability_data(ids=ids,
                                          staff=staff,
                                          common_id=common_id)

        value_domain = [
            '>1', '>2', '>3', '>4', '>5', '<1', '<2', '<3', '<4', '<5'
        ]
        krip = alpha(reliability_data=data,
                     level_of_measurement='nominal',
                     value_domain=value_domain)
        return krip
 def alpha_for_question(self, raters_to_exclude=set()):
     reliability_data = self.to_reliability(
         raters_to_exclude=raters_to_exclude)
     value_domain = sorted(self.values_map.values())
     k_alpha = alpha(reliability_data=reliability_data,
                     value_domain=value_domain,
                     level_of_measurement=self.alpha_distance)
     return k_alpha
def scoreAlpha(answerMatrix, distanceFunc):
    """provides the krippendorff scores
    of the data passed in, distanceFunc should be
    'nominal', 'ordinal', 'interval', 'ratio' or a callable
    """

    return krippendorff.alpha(value_counts = answerMatrix, \
        level_of_measurement = distanceFunc)
Esempio n. 13
0
def score(path):
    df = pd.read_csv(path, sep=',', header=None)
    total_mean = df.mean(axis=1).mean()
    score_matrix = df.T.values
    krip = krippendorff.alpha(score_matrix, level_of_measurement='ratio')
    print('Krippendorff\'s alpha coefficient:', round(krip, 3))
    print('Mean score:', round(total_mean, 3))
    return df
Esempio n. 14
0
 def krippendorfs_alpha(upper_rh_advice, exercise_upper_gold):
     fingerings = list(upper_rh_advice)
     fingerings.pop(0)
     finger_ints = list(map(int, fingerings))
     exercise_upper_gold.append(finger_ints)
     krip = alpha(reliability_data=exercise_upper_gold,
                  level_of_measurement='interval')
     exercise_upper_gold.pop()
     return krip
Esempio n. 15
0
def validate(output, test_labels) -> Tuple[float, float, float]:
    p = list(predict(output))
    # Also 'predict' the true labels to convert from N neurons to single value
    actual = predict(test_labels)
    acc = sum([x == y for (x, y) in zip(p, actual)]) / len(p)
    cor = np.corrcoef(p, actual)[1][0]
    alpha = krippendorff.alpha(np.vstack([p, actual]),
                               level_of_measurement='interval')
    return acc, cor, alpha
Esempio n. 16
0
def main():
    print("Example from http://en.wikipedia.org/wiki/Krippendorff's_Alpha")
    print()
    reliability_data_str = (
        "*    *    *    *    *    3    4    1    2    1    1    3    3    *    3",  # coder A
        "1    *    2    1    3    3    4    3    *    *    *    *    *    *    *",  # coder B
        "*    *    2    1    3    4    4    *    2    1    1    3    3    *    4",  # coder C
    )
    print('\n'.join(reliability_data_str))
    print()

    reliability_data = [[np.nan if v == '*' else int(v) for v in coder.split()] for coder in reliability_data_str]

    print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(reliability_data=reliability_data,
                                                                          level_of_measurement='nominal'))
    print("Krippendorff's alpha for interval metric: ", krippendorff.alpha(reliability_data=reliability_data))

    print()
    print()
    print("From value counts:")
    print()
    value_counts = np.array([[1, 0, 0, 0],
                             [0, 0, 0, 0],
                             [0, 2, 0, 0],
                             [2, 0, 0, 0],
                             [0, 0, 2, 0],
                             [0, 0, 2, 1],
                             [0, 0, 0, 3],
                             [1, 0, 1, 0],
                             [0, 2, 0, 0],
                             [2, 0, 0, 0],
                             [2, 0, 0, 0],
                             [0, 0, 2, 0],
                             [0, 0, 2, 0],
                             [0, 0, 0, 0],
                             [0, 0, 1, 1]])
    print(value_counts)
    print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(value_counts=value_counts,
                                                                          level_of_measurement='nominal'))
    print("Krippendorff's alpha for interval metric: ", krippendorff.alpha(value_counts=value_counts))
Esempio n. 17
0
def calculate_agreement(main_annotator, second_annotator, attribute_type):
    if attribute_type == 'perspective':
        main_annotator_sub = get_perspectives_based_on_sample(
            second_annotator, main_annotator)
    else:
        main_annotator_sub = get_stance_based_on_sample(
            second_annotator, main_annotator)
    reliability_data = [
        main_annotator_sub[attribute_type], main_annotator_sub['annotator']
    ]
    krippendorff_alpha = krippendorff.alpha(reliability_data=reliability_data,
                                            level_of_measurement='nominal')
    print(attribute_type, ': ', krippendorff_alpha)
Esempio n. 18
0
 def krippendorffAlpha(self, listOfXLSXFiles=[], mode="mweAgreement"):
     s = KrippendorffMeasure
     annotator_values_inWholeDataset = []
     k_list = []
     for file in listOfXLSXFiles:
         if mode == "mweAgreement":
             annotator_values = s.readXLSXFile_mweAgreement(
                 self, xlsx_filename=file)
         elif mode == "semAgreement":
             annotator_values = s.readXLSXFile_semAgreement(
                 self, xlsx_filename=file)
         elif mode == "freeAdjAgreement":
             annotator_values = s.readXLSXFile_freeAdjAgreement(
                 self, xlsx_filename=file)
         else:
             print("Error: unknown mode")
         reliability_matrix = s.krippendorffReliabilityMatrix(
             self, annotator_values)
         #levellevel_of_measurement='interval'
         levellevel_of_measurement = 'nominal'
         a = krippendorff.alpha(
             reliability_data=reliability_matrix,
             value_counts=None,
             level_of_measurement=levellevel_of_measurement)
         k_list.append((file, a))
         annotator_values_inWholeDataset = annotator_values_inWholeDataset + annotator_values
     reliability_matrix_inWholeDataset = s.krippendorffReliabilityMatrix(
         self, annotator_values_inWholeDataset)
     #print(reliability_matrix_inWholeDataset)
     a = krippendorff.alpha(
         reliability_data=reliability_matrix_inWholeDataset,
         value_counts=None,
         level_of_measurement='interval')
     k_list.append(("All", a))
     #print("a = " + str(k_list))
     return k_list
Esempio n. 19
0
 def __init__(self, separator=': ', **kwargs):
     self.target = kwargs
     self.data = np.array(kwargs['data'])
     self.difference = krippendorff.Difference(*kwargs['args'])
     self.separator = separator
     self.labels = ('Data', 'Data type', 'Difference method',
                    'Observed agreement', 'Expected agreement',
                    'Alpha score')
     self.values = set(v for v in self.data.flatten() if v == v)
     self.codebook = {v: i for (i, v) in enumerate(self.values)}
     self.inverse_codebook = dict(enumerate(self.values))
     self.cm = krippendorff.get_coincidence_matrix(self.data, self.codebook)
     self.d = krippendorff.delta(self.cm, self.inverse_codebook,
                                 self.difference)
     self.observed = krippendorff.observation(self.cm, self.d)
     self.expected = krippendorff.expectation(self.cm, self.d)
     self.alpha = krippendorff.alpha(self.data, self.difference)
 def print_alpha_for_question(self, raters_to_exclude=set()):
     reliability_data = self.to_reliability(
         raters_to_exclude=raters_to_exclude)
     value_domain = sorted(self.values_map.values())
     pairable_values = calc_pairable_values(reliability_data, value_domain)
     k_alpha = alpha(reliability_data=reliability_data,
                     value_domain=value_domain,
                     level_of_measurement=self.alpha_distance)
     maximum_raters = reliability_data.shape[0]
     total_units = reliability_data.shape[1]
     print("----{}".format(self.label))
     print("{}".format(self.question_text))
     print("Units: {} Max raters: {} Pairable values: {}".format(
         total_units, maximum_raters, pairable_values))
     print(
         "Krippendorff alpha for '{}' is {:.3f} Alpha distance: {} Value domain: {}"
         .format(self.label, k_alpha, self.alpha_distance, value_domain))
Esempio n. 21
0
def print_alpha_for_topic(topic_name, rows, maximum_raters, cumulative_length,
                          virtual_corpus_positions):
    dtype = float
    reliability_data = np.full((maximum_raters, cumulative_length),
                               np.nan,
                               dtype=dtype)
    for row_count, output_row in output_generator(rows,
                                                  virtual_corpus_positions):
        start_pos = output_row['start_pos']
        end_pos = output_row['end_pos']
        user_sequence_id = output_row['user_sequence_id']
        topic_number = output_row['topic_number']
        reliability_data[user_sequence_id][start_pos:end_pos] = dtype(
            topic_number)
    k_alpha = alpha(reliability_data=reliability_data,
                    level_of_measurement='nominal')
    print("Krippendorff alpha is {:.3f} for '{}'".format(k_alpha, topic_name))
Esempio n. 22
0
def krippen_alpha(df, metric, lom):
    """
    Given a dataframe, a column in that dataframe, and a level of measurement, compute Krippendorff's Alpha for that column and level of measurement.
    """
    metric_vals = np.unique(df[metric])

    def metric_vals_to_indices(vals, mvals):
        indices = [np.nonzero(mvals == val)[0][0] for val in vals]
        return np.array(indices)

    uniq_ids = np.unique(df['questionId'])
    shape = (uniq_ids.shape[0], metric_vals.shape[0])
    value_counts = np.zeros(shape, dtype=float)
    for i, q_id in zip(range(len(uniq_ids)), uniq_ids):
        assignments = df.query('questionId==@q_id')[metric].to_numpy()
        vals, counts = np.unique(assignments, return_counts=True)
        indices = metric_vals_to_indices(vals, metric_vals)
        value_counts[i][indices] += counts
    alpha = krippendorff.alpha(value_counts=value_counts, value_domain=metric_vals.tolist(), level_of_measurement=lom)
    return alpha
def get_krippendorffs_alpha(evals_matrix):
    """Calcula el alpha de Krippendorff, tomando en cuenta todas las
    anotaciones realizadas por los patólogos. Esta medida estadística puede
    trabajar con datos perdidos, por lo cual no es necesario trabajar estos
    datos de forma especial. Además, debido a que la clasificación es de tipo
    (0, 1, 2, 3) y existe una jerarquía entre dichos valores, se usa una
    métrica de tipo ordinal.

    Args:
        - evals_matrix: np.array(), matriz con las evaluaciones realizadas por
        los patologos. Es una matriz de forma (N, M), donde N es el número de
        muestras y M es el número de anotadores. Además, la matriz debe ser
        de tipo float, y si es que existen datos perdidos, deben estar
        codificados como np.nan.

    Returns:
        dict[str: float], con una llave: krippendorff_alpha, la cual tiene
        asociada el alpha de Krippendorff calculado.
    """
    alpha = krippendorff.alpha(evals_matrix.transpose(),
                               level_of_measurement="ordinal")
    return {"krippendorff_alpha": alpha}
Esempio n. 24
0
def cohkap(pdfilename, grtfilename, resultfilename, y=0):

    #Reading the respective csv files 
    prediction_df = pd.read_csv(pdfilename)[['image', 'label']]
    ground_df = pd.read_csv(grtfilename)
    result_coh = resultfilename.strip('.csv') + '_coh.csv'
    result_f1 = resultfilename.strip('.csv') + '_f1.csv'

    #performing inner merge
    mergedf_in = pd.merge(prediction_df, ground_df, on='image', how='inner')
    merged_list = mergedf_in.values[:, 1:].transpose().tolist()
    kap = np.zeros(len(ground_df.columns[1:]))
    f1 = np.zeros(len(ground_df.columns[1:]))

    #Making a matrix of Kohen's kappa Values and f1 values of all cases of predictions with each rater
    for i in range(len(ground_df.columns)-1):
        kap[i] = cohen_kappa_score(mergedf_in.iloc[:,1:2], mergedf_in.iloc[:,i+2:i+3], weights='quadratic')
        f1[i] = f1_score(merged_list[0], merged_list[i+1], average='weighted')
    columns = mergedf_in.columns.tolist()
    columns.remove('image')
    columns.remove('label')
    kap = pd.DataFrame(kap, index=[columns], columns=[pdfilename.split('/')[-1].strip('.csv')]).transpose()
    f1 = pd.DataFrame(f1, index=[columns], columns=[pdfilename.split('/')[-1].strip('.csv')]).transpose()
    if y:
        M = ground_df.values[:,1:].transpose()
        E4.delete(0, END)
        Entry.insert(E4, 0, krippendorff.alpha(M.tolist()))

    # Writing Coh to csv
    if not os.path.exists(result_coh):
        kap.to_csv(result_coh, mode='a')
    else:
        kap.to_csv(result_coh, mode='a',header=False) 
    # Writing F1 to csv
    if not os.path.exists(result_f1):
        f1.to_csv(result_f1, mode='a')
    else:
        f1.to_csv(result_f1, mode='a',header=False) 
Esempio n. 25
0
    def get_agreement(self, on="HATE", users=None):
        """
        Get agreement

        Arguments:
        ---------

           on: "string"

           Must be one of
            - hate
            - MUJER
            - LGBTI
            - RACISMO
            - POBREZA
            - DISCAPACIDAD
            - POLITICA
            - ASPECTO
            - CRIMINAL
            - OTROS

        """
        df = self.get_labelled_comments(on)

        if users:
            df = df.loc[users]
        """
        Get support
        """

        #labelled_by_all = df.columns[df.notna().all()]
        #any_marked_positive = df[labelled_by_all].sum() > 0
        any_marked_positive = (df > 0).sum()
        support = (any_marked_positive > 0).sum()
        if support == 0:
            return np.nan, support

        return krippendorff.alpha(df.values.astype('float')), support
Esempio n. 26
0
def getWeightKrippendorffMatrix(df,min_overlap=1):
    df_matrix = df.to_numpy() 
    number_workers = np.size(df_matrix,1)
    # empty distance matrix
    distance_matrix = np.zeros((number_workers,number_workers))
    list_over = []
    for i in tqdm(range(0,number_workers)):
        for j in range(i+1,number_workers):
            weight = 0
            annotator_1 = df_matrix[i]
            annotator_2 = df_matrix[j]
            
            annotator_1_cleaned = annotator_1[~np.isnan(annotator_2)]  
            annotator_2_cleaned = annotator_2[~np.isnan(annotator_1)]  
            
            annotator_1_cleaned = annotator_1_cleaned[~np.isnan(annotator_1_cleaned)]  
            annotator_2_cleaned = annotator_2_cleaned[~np.isnan(annotator_2_cleaned)]  
            
            len_overlap = len(annotator_2_cleaned)
            list_over.append(len_overlap)
                    
            if len_overlap >= min_overlap:
                try:
                    kd_value = krippendorff.alpha(reliability_data=[annotator_1_cleaned,annotator_2_cleaned],                               
                                          level_of_measurement='nominal')
                except RuntimeWarning:
                    print(annotator_1_cleaned)
                    print(annotator_2_cleaned)
                if kd_value < 2:
                    weight = 0.5+ (kd_value +1)/(2)
                    #weight = 0.5+ kd_value 
                                   

            distance_matrix[i,j] = weight
            distance_matrix[j,i] = weight
    
    return (distance_matrix,list_over)
def main():
    
    parser = argparse.ArgumentParser(description='This program calculates '\
        'Krippendorff\'s Alpha given nominal data or ordinal data.')
    parser.add_argument('level_of_measurement', choices=['nominal', 'ordinal'], 
    nargs=1, help='set level of measurement')
    parser.add_argument('data', nargs=1, help='set the data file')
    parser.add_argument('-w', '--weights', nargs=1, help='set the weights file,'
    ' required for ordinal data only')
    
    args = parser.parse_args()
    metric = args.level_of_measurement[0]
    data = args.data[0]
    if (metric == 'ordinal') & (args.weights == None):
        parser.error('Must provide a separate file for weights.')
    check_input(data)
    
    df = pd.read_csv(data)
    reliability_data = []
    if metric == 'ordinal':
        check_input(args.weights[0])
        df1 = pd.read_csv(args.weights[0], sep=",\s", header=None, 
            names=["c", "w"], engine="python")
        weights = dict(zip(df1.c, df1.w))
        for i in range(1, len(df.columns)):
            temp = df.apply(lambda x: np.nan if str(x[i]).strip() == '' \
            else weights[x[i]], axis=1)
            reliability_data.append(temp.to_list())
    else:
        for i in range(1, len(df.columns)):
            temp = df.apply(lambda x: np.nan if str(x[i]).strip() == '' \
            else x[i], axis=1)
            reliability_data.append(temp.to_list())

    print("Krippendorff's alpha for {} metric:".format(metric), 
    krippendorff.alpha(reliability_data=reliability_data, 
                       level_of_measurement=metric))
Esempio n. 28
0
def krippendorff_metric_study(case_, additional_measures):
    data_raw = pd.read_csv("data/" + case_ + ".csv")
    data_vectors = {}

    rounds_ = data_raw['timeunit'].unique()
    delegates = data_raw['name'].unique()

    print("Loaded {rounds} sample rounds".format(rounds=len(rounds_)))
    print("Rounds contain {delegates} delegates".format(
        delegates=len(delegates)))

    data_new = pd.DataFrame(0, index=rounds_, columns=delegates)
    for round_ in rounds_:
        data_vectors[round_] = list(
            data_raw[data_raw['year'] == round_]['name'])

    for round_ in rounds_:
        for name_ in data_vectors[round_]:
            data_new[name_][round_] = 1

    alpha = ka.alpha(data_new)
    print("k'aplpha " + str(alpha))

    # TODO: test this
    if additional_measures:
        data_new_nltk = []
        for round_ in rounds_:
            for name_ in delegates:
                data_new_nltk.append([round_, name_, data_new[name_][round_]])

        ratingtask = agreement.AnnotationTask(data=data_new_nltk)
        print("kappa " + str(ratingtask.kappa()))
        print("fleiss " + str(ratingtask.multi_kappa()))
        print("scotts " + str(ratingtask.pi()))

    return alpha
Esempio n. 29
0
def buildKrippendorffMatrix(surveys):

    #dictionary containing the IDs for each category: key - category name
    categories = {}
    categoryIndex = 1
    for category in categoryList:

        categories[category[0].strip()] = categoryIndex
        categoryIndex = categoryIndex + 1

    categories["Other"] = categoryIndex
    categories["None"] = categoryIndex + 1

    #get the name of the first input survey
    surveyName = list(surveys)[0]
    #get the first input survey
    survey = surveys[surveyName]
    #initialize counting the current column number
    currentColumn = 0
    #number of term
    termNumber = 1
    #String for the first line of the Krippendorff CSV file
    firstTop = "Rater"
    #String for the second line of the Krippendorff CSV file
    secondTop = ""
    #String for the next-to-last line of the Krippendorff CSV file
    firstBottom = ""
    for category in categories:
        firstBottom = firstBottom + category.strip() + ","

    #String for the line of the Krippendorff CSV file
    secondBottom = ""
    for category in categories:
        secondBottom = secondBottom + str(categories[category.strip()]) + ","

    #initialize counting the current column number
    currentColumn = 0

    #loop over each question and noun of the first input survey --> first key
    for term in survey:

        #check if the question starts with the String 'researchfield'. If it does, leave the loop
        #--> end of the actual data in the file
        if (term.lower().startswith("researchfield")):
            break

        #check if the current column number is equal or greater than the user set column number (begin of actual data).
        #If it does, start parsing the data of the file. Else, continue to the next column and check again
        if (currentColumn >= column):
            #get the title name
            title = term.split(".")[0].split("[")[0]
            #check if the question is an 'other' of comment section. If it is, ignore this question and noun
            #and continue to the next question and/or noun. Else, parse the question and noun
            if (title[-1] == "C" or title[-2:] == "CQ"):
                continue
            else:
                #get the noun
                noun = term.split("].")[1].split("[")[1][:-1].replace(",", ";")
                firstTop = firstTop + ",<title>-Term" + str(termNumber)
                #increase the term number by 1
                termNumber = termNumber + 1
                secondTop = secondTop + "," + title + "-" + noun

                #loop over each participant ID
                for persID in persIDdict:

                    try:
                        #loop over each survey of the survey dictionary
                        for surveyKey in surveys:

                            #initialize the category String
                            category = ""
                            #get the list of participant IDs
                            persIDs = surveys[surveyKey]["id. Response ID"]
                            #initialize index of participant ID
                            surveyIDindex = None
                            #check if the survey contain the participant ID. If it does, get the index of the participant ID.
                            #Else, continue to the next survey
                            if (persID in persIDs.tolist()):
                                surveyIDindex = persIDs[persIDs ==
                                                        persID].index[0]
                            else:
                                continue

                            #get the answer
                            answer = str(
                                surveys[surveyKey][term][surveyIDindex])
                            #if the answer is 'other', set the category String to 'Other'
                            if (answer.startswith("<div>other")):
                                category = "Other"
                            #if the answer is empty (no answer was given/NaN), set the category String to 'None'
                            elif (answer == "nan"):
                                category = "None"
                            #else, set the category String to the answer given by the survey
                            else:
                                category = answer.split("</span>")[0].split(
                                    ">")[-1]

                            try:
                                #append the line String of each participant by the category ID
                                persIDdict[persID] = persIDdict[
                                    persID] + "," + str(categories[category])
                            except:
                                raise IndexError(
                                    "The program couldn't evaluate the category '"
                                    + category +
                                    "'. Please make sure that this category is contained in the 'categories' file."
                                )
                    except IndexError as ie:
                        raise IndexError(ie)
                    except:
                        raise KeyError(
                            "The program couldn't find the question and term '"
                            + term + "' in the survey file '" + surveyKey +
                            "'. Please make sure that all files contain identical questions and terms."
                        )

        #increase the current column number by 1
        currentColumn = currentColumn + 1

    summary = ""
    for persID in persIDdict:

        summary = summary + persIDdict[persID] + "\n"

    krippendorff_matrix_str = summary.split("\n")
    krippendorff_matrix = [[int(i) for i in j.split(",")[1:]]
                           for j in krippendorff_matrix_str if j]
    krippendorff = kp.alpha(reliability_data=krippendorff_matrix)
    krippendorff_nominal = kp.alpha(reliability_data=krippendorff_matrix,
                                    level_of_measurement='nominal')

    #writes the formatted results to the Krippendorff CSV file
    with open("lsg__krippendorff__" + resultName + ".csv", "w") as lsg:
        lsg.write(firstTop + "\n" + secondTop + "\n" + summary + "\n\n\n" +
                  firstBottom + "\n" + secondBottom +
                  "\n\nKrippendorff's alpha for interval metric," +
                  str(krippendorff) +
                  "\nKrippendorff's alpha for nominal metric," +
                  str(krippendorff_nominal))
Esempio n. 30
0
                        valueCounts['offensive'][line['tweet_id']],
                        desambigEntries)
                else:
                    labelOffensive = "N/A"

                if labelHate == 1:
                    labelHateType = None
                    if line['tweet_id'] in valueCounts['hateTypes']:
                        labelHateType = determineHateTypeLabel(
                            line['tweet_id'], line['text'],
                            valueCounts['hateTypes'][line['tweet_id']],
                            desambigEntries)
                    else:
                        labelHateType = "N/A"
                else:
                    labelHateType = "N/A"

                writer.writerow({
                    'id': line['tweet_id'],
                    'text': line['text'],
                    'HS': str(labelHate),
                    'OF': str(labelOffensive),
                    'HT': labelHateType
                })

print("\nAmbiguous:", countAmbiguous)
print(
    "\nKrippendorff:",
    krippendorff.alpha(value_counts=np.array(list(
        valueCounts['hate'].values())),
                       level_of_measurement='nominal'))