Example #1
0
def predicate_df_to_verb_entry(
        predicate_df: pd.DataFrame) -> Tuple[str, JSON_OBJ]:
    """ Returns (predicate_index, predicate_entry), where predicate_entry is a dict
        with the keys: 'verbIndex', 'verbInflectedForms', 'questionLabels'. """
    obj = {}
    row: pd.Series = df_first_row(predicate_df)
    pred_idx_lbl = get_predicate_idx_label(predicate_df)
    obj['verbIndex'] = int(row[pred_idx_lbl])
    # additional information, only relevant for QANom and not QA-SRL:
    #   'isVerbal' - whether the candidate predicate is indeed a verbal predicate
    #       (otherwise, no QAs are annotated).
    #   'verbForm' - what is the verbal form (infinitive) of the nominalization
    obj['isVerbal'] = row.is_verbal
    obj['verbForm'] = row.verb_form
    if row.is_verbal:
        obj['verbInflectedForms'] = {}  # todo, based on obj['verbForm']
        question_entries = [
            question_row_to_question_label(r)
            for _, r in predicate_df.iterrows()
        ]
        obj['questionLabels'] = {
            question_entry["questionString"]: question_entry
            for question_entry in question_entries
        }
    else:
        # negative instances (non verbal) would have empty questionLabels dict
        obj['verbInflectedForms'] = {}
        obj['questionLabels'] = {}
    return str(row[pred_idx_lbl]), obj
Example #2
0
def compute_num_self_cycles(annot_df: pd.DataFrame) -> int:
    """ How many arguments in annot_df include the predicate within the answer span. """
    counter = 0
    idx_lbl = get_predicate_idx_label(annot_df)
    for i, row in annot_df.iterrows():
        answers = row.answer_range
        pred_idx = row[idx_lbl]
        for a in answers:
            if pred_idx in range(a[0], a[1]):
                counter += 1
    return counter
def evaluate_inter_generator_agreement(annot_df: pd.DataFrame,
                                       verbose: bool = False) -> float:
    cols = ['qasrl_id', get_predicate_idx_label(annot_df)]
    n_gen = annot_df.groupby(cols).worker_id.transform(pd.Series.nunique)
    workers = annot_df.worker_id.unique().tolist()
    n_workers = len(workers)
    annot_df = annot_df.copy()
    n_predicates = annot_df[cols].drop_duplicates().shape[0]
    if verbose:
        print("n_workers: ", n_workers)
        print("n_predicates: ", n_predicates)
        print(f"metric\tworker_1\tworker_2\tprec\trecall\tf1")

    total_arg_metric = Metrics.empty()
    total_larg_metric = Metrics.empty()
    total_role_metric = Metrics.empty()
    total_nomIdent_metric: BinaryClassificationMetrics = BinaryClassificationMetrics.empty(
    )
    for w1, w2 in combinations(workers, r=2):
        w1_df = annot_df[annot_df.worker_id == w1].copy()
        w2_df = annot_df[annot_df.worker_id == w2].copy()
        # compute agreement measures
        arg_metrics, labeled_arg_metrics, role_metrics, nom_ident_metrics, _ = \
            eval_datasets(w1_df, w2_df)
        if verbose:
            print(f"\nComparing  {w1}   to   {w2}:   [p,r,f1]")
            merged_df = pd.merge(w1_df, w2_df, on='key')
            print(
                f"Number of shared predicates: {get_n_predicates(merged_df)}")
            print(f"ARG:\t{arg_metrics}")
            print(f"Labeled ARG:\t{labeled_arg_metrics}")
            print(f"ROLE:\t{role_metrics}")
            print(
                f"NOM_IDENT:\t{w1}\t{w2}\t{nom_ident_metrics.prec():.3f}\t{nom_ident_metrics.recall():.3f}\t{nom_ident_metrics.f1():.3f}"
            )
            print(
                f"NOM_IDENT accuracy: {nom_ident_metrics.accuracy():.3f}, {int(nom_ident_metrics.errors())} mismathces out of {nom_ident_metrics.instances()} predicates."
            )
        total_arg_metric += arg_metrics
        total_larg_metric += labeled_arg_metrics
        total_role_metric += role_metrics
        total_nomIdent_metric += nom_ident_metrics

    print(f"\nOverall pairwise agreement:")
    print(f"arg-f1 \t {total_arg_metric.f1():.4f}")
    print(f"labeled-arg-f1 \t {total_larg_metric.f1():.4f}")
    print(f"role-f1 \t {total_role_metric.f1():.4f}")
    print(
        f"is-verbal-accuracy \t {total_nomIdent_metric.accuracy():.4f}    for {total_nomIdent_metric.instances()} pairwise comparisons."
    )
    return total_arg_metric.f1()
Example #4
0
def reannotate_corrected_verb_forms(annot_df: pd.DataFrame,
                                    output_json_fn) -> NoReturn:
    """
    Generate input for the qasrl_crowdsourcing project (equivalent to output of candidate_extraction.py)
    :param annot_df: returned from find_invalid_prompts()
    :param output_json_fn: file name where to dump the JSON of the prompts for re-annotation
    """
    pred_idx_lbl = common.get_predicate_idx_label(annot_df)
    annot_df.drop_duplicates(subset=["qasrl_id", pred_idx_lbl])
    invalidated_df = annot_df[
        annot_df["invalid_prompt"]]  # filter only invalidated prompts
    re_annot_df = invalidated_df[
        invalidated_df["corrected_verb_form"] !=
        ""]  # filter out predicates now with no verb form
    candidates = [{
        "sentenceId": row["qasrl_id"],
        "tokSent": row["sentence"].split(" "),
        "targetIdx": row[pred_idx_lbl],
        "verbForms": [row["corrected_verb_form"]]
    } for id, row in re_annot_df.iterrows()]
    json.dump(candidates, open(output_json_fn, "w"))
Example #5
0
def filter_ids(df, row):
    idx_lbl = get_predicate_idx_label(df)
    return (df.qasrl_id == row.qasrl_id) & (df[idx_lbl] == row[idx_lbl])