예제 #1
0
def generate_task_examples(
    data: List[Dict[str, Any]], argument: str, roles: List[str], split: str
) -> List[ProbingTaskExample]:

    probing_examples = []

    for example in data:
        # dep_heads = example["dep_head"]
        # dep_labels = example["dep"]
        # tree = dep_heads_to_tree(
        #     dep_heads,
        #     len(example["tokens"]),
        #     example["head"],
        #     example["tail"],
        #     prune=0,
        #     dep_labels=dep_labels,
        #     tokens=example["tokens"]
        # )
        arg_start, arg_end = example[argument]
        idx, head, dep_rel = find_common_head(
            arg_start, arg_end, example
        )  # heads are 1-based!
        dep_heads = example["dep_head"]
        dep_labels = example["dep"]
        tree = dep_heads_to_tree(
            dep_heads,
            len(example["tokens"]),
            example["head"],
            example["tail"],
            prune=0,
            dep_labels=dep_labels,
            tokens=example["tokens"],
        )
        if idx >= 0:
            probing_examples.append(
                ProbingTaskExample(
                    tokens=example["tokens"],
                    label=str(roles.index(dep_rel) + 1)
                    if dep_rel in DEFAULT_ROLES
                    else "0",
                    split=split,
                    head=example["head"],
                    tail=example["tail"],
                    ner=example["ner"],
                    pos=example["pos"],
                    dep=example["dep"],
                    dep_head=example["dep_head"],
                    id=example["id"],
                )
            )
    return probing_examples
예제 #2
0
def generate_task_examples(data: List[Dict[str, Any]],
                           buckets: List[Tuple[int, int]],
                           split: str) -> List[ProbingTaskExample]:

    probing_examples = []

    for example in data:
        dep_heads = example["dep_head"]
        dep_labels = example["dep"]
        tree = dep_heads_to_tree(
            dep_heads,
            len(example["tokens"]),
            example["head"],
            example["tail"],
            prune=-1,
            dep_labels=dep_labels,
        )
        bucket_index = None
        for idx, (bucket_min, bucket_max) in enumerate(buckets):
            if bucket_min <= tree.depth() <= bucket_max:
                bucket_index = idx
                break
        # discard examples that are too deep
        if bucket_index is None:
            continue

        probing_examples.append(
            ProbingTaskExample(
                tokens=example["tokens"],
                label=str(bucket_index),
                split=split,
                head=example["head"],
                tail=example["tail"],
                ner=example["ner"],
                pos=example["pos"],
                dep=example["dep"],
                dep_head=example["dep_head"],
                id=example["id"],
            ))
    return probing_examples