def generate_task_examples( data: List[Dict[str, Any]], argument: str, roles: List[str], split: str ) -> List[ProbingTaskExample]: probing_examples = [] for example in data: # dep_heads = example["dep_head"] # dep_labels = example["dep"] # tree = dep_heads_to_tree( # dep_heads, # len(example["tokens"]), # example["head"], # example["tail"], # prune=0, # dep_labels=dep_labels, # tokens=example["tokens"] # ) arg_start, arg_end = example[argument] idx, head, dep_rel = find_common_head( arg_start, arg_end, example ) # heads are 1-based! dep_heads = example["dep_head"] dep_labels = example["dep"] tree = dep_heads_to_tree( dep_heads, len(example["tokens"]), example["head"], example["tail"], prune=0, dep_labels=dep_labels, tokens=example["tokens"], ) if idx >= 0: probing_examples.append( ProbingTaskExample( tokens=example["tokens"], label=str(roles.index(dep_rel) + 1) if dep_rel in DEFAULT_ROLES else "0", split=split, head=example["head"], tail=example["tail"], ner=example["ner"], pos=example["pos"], dep=example["dep"], dep_head=example["dep_head"], id=example["id"], ) ) return probing_examples
def generate_task_examples(data: List[Dict[str, Any]], buckets: List[Tuple[int, int]], split: str) -> List[ProbingTaskExample]: probing_examples = [] for example in data: dep_heads = example["dep_head"] dep_labels = example["dep"] tree = dep_heads_to_tree( dep_heads, len(example["tokens"]), example["head"], example["tail"], prune=-1, dep_labels=dep_labels, ) bucket_index = None for idx, (bucket_min, bucket_max) in enumerate(buckets): if bucket_min <= tree.depth() <= bucket_max: bucket_index = idx break # discard examples that are too deep if bucket_index is None: continue probing_examples.append( ProbingTaskExample( tokens=example["tokens"], label=str(bucket_index), split=split, head=example["head"], tail=example["tail"], ner=example["ner"], pos=example["pos"], dep=example["dep"], dep_head=example["dep_head"], id=example["id"], )) return probing_examples