def create_frame(intent_label, slot_names_str, utterance): frame = Node( label=intent_label, span=Span(0, len(utterance)), children={ Node(label=slot.label, span=Span(slot.start, slot.end)) for slot in parse_slot_string(slot_names_str) }, ) return frame
def create_frame(text, intent_label, slot_names_str, byte_len): frame = Node( label=intent_label, span=Span(0, byte_len), children={ Node(label=slot.label, span=Span(slot.start, slot.end)) for slot in parse_slot_string(slot_names_str) }, text=text, ) return frame
def node_to_metrics_node(node: Union[Intent, Slot], start: int = 0) -> Node: """ The input start is the absolute start position in utterance """ res_children: Set[Node] = set() idx = start node_text_tokens: List[str] = [] if node.children: for child in node.children: if type(child) == Token: idx += len(child.label) + 1 node_text_tokens.append(child.label) elif type(child) == Intent or type(child) == Slot: res_child = CompositionalMetricReporter.node_to_metrics_node( child, idx) res_children.add(res_child) idx = res_child.span.end + 1 else: raise ValueError("Child must be Token, Intent or Slot!") node_text = " ".join(node_text_tokens) node = Node( label=node.label, span=Span(start, idx - 1), children=res_children, text=node_text, ) return node
def test_tree_to_metric_node(self): TEXT_EXAMPLES = [ ( "[IN:alarm/set_alarm repeat the [SL:datetime 3 : 00 pm ] " + "[SL:alarm/name alarm ] [SL:datetime for Sunday august 12th ] ] ", Node( label="IN:alarm/set_alarm", span=Span(start=0, end=49), children={ Node(label="SL:datetime", span=Span(start=11, end=20)), Node(label="SL:alarm/name", span=Span(start=21, end=26)), Node(label="SL:datetime", span=Span(start=27, end=49)), }, ), ), ( "[IN:calling/call_friend call [SL:person moms ] cellphone ]", Node( label="IN:calling/call_friend", span=Span(start=0, end=19), children={Node(label="SL:person", span=Span(start=5, end=9))}, ), ), ( "[IN:GET_DIRECTIONS I need [SL:ANCHOR directions] to [SL:DESTINATION " + "[IN:GET_EVENT the jazz festival]]]", Node( label="IN:GET_DIRECTIONS", span=Span(start=0, end=38), children={ Node(label="SL:ANCHOR", span=Span(start=7, end=17)), Node( label="SL:DESTINATION", span=Span(start=21, end=38), children={ Node(label="IN:GET_EVENT", span=Span(start=21, end=38)) }, ), }, ), ), ] for annotation_string, expected_frame in TEXT_EXAMPLES: annotation = Annotation(annotation_string) frame = CompositionalMetricReporter.tree_to_metric_node(annotation.tree) self.assertEqual(frame, expected_frame)
def convert_bio_to_spans(bio_sequence: List[str]) -> List[Span]: """ Process the output and convert to spans for evaluation. """ spans = [] # (label, startindex, endindex) cur_start = None cur_label = None N = len(bio_sequence) for t in range(N + 1): if (cur_start is not None) and (t == N or re.search( "^[BO]", bio_sequence[t])): assert cur_label is not None spans.append(Span(cur_label, cur_start, t)) cur_start = None cur_label = None if t == N: continue assert bio_sequence[t] if bio_sequence[t][0] not in ("B", "I", "O"): bio_sequence[t] = "O" if bio_sequence[t].startswith("B"): cur_start = t cur_label = re.sub("^B-?", "", bio_sequence[t]).strip() if bio_sequence[t].startswith("I"): if cur_start is None: newseq = bio_sequence[:] newseq[t] = "B" + newseq[t][1:] return convert_bio_to_spans(newseq) continuation_label = re.sub("^I-?", "", bio_sequence[t]) if continuation_label != cur_label: newseq = bio_sequence[:] newseq[t] = "B" + newseq[t][1:] return convert_bio_to_spans(newseq) # should have exited for last span ending at end by now assert cur_start is None return spans
def get_slots(word_names): slots = { Node(label=slot.label, span=Span(slot.start, slot.end)) for slot in parse_slot_string(word_names) } return Counter(slots)
def test_immutable_node(self) -> None: node = Node(label="", span=Span(start=0, end=5)) with self.assertRaises(AttributeError): node.label = "intent"
compute_frame_accuracies_by_depth, compute_frame_accuracy, compute_intent_slot_metrics, compute_top_intent_accuracy, compute_percent_invalid_trees, compute_percent_trees_wrong_label, ) from pytext.metrics.tests.metrics_test_base import MetricsTestBase TEST_EXAMPLES: List[Dict[str, Any]] = [ # Non-nested examples { # Two identical frames "predicted": Node( label="intent1", span=Span(start=0, end=20), children={Node(label="slot1", span=Span(start=1, end=2))}, ), "expected": Node( label="intent1", span=Span(start=0, end=20), children={Node(label="slot1", span=Span(start=1, end=2))}, ), "frames_match": True, "bracket_confusions": { "intent_confusion": {"TP": 1, "FP": 0, "FN": 0}, "slot_confusion": {"TP": 1, "FP": 0, "FN": 0}, }, "tree_confusions": { "intent_confusion": {"TP": 1, "FP": 0, "FN": 0}, "slot_confusion": {"TP": 1, "FP": 0, "FN": 0},
def test_tree_to_metric_node(self): TEXT_EXAMPLES = [ ( "[IN:alarm/set_alarm repeat the [SL:datetime 3 : 00 pm ] " + "[SL:alarm/name alarm ] [SL:datetime for Sunday august 12th ] ] ", Node( label="IN:alarm/set_alarm", span=Span(start=0, end=49), children={ Node( label="SL:datetime", span=Span( start=11, end=20, ), text="3 : 00 pm", ), Node( label="SL:alarm/name", span=Span( start=21, end=26, ), text="alarm", ), Node( label="SL:datetime", span=Span( start=27, end=49, ), text="for Sunday august 12th", ), }, text="repeat the", ), ), ( "[IN:calling/call_friend call [SL:person moms ] cellphone ]", Node( label="IN:calling/call_friend", span=Span(start=0, end=19), children={ Node(label="SL:person", span=Span(start=5, end=9), text="moms") }, text="call cellphone", ), ), ( "[IN:GET_DIRECTIONS I need [SL:ANCHOR directions] to [SL:DESTINATION " + "[IN:GET_EVENT the jazz festival]]]", Node( label="IN:GET_DIRECTIONS", span=Span(start=0, end=38), text="I need to", children={ Node( label="SL:ANCHOR", span=Span(start=7, end=17), text="directions", ), Node( label="SL:DESTINATION", span=Span(start=21, end=38), text="", children={ Node( label="IN:GET_EVENT", span=Span(start=21, end=38), text="the jazz festival", ) }, ), }, ), ), ] for annotation_string, expected_frame in TEXT_EXAMPLES: frame = get_frame(annotation_string) self.assertEqual(frame, expected_frame)