Esempio n. 1
0
    def create_annotation(utterance: str, intent_label: str, slots_label: str) -> str:
        annotation_str = OPEN + escape_brackets(intent_label) + " "
        slots = parse_slot_string(slots_label)
        cur_index = 0
        for slot in sorted(slots, key=lambda slot: slot.start):
            annotation_str += escape_brackets(
                get_substring_from_offsets(utterance, cur_index, slot.start)
            )
            annotation_str += (
                OPEN
                + escape_brackets(slot.label)
                + " "
                + escape_brackets(
                    get_substring_from_offsets(utterance, slot.start, slot.end)
                )
                + " "
                + CLOSE
            )
            cur_index = slot.end
        annotation_str += (
            escape_brackets(get_substring_from_offsets(utterance, cur_index, None))
            + " "
            + CLOSE
        )

        return annotation_str
def create_frame(intent_label, slot_names_str, utterance):
    frame = Node(
        label=intent_label,
        span=Span(0, len(utterance)),
        children={
            Node(label=slot.label, span=Span(slot.start, slot.end))
            for slot in parse_slot_string(slot_names_str)
        },
    )
    return frame
Esempio n. 3
0
def create_frame(text, intent_label, slot_names_str, byte_len):
    frame = Node(
        label=intent_label,
        span=Span(0, byte_len),
        children={
            Node(label=slot.label, span=Span(slot.start, slot.end))
            for slot in parse_slot_string(slot_names_str)
        },
        text=text,
    )
    return frame
    def _unnumberize(self, preds, tokens, doc_str):
        """
        We re-tokenize and re-numberize the raw context (doc_str) here to get doc_tokens to get
        access to start_idx and end_idx mappings.  At this point, ans_token_start is the start index
        of the answer within tokens and ans_token_end is the end index. We calculate the offset of doc_tokens
        within tokens.
        Then we find the start_idx and end_idx
        as well as the corresponding span in the raw text using the answer token indices.
        """
        # start_idx and end_idx are lists of char start and end positions in doc_str.
        doc_tokens, start_idxs, end_idxs = self.tensorizer._lookup_tokens(
            doc_str)

        # find the offsets of doc_tokens in tokens
        try:
            offset_end = tokens.index(
                self.tensorizer.vocab.get_pad_index()) - 1
        except ValueError:
            offset_end = len(tokens) - 1
        offset_start = list(
            map(
                lambda x: tokens[x:offset_end] == doc_tokens[:offset_end - x],
                range(offset_end),
            )).index(True)

        # find each answer's char idxs and strings as well
        pred_labels = self._process_pred(preds[offset_start:offset_end])
        token_range = list(zip(start_idxs, end_idxs))

        pred_slots = parse_slot_string(
            merge_token_labels_to_slot(
                token_range,
                pred_labels,
                self.tensorizer.use_bio_labels,
            ))
        ans_strs = []
        ans_start_char_idxs = []
        ans_end_char_idxs = []
        for slot in pred_slots:
            # if its not an answer span, skip
            if slot.label in map(
                    str,
                [
                    self.tensorizer.labels_vocab.pad_token,
                    Slot.NO_LABEL_SLOT,
                ],
            ):
                continue
            ans_strs.append(doc_str[slot.start:slot.end])
            ans_start_char_idxs.append(slot.start)
            ans_end_char_idxs.append(slot.end)

        return ans_strs, ans_start_char_idxs, ans_end_char_idxs
    def create_annotation(utterance: str, intent_label: str,
                          slots_label: str) -> str:
        annotation_str = OPEN + escape_brackets(intent_label) + " "
        slots = parse_slot_string(slots_label)
        cur_index = 0
        for slot in sorted(slots, key=lambda slot: slot.start):
            annotation_str += escape_brackets(utterance[cur_index:slot.start])
            annotation_str += (
                OPEN + escape_brackets(slot.label) + " " +
                escape_brackets(utterance[slot.start:slot.end]) + " " + CLOSE)
            cur_index = slot.end
        annotation_str += escape_brackets(utterance[cur_index:]) + " " + CLOSE

        return annotation_str
Esempio n. 6
0
def load_slots(s):
    return parse_slot_string(s)
Esempio n. 7
0
def get_slots(word_names):
    slots = {
        Node(label=slot.label, span=Span(slot.start, slot.end))
        for slot in parse_slot_string(word_names)
    }
    return Counter(slots)