Exemple #1
0
    def prepare_sep_label_feature(self, slot_label2id, intent_label2id):
        """ prepare digital input for label feature separately """
        slot_label_item = []
        for label_name in slot_label2id:
            if label_name == '[PAD]':
                continue
            seq_in = self.convert_label_name(label_name)
            seq_out = ['None'] * len(seq_in)
            label = ['None']
            slot_label_item.append(
                self.data_item2feature_item(DataItem(seq_in, seq_out, label),
                                            0))
        slot_label_input = self.get_support_model_input(
            slot_label_item,
            len(slot_label2id) - 1)

        intent_label_item = []
        for label_name in intent_label2id:
            if label_name == '[PAD]':
                continue
            seq_in = self.convert_label_name(label_name)
            seq_out = ['None'] * len(seq_in)
            label = ['None']
            slot_label_item.append(
                self.data_item2feature_item(DataItem(seq_in, seq_out, label),
                                            0))
        intent_label_input = self.get_support_model_input(
            slot_label_item,
            len(slot_label2id) - 1)

        return (slot_label_input, slot_label_item), (intent_label_input,
                                                     intent_label_item)
Exemple #2
0
    def prepare_label_feature(self, slot_label2id: Dict[str, int],
                              intent_label2id: Dict[str, int]):
        """ prepare digital input for label feature in concatenate style """
        slot_sorted_labels = sorted(slot_label2id.items(), key=lambda x: x[1])
        intent_sorted_labels = sorted(intent_label2id.items(),
                                      key=lambda x: x[1])

        seq_ins, seq_outs, labels = [], [], []
        for label_name, label_id in slot_sorted_labels:
            if label_name == '[PAD]':
                continue
            tmp_text = self.convert_label_name(label_name)
            seq_ins.extend(tmp_text)
            seq_outs.extend(['O'] * len(tmp_text))
            labels.extend(['O'])
        slot_label_item = self.data_item2feature_item(
            DataItem(seq_in=seq_ins, seq_out=seq_outs, label=labels), 0)
        slot_label_input = self.get_test_model_input(slot_label_item)

        seq_ins, seq_outs, labels = [], [], []
        for label_name, label_id in intent_sorted_labels:
            if label_name == '[PAD]':
                continue
            tmp_text = self.convert_label_name(label_name)
            seq_ins.extend(tmp_text)
            seq_outs.extend(['O'] * len(tmp_text))
            labels.extend(['O'])
        intent_label_item = self.data_item2feature_item(
            DataItem(seq_in=seq_ins, seq_out=seq_outs, label=labels), 0)
        intent_label_input = self.get_test_model_input(intent_label_item)

        return (slot_label_input, slot_label_item), (intent_label_input,
                                                     intent_label_item)
Exemple #3
0
 def prepare_sep_label_feature(self, label2id):
     """ prepare digital input for label feature separately """
     label_items = []
     for label_name in label2id:
         if label_name == '[PAD]':
             continue
         seq_in = self.convert_label_name(label_name)
         seq_out = ['None'] * len(seq_in)
         label = ['None']
         label_items.append(self.data_item2feature_item(DataItem(seq_in, seq_out, label), 0))
     label_input = self.get_support_model_input(label_items, len(label2id) - 1)  # no pad, so - 1
     return label_input, label_items
Exemple #4
0
 def prepare_label_feature(self, label2id: dict):
     """ prepare digital input for label feature in concatenate style """
     text, wp_text, label, wp_label, wp_mark = [], [], [], [], []
     sorted_labels = sorted(label2id.items(), key=lambda x: x[1])
     for label_name, label_id in sorted_labels:
         if label_name == '[PAD]':
             continue
         tmp_text = self.convert_label_name(label_name)
         tmp_wp_text = self.tokenizer.tokenize(' '.join(tmp_text))
         text.extend(tmp_text)
         wp_text.extend(tmp_wp_text)
         label.extend(['O'] * len(tmp_text))
         wp_label.extend(['O'] * len(tmp_wp_text))
         wp_mark.extend([0] + [1] * (len(tmp_wp_text) - 1))
     label_item = self.data_item2feature_item(DataItem(text, label, wp_text, wp_label, wp_mark), 0)
     label_input = self.get_test_model_input(label_item)
     return label_input, label_item
Exemple #5
0
 def prepare_sep_label_feature(self, label2id_map):
     """ prepare digital input for label feature separately """
     label_item_map = {task: [] for task in label2id_map.keys()}
     for task, label2id in label2id_map.items():
         for label_name in label2id:
             if label_name == '[PAD]':
                 continue
             seq_in = self.convert_label_name(label_name)
             seq_out = ['None'] * len(seq_in)
             label = ['None']
             label_item_map[task].append(
                 self.data_item2feature_item(
                     DataItem(seq_in, seq_out, label), 0, task))
     label_input_map = {
         task: self.get_support_model_input(label_items,
                                            len(label2id_map[task]) - 1)
         for task, label_items in label_item_map.items()
     }  # no pad, so - 1
     return label_input_map, label_item_map
Exemple #6
0
 def prepare_label_feature(self, label2id_map: Dict[str, Dict[str, int]]):
     """ prepare digital input for label feature in concatenate style """
     text, wp_text, label, wp_label, wp_mark = [], [], [], [], []
     sorted_label_map = {
         task: sorted(label2id.items(), key=lambda x: x[1])
         for task, label2id in label2id_map.items()
     }
     label_item_map, label_input_map = {}, {}
     for task, sorted_labels in sorted_label_map.items():
         for label_name, label_id in sorted_labels:
             if label_name == '[PAD]':
                 continue
             tmp_text = self.convert_label_name(label_name)
             tmp_wp_text = self.tokenizer.tokenize(' '.join(tmp_text))
             text.extend(tmp_text)
             wp_text.extend(tmp_wp_text)
             label.extend(['O'] * len(tmp_text))
             wp_label.extend(['O'] * len(tmp_wp_text))
             wp_mark.extend([0] + [1] * (len(tmp_wp_text) - 1))
         label_item_map[task] = self.data_item2feature_item(
             DataItem(text, label, wp_text, wp_label, wp_mark), 0, task)
         label_input_map[task] = self.get_test_model_input(
             label_item_map[task])
     return label_input_map, label_item_map