def prepare_sep_label_feature(self, slot_label2id, intent_label2id): """ prepare digital input for label feature separately """ slot_label_item = [] for label_name in slot_label2id: if label_name == '[PAD]': continue seq_in = self.convert_label_name(label_name) seq_out = ['None'] * len(seq_in) label = ['None'] slot_label_item.append( self.data_item2feature_item(DataItem(seq_in, seq_out, label), 0)) slot_label_input = self.get_support_model_input( slot_label_item, len(slot_label2id) - 1) intent_label_item = [] for label_name in intent_label2id: if label_name == '[PAD]': continue seq_in = self.convert_label_name(label_name) seq_out = ['None'] * len(seq_in) label = ['None'] slot_label_item.append( self.data_item2feature_item(DataItem(seq_in, seq_out, label), 0)) intent_label_input = self.get_support_model_input( slot_label_item, len(slot_label2id) - 1) return (slot_label_input, slot_label_item), (intent_label_input, intent_label_item)
def prepare_label_feature(self, slot_label2id: Dict[str, int], intent_label2id: Dict[str, int]): """ prepare digital input for label feature in concatenate style """ slot_sorted_labels = sorted(slot_label2id.items(), key=lambda x: x[1]) intent_sorted_labels = sorted(intent_label2id.items(), key=lambda x: x[1]) seq_ins, seq_outs, labels = [], [], [] for label_name, label_id in slot_sorted_labels: if label_name == '[PAD]': continue tmp_text = self.convert_label_name(label_name) seq_ins.extend(tmp_text) seq_outs.extend(['O'] * len(tmp_text)) labels.extend(['O']) slot_label_item = self.data_item2feature_item( DataItem(seq_in=seq_ins, seq_out=seq_outs, label=labels), 0) slot_label_input = self.get_test_model_input(slot_label_item) seq_ins, seq_outs, labels = [], [], [] for label_name, label_id in intent_sorted_labels: if label_name == '[PAD]': continue tmp_text = self.convert_label_name(label_name) seq_ins.extend(tmp_text) seq_outs.extend(['O'] * len(tmp_text)) labels.extend(['O']) intent_label_item = self.data_item2feature_item( DataItem(seq_in=seq_ins, seq_out=seq_outs, label=labels), 0) intent_label_input = self.get_test_model_input(intent_label_item) return (slot_label_input, slot_label_item), (intent_label_input, intent_label_item)
def prepare_sep_label_feature(self, label2id): """ prepare digital input for label feature separately """ label_items = [] for label_name in label2id: if label_name == '[PAD]': continue seq_in = self.convert_label_name(label_name) seq_out = ['None'] * len(seq_in) label = ['None'] label_items.append(self.data_item2feature_item(DataItem(seq_in, seq_out, label), 0)) label_input = self.get_support_model_input(label_items, len(label2id) - 1) # no pad, so - 1 return label_input, label_items
def prepare_label_feature(self, label2id: dict): """ prepare digital input for label feature in concatenate style """ text, wp_text, label, wp_label, wp_mark = [], [], [], [], [] sorted_labels = sorted(label2id.items(), key=lambda x: x[1]) for label_name, label_id in sorted_labels: if label_name == '[PAD]': continue tmp_text = self.convert_label_name(label_name) tmp_wp_text = self.tokenizer.tokenize(' '.join(tmp_text)) text.extend(tmp_text) wp_text.extend(tmp_wp_text) label.extend(['O'] * len(tmp_text)) wp_label.extend(['O'] * len(tmp_wp_text)) wp_mark.extend([0] + [1] * (len(tmp_wp_text) - 1)) label_item = self.data_item2feature_item(DataItem(text, label, wp_text, wp_label, wp_mark), 0) label_input = self.get_test_model_input(label_item) return label_input, label_item
def prepare_sep_label_feature(self, label2id_map): """ prepare digital input for label feature separately """ label_item_map = {task: [] for task in label2id_map.keys()} for task, label2id in label2id_map.items(): for label_name in label2id: if label_name == '[PAD]': continue seq_in = self.convert_label_name(label_name) seq_out = ['None'] * len(seq_in) label = ['None'] label_item_map[task].append( self.data_item2feature_item( DataItem(seq_in, seq_out, label), 0, task)) label_input_map = { task: self.get_support_model_input(label_items, len(label2id_map[task]) - 1) for task, label_items in label_item_map.items() } # no pad, so - 1 return label_input_map, label_item_map
def prepare_label_feature(self, label2id_map: Dict[str, Dict[str, int]]): """ prepare digital input for label feature in concatenate style """ text, wp_text, label, wp_label, wp_mark = [], [], [], [], [] sorted_label_map = { task: sorted(label2id.items(), key=lambda x: x[1]) for task, label2id in label2id_map.items() } label_item_map, label_input_map = {}, {} for task, sorted_labels in sorted_label_map.items(): for label_name, label_id in sorted_labels: if label_name == '[PAD]': continue tmp_text = self.convert_label_name(label_name) tmp_wp_text = self.tokenizer.tokenize(' '.join(tmp_text)) text.extend(tmp_text) wp_text.extend(tmp_wp_text) label.extend(['O'] * len(tmp_text)) wp_label.extend(['O'] * len(tmp_wp_text)) wp_mark.extend([0] + [1] * (len(tmp_wp_text) - 1)) label_item_map[task] = self.data_item2feature_item( DataItem(text, label, wp_text, wp_label, wp_mark), 0, task) label_input_map[task] = self.get_test_model_input( label_item_map[task]) return label_input_map, label_item_map