def _eval_epochs_done_callback(task_name, global_vars, label_ids, graph_fold=None, normalize_cm=True): labels = np.asarray(global_vars[task_name + '_all_labels']) preds = np.asarray(global_vars[task_name + '_all_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 labels = labels[subtokens_mask] preds = preds[subtokens_mask] accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy for task {task_name}: {accuracy}') # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size])) classification_report = get_classification_report(labels, preds, label_ids) logging.info(classification_report) # calculate and plot confusion_matrix if graph_fold: plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm, prefix=task_name) return accuracy
def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_label_id=0, normalize_cm=True): labels = np.asarray(global_vars['all_labels']) preds = np.asarray(global_vars['all_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 labels = labels[subtokens_mask] preds = preds[subtokens_mask] # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size])) accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy: {accuracy}') f1_scores = get_f1_scores(labels, preds, average_modes=['weighted', 'macro', 'micro']) for k, v in f1_scores.items(): logging.info(f'{k}: {v}') classification_report = get_classification_report(labels, preds, label_ids) logging.info(classification_report) # calculate and plot confusion_matrix if graph_fold: plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm) return dict({'Accuracy': accuracy})
def eval_epochs_done_callback(global_vars, graph_fold): labels = np.asarray(global_vars['all_labels']) preds = np.asarray(global_vars['all_preds']) accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy: {accuracy}') # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size])) plot_confusion_matrix(labels, preds, graph_fold) logging.info(classification_report(labels, preds)) return dict({"accuracy": accuracy})
def eval_epochs_done_callback(global_vars, graph_fold): intent_labels = np.asarray(global_vars['all_intent_labels']) intent_preds = np.asarray(global_vars['all_intent_preds']) slot_labels = np.asarray(global_vars['all_slot_labels']) slot_preds = np.asarray(global_vars['all_slot_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 slot_labels = slot_labels[subtokens_mask] slot_preds = slot_preds[subtokens_mask] # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if intent_preds.shape[0] > sample_size + 1: i = random.randint(0, intent_preds.shape[0] - sample_size - 1) logging.info("Sampled i_preds: [%s]" % list2str(intent_preds[i:i + sample_size])) logging.info("Sampled intents: [%s]" % list2str(intent_labels[i:i + sample_size])) logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i:i + sample_size])) logging.info("Sampled slots: [%s]" % list2str(slot_labels[i:i + sample_size])) plot_confusion_matrix(intent_labels, intent_preds, graph_fold) logging.info('Intent prediction results') correct_preds = sum(intent_labels == intent_preds) intent_accuracy = correct_preds / intent_labels.shape[0] logging.info(f'Intent accuracy: {intent_accuracy}') logging.info(f'Classification report:\n \ {classification_report(intent_labels, intent_preds)}') logging.info('Slot prediction results') slot_accuracy = sum(slot_labels == slot_preds) / slot_labels.shape[0] logging.info(f'Slot accuracy: {slot_accuracy}') logging.info(f'Classification report:\n \ {classification_report(slot_labels[:-2], slot_preds[:-2])}') return dict({ 'intent_accuracy': intent_accuracy, 'slot_accuracy': slot_accuracy })
def convert_sequences_to_features(self, all_sent_subtokens, sent_labels, tokenizer, max_seq_length): """Loads a data file into a list of `InputBatch`s. """ self.features = [] for sent_id in range(len(all_sent_subtokens)): sent_subtokens = all_sent_subtokens[sent_id] sent_label = sent_labels[sent_id] input_ids = [ tokenizer._convert_token_to_id(t) for t in sent_subtokens ] # The mask has 1 for real tokens and 0 for padding tokens. # Only real tokens are attended to. input_mask = [1] * len(input_ids) # Zero-pad up to the sequence length. while len(input_ids) < max_seq_length: input_ids.append(0) input_mask.append(0) segment_ids = [0] * max_seq_length assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length if sent_id == 0: logging.info("*** Example ***") logging.info("example_index: %s" % sent_id) logging.info("subtokens: %s" % " ".join(sent_subtokens)) logging.info("sent_label: %s" % sent_label) logging.info("input_ids: %s" % list2str(input_ids)) logging.info("input_mask: %s" % list2str(input_mask)) self.features.append( InputFeatures( sent_id=sent_id, sent_label=sent_label, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, ))
def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_label_id=0, normalize_cm=True): labels = np.asarray(global_vars['all_labels']) preds = np.asarray(global_vars['all_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 labels = labels[subtokens_mask] preds = preds[subtokens_mask] accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy: {accuracy}') # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size])) # remove labels from label_ids that don't appear in the dev set used_labels = set(labels) | set(preds) label_ids = { k: label_ids[k] for k, v in label_ids.items() if v in used_labels } logging.info(classification_report(labels, preds, target_names=label_ids)) # calculate and plot confusion_matrix if graph_fold: plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm) return dict({'Accuracy': accuracy})
def eval_epochs_done_callback(global_vars, output_dir, task_name): labels = np.asarray(global_vars['all_labels']) preds = np.asarray(global_vars['all_preds']) # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Task name: %s" % task_name.upper()) logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size])) results = compute_metrics(task_name, preds, labels) os.makedirs(output_dir, exist_ok=True) with open(os.path.join(output_dir, task_name + '.txt'), 'w') as f: f.write('labels\t' + list2str(labels) + '\n') f.write('preds\t' + list2str(preds) + '\n') logging.info(results) return results
def merge(data_dir, subdirs, dataset_name, modes=['train', 'test']): outfold = f'{data_dir}/{dataset_name}' if if_exist(outfold, [f'{mode}.tsv' for mode in modes]): logging.info(DATABASE_EXISTS_TMP.format('SNIPS-ATIS', outfold)) slots = get_vocab(f'{outfold}/dict.slots.csv') none_slot = 0 for key in slots: if slots[key] == 'O': none_slot = key break return outfold, int(none_slot) os.makedirs(outfold, exist_ok=True) data_files, slot_files = {}, {} for mode in modes: data_files[mode] = open(f'{outfold}/{mode}.tsv', 'w') data_files[mode].write('sentence\tlabel\n') slot_files[mode] = open(f'{outfold}/{mode}_slots.tsv', 'w') intents, slots = {}, {} intent_shift, slot_shift = 0, 0 none_intent, none_slot = -1, -1 for subdir in subdirs: curr_intents = get_vocab(f'{data_dir}/{subdir}/dict.intents.csv') curr_slots = get_vocab(f'{data_dir}/{subdir}/dict.slots.csv') for key in curr_intents: if intent_shift > 0 and curr_intents[key] == 'O': continue if curr_intents[key] == 'O' and intent_shift == 0: none_intent = int(key) intents[int(key) + intent_shift] = curr_intents[key] for key in curr_slots: if slot_shift > 0 and curr_slots[key] == 'O': continue if slot_shift == 0 and curr_slots[key] == 'O': none_slot = int(key) slots[int(key) + slot_shift] = curr_slots[key] for mode in modes: with open(f'{data_dir}/{subdir}/{mode}.tsv', 'r') as f: for line in f.readlines()[1:]: text, label = line.strip().split('\t') label = int(label) if curr_intents[label] == 'O': label = none_intent else: label = label + intent_shift data_files[mode].write(f'{text}\t{label}\n') with open(f'{data_dir}/{subdir}/{mode}_slots.tsv', 'r') as f: for line in f.readlines(): labels = [int(label) for label in line.strip().split()] shifted_labels = [] for label in labels: if curr_slots[label] == 'O': shifted_labels.append(none_slot) else: shifted_labels.append(label + slot_shift) slot_files[mode].write(list2str(shifted_labels) + '\n') intent_shift += len(curr_intents) slot_shift += len(curr_slots) write_vocab_in_order(intents, f'{outfold}/dict.intents.csv') write_vocab_in_order(slots, f'{outfold}/dict.slots.csv') return outfold, none_slot
def eval_epochs_done_callback(global_vars, intents_label_ids, slots_label_ids, graph_fold=None, normalize_cm=True): intent_labels = np.asarray(global_vars['all_intent_labels']) intent_preds = np.asarray(global_vars['all_intent_preds']) slot_labels = np.asarray(global_vars['all_slot_labels']) slot_preds = np.asarray(global_vars['all_slot_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 slot_labels = slot_labels[subtokens_mask] slot_preds = slot_preds[subtokens_mask] # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if intent_preds.shape[0] > sample_size + 1: i = random.randint(0, intent_preds.shape[0] - sample_size - 1) logging.info("Sampled i_preds: [%s]" % list2str(intent_preds[i:i + sample_size])) logging.info("Sampled intents: [%s]" % list2str(intent_labels[i:i + sample_size])) logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i:i + sample_size])) logging.info("Sampled slots: [%s]" % list2str(slot_labels[i:i + sample_size])) if graph_fold: # calculate, plot and save the confusion_matrix plot_confusion_matrix(intent_labels, intent_preds, graph_fold, intents_label_ids, normalize=normalize_cm, prefix='Intent') plot_confusion_matrix(slot_labels, slot_preds, graph_fold, slots_label_ids, normalize=normalize_cm, prefix='Slot') logging.info('Slot Prediction Results:') slot_accuracy = np.mean(slot_labels == slot_preds) logging.info(f'Slot Accuracy: {slot_accuracy}') f1_scores = get_f1_scores(slot_labels, slot_preds, average_modes=['weighted', 'macro', 'micro']) for k, v in f1_scores.items(): logging.info(f'{k}: {v}') logging.info( f'\n {get_classification_report(slot_labels, slot_preds, label_ids=slots_label_ids)}' ) logging.info('Intent Prediction Results:') intent_accuracy = np.mean(intent_labels == intent_preds) logging.info(f'Intent Accuracy: {intent_accuracy}') f1_scores = get_f1_scores(intent_labels, intent_preds, average_modes=['weighted', 'macro', 'micro']) for k, v in f1_scores.items(): logging.info(f'{k}: {v}') logging.info( f'\n {get_classification_report(intent_labels, intent_preds, label_ids=intents_label_ids)}' ) return dict({ 'intent_accuracy': intent_accuracy, 'slot_accuracy': slot_accuracy })