def main(in_dataset_folder, in_model_folder, in_no_ood_evaluation): rev_vocab, kb, action_templates, config = load_model(in_model_folder) test_dialogs, test_indices = read_dialogs(os.path.join( in_dataset_folder, 'dialog-babi-task6-dstc2-tst.txt'), with_indices=True) et = EntityTracker(kb) at = ActionTracker(None, et) at.set_action_templates(action_templates) vocab = {word: idx for idx, word in enumerate(rev_vocab)} X, context_features, action_masks, y = make_dataset_for_hierarchical_lstm( test_dialogs, test_indices, vocab, et, at, **config) net = HierarchicalLSTM(config, context_features.shape[-1], action_masks.shape[-1]) net.restore(in_model_folder) eval_stats_full_dataset = evaluate_advanced( net, (X, context_features, action_masks, y), test_dialogs, at.action_templates) print( 'Full dataset: {} turns overall, {} turns after the first OOD'.format( eval_stats_full_dataset['total_turns'], eval_stats_full_dataset['total_turns_after_ood'])) print('Accuracy:') accuracy = eval_stats_full_dataset[ 'correct_turns'] / eval_stats_full_dataset['total_turns'] accuracy_after_ood = eval_stats_full_dataset['correct_turns_after_ood'] / eval_stats_full_dataset['total_turns_after_ood'] \ if eval_stats_full_dataset['total_turns_after_ood'] != 0 \ else 0 accuracy_post_ood = eval_stats_full_dataset['correct_post_ood_turns'] / eval_stats_full_dataset['total_post_ood_turns'] \ if eval_stats_full_dataset['total_post_ood_turns'] != 0 \ else 0 print( 'overall: {:.3f}; after first OOD: {:.3f}, directly post-OOD: {:.3f}'. format(accuracy, accuracy_after_ood, accuracy_post_ood)) print('Loss : {:.3f}'.format(eval_stats_full_dataset['avg_loss'])) if in_no_ood_evaluation: eval_stats_no_ood = evaluate_advanced( net, (X, context_features, action_masks, y), test_indices, at.action_templates, ignore_ood_accuracy=True) print('Accuracy (OOD turns ignored):') accuracy = eval_stats_no_ood['correct_turns'] / eval_stats_no_ood[ 'total_turns'] accuracy_after_ood = eval_stats_no_ood['correct_turns_after_ood'] / eval_stats_no_ood['total_turns_after_ood'] \ if eval_stats_no_ood['total_turns_after_ood'] != 0 \ else 0 accuracy_post_ood = eval_stats_no_ood['correct_post_ood_turns'] / eval_stats_no_ood['total_post_ood_turns'] \ if eval_stats_no_ood['total_post_ood_turns'] != 0 \ else 0 print( 'overall: {:.3f}; after first OOD: {:.3f}, directly post-OOD: {:.3f}' .format(accuracy, accuracy_after_ood, accuracy_post_ood)) print('Loss : {:.3f}'.format(eval_stats_no_ood['avg_loss']))
def main(in_clean_dataset_folder, in_noisy_dataset_folder, in_model_folder, in_mode, in_runs_number): rev_vocab, kb, action_templates, config = load_model(in_model_folder) clean_dialogs, clean_indices = read_dialogs(os.path.join(in_clean_dataset_folder, 'dialog-babi-task6-dstc2-tst.txt'), with_indices=True) noisy_dialogs, noisy_indices = read_dialogs(os.path.join(in_noisy_dataset_folder, 'dialog-babi-task6-dstc2-tst.txt'), with_indices=True) max_noisy_dialog_length = max([item['end'] - item['start'] + 1 for item in noisy_indices]) config['max_input_length'] = max_noisy_dialog_length post_ood_turns_clean, post_ood_turns_noisy = mark_post_ood_turns(noisy_dialogs, BABI_CONFIG['backoff_utterance'].lower()) et = EntityTracker(kb) at = ActionTracker(None, et) at.set_action_templates(action_templates) vocab = {word: idx for idx, word in enumerate(rev_vocab)} data_clean = make_dataset_for_vhcn_v2(clean_dialogs, clean_indices, vocab, et, at, **config) data_noisy = make_dataset_for_vhcn_v2(noisy_dialogs, noisy_indices, vocab, et, at, **config) context_features_clean, action_masks_clean = data_clean[2:4] net = VariationalHierarchicalLSTMv3(rev_vocab, config, context_features_clean.shape[-1], action_masks_clean.shape[-1]) net.restore(in_model_folder) if in_mode == 'clean': eval_stats_clean = evaluate_advanced(net, data_clean, at.action_templates, BABI_CONFIG['backoff_utterance'].lower(), post_ood_turns=post_ood_turns_clean, runs_number=in_runs_number) print('Clean dataset: {} turns overall'.format(eval_stats_clean['total_turns'])) print('Accuracy:') accuracy = eval_stats_clean['correct_turns'] / eval_stats_clean['total_turns'] accuracy_continuous = eval_stats_clean['correct_continuous_turns'] / eval_stats_clean['total_turns'] accuracy_post_ood = eval_stats_clean['correct_post_ood_turns'] / eval_stats_clean['total_post_ood_turns'] \ if eval_stats_clean['total_post_ood_turns'] != 0 \ else 0 print('overall: {:.3f}; continuous: {:.3f}; directly post-OOD: {:.3f}'.format(accuracy, accuracy_continuous, accuracy_post_ood)) print('Loss : {:.3f}'.format(eval_stats_clean['avg_loss'])) elif in_mode == 'noisy': eval_stats_noisy = evaluate_advanced(net, data_noisy, at.action_templates, BABI_CONFIG['backoff_utterance'].lower(), post_ood_turns=post_ood_turns_noisy, runs_number=in_runs_number) print('\n\n') print('Noisy dataset: {} turns overall'.format(eval_stats_noisy['total_turns'])) print('Accuracy:') accuracy = eval_stats_noisy['correct_turns'] / eval_stats_noisy['total_turns'] accuracy_continuous = eval_stats_noisy['correct_continuous_turns'] / eval_stats_noisy['total_turns'] accuracy_post_ood = eval_stats_noisy['correct_post_ood_turns'] / eval_stats_noisy['total_post_ood_turns'] \ if eval_stats_noisy['total_post_ood_turns'] != 0 \ else 0 accuracy_ood = eval_stats_noisy['correct_ood_turns'] / eval_stats_noisy['total_ood_turns'] \ if eval_stats_noisy['total_ood_turns'] != 0 \ else 0 print('overall: {:.3f}; continuous: {:.3f}; directly post-OOD: {:.3f}; OOD: {:.3f}'.format(accuracy, accuracy_continuous, accuracy_post_ood, accuracy_ood)) print('Loss : {:.3f}'.format(eval_stats_noisy['avg_loss'])) elif in_mode == 'noisy_ignore_ood': eval_stats_no_ood = evaluate_advanced(net, data_noisy, at.action_templates, BABI_CONFIG['backoff_utterance'].lower(), post_ood_turns=post_ood_turns_noisy, ignore_ood_accuracy=True, runs_number=in_runs_number) print('Accuracy (OOD turns ignored):') accuracy = eval_stats_no_ood['correct_turns'] / eval_stats_no_ood['total_turns'] accuracy_after_ood = eval_stats_no_ood['correct_turns_after_ood'] / eval_stats_no_ood['total_turns_after_ood'] \ if eval_stats_no_ood['total_turns_after_ood'] != 0 \ else 0 accuracy_post_ood = eval_stats_no_ood['correct_post_ood_turns'] / eval_stats_no_ood['total_post_ood_turns'] \ if eval_stats_no_ood['total_post_ood_turns'] != 0 \ else 0 print('overall: {:.3f}; after first OOD: {:.3f}, directly post-OOD: {:.3f}'.format(accuracy, accuracy_after_ood, accuracy_post_ood))
def main(in_clean_dataset_folder, in_noisy_dataset_folder, in_model_folder, in_no_ood_evaluation): rev_vocab, kb, action_templates, config = load_model(in_model_folder) clean_dialogs, clean_indices = read_dialogs(os.path.join( in_clean_dataset_folder, 'dialog-babi-task6-dstc2-tst.txt'), with_indices=True) noisy_dialogs, noisy_indices = read_dialogs(os.path.join( in_noisy_dataset_folder, 'dialog-babi-task6-dstc2-tst.txt'), with_indices=True) post_ood_turns_clean, post_ood_turns_noisy = mark_post_ood_turns( noisy_dialogs) assert len(post_ood_turns_clean) == len(post_ood_turns_noisy) for post_ood_turn_clean, post_ood_turn_noisy in zip( sorted(post_ood_turns_clean), sorted(post_ood_turns_noisy)): noisy_dialogs[post_ood_turn_noisy][0] = clean_dialogs[ post_ood_turn_clean][0] et = EntityTracker(kb) at = ActionTracker(None, et) at.set_action_templates(action_templates) vocab = {word: idx for idx, word in enumerate(rev_vocab)} X_clean, context_features_clean, action_masks_clean, y_clean = make_dataset_for_hierarchical_lstm( clean_dialogs, clean_indices, vocab, et, at, **config) X_noisy, context_features_noisy, action_masks_noisy, y_noisy = make_dataset_for_hierarchical_lstm( noisy_dialogs, noisy_indices, vocab, et, at, **config) net = HierarchicalLSTM(config, context_features_clean.shape[-1], action_masks_clean.shape[-1]) net.restore(in_model_folder) eval_stats_clean = evaluate_advanced( net, (X_clean, context_features_clean, action_masks_clean, y_clean), at.action_templates, post_ood_turns=post_ood_turns_clean) print('Clean dataset: {} turns overall'.format( eval_stats_clean['total_turns'])) print('Accuracy:') accuracy = eval_stats_clean['correct_turns'] / eval_stats_clean[ 'total_turns'] accuracy_post_ood = eval_stats_clean['correct_post_ood_turns'] / eval_stats_clean['total_post_ood_turns'] \ if eval_stats_clean['total_post_ood_turns'] != 0 \ else 0 print('overall: {:.3f}; directly post-OOD: {:.3f}'.format( accuracy, accuracy_post_ood)) print('Loss : {:.3f}'.format(eval_stats_clean['avg_loss'])) eval_stats_noisy = evaluate_advanced( net, (X_noisy, context_features_noisy, action_masks_noisy, y_noisy), at.action_templates, post_ood_turns=post_ood_turns_noisy) print('\n\n') print( 'Noisy dataset: {} turns overall, {} turns after the first OOD'.format( eval_stats_noisy['total_turns'], eval_stats_noisy['total_turns_after_ood'])) print('Accuracy:') accuracy = eval_stats_noisy['correct_turns'] / eval_stats_noisy[ 'total_turns'] accuracy_after_ood = eval_stats_noisy['correct_turns_after_ood'] / eval_stats_noisy['total_turns_after_ood'] \ if eval_stats_noisy['total_turns_after_ood'] != 0 \ else 0 accuracy_post_ood = eval_stats_noisy['correct_post_ood_turns'] / eval_stats_noisy['total_post_ood_turns'] \ if eval_stats_noisy['total_post_ood_turns'] != 0 \ else 0 print( 'overall: {:.3f}; after first OOD: {:.3f}, directly post-OOD: {:.3f}'. format(accuracy, accuracy_after_ood, accuracy_post_ood)) print('Loss : {:.3f}'.format(eval_stats_noisy['avg_loss'])) if in_no_ood_evaluation: eval_stats_no_ood = evaluate_advanced( net, (X_noisy, context_features_noisy, action_masks_noisy, y_noisy), at.action_templates, post_ood_turns=post_ood_turns_noisy, ignore_ood_accuracy=True) print('Accuracy (OOD turns ignored):') accuracy = eval_stats_no_ood['correct_turns'] / eval_stats_no_ood[ 'total_turns'] accuracy_after_ood = eval_stats_no_ood['correct_turns_after_ood'] / eval_stats_no_ood['total_turns_after_ood'] \ if eval_stats_no_ood['total_turns_after_ood'] != 0 \ else 0 accuracy_post_ood = eval_stats_no_ood['correct_post_ood_turns'] / eval_stats_no_ood['total_post_ood_turns'] \ if eval_stats_no_ood['total_post_ood_turns'] != 0 \ else 0 print( 'overall: {:.3f}; after first OOD: {:.3f}, directly post-OOD: {:.3f}' .format(accuracy, accuracy_after_ood, accuracy_post_ood))
def main(in_clean_dataset_folder, in_noisy_dataset_folder, in_model_folder, in_mode, in_runs_number): rev_vocab, kb, action_templates, config = load_model(in_model_folder) train_json = load_hcn_json( os.path.join(in_clean_dataset_folder, 'train.json')) test_json = load_hcn_json( os.path.join(in_clean_dataset_folder, 'test.json')) test_ood_json = load_hcn_json( os.path.join(in_noisy_dataset_folder, 'test_ood.json')) max_noisy_dialog_length = max( [len(dialog['turns']) for dialog in test_ood_json['dialogs']]) config['max_input_length'] = max_noisy_dialog_length post_ood_turns_clean, post_ood_turns_noisy = mark_post_ood_turns( test_ood_json) et = EntityTracker(kb) action_weights = defaultdict(lambda: 1.0) action_weights[0] = 0.0 action_weighting = np.vectorize(action_weights.__getitem__) vocab = {word: idx for idx, word in enumerate(rev_vocab)} ctx_features = [] for dialog in train_json['dialogs']: for utterance in dialog['turns']: if 'context_features' in utterance: ctx_features.append(utterance['context_features']) ctx_features_vocab, ctx_features_rev_vocab = make_vocabulary( ctx_features, config['max_vocabulary_size'], special_tokens=[]) data_preparation_function = getattr(utils.preprocessing, config['data_preparation_function']) data_clean = data_preparation_function(test_json, vocab, ctx_features_vocab, et, **config) data_noisy = data_preparation_function(test_ood_json, vocab, ctx_features_vocab, et, **config) data_clean_with_weights = *data_clean, action_weighting(data_clean[-1]) data_noisy_with_weights = *data_noisy, action_weighting(data_noisy[-1]) net = getattr(modules, config['model_name'])(vocab, config, len(ctx_features_vocab), len(action_templates)) net.restore(in_model_folder) if in_mode == 'clean': eval_stats_clean = evaluate_advanced( net, data_clean_with_weights, action_templates, BABI_CONFIG['backoff_utterance'], post_ood_turns=post_ood_turns_clean, runs_number=in_runs_number) print('Clean dataset: {} turns overall'.format( eval_stats_clean['total_turns'])) print('Accuracy:') accuracy = eval_stats_clean['correct_turns'] / eval_stats_clean[ 'total_turns'] accuracy_continuous = eval_stats_clean[ 'correct_continuous_turns'] / eval_stats_clean['total_turns'] accuracy_post_ood = eval_stats_clean['correct_post_ood_turns'] / eval_stats_clean['total_post_ood_turns'] \ if eval_stats_clean['total_post_ood_turns'] != 0 \ else 0 ood_f1 = eval_stats_clean['ood_f1'] print('overall acc: {:.3f}; continuous acc: {:.3f}; ' 'directly post-OOD acc: {:.3f}; OOD F1: {:.3f}'.format( accuracy, accuracy_continuous, accuracy_post_ood, ood_f1)) print('Loss : {:.3f}'.format(eval_stats_clean['avg_loss'])) elif in_mode == 'noisy': eval_stats_noisy = evaluate_advanced( net, data_noisy_with_weights, action_templates, BABI_CONFIG['backoff_utterance'], post_ood_turns=post_ood_turns_noisy, runs_number=in_runs_number) print('\n\n') print('Noisy dataset: {} turns overall'.format( eval_stats_noisy['total_turns'])) print('Accuracy:') accuracy = eval_stats_noisy['correct_turns'] / eval_stats_noisy[ 'total_turns'] accuracy_continuous = eval_stats_noisy[ 'correct_continuous_turns'] / eval_stats_noisy['total_turns'] accuracy_post_ood = eval_stats_noisy['correct_post_ood_turns'] / eval_stats_noisy['total_post_ood_turns'] \ if eval_stats_noisy['total_post_ood_turns'] != 0 \ else 0 accuracy_ood = eval_stats_noisy['correct_ood_turns'] / eval_stats_noisy['total_ood_turns'] \ if eval_stats_noisy['total_ood_turns'] != 0 \ else 0 ood_f1 = eval_stats_noisy['ood_f1'] print('overall acc: {:.3f}; continuous acc: {:.3f}; ' 'directly post-OOD acc: {:.3f}; OOD acc: {:.3f}; OOD F1: {:.3f}'. format(accuracy, accuracy_continuous, accuracy_post_ood, accuracy_ood, ood_f1)) print('Loss : {:.3f}'.format(eval_stats_noisy['avg_loss'])) elif in_mode == 'noisy_ignore_ood': eval_stats_no_ood = evaluate_advanced( net, data_noisy_with_weights, action_templates, BABI_CONFIG['backoff_utterance'], post_ood_turns=post_ood_turns_noisy, ignore_ood_accuracy=True, runs_number=in_runs_number) print('Accuracy (OOD turns ignored):') accuracy = eval_stats_no_ood['correct_turns'] / eval_stats_no_ood[ 'total_turns'] accuracy_after_ood = eval_stats_no_ood['correct_turns_after_ood'] / eval_stats_no_ood['total_turns_after_ood'] \ if eval_stats_no_ood['total_turns_after_ood'] != 0 \ else 0 accuracy_post_ood = eval_stats_no_ood['correct_post_ood_turns'] / eval_stats_no_ood['total_post_ood_turns'] \ if eval_stats_no_ood['total_post_ood_turns'] != 0 \ else 0 print('overall: {:.3f}; ' 'after first OOD: {:.3f}; ' 'directly post-OOD: {:.3f}'.format(accuracy, accuracy_after_ood, accuracy_post_ood))