def process_batch(args, batcher, model, loss_func, token_vocab, metadata_vocab, sf_lf_map, sf_tokenized_lf_map, token_metadata_counts): """ :param args: argparse instance :param batcher: AcronymBatcherLoader instance :param model: PyTorch acronym expander model from ./modules/ :param loss_func: PyTorch nn.CrossEntropyLoss function :param token_vocab: unigram token vocabulary for MIMIC-III :param metadata_vocab: metadata-specific vocabulary for MIMIC-III :param sf_lf_map: dictionary mapping SFs to original string LFs :param sf_tokenized_lf_map: dictionary mapping SFs to tokenized LFs :param token_metadata_counts: dictionary mapping LFs to metadata counts. Used for computing p(metadata|LF) :return: average loss in mini-batch along with other performance metrics rel_weight only applies to the LMC model which returns the result of the metadata-token gating function """ batch_input, batch_p, batch_counts = batcher.next(token_vocab, sf_lf_map, sf_tokenized_lf_map, token_metadata_counts, metadata_vocab=metadata_vocab) batch_input = list(map(lambda x: torch.LongTensor(x).clamp_min_(0).to(args.device), batch_input)) batch_p = list(map(lambda x: torch.FloatTensor(x).to(args.device), batch_p)) full_input = batch_input + batch_counts if args.lm_type == 'bsg' else batch_input + batch_p + batch_counts scores, target, rel_weights = model(*full_input) num_correct = len(np.where(tensor_to_np(torch.argmax(scores, 1)) == tensor_to_np(target))[0]) num_examples = len(batch_counts[0]) batch_loss = loss_func.forward(scores, target) return batch_loss, num_examples, num_correct, scores, rel_weights
def elmo_analyze(test_batcher, model, sf_lf_map, vocab, sf_tokenized_lf_map, indexer, results_dir=None): """ :param args: argparse.ArgumentParser instance :param test_batcher: AcronymBatcherLoader instance :param model: AcronymExpander instance :param sf_lf_map: Short form to LF mappings :param vocab: Vocabulary AllenNLP instance storing tokens and corresponding token ids :param indexer: AllenNLP token to id indexer :param results_dir: where to write the results files :return: None but writes a confusion matrix analysis file into results_dir """ test_batcher.reset(shuffle=False) model.eval() sf_confusion = defaultdict(lambda: ([], [])) id_map = {'correct': [], 'error': []} errors_str, correct_str = defaultdict(str), defaultdict(str) loss_func = nn.CrossEntropyLoss() total_ll, num_correct, num_examples = 0.0, 0.0, 0.0 correct_top3 = [0, 0, 0] for _ in tqdm(range(test_batcher.num_batches())): batch_input, num_outputs = test_batcher.elmo_next(vocab, indexer, sf_tokenized_lf_map) batch_input = list(map(lambda x: torch.LongTensor(x).clamp_min_(0).to('cuda'), batch_input)) with torch.no_grad(): scores, target = model(*batch_input + [num_outputs]) batch_correct = len(np.where(tensor_to_np(torch.argmax(scores, 1)) == tensor_to_np(target))[0]) batch_examples = len(num_outputs) batch_loss = loss_func.forward(scores, target) batch_data = test_batcher.get_prev_batch() num_correct += batch_correct num_examples += batch_examples total_ll += batch_loss.item() pred_lf_idxs = tensor_to_np(torch.argmax(scores, 1)) target_lf_idxs = np.array(batch_data['target_lf_idx'].tolist()) batch_data = test_batcher.get_prev_batch() top_num = min(scores.size()[-1], 3) top_3_pred_lf_idxs = tensor_to_np(torch.topk(scores, top_num)[1]) tc = 0 for i in range(top_num): tc += len(np.where(top_3_pred_lf_idxs[:, i] == target_lf_idxs)[0]) correct_top3[i] += tc for i in range(top_num, 3): correct_top3[i] += len(target_lf_idxs) _analyze_batch(batch_data, sf_lf_map, pred_lf_idxs, correct_str, errors_str, sf_confusion, id_map, None) avg_test_ll = total_ll / float(test_batcher.num_batches()) avg_test_acc = num_correct / float(num_examples) avg_top3_acc = list(map(lambda nc: str(nc / float(num_examples)), correct_top3)) avg_top3_acc_str = '/'.join(avg_top3_acc) print('Test Loss={}. Accuracy={}'.format(avg_test_ll, avg_test_acc)) print('Top 3 Accuracy={}'.format(avg_top3_acc_str)) metrics = _analyze_stats(results_dir, sf_lf_map, correct_str, errors_str, sf_confusion, id_map, experiment='elmo') metrics['accuracy'] = avg_test_acc metrics['log_loss'] = avg_test_ll return metrics
def run_test_epoch(model, test_batcher, indexer, vocab, sf_tokenized_lf_map, loss_func): device_str = 'cuda' if torch.cuda.is_available() else 'cpu' model.eval() # just sets .requires_grad = True test_batcher.reset(shuffle=False) test_epoch_loss, test_examples, test_correct = 0.0, 0, 0 for _ in tqdm(range(test_batcher.num_batches())): batch_input, num_outputs = test_batcher.elmo_next( vocab, indexer, sf_tokenized_lf_map) batch_input = list( map(lambda x: torch.LongTensor(x).clamp_min_(0).to(device_str), batch_input)) with torch.no_grad(): scores, target = model(*batch_input + [num_outputs]) num_correct = len( np.where( tensor_to_np(torch.argmax(scores, 1)) == tensor_to_np(target)) [0]) num_examples = len(num_outputs) batch_loss = loss_func.forward(scores, target) test_correct += num_correct test_examples += num_examples test_epoch_loss += batch_loss.item() sleep(0.1) test_loss = test_epoch_loss / float(test_batcher.num_batches()) test_acc = test_correct / float(test_examples) print('Test Loss={}. Accuracy={}'.format(test_loss, test_acc)) sleep(0.1) return test_loss
def process_batch(batcher, model, loss_func, vocab, sf_tokenized_lf_map): batch_input, num_outputs = batcher.next(vocab, sf_tokenized_lf_map) batch_input = list(map(lambda x: torch.LongTensor(x).clamp_min_(0), batch_input)) proba, target = model(*(batch_input + [num_outputs])) num_correct = len(np.where(tensor_to_np(torch.argmax(proba, 1)) == tensor_to_np(target))[0]) num_examples = len(num_outputs) batch_loss = loss_func.forward(proba, target) return batch_loss, num_examples, num_correct, proba
def analyze(args, test_batcher, model, sf_lf_map, loss_func, token_vocab, metadata_vocab, sf_tokenized_lf_map, token_metadata_counts, results_dir=None): """ :param args: ArgParse instance :param test_batcher: AcronymBatcherLoader instance :param model: AcronymExpander instance :param sf_lf_map: Short form to LF mappings :param loss_func: PyTorch CrossEntropyLoss instance :param vocab: Vocab instance storing tokens and corresponding token ids :param sf_tokenized_lf_map: dictionary of SF --> tokenized LFs :param results_dir: where to write the results files :return: None but writes analysis files into results_dir """ test_batcher.reset(shuffle=False) model.eval() sf_confusion = defaultdict(lambda: ([], [])) id_map = {'correct': [], 'error': []} errors_str, correct_str = defaultdict(str), defaultdict(str) total_ll, num_correct, num_examples = 0.0, 0.0, 0.0 correct_top3 = [0, 0, 0] for _ in range(test_batcher.num_batches()): with torch.no_grad(): batch_loss, batch_examples, batch_correct, batch_scores, rel_weights = process_batch( args, test_batcher, model, loss_func, token_vocab, metadata_vocab, sf_lf_map, sf_tokenized_lf_map, token_metadata_counts) num_correct += batch_correct num_examples += batch_examples total_ll += batch_loss batch_data = test_batcher.get_prev_batch() target_lf_idxs = np.array(batch_data['target_lf_idx'].tolist()) pred_lf_idxs = tensor_to_np(torch.argmax(batch_scores, 1)) top_num = min(batch_scores.size()[-1], 3) top_3_pred_lf_idxs = tensor_to_np(torch.topk(batch_scores, top_num)[1]) tc = 0 for i in range(top_num): tc += len(np.where(top_3_pred_lf_idxs[:, i] == target_lf_idxs)[0]) correct_top3[i] += tc for i in range(top_num, 3): correct_top3[i] += len(target_lf_idxs) if rel_weights is not None: rel_weights = tensor_to_np(rel_weights) _analyze_batch(batch_data, sf_lf_map, pred_lf_idxs, correct_str, errors_str, sf_confusion, id_map, rel_weights) avg_test_ll = total_ll / float(test_batcher.num_batches()) avg_acc = num_correct / float(num_examples) avg_top3_acc = list(map(lambda nc: str(nc / float(num_examples)), correct_top3)) avg_top3_acc_str = '/'.join(avg_top3_acc) print('Test Loss={}. Accuracy={}'.format(avg_test_ll, avg_acc)) print('Top 3 Accuracy={}'.format(avg_top3_acc_str)) return _analyze_stats( results_dir, sf_lf_map, correct_str, errors_str, sf_confusion, id_map, experiment=args.experiment)
def bert_analyze(test_batcher, model, sf_lf_map, tokenizer, sf_tokenized_lf_map, results_dir=None): device_str = 'cuda' if torch.cuda.is_available() else 'cpu' test_batcher.reset(shuffle=False) model.eval() sf_confusion = defaultdict(lambda: ([], [])) id_map = {'correct': [], 'error': []} errors_str, correct_str = defaultdict(str), defaultdict(str) loss_func = nn.CrossEntropyLoss() total_ll, num_correct, num_examples = 0.0, 0.0, 0.0 correct_top3 = [0, 0, 0] for _ in tqdm(range(test_batcher.num_batches())): batch_input, num_outputs = test_batcher.bert_next(tokenizer, sf_tokenized_lf_map) batch_input = list(map(lambda x: torch.LongTensor(x).clamp_min_(0).to(device_str), batch_input)) with torch.no_grad(): scores, target = model(*batch_input + [num_outputs]) batch_correct = len(np.where(tensor_to_np(torch.argmax(scores, 1)) == tensor_to_np(target))[0]) batch_examples = len(num_outputs) batch_loss = loss_func.forward(scores, target) batch_data = test_batcher.get_prev_batch() num_correct += batch_correct num_examples += batch_examples total_ll += batch_loss.item() pred_lf_idxs = tensor_to_np(torch.argmax(scores, 1)) target_lf_idxs = np.array(batch_data['target_lf_idx'].tolist()) batch_data = test_batcher.get_prev_batch() top_num = min(scores.size()[-1], 3) top_3_pred_lf_idxs = tensor_to_np(torch.topk(scores, top_num)[1]) tc = 0 for i in range(top_num): tc += len(np.where(top_3_pred_lf_idxs[:, i] == target_lf_idxs)[0]) correct_top3[i] += tc for i in range(top_num, 3): correct_top3[i] += len(target_lf_idxs) _analyze_batch(batch_data, sf_lf_map, pred_lf_idxs, correct_str, errors_str, sf_confusion, id_map, None) avg_test_ll = total_ll / float(test_batcher.num_batches()) avg_test_acc = num_correct / float(num_examples) avg_top3_acc = list(map(lambda nc: str(nc / float(num_examples)), correct_top3)) avg_top3_acc_str = '/'.join(avg_top3_acc) print('Test Loss={}. Accuracy={}'.format(avg_test_ll, avg_test_acc)) print('Top 3 Accuracy={}'.format(avg_top3_acc_str)) metrics = _analyze_stats(results_dir, sf_lf_map, correct_str, errors_str, sf_confusion, id_map, experiment='elmo') metrics['accuracy'] = avg_test_acc metrics['log_loss'] = avg_test_ll return metrics
def point_similarity(model, vocab, tokens_a, tokens_b): ids_a = get_known_ids(vocab, tokens_a) ids_b = get_known_ids(vocab, tokens_b) if len(ids_a) == 0 or len(ids_b) == 0: return 0.0 embeddings = tensor_to_np(model.embeddings_mu.weight) rep_a = embeddings[ids_a, :].mean(0) rep_b = embeddings[ids_b, :].mean(0) sim = 1.0 - cosine(rep_a, rep_b) return sim
print(headers[i]) for p in np.arange(0, 1.25, 0.25): rw = [p, 1.0 - p] rel_weights = torch.FloatTensor([rw]).to(device_str) with torch.no_grad(): mu_q, sigma_q, weights = model.encoder(center_word_tens, header_tens, context_tens, mask, center_mask_p=None, context_mask_p=None, metadata_mask_p=None, rel_weights=rel_weights) scores = tensor_to_np( nn.Softmax(-1)(-compute_kl(mu_q, sigma_q, mu_compare, sigma_compare).squeeze(1))) order = np.argsort(-scores) weight_str = 'Relative Weights --> Word={}. Section={}'.format( rw[1], rw[0]) print('\t{}'.format(weight_str)) for i in order[:min(10, len(order))]: print('\t\t{} --> {}'.format(compare_words[i], scores[i])) section_df = pd.read_csv( os.path.join(home_dir, '/preprocess/data/mimic/section_freq.csv')).dropna() section_names = list( sorted( set( list(
if __name__ == '__main__': is_lga = False fp_str = 'lga.png' if is_lga else 'sec2vec.png' title = 'Latent Meaning Cells' if is_lga else 'BSG With Headers As Pseudo-Contexts' if is_lga: checkpoint_fp = '../bsg/weights/lga/checkpoint_1.pth' if not torch.cuda.is_available(): checkpoint_state = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage) else: checkpoint_state = torch.load(checkpoint_fp) section_vocab = checkpoint_state['section_vocab'] embeddings = tensor_to_np(checkpoint_state['model_state_dict']['encoder.section_embeddings.weight']) sections = section_vocab.i2w[1:] else: checkpoint_fp = '../bsg/weights/12-20-sec2vec/checkpoint_1.pth' if not torch.cuda.is_available(): checkpoint_state = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage) else: checkpoint_state = torch.load(checkpoint_fp) vocab = checkpoint_state['vocab'] offset = vocab.separator_start_vocab_id embeddings = tensor_to_np(checkpoint_state['model_state_dict']['encoder.embeddings.weight'])[offset:, :] sections = vocab.i2w[offset:] tsne = TSNE(n_components=2, verbose=1, perplexity=50) tsne_results = tsne.fit_transform(embeddings) sections = [s.split('=')[1] for s in sections]
def error_analysis(test_batcher, model, used_sf_lf_map, loss_func, vocab, results_dir=None): """ :param test_batcher: AcronymBatcherLoader instance :param model: AcronymExpander instance :param used_sf_lf_map: Short form to LF mappings :param loss_func: PyTorch CrossEntropyLoss instance :param vocab: Vocab instance storing tokens and corresponding token ids :param results_dir: where to write the results files :return: None but writes a confusion matrix analysis file into results_dir """ test_batcher.reset(shuffle=False) model.eval() sf_confusion = defaultdict(lambda: ([], [])) results_str = defaultdict(str) errors_str = defaultdict(str) k = 5 for _ in tqdm(range(test_batcher.num_batches())): with torch.no_grad(): batch_loss, num_examples, num_correct, proba = process_batch( test_batcher, model, loss_func, vocab, used_sf_lf_map) batch_data = test_batcher.get_prev_batch() proba = tensor_to_np(proba) top_pred_idxs = np.argsort(-proba, axis=1)[:, :k] pred_lf_idxs = top_pred_idxs[:, 0] for batch_idx, (row_idx, row) in enumerate(batch_data.iterrows()): row = row.to_dict() sf = row['sf'] lf_map = used_sf_lf_map[sf] target_lf = row['target_lf'] target_lf_idx = row['used_target_lf_idx'] pred_lf_idx = pred_lf_idxs[batch_idx] pred_lf = lf_map[pred_lf_idx] top_pred_lfs = ', '.join( list(map(lambda lf: lf_map[lf], top_pred_idxs[batch_idx][:min(k, len(lf_map))]))) example_str = _render_example(sf, target_lf, lf_map[target_lf_idx], pred_lf, top_pred_lfs, row['trimmed_tokens'], row['tokenized_context']) results_str[sf] += example_str if not target_lf_idx == pred_lf_idx: errors_str[sf] += example_str sf_confusion[sf][0].append(target_lf_idx) sf_confusion[sf][1].append(pred_lf_idx) results_fp = os.path.join(results_dir, 'results.txt') reports_fp = os.path.join(results_dir, 'reports.txt') errors_fp = os.path.join(results_dir, 'errors.txt') summary_fp = os.path.join(results_dir, 'summary.csv') df = defaultdict(list) cols = [ 'sf', 'support', 'micro_precision', 'micro_recall', 'micro_f1', 'macro_precision', 'macro_recall', 'macro_f1', 'weighted_precision', 'weighted_recall', 'weighted_f1', ] reports = [] with open(results_fp, 'w') as fd: for k in sorted(results_str.keys()): fd.write(results_str[k]) with open(errors_fp, 'w') as fd: for k in sorted(errors_str.keys()): fd.write(errors_str[k]) for sf in sf_confusion: labels = used_sf_lf_map[sf] labels_trunc = list(map(lambda x: x.split(';')[0], labels)) y_true = sf_confusion[sf][0] y_pred = sf_confusion[sf][1] sf_results = classification_report(y_true, y_pred, labels=list(range(len(labels_trunc))), target_names=labels_trunc, output_dict=True) report = classification_report(y_true, y_pred, labels=list(range(len(labels_trunc))), target_names=labels_trunc) reports.append(report) reports.append('\n\n') metrics = ['micro avg', 'macro avg', 'weighted avg'] for metric in metrics: if metric in sf_results: for k, v in sf_results[metric].items(): if not k == 'support': metric_key = '{}_{}'.format(metric.split(' ')[0], k.split('-')[0]) df[metric_key].append(v) else: suffixes = ['precision', 'recall', 'f1'] for suffix in suffixes: df['{}_{}'.format(metric.split(' ')[0], suffix)].append(None) df['sf'].append(sf) df['support'].append(sf_results['weighted avg']['support']) try: cm = ConfusionMatrix(actual_vector=y_true, predict_vector=y_pred) label_idx_to_str = dict() for idx in cm.classes: label_idx_to_str[idx] = labels_trunc[int(idx)] cm.relabel(mapping=label_idx_to_str) cm_outpath = os.path.join(results_dir, 'confusion', sf) cm.save_html(cm_outpath) except: print('Only 1 target class for test set SF={}'.format(sf)) pd.DataFrame(df, columns=cols).to_csv(summary_fp, index=False) with open(reports_fp, 'w') as fd: map(fd.write, reports)