def test(model, Y, epoch, data_path, fold, gpu, version, code_inds, dicts, samples, model_dir, testing): """ Testing loop. Returns metrics """ filename = data_path.replace('train', fold) print('file for evaluation: %s' % filename) num_labels = len(dicts['ind2c']) #initialize stuff for saving attention samples if samples: tp_file = open('%s/tp_%s_examples_%d.txt' % (model_dir, fold, epoch), 'w') fp_file = open('%s/fp_%s_examples_%d.txt' % (model_dir, fold, epoch), 'w') window_size = model.conv.weight.data.size()[2] y, yhat, yhat_raw, hids, losses = [], [], [], [], [] ind2w, w2ind, ind2c, c2ind = dicts['ind2w'], dicts['w2ind'], dicts[ 'ind2c'], dicts['c2ind'] desc_embed = model.lmbda > 0 if desc_embed and len(code_inds) > 0: unseen_code_vecs(model, code_inds, dicts, gpu) model.eval() gen = datasets.data_generator(filename, dicts, 1, num_labels, version=version, desc_embed=desc_embed) for batch_idx, tup in tqdm(enumerate(gen)): data, target, hadm_ids, _, descs = tup data, target = Variable(torch.LongTensor(data), volatile=True), Variable( torch.FloatTensor(target)) if gpu: data = data.cuda() target = target.cuda() model.zero_grad() if desc_embed: desc_data = descs else: desc_data = None #get an attention sample for 2% of batches get_attn = samples and (np.random.rand() < 0.02 or (fold == 'test' and testing)) output, loss, alpha = model(data, target, desc_data=desc_data, get_attention=get_attn) output = F.sigmoid(output) output = output.data.cpu().numpy() losses.append(loss.item()) target_data = target.data.cpu().numpy() if get_attn and samples: interpret.save_samples(data, output, target_data, alpha, window_size, epoch, tp_file, fp_file, dicts=dicts) #save predictions, target, hadm ids yhat_raw.append(output) output = np.round(output) y.append(target_data) yhat.append(output) hids.extend(hadm_ids) #close files if needed if samples: tp_file.close() fp_file.close() y = np.concatenate(y, axis=0) yhat = np.concatenate(yhat, axis=0) yhat_raw = np.concatenate(yhat_raw, axis=0) #write the predictions preds_file = persistence.write_preds(yhat, model_dir, hids, fold, ind2c, yhat_raw) #get metrics k = 5 if num_labels == 50 else [8, 15] metrics = evaluation.all_metrics(yhat, y, k=k, yhat_raw=yhat_raw) evaluation.print_metrics(metrics) metrics['loss_%s' % fold] = np.mean(losses) return metrics
def test(model, Y, epoch, dataset, batch_size, embed_desc, fold, gpu, dicts, model_dir): """ Testing loop. Returns metrics """ print('file for evaluation: %s' % fold) docs, attention, y, yhat, yhat_raw, hids, losses = [], [], [], [], [], [], [] y_coarse, yhat_coarse, yhat_coarse_raw = [], [], [] ind2w, w2ind, ind2c, c2ind, desc = dicts['ind2w'], dicts['w2ind'], dicts[ 'ind2c'], dicts['c2ind'], dicts['desc'] model.eval() gen = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate) desc_data = desc if embed_desc and gpu: desc_data = desc_data.cuda() t = tqdm(gen, total=len(gen), ncols=0, file=sys.stdout) for batch_idx, tup in enumerate(t): data, target, target_coarse, hadm_ids, data_text = tup if gpu: data, target, target_coarse = data.cuda(), target.cuda( ), target_coarse.cuda() model.zero_grad() if model.hier: output, loss, alpha = model(data, target, target_coarse, desc_data=desc_data) else: output, loss, alpha = model(data, target, desc_data=desc_data) if model.hier: output, output_coarse = output output_coarse = output_coarse.data.cpu().numpy() alpha, alpha_coarse = alpha else: output_coarse = np.zeros([len(output), len(dicts['ind2c_coarse'])]) for i, y_hat_raw_ in enumerate(output.data.cpu().numpy()): if len(np.nonzero(np.round(y_hat_raw_))) == 0: continue codes = [ str(dicts['ind2c'][ind]) for ind in np.nonzero(np.round(y_hat_raw_))[0] ] codes_coarse = set(str(code).split('.')[0] for code in codes) codes_coarse_idx = [ dicts['c2ind_coarse'][code_coarse] for code_coarse in codes_coarse ] output_coarse[i, codes_coarse_idx] = 1 target_coarse_data = target_coarse.data.cpu().numpy() y_coarse.append(target_coarse_data) yhat_coarse_raw.append(output_coarse) yhat_coarse.append(np.round(output_coarse)) losses.append(loss.item()) target_data = target.data.cpu().numpy() del data, loss #if fold == 'test': ##alpha, _ = torch.max(torch.round(output).unsqueeze(-1).expand_as(alpha) * alpha, 1) ##alpha = (torch.round(output).byte() | target.byte()).unsqueeze(-1).expand_as(alpha).type('torch.cuda.FloatTensor') * alpha # alpha = [a for a in [a_m for a_m in alpha.data.cpu().numpy()]] #else: # alpha = [] del target output = output.data.cpu().numpy() #save predictions, target, hadm ids yhat_raw.append(output) yhat.append(np.round(output)) y.append(target_data) hids.extend(hadm_ids) docs.extend(data_text) attention.extend( alpha[:, [dicts['c2ind'][c] for c in persistence.get_codes()]].cpu()) t.set_postfix(loss=np.mean(losses)) level = '' k = 5 if len(ind2c) == 50 else [8, 15] y_coarse = np.concatenate(y_coarse, axis=0) yhat_coarse = np.concatenate(yhat_coarse, axis=0) yhat_coarse_raw = np.concatenate(yhat_coarse_raw, axis=0) metrics_coarse, _, _ = evaluation.all_metrics(yhat_coarse, y_coarse, k=k, yhat_raw=yhat_coarse_raw, level='coarse') evaluation.print_metrics(metrics_coarse, level='coarse') y = np.concatenate(y, axis=0) yhat = np.concatenate(yhat, axis=0) yhat_raw = np.concatenate(yhat_raw, axis=0) #get metrics metrics, metrics_codes, metrics_inst = evaluation.all_metrics( yhat, y, k=k, yhat_raw=yhat_raw, level='fine') evaluation.print_metrics(metrics, level='fine') metrics['loss'] = np.mean(losses) metrics.update(metrics_coarse) #write the predictions if fold == 'test': persistence.write_preds(hids, docs, attention, y, yhat, yhat_raw, metrics_inst, model_dir, fold, ind2c, c2ind, dicts['desc_plain']) return metrics, metrics_codes, metrics_inst, hids
def test(model, epoch, batch_size, data_path, fold, gpu, dicts, samples, model_dir, testing, debug): """ Testing loop. Returns metrics """ filename = data_path.replace('train', fold) print('file for evaluation: %s' % filename) # num_labels = tools.get_num_labels(Y, version) #initialize stuff for saving attention samples if samples: tp_file = open('%s/tp_%s_examples_%d.txt' % (model_dir, fold, epoch), 'w') fp_file = open('%s/fp_%s_examples_%d.txt' % (model_dir, fold, epoch), 'w') window_size = model.conv.weight.data.size()[2] y, yhat, yhat_raw, hids, losses = [], [], [], [], [] # ind2w, w2ind, ind2c, c2ind = dicts[0], dicts[1], dicts[2], dicts[3] ind2w, w2ind = dicts[0], dicts[1] # desc_embed = model.lmbda > 0 # if desc_embed and len(code_inds) > 0: # unseen_code_vecs(model, code_inds, dicts) model.eval() gen = datasets.data_generator(filename, dicts, batch_size) for batch_idx, tup in tqdm(enumerate(gen)): if debug and batch_idx > 50: break # data, target, hadm_ids, _, descs = tup data, target, hadm_ids = tup data, target = Variable(torch.LongTensor(data), volatile=True), Variable( torch.FloatTensor(target)) if gpu: data = data.cuda() target = target.cuda() model.zero_grad() # if desc_embed: # desc_data = descs # else: # desc_data = None # get_attn = samples and (np.random.rand() < 0.02 or (fold == 'test' and testing)) # output, loss, alpha = model(data, target, desc_data=desc_data, get_attention=get_attn) output, loss, alpha = model(data, target) output = output.data.cpu().numpy() losses.append(loss.data[0]) target_data = target.data.cpu().numpy() # if get_attn and samples: # interpret.save_samples(data, output, target_data, alpha, window_size, epoch, tp_file, fp_file, freq_params[0], dicts=dicts) #save predictions, target, hadm ids yhat_raw.append(output) # NEED TO KNOW FORM OF OUTPUT output = np.round(output) y.append(target_data) yhat.append(output) hids.extend(hadm_ids) if samples: tp_file.close() fp_file.close() y = np.concatenate(y, axis=0) yhat = np.concatenate(yhat, axis=0) yhat_raw = np.concatenate(yhat_raw, axis=0) print("y shape: " + str(y.shape)) print("yhat shape: " + str(yhat.shape)) #write the predictions # preds_file = persistence.write_preds(yhat, model_dir, hids, fold, ind2c, yhat_raw) preds_file = persistence.write_preds(yhat, model_dir, hids, fold, yhat_raw) #get metrics # k = 5 if num_labels == 50 else 8 # metrics = evaluation.all_metrics(yhat, y, k=k, yhat_raw=yhat_raw) metrics = evaluation.all_metrics(yhat, y, yhat_raw=yhat_raw) evaluation.print_metrics(metrics) metrics['loss_%s' % fold] = np.mean(losses) return metrics
def test(args, model, Y, epoch, data_path, fold, gpu, version, code_inds, dicts, samples, model_dir, testing): """ Testing loop. Returns metrics """ filename = data_path.replace('train', fold) print('file for evaluation: %s' % filename) num_labels = len(dicts['ind2c']) #initialize stuff for saving attention samples if samples: tp_file = open('%s/tp_%s_examples_%d.txt' % (model_dir, fold, epoch), 'w') fp_file = open('%s/fp_%s_examples_%d.txt' % (model_dir, fold, epoch), 'w') window_size = model.conv.weight.data.size()[2] y, yhat, yhat_raw, hids, losses = [], [], [], [], [] ind2w, w2ind, ind2c, c2ind = dicts['ind2w'], dicts['w2ind'], dicts[ 'ind2c'], dicts['c2ind'] desc_embed = model.lmbda > 0 if desc_embed and len(code_inds) > 0: unseen_code_vecs(model, code_inds, dicts, gpu) if args.model == 'bert': if args.redefined_tokenizer: bert_tokenizer = BertTokenizer.from_pretrained(args.tokenizer_path, do_lower_case=True) else: bert_tokenizer = BertTokenizer.from_pretrained( './pretrained_weights/bert-base-uncased-vocab.txt', do_lower_case=True) elif args.model == 'biobert': if args.redefined_tokenizer: bert_tokenizer = BertTokenizer.from_pretrained(args.tokenizer_path, do_lower_case=False) else: bert_tokenizer = BertTokenizer.from_pretrained( './pretrained_weights/biobert_pretrain_output_all_notes_150000/vocab.txt', do_lower_case=False) elif args.model == 'bert-tiny': if args.redefined_tokenizer: bert_tokenizer = BertTokenizer.from_pretrained(args.tokenizer_path, do_lower_case=True) else: bert_tokenizer = BertTokenizer.from_pretrained( './pretrained_weights/bert-tiny-uncased-vocab.txt', do_lower_case=True) else: bert_tokenizer = None model.eval() gen = datasets.data_generator(filename, dicts, 1, num_labels, version=version, desc_embed=desc_embed, bert_tokenizer=bert_tokenizer, test=True, max_seq_length=args.max_sequence_length) for batch_idx, tup in tqdm(enumerate(gen)): data, target, hadm_ids, _, descs = tup data, target = torch.LongTensor(data), torch.FloatTensor(target) if gpu: data = data.cuda() target = target.cuda() if desc_embed: desc_data = descs else: desc_data = None if args.model in ['bert', 'biobert', 'bert-tiny']: token_type_ids = (data > 0).long() * 0 attention_mask = (data > 0).long() position_ids = torch.arange(data.size(1)).expand( data.size(0), data.size(1)) if gpu: position_ids = position_ids.cuda() position_ids = position_ids * (data > 0).long() else: attention_mask = (data > 0).long() token_type_ids = None position_ids = None if args.model in BERT_MODEL_LIST: with torch.no_grad(): output, loss = model(input_ids=data, \ token_type_ids=token_type_ids, \ attention_mask=attention_mask, \ position_ids=position_ids, \ labels=target, \ desc_data=desc_data, \ pos_labels=None, \ ) output = torch.sigmoid(output) output = output.data.cpu().numpy() else: with torch.no_grad(): output, loss, alpha = model(data, target, desc_data=desc_data, get_attention=get_attn) #get an attention sample for 2% of batches get_attn = samples and (np.random.rand() < 0.02 or (fold == 'test' and testing)) output = torch.sigmoid(output) output = output.data.cpu().numpy() if get_attn and samples: interpret.save_samples(data, output, target_data, alpha, window_size, epoch, tp_file, fp_file, dicts=dicts) losses.append(loss.item()) target_data = target.data.cpu().numpy() #save predictions, target, hadm ids yhat_raw.append(output) output = np.round(output) y.append(target_data) yhat.append(output) hids.extend(hadm_ids) # close files if needed if samples: tp_file.close() fp_file.close() y = np.concatenate(y, axis=0) yhat = np.concatenate(yhat, axis=0) yhat_raw = np.concatenate(yhat_raw, axis=0) #write the predictions preds_file = persistence.write_preds(yhat, model_dir, hids, fold, ind2c, yhat_raw) #get metrics k = 5 if num_labels == 50 else [8, 15] metrics = evaluation.all_metrics(yhat, y, k=k, yhat_raw=yhat_raw) evaluation.print_metrics(metrics) metrics['loss_%s' % fold] = np.mean(losses) return metrics