def initialize(self, model_path): print("initial tokenizer...") self.tokenizer = DataIterator().tokenizer self.PAD_IND = self.tokenizer.vocab.stoi['<pad>'] self.token_reference = TokenReferenceBase( reference_token_idx=self.PAD_IND) print("initial inference model...") self.model = torch.load(model_path, map_location="cpu").eval() print("initial attribution method ... ") self.lig = LayerIntegratedGradients(self.model, self.model.embedding)
def interpret_sentence(model, text, text_lengths, args, label=0): # Interpretable method if 'BERT' in args.model: PAD_IND = args.bert_tokenizer.pad_token_id lig = LayerIntegratedGradients(model, model.model.embeddings) else: PAD_IND = args.TEXT.vocab.stoi['<pad>'] lig = LayerIntegratedGradients(model, model.embedding) token_reference = TokenReferenceBase(reference_token_idx=PAD_IND) model.zero_grad() # predict start = time.time() pred = model(text, text_lengths).squeeze(0) print("time:", time.time() - start) pred_ind = torch.argmax(pred).item() # generate reference indices for each sample reference_indices = token_reference.generate_reference( text.shape[1], device=args.device).unsqueeze(0) # compute attributions and approximation delta using layer integrated gradients attributions_ig_1 = lig.attribute((text, text_lengths), (reference_indices, text_lengths), target=0, n_steps=100, return_convergence_delta=False) attributions_ig_2 = lig.attribute((text, text_lengths), (reference_indices, text_lengths), target=1, n_steps=100, return_convergence_delta=False) if 'BERT' in args.model: sentence = [ args.bert_tokenizer.ids_to_tokens[int(word)] for word in text.squeeze(0).cpu().numpy() if int(word) != args.bert_tokenizer.pad_token_id ] else: sentence = [ args.TEXT.vocab.itos[int(word)] for word in text.squeeze(0).cpu().numpy() ] # print(sentence) add_attributions_to_visualizer(attributions_ig_1, sentence, pred, pred_ind, label, args) add_attributions_to_visualizer(attributions_ig_2, sentence, pred, pred_ind, label, args)
class CNNPredictionModel: def __init__(self, model_path="static/models/CNN-29", seq_length=256): self.seq_length = seq_length self.initialize(model_path) def initialize(self, model_path): print("initial tokenizer...") self.tokenizer = DataIterator().tokenizer self.PAD_IND = self.tokenizer.vocab.stoi['<pad>'] self.token_reference = TokenReferenceBase( reference_token_idx=self.PAD_IND) print("initial inference model...") self.model = torch.load(model_path, map_location="cpu").eval() print("initial attribution method ... ") self.lig = LayerIntegratedGradients(self.model, self.model.embedding) def predict(self, text): words = self.tokenizer.preprocess(clean_text(text)) if len(words) < self.seq_length: words += ['<pad>'] * (self.seq_length - len(words)) elif len(words) > self.seq_length: words = words[:self.seq_length] tokens = [self.tokenizer.vocab.stoi[word] for word in words] tokens = torch.LongTensor(tokens).unsqueeze(0) reference_tokens = self.token_reference.generate_reference( self.seq_length, device='cpu').unsqueeze(0) pred = self.model(tokens) plabel = int(torch.argmax(pred, 1)) pred = pred.tolist()[0] unpad_index = [ idx for idx, word in enumerate(words) if word != '<pad>' ] unpad_words = [word for word in words if word != '<pad>'] attributions = [] for label in range(len(pred)): attributions.append( list( self.attribute(tokens, reference_tokens, label, unpad_index))) return unpad_words, pred, plabel, attributions def attribute(self, tokens, reference_tokens, target, unpad_index): attributions, delta = self.lig.attribute(tokens, reference_tokens, target=target,\ return_convergence_delta=True) attributions = attributions.sum(dim=2).squeeze(0) attributions = attributions / torch.norm(attributions) attributions = attributions.cpu().detach().numpy() unpad_attributions = attributions[unpad_index] range_limit = np.max(np.abs(unpad_attributions)) unpad_attributions /= range_limit return unpad_attributions def __repr__(self): return "prediction model for CNN" def __str__(self): return "prediction model for CNN"
def get_insights(self, text_preprocess, _, target=0): """Calculates the captum insights Args: text_preprocess (tensor): Tensor of the Text Input _ (str): The Raw text data specified in the input request target (int): Defaults to 0, the user needs to specify the target for the captum explanation. Returns: (dict): Returns a dictionary of the word token importances """ text_tensor, all_tokens = text_preprocess token_reference = TokenReferenceBase() logger.info("input_text shape %s", len(text_tensor.shape)) logger.info("get_insights target %s", target) offsets = torch.tensor([0]).to(self.device) all_tokens = self.get_word_token(all_tokens) logger.info("text_tensor tokenized shape %s", text_tensor.shape) reference_indices = token_reference.generate_reference( text_tensor.shape[0], device=self.device ).squeeze(0) logger.info("reference indices shape %s", reference_indices.shape) # all_tokens = self.get_word_token(text) attributions = self.lig.attribute( text_tensor, reference_indices, additional_forward_args=(offsets), return_convergence_delta=False, target=target, ) logger.info("attributions shape %s", attributions.shape) attributions_sum = self.summarize_attributions(attributions) response = {} response["importances"] = attributions_sum.tolist() response["words"] = all_tokens return [response]
def interpret_sentence(docid): vis_data_records_ig = [] model.zero_grad() tokens, token_ids, label = get_tokens_by_docid(docid) pred = forward_with_sigmoid(data.x, data.edge_index, data.edge_attr, docid)[label] pred_ind = round(pred.detach().cpu().item()) # compute attributions and approximation delta using layer integrated gradients token_reference = TokenReferenceBase(reference_token_idx=0) reference_indices = token_reference.generate_reference( data.x.shape[0], device='cuda:3').unsqueeze(0) attributions_ig, delta = lig.attribute( data.x.unsqueeze(0), reference_indices, additional_forward_args=(data.edge_index.unsqueeze(0), data.edge_attr.unsqueeze(0), docid), n_steps=50, return_convergence_delta=True, internal_batch_size=1) print(f'pred: {pred}, delta: {abs(delta)}') print(attributions_ig) add_attributions_to_visualizer(attributions_ig, tokens, token_ids, pred, pred_ind, label, delta, vis_data_records_ig) visualization.visualize_text(vis_data_records_ig)
def interpret_sentence( model: nn.Module, input_abstract: str, input_title: str, input_keywords: str, vectors: Vectors, interpretable_embedding_abstracts: configure_interpretable_embedding_layer, interpretable_embedding_titles: configure_interpretable_embedding_layer, interpretable_embedding_keywords: configure_interpretable_embedding_layer, ig: IntegratedGradients, vis_data_records_ig: list, output_vectors: Dictionary, device: torch.device, min_len: int = 200): model.eval() model.zero_grad() abstract_token_reference = TokenReferenceBase( reference_token_idx=len(vectors)) title_token_reference = TokenReferenceBase( reference_token_idx=len(vectors)) keywords_token_reference = TokenReferenceBase( reference_token_idx=len(vectors)) abstract_text, abstract_indices = get_input_indices(model, input_abstract, vectors, min_len, device, input_type='abstract') title_text, title_indices = get_input_indices(model, input_title, vectors, min_len, device, input_type='title') keywords_text, keywords_indices = get_input_indices(model, input_keywords, vectors, min_len, device, input_type='keywords') # input_indices dim: [sequence_length] seq_length = min_len abstract_indices = abstract_indices.to(device) title_indices = title_indices.to(device) keywords_indices = keywords_indices.to(device) # pre-computing word embeddings input_embedding_abstracts = interpretable_embedding_abstracts.indices_to_embeddings( abstract_indices) input_embedding_titles = interpretable_embedding_titles.indices_to_embeddings( title_indices) input_embedding_keywords = interpretable_embedding_keywords.indices_to_embeddings( keywords_indices) # predict pred = model.forward_internal(input_embedding_abstracts, input_embedding_titles, input_embedding_keywords) in_top_five, top_list, top_values = check_top(pred, model.field_name, False) for i in range(len(top_list)): model.interp = "titles" interpret_subsystem(top_list[i], top_values[i], input_embedding_titles, input_embedding_abstracts, input_embedding_keywords, interpretable_embedding_titles, title_text, title_token_reference, seq_length, ig, vis_data_records_ig, output_vectors, device) model.interp = "abstracts" interpret_subsystem(top_list[i], top_values[i], input_embedding_abstracts, input_embedding_titles, input_embedding_keywords, interpretable_embedding_abstracts, abstract_text, abstract_token_reference, seq_length, ig, vis_data_records_ig, output_vectors, device) model.interp = "keywords" interpret_subsystem(top_list[i], top_values[i], input_embedding_keywords, input_embedding_abstracts, input_embedding_titles, interpretable_embedding_keywords, keywords_text, keywords_token_reference, seq_length, ig, vis_data_records_ig, output_vectors, device)
break print('Epoch: {}, train loss: {}, val loss: {}, train acc: {}, val acc: {}, train f1: {}, val f1: {}'.format(epoch, train_loss, val_loss, train_acc, val_acc, train_f1, val_f1)) model.load_state_dict(best_model) metrics = test_evaluating(model, test_iter, criterion) metrics["test_f1"] !pip install -q captum from captum.attr import LayerIntegratedGradients, TokenReferenceBase, visualization PAD_IND = TEXT.vocab.stoi['pad'] token_reference = TokenReferenceBase(reference_token_idx=PAD_IND) lig = LayerIntegratedGradients(model, model.embedding) def forward_with_softmax(inp): logits = model(inp) return torch.softmax(logits, 0)[0][1] def forward_with_sigmoid(input): return torch.sigmoid(model(input)) # accumalate couple samples in this array for visualization purposes vis_data_records_ig = [] def interpret_sentence(model, sentence, min_len = 7, label = 0): model.eval()
def compute_and_output_attributions( outcome='top_level' ): import pickle print ('Loading data ...') if outcome == 'top_level': prepared_data_file = PREPARED_DATA_FILE_top_level elif outcome == 'mn_avg_eb': prepared_data_file = PREPARED_DATA_FILE_mn_avg_eb elif outcome == 'mn_avg_eb_adv': prepared_data_file = PREPARED_DATA_FILE_mn_avg_eb_adv elif outcome == 'perwht': prepared_data_file = PREPARED_DATA_FILE_perwht elif outcome == 'perfrl': prepared_data_file = PREPARED_DATA_FILE_perfrl else: prepared_data_file = PREPARED_DATA_FILE_mn_grd_eb df = pd.read_csv(RAW_DATA_FILE) with open(prepared_data_file, 'rb') as f: all_input_ids, labels_target, attention_masks, sentences_per_school, url, perwht, perfrl, share_singleparent, totenrl, share_collegeplus, mail_returnrate = pickle.load(f, encoding='latin1') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print ('Loading model ...') model, BEST_MODEL_DIR = get_best_model(outcome) model.to(device) model.zero_grad() # load tokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # Define wrapper function for integrated gradients def bert_forward_wrapper(input_ids, num_sentences, attention_mask=None, position=0): return model(input_ids, num_sentences, attention_mask=attention_mask) from captum.attr import TokenReferenceBase from captum.attr import IntegratedGradients, LayerIntegratedGradients from captum.attr import visualization as viz # We only want to compute IG over schools in our validation set data_splits = ['validation'] all_summarized_attr = [] input_ids_for_attr = [] count = 0 internal_batch_size = 12 n_steps = 48 OUTPUT_DIR = '{}interp/attributions/{}/' OUTPUT_FILE = OUTPUT_DIR + '{}_{}_loss_{}.json' if not os.path.exists(OUTPUT_DIR.format(BASE_DIR, BEST_MODEL_DIR)): os.makedirs(OUTPUT_DIR.format(BASE_DIR, BEST_MODEL_DIR)) start_ind = len([int(f.split('_')[0]) for f in os.listdir(OUTPUT_DIR.format(BASE_DIR, BEST_MODEL_DIR))]) for d in data_splits: # Standardize our outcome measure, like we did for training and validation outcome_key = outcome.split('_adv')[0] labels_target[d] = torch.FloatTensor((labels_target[d] - np.mean(df[outcome_key])) / np.std(df[outcome_key])) n_schools = torch.LongTensor(all_input_ids[d]).size(0) print ("num schools {} for {} split".format(n_schools, d)) for i in range(start_ind, n_schools): print (d, i) count += 1 # Prepare data input_ids = torch.LongTensor([all_input_ids[d][i]]).squeeze(0).to(device) num_sentences = int(sentences_per_school[d][i]) label_t = labels_target[d][i].unsqueeze(0).to(device) input_mask = torch.tensor([attention_masks[d][i]]).squeeze(0).to(device) label_perfrl = torch.tensor([perfrl[d][i]]).to(device) label_perwht = torch.tensor([perwht[d][i]]).to(device) lable_share_singleparent = torch.tensor([share_singleparent[d][i]]).to(device) label_totenrl = torch.tensor([totenrl[d][i]]).to(device) label_share_collegeplus = torch.tensor([share_collegeplus[d][i]]).to(device) label_mail_returnrate = torch.tensor([mail_returnrate[d][i]]).to(device) # Get the prediction for this example pred = model(input_ids, num_sentences, attention_mask=input_mask) mse = F.mse_loss(pred[0].unsqueeze_(0), label_t) # Generate reference sequence for integrated gradients ref_token_id = tokenizer.pad_token_id # A token used for generating token reference token_reference = TokenReferenceBase(reference_token_idx=ref_token_id) ref_input_ids = token_reference.generate_reference(input_ids.size(0), device=device).unsqueeze(1).repeat(1, input_ids.size(1)).long() # Compute integrated gradients lig = LayerIntegratedGradients(bert_forward_wrapper, model.bert.embeddings) attributions, conv_delta = lig.attribute( inputs=input_ids, baselines=ref_input_ids, additional_forward_args=(num_sentences, input_mask, 0), internal_batch_size=internal_batch_size, n_steps=n_steps, return_convergence_delta=True) # Sum attributions for each hidden dimension describing a token summarized_attr = attributions.sum(dim=-1).squeeze(0) n_sent = summarized_attr.size(0) attr_for_school_sents = defaultdict(dict) # Iterate over sentences and store the attributions per token in each sentence for j in range(0, n_sent): indices = input_ids[j].detach().squeeze(0).tolist() all_tokens = tokenizer.convert_ids_to_tokens(indices) attr_for_school_sents[j]['tokens'] = all_tokens attr_for_school_sents[j]['attributions'] = summarized_attr[j].tolist() assert (len(attr_for_school_sents[j]['tokens']) == len(attr_for_school_sents[j]['attributions'])) f = open(OUTPUT_FILE.format(BASE_DIR, BEST_MODEL_DIR, i, d, mse), 'w') f.write(json.dumps(attr_for_school_sents, indent=4)) f.close()
if __name__ == "__main__": device = torch.device("cuda:0") save_dir = "./checkpoints" config = "CNN-debias-" cfg = ConfigBinaryClassification() Data = DataIterator(config=cfg) print("loading model") model = torch.load("checkpoints/CNN-distill-26").to(device) print("loading tokenizer") tokenizer = Data.tokenizer PAD_IND = tokenizer.vocab.stoi['<pad>'] seq_length = 256 token_reference = TokenReferenceBase(reference_token_idx=PAD_IND) lig = LayerIntegratedGradients(model, model.embedding) reference_tokens = token_reference.generate_reference(seq_length, device=device).unsqueeze(0).to(device) #black_list = {0:["克罗恩病"], 1:["肠结核"]} black_list = {0:["克罗恩病", "循腔", "进镜", "乙状结肠", "回肠", "肛门", "降结肠" ], 1:["肠结核", "盲肠", "盲袋", "余所见",
loader = DataLoader(DATA_DIR, params) # data = loader.load_data(['train', 'val'], DATA_DIR) # train_data = data['train'] # train_data_iterator = loader.data_iterator(train_data, params, shuffle=False) # train_batch, label_batch = next(train_data_iterator) # sentences_file = DATA_DIR + 'train/sentences.txt' # labels_file = DATA_DIR + 'train/labels.txt' # sentences = [] # with open(sentences_file) as f: # for sent in f.read().splitlines(): # sentences.append(sent) # labels = [] # with open(labels_file) as f: # for lab in f.read().splitlines(): # labels.append(lab) token_reference = TokenReferenceBase(reference_token_idx=loader.pad_ind) # layer_ig = LayerIntegratedGradients(model, model.embedding) # vis_data_records = [] def interpret_sequence(model, sentences, data, attribution, records): model.zero_grad() for i, sentence in enumerate(sentences): seq_len = len(data[i]) inp = data[i].unsqueeze(0) reference_indices = token_reference.generate_reference( seq_len, device=dev('cpu')).unsqueeze(0) pred = torch.sigmoid(model(inp)) prob = pred.max().item() pred_ind = round(pred.argmax().item())