def bertviz_headview(model, tokenizer, sentence_a, sentence_b=None, layer=None, heads=None): """ Call function as follows: model = BertModel.from_pretrained('bert-base-uncased', output_attentions=True) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) sentence_a, sentence_b = "the rabbit quickly hopped", "The turtle slowly crawled" bertviz_headview(model, tokenizer, sentence_a, sentence_b) """ inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True) input_ids = inputs['input_ids'] if sentence_b: token_type_ids = inputs['token_type_ids'] attention = model(input_ids, token_type_ids=token_type_ids)[-1] sentence_b_start = token_type_ids[0].tolist().index(1) else: attention = model(input_ids)[-1] sentence_b_start = None input_id_list = input_ids[0].tolist() # Batch index 0 tokens = tokenizer.convert_ids_to_tokens(input_id_list) head_view(attention, tokens, sentence_b_start, layer=layer, heads=heads)
def show_head_view(model, tokenizer, sentence_a, sentence_b=None): inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True) token_type_ids = inputs['token_type_ids'] input_ids = inputs['input_ids'] attention = model(input_ids, token_type_ids=token_type_ids)[-1] input_id_list = input_ids[0].tolist() # Batch index 0 tokens = tokenizer.convert_ids_to_tokens(input_id_list) if sentence_b: sentence_b_start = token_type_ids[0].tolist().index(1) else: sentence_b_start = None print(attention[0].shape, attention[1].shape) head_view(attention, tokens, sentence_b_start)
def examine_model(experiment, labeltype, inputs=None): saved_dir = "models/saved_{}".format(experiment) model = torch.load('{}/hate_speech_model_trained.pt'.format(saved_dir)) settings = Settings(experiment, True) settings.write_debug('Starting visualization of trained model') if inputs == None: inputs = ["The cat sat on the dog"] tokenizer = load_bert_tokenizer(settings, True) model_type = settings.get_model_type() # examine neurons during a given input # call_html() # show(model, 'bert', tokenizer, input_data) for input_data in inputs: inputs = tokenizer.encode_plus(input_data, return_tensors='pt', add_special_tokens=True) device = get_gpu(settings) token_type_ids = inputs['token_type_ids'].to(device) input_ids = inputs['input_ids'].to(device) attention = model(input_ids, token_type_ids=token_type_ids)[-1] input_id_list = input_ids[0].tolist() # Batch index 0 tokens = tokenizer.convert_ids_to_tokens(input_id_list) main_html = call_html() # examine whole model during a given input model_html1, model_js2, model_js3 = model_view(attention, tokens) # examine just the heads during a given input head_html1, head_js2, head_js3 = head_view(attention, tokens) config_name = settings.get_config_name() os.makedirs("viz/{}/{}/{}".format(config_name, labeltype, input_data), exist_ok=True) with open( "viz/{}/{}/{}/model_vizualization.html".format( config_name, labeltype, input_data), 'w+') as f: f.write("{} \n <script>{} \n {}</script> \n {} \n".format( main_html.data, model_js2.data, model_js3.data, model_html1.data)) with open( "viz/{}/{}/{}/head_vizualization.html".format( config_name, labeltype, input_data), 'w+') as f: f.write("{} \n <script>{} \n {}</script> \n {} \n".format( main_html.data, head_js2.data, head_js3.data, head_html1.data)) # Which BERT features were weighted the most? The least? settings.write_debug( 'Finished visualization of trained model given input {}'.format( input_data)) settings.write_debug('HTML file saved to viz/{}/{}'.format( config_name, input_data))
def visualize_attention_head(model, tokenizer, sentence_a, sentence_b=None): inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True) input_ids = inputs['input_ids'] if sentence_b: token_type_ids = inputs['token_type_ids'] attention = model(input_ids, token_type_ids=token_type_ids)[-1] sentence_b_start = token_type_ids[0].tolist().index(1) else: attention = model(input_ids)[-1] sentence_b_start = None input_id_list = input_ids[0].tolist() # Batch index 0 tokens = tokenizer.convert_ids_to_tokens(input_id_list) call_html() head_view(attention, tokens, sentence_b_start)
def show_head_view(model, tokenizer, sentence_1, sentence_2, layer=None, heads=None): """Visualize attention head of BERTology models """ inputs = tokenizer.encode_plus(sentence_1, sentence_2, return_tensors='pt', add_special_tokens=True) input_ids = inputs['input_ids'] token_type_ids = inputs['token_type_ids'] attention = model(input_ids=input_ids, token_type_ids=token_type_ids).attentions sentence_b_start = token_type_ids[0].tolist().index(1) input_id_list = input_ids[0].tolist() tokens = tokenizer.convert_ids_to_tokens(input_id_list) head_view(attention, tokens, sentence_b_start, layer=layer, heads=heads)
def get_attention(model_file, sentence): model_type = 'bert' model_version = 'bert-base-uncased' do_lower_case = True model = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', output_attentions=True) model = torch.nn.DataParallel(model) chpt = torch.load(model_file, map_location=torch.device('cpu')) model.load_state_dict(chpt['state_dict']) tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=True) tokens = [tokenizer.cls_token ] + tokenizer.tokenize(sentence) + [tokenizer.sep_token] input_ids = tokenizer.convert_tokens_to_ids(tokens) input_ids = torch.tensor(input_ids).unsqueeze(0) ##head view model.eval() outputs = model(input_ids) attentions = outputs[-1] head_view(attentions, tokens, None) ####neuron view show(model, model_type, tokenizer, sentence, None)
optimizer = AdamW( model.parameters(), lr=LR, correct_bias=False ) # To reproduce BertAdam specific behavior set correct_bias=False scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=NUM_WARMUP_STEPS, num_training_steps=NUM_TRAIN_STEPS) # PyTorch scheduler model.train() batch = next(iter(train_loader)) input_ids, input_mask, segment_ids, labels = batch outputs = model(input_ids, input_mask, segment_ids, labels=labels) loss, logits, attentions = outputs tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokens = tokenizer.convert_ids_to_tokens(input_ids.numpy()[0]) tokens = [str(item) for item in tokens] head_view(attentions, tokens, None) ####neuron view from bertviz.transformers_neuron_view import BertModel, BertTokenizer from bertviz.neuron_view import show model_type = 'bert' model_version = 'bert-base-uncased' do_lower_case = True model = BertModel.from_pretrained(model_version) tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=do_lower_case) sentence_a = "date of birth: sex: fservice: medicineallergies:patient recorded as having no known allergies to drugsattending:chief complaint:hematemesismajor surgical or invasive procedure:banding x 4 of esophageal variceshistory of present illness:pt is a 74yo woman with pmh of ms, autoimmune hepatitis" show(model, model_type, tokenizer, sentence_a, None)
def bertv(attention, tokens): call_html() head_view(attention, tokens)
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True) input_ids = inputs['input_ids'] if sentence_b: token_type_ids = inputs['token_type_ids'] attention = model(input_ids, token_type_ids=token_type_ids)[-1] sentence_b_start = token_type_ids[0].tolist().index(1) else: attention = model(input_ids)[-1] sentence_b_start = None input_id_list = input_ids[0].tolist() # Batch index 0 tokens = tokenizer.convert_ids_to_tokens(input_id_list) head_view(attention, tokens, sentence_b_start) def show_head_view(model, tokenizer, sentence_a, sentence_b=None): inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True) input_ids = inputs['input_ids'] if sentence_b: token_type_ids = inputs['token_type_ids'] attention = model(input_ids, token_type_ids=token_type_ids)[-1] sentence_b_start = token_type_ids[0].tolist().index(1) else: attention = model(input_ids)[-1] sentence_b_start = None