def main(learing_rate=5e-5, batch_size=4, num_epochs=3): train_url = "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json" train_encodings, _ = data_processing.data_processing(train_url) train_dataset = SquadDataset(train_encodings) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger.info(device) model = BERT_plus_BiDAF(if_extra_modeling=True) model.to(device) logger.info("Model Structure:" + "\n" + "-" * 10) logger.info(model) parameters = model.parameters() logger.info("Parameters to learn:" + "\n" + "-" * 10) for name, param in model.named_parameters(): if param.requires_grad: logger.info("\t" + str(name)) logger.info("Hyperparameters:" + "\n" + "-" * 10) logger.info("Learning Rate: " + str(learing_rate)) logger.info("Batch Size: " + str(batch_size)) logger.info("-" * 10) logger.info("Number of Epochs: " + str(num_epochs)) optimizer = optim.Adam(parameters, lr=learing_rate) dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) trained_model = train(device, model, optimizer, dataloader, num_epochs=num_epochs) torch.save(trained_model, 'trained_model.pt')
exact_match += 1 f1_sum += compute_f1(golden_answer, pred_answer, nlp) acc = 100 * exact_match / n f1_score = 100 * f1_sum /n return acc, f1_score if __name__ == "__main__": val_encodings = torch.load('val_encodings.pt') val_answer=torch.load('val_answer.pt') val_dataset = SquadDataset(val_encodings) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) model = BERT_plus_BiDAF(if_bidirectional=True,if_extra_modeling=True) model.load_state_dict(torch.load('bertfixed_BiDAF_BiLSTM.pt')) model = model.to(device) print("Model imported successfully") nlp = spacy.blank("en") tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased') # predictions = torch.load('pred_logits.pt') predictions = evaluate(model, val_dataset) threshold = [0] accs, f1s = [], [] for i in range(len(threshold)): print("Compare with threshold = ", str(threshold[i])) acc, f1 = compare(predictions, val_dataset, tokenizer, nlp, threshold[i])
} def __len__(self): return len(self.encodings.input_ids) # In[18]: val_dataset = SquadDataset(val_encodings) # This part should be model construction. # In[19]: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # In[20]: model = BERT_plus_BiDAF(if_extra_modeling=True) model.load_state_dict(torch.load('bert_BiDAF.pt')) model.to(device) print("Model imported successfully") # In[21]: tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased') def predict(logits_start, logits_end, threshold=0.1): """ Input: logits_start, logits_end: torch.tensor() of shape [batch_size, sequence length] return the index i,j such that i<=j and logits_start[i]+logits[j] is maximized """ # compute probability p_start = F.softmax(logits_start, dim=-1)
kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)]) text = im.axes.text(j, i, valfmt(data[i, j], None), **kw) texts.append(text) return texts if __name__ == "__main__": val_encodings = torch.load( r'D:\OneDrive\Courses\ECS289 NLP\val_encodings.pt') val_answer = torch.load(r'D:\OneDrive\Courses\ECS289 NLP\val_answer.pt') val_dataset = SquadDataset(val_encodings) model = BERT_plus_BiDAF(if_bidirectional=True, if_extra_modeling=True, if_attention_map=True) model.load_state_dict( torch.load( r'D:\OneDrive\Courses\ECS289 NLP\bert_bidaf_bidirectionalLSTM.pt')) idx = 0 weight_c2q_np, weight_q2c_np, question_tokens, context_tokens = attention_map( idx, val_dataset, model) # c2q plot fig, ax = plt.subplots(figsize=(60, 60)) im, cbar = heatmap(weight_c2q_np.T, question_tokens, context_tokens, ax=ax,
self.encodings = encodings def __getitem__(self, idx): return { key: torch.tensor(val[idx]) for key, val in self.encodings.items() } def __len__(self): return len(self.encodings.input_ids) train_dataset = SquadDataset(train_encodings) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = BERT_plus_BiDAF(if_extra_modeling=True) model.to(device) parameters = model.parameters() optimizer = optim.Adam(parameters, lr=5e-5) #below is the definition of training process def predict(logits_start, logits_end, threshold=0.1): """ Input: logits_start, logits_end: torch.tensor() of shape [batch_size, sequence length] return the index i,j such that i<=j and logits_start[i]+logits[j] is maximized """ # compute probability p_start = F.softmax(logits_start, dim=-1) p_end = F.softmax(logits_end, dim=-1)
return len(self.encodings.input_ids) # %% train_dataset = SquadDataset(train_encodings) val_dataset = SquadDataset(val_encodings) # %% [markdown] # This part should be model construction. # %% device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # %% model = BERT_plus_BiDAF(if_extra_modeling=True) # %% model.to(device) # %% [markdown] # This part should be declaration of the optimizer and the loss function. # %% parameters = model.parameters() print("Parameters to learn:") for name, param in model.named_parameters(): if param.requires_grad: print("\t", name) optimizer = optim.Adam(parameters, lr=5e-5)