Example #1
0
def main(learing_rate=5e-5, batch_size=4, num_epochs=3):
    train_url = "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json"
    train_encodings, _ = data_processing.data_processing(train_url)
    train_dataset = SquadDataset(train_encodings)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    logger.info(device)

    model = BERT_plus_BiDAF(if_extra_modeling=True)
    model.to(device)
    logger.info("Model Structure:" + "\n" + "-" * 10)
    logger.info(model)

    parameters = model.parameters()
    logger.info("Parameters to learn:" + "\n" + "-" * 10)
    for name, param in model.named_parameters():
        if param.requires_grad:
            logger.info("\t" + str(name))

    logger.info("Hyperparameters:" + "\n" + "-" * 10)
    logger.info("Learning Rate: " + str(learing_rate))
    logger.info("Batch Size: " + str(batch_size))
    logger.info("-" * 10)
    logger.info("Number of Epochs: " + str(num_epochs))

    optimizer = optim.Adam(parameters, lr=learing_rate)
    dataloader = DataLoader(train_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=4)
    trained_model = train(device,
                          model,
                          optimizer,
                          dataloader,
                          num_epochs=num_epochs)
    torch.save(trained_model, 'trained_model.pt')
Example #2
0
            exact_match += 1
        f1_sum += compute_f1(golden_answer, pred_answer, nlp)

    acc = 100 * exact_match / n
    f1_score = 100 * f1_sum /n
    return acc, f1_score

if __name__ == "__main__":
    val_encodings = torch.load('val_encodings.pt')
    val_answer=torch.load('val_answer.pt')
    val_dataset = SquadDataset(val_encodings)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    model = BERT_plus_BiDAF(if_bidirectional=True,if_extra_modeling=True)
    model.load_state_dict(torch.load('bertfixed_BiDAF_BiLSTM.pt'))
    model = model.to(device)
    print("Model imported successfully")
    
    nlp = spacy.blank("en")
    tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
    
    # predictions = torch.load('pred_logits.pt')
    predictions = evaluate(model, val_dataset)

    threshold = [0]
    accs, f1s = [], []
    for i in range(len(threshold)):
        print("Compare with threshold = ", str(threshold[i]))
        acc, f1 = compare(predictions, val_dataset, tokenizer, nlp, threshold[i])
Example #3
0
        }

    def __len__(self):
        return len(self.encodings.input_ids)


# In[18]:
val_dataset = SquadDataset(val_encodings)
# This part should be model construction.

# In[19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# In[20]:
model = BERT_plus_BiDAF(if_extra_modeling=True)
model.load_state_dict(torch.load('bert_BiDAF.pt'))
model.to(device)
print("Model imported successfully")
# In[21]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')


def predict(logits_start, logits_end, threshold=0.1):
    """
    Input:
    logits_start, logits_end: torch.tensor() of shape [batch_size, sequence length]
    return the index i,j such that i<=j and logits_start[i]+logits[j] is maximized
    """
    # compute probability
    p_start = F.softmax(logits_start, dim=-1)
            kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)])
            text = im.axes.text(j, i, valfmt(data[i, j], None), **kw)
            texts.append(text)

    return texts


if __name__ == "__main__":

    val_encodings = torch.load(
        r'D:\OneDrive\Courses\ECS289 NLP\val_encodings.pt')
    val_answer = torch.load(r'D:\OneDrive\Courses\ECS289 NLP\val_answer.pt')
    val_dataset = SquadDataset(val_encodings)

    model = BERT_plus_BiDAF(if_bidirectional=True,
                            if_extra_modeling=True,
                            if_attention_map=True)
    model.load_state_dict(
        torch.load(
            r'D:\OneDrive\Courses\ECS289 NLP\bert_bidaf_bidirectionalLSTM.pt'))

    idx = 0
    weight_c2q_np, weight_q2c_np, question_tokens, context_tokens = attention_map(
        idx, val_dataset, model)

    # c2q plot
    fig, ax = plt.subplots(figsize=(60, 60))
    im, cbar = heatmap(weight_c2q_np.T,
                       question_tokens,
                       context_tokens,
                       ax=ax,
Example #5
0
        self.encodings = encodings

    def __getitem__(self, idx):
        return {
            key: torch.tensor(val[idx])
            for key, val in self.encodings.items()
        }

    def __len__(self):
        return len(self.encodings.input_ids)


train_dataset = SquadDataset(train_encodings)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BERT_plus_BiDAF(if_extra_modeling=True)
model.to(device)
parameters = model.parameters()
optimizer = optim.Adam(parameters, lr=5e-5)


#below is the definition of training process
def predict(logits_start, logits_end, threshold=0.1):
    """
    Input:
    logits_start, logits_end: torch.tensor() of shape [batch_size, sequence length]
    return the index i,j such that i<=j and logits_start[i]+logits[j] is maximized
    """
    # compute probability
    p_start = F.softmax(logits_start, dim=-1)
    p_end = F.softmax(logits_end, dim=-1)
        return len(self.encodings.input_ids)


# %%
train_dataset = SquadDataset(train_encodings)
val_dataset = SquadDataset(val_encodings)

# %% [markdown]
# This part should be model construction.

# %%
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# %%
model = BERT_plus_BiDAF(if_extra_modeling=True)

# %%
model.to(device)

# %% [markdown]
# This part should be declaration of the optimizer and the loss function.

# %%
parameters = model.parameters()
print("Parameters to learn:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print("\t", name)
optimizer = optim.Adam(parameters, lr=5e-5)