CORRECT = '../data/eval_data/albert_bee_uq_test_correct'

pretrained_weights = 'albert-base-v1'
tokenizer = AlbertTokenizer.from_pretrained(pretrained_weights,
                                            do_lower_case=True,
                                            do_basic_tokenize=True)

# Initialize Model and Optimizer
model = AlbertForSequenceClassification.from_pretrained(pretrained_weights,
                                                        num_labels=3)
model.load_state_dict(torch.load(PATH))
model.cuda()
model.eval()

# Data to evaluate
beetle_data = dl.SemEvalDataset(DATA)
beetle_loader = DataLoader(beetle_data)
print("Nr. of data instances: ", len(beetle_data))
correct_guesses = []
label = []
steps = []
with torch.no_grad():
    for step, batch in enumerate(beetle_loader):
        batch = tuple(t.to(device) for t in batch)
        token_ids, segment, attention, lab = batch
        outputs = model(token_ids,
                        token_type_ids=segment,
                        attention_mask=attention,
                        labels=lab)
        logits = outputs[1].detach().cpu().numpy().squeeze()
        labels = lab.to('cpu').numpy()
Example #2
0
EPOCHS = 16
pretrained_weights = 'bert-base-uncased'

# Set these paths to train BERT
MODEL_PATH = '../models/bert_mnli/bert_model_mnli.pt'
TRAIN_LOSS_PATH = '../models/bert_mnli/train_loss_per_batch.npy'
VAL_LOSS_PATH = '../models/bert_mnli/val_loss_per_epoch.npy'

# Initialize Model and Optimizer
model = BertForSequenceClassification.from_pretrained(pretrained_weights,
                                                      num_labels=3)
model.cuda()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, correct_bias=False)

# Load data
train_data = dl.SemEvalDataset("../data/preprocessed/bert_mnli_train.npy")
val_data = dl.SemEvalDataset("../data/preprocessed/bert_mnli_val.npy")

train_loader = DataLoader(train_data,
                          batch_size=BATCH_SIZE,
                          num_workers=0,
                          shuffle=True)
val_loader = DataLoader(val_data,
                        batch_size=BATCH_SIZE,
                        num_workers=0,
                        shuffle=True)

# Track loss per batch
train_loss = []
val_loss_detailed = []
val_loss_per_epoch = []
Example #3
0
# DATA: preprocessed test set
# LOSS: location of the loss file

PATH = '../models/bert_mnli/model_mnli.pt'
DATA = '../data/preprocessed/bert_mnli_mismatched_test.npy'
LOSS = '../models/bert_mnli/test_matched_loss'

# Initialize Model and Optimizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels=3)
model.load_state_dict(torch.load(PATH))
model.cuda()
model.eval()

# Data to evaluate
test_data = dl.SemEvalDataset(DATA)
test_loader = DataLoader(test_data)

data = []
logit_list = np.empty([1, 3], dtype=float)
label_list = np.empty(1, dtype=int)

with torch.no_grad():
    macro, weighted, acc = 0, 0, 0

    for batch in test_loader:
        batch = tuple(t.to(device) for t in batch)
        token_ids, segment, attention, lab = batch
        with torch.no_grad():
            outputs = model(token_ids,
                            token_type_ids=segment,
Example #4
0
BATCH_SIZE = 16
LEARNING_RATE = 1e-5
EPOCHS = 8

MODEL_PATH = '../models/albert_beetle/model_.pt'
TRAIN_LOSS_PATH = '../models/albert_beetle/train_loss_per_batch.npy'
VAL_LOSS_PATH = '../models/albert_beetle/val_loss_per_epoch.npy'

# Initialize Model and Optimizer
pretrained_weights = 'albert-base-v1'
model = AlbertForSequenceClassification.from_pretrained(pretrained_weights, num_labels=3)
model.cuda()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, correct_bias=False)

# Load data
train_data = dl.SemEvalDataset("../data/preprocessed/albert_beetle_train.npy")
val_data = dl.SemEvalDataset("../data/preprocessed/albert_beetle_val.npy")

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=0, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, num_workers=0, shuffle=True)

# Track loss per batch
train_loss = []
val_loss_detailed = []
val_loss_per_epoch = []
tracker = 0
for i in trange(EPOCHS, desc="Epoch "):

    # Training
    model.train()
    training_loss = 0