CORRECT = '../data/eval_data/albert_bee_uq_test_correct' pretrained_weights = 'albert-base-v1' tokenizer = AlbertTokenizer.from_pretrained(pretrained_weights, do_lower_case=True, do_basic_tokenize=True) # Initialize Model and Optimizer model = AlbertForSequenceClassification.from_pretrained(pretrained_weights, num_labels=3) model.load_state_dict(torch.load(PATH)) model.cuda() model.eval() # Data to evaluate beetle_data = dl.SemEvalDataset(DATA) beetle_loader = DataLoader(beetle_data) print("Nr. of data instances: ", len(beetle_data)) correct_guesses = [] label = [] steps = [] with torch.no_grad(): for step, batch in enumerate(beetle_loader): batch = tuple(t.to(device) for t in batch) token_ids, segment, attention, lab = batch outputs = model(token_ids, token_type_ids=segment, attention_mask=attention, labels=lab) logits = outputs[1].detach().cpu().numpy().squeeze() labels = lab.to('cpu').numpy()
EPOCHS = 16 pretrained_weights = 'bert-base-uncased' # Set these paths to train BERT MODEL_PATH = '../models/bert_mnli/bert_model_mnli.pt' TRAIN_LOSS_PATH = '../models/bert_mnli/train_loss_per_batch.npy' VAL_LOSS_PATH = '../models/bert_mnli/val_loss_per_epoch.npy' # Initialize Model and Optimizer model = BertForSequenceClassification.from_pretrained(pretrained_weights, num_labels=3) model.cuda() optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, correct_bias=False) # Load data train_data = dl.SemEvalDataset("../data/preprocessed/bert_mnli_train.npy") val_data = dl.SemEvalDataset("../data/preprocessed/bert_mnli_val.npy") train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=0, shuffle=True) val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, num_workers=0, shuffle=True) # Track loss per batch train_loss = [] val_loss_detailed = [] val_loss_per_epoch = []
# DATA: preprocessed test set # LOSS: location of the loss file PATH = '../models/bert_mnli/model_mnli.pt' DATA = '../data/preprocessed/bert_mnli_mismatched_test.npy' LOSS = '../models/bert_mnli/test_matched_loss' # Initialize Model and Optimizer model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3) model.load_state_dict(torch.load(PATH)) model.cuda() model.eval() # Data to evaluate test_data = dl.SemEvalDataset(DATA) test_loader = DataLoader(test_data) data = [] logit_list = np.empty([1, 3], dtype=float) label_list = np.empty(1, dtype=int) with torch.no_grad(): macro, weighted, acc = 0, 0, 0 for batch in test_loader: batch = tuple(t.to(device) for t in batch) token_ids, segment, attention, lab = batch with torch.no_grad(): outputs = model(token_ids, token_type_ids=segment,
BATCH_SIZE = 16 LEARNING_RATE = 1e-5 EPOCHS = 8 MODEL_PATH = '../models/albert_beetle/model_.pt' TRAIN_LOSS_PATH = '../models/albert_beetle/train_loss_per_batch.npy' VAL_LOSS_PATH = '../models/albert_beetle/val_loss_per_epoch.npy' # Initialize Model and Optimizer pretrained_weights = 'albert-base-v1' model = AlbertForSequenceClassification.from_pretrained(pretrained_weights, num_labels=3) model.cuda() optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, correct_bias=False) # Load data train_data = dl.SemEvalDataset("../data/preprocessed/albert_beetle_train.npy") val_data = dl.SemEvalDataset("../data/preprocessed/albert_beetle_val.npy") train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=0, shuffle=True) val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, num_workers=0, shuffle=True) # Track loss per batch train_loss = [] val_loss_detailed = [] val_loss_per_epoch = [] tracker = 0 for i in trange(EPOCHS, desc="Epoch "): # Training model.train() training_loss = 0