def __get_internal_bert_config(model_path, gradient_checkpointing): label_count = 2 dropout_prob = EVar.Dropout task_name = 'cola' b_config = BertConfig(num_labels=label_count, hidden_dropout_prob=dropout_prob, finetuning_task=task_name, output_hidden_states=False, output_attentions=False, gradient_checkpointing=gradient_checkpointing) b_config.layer_norm_eps = 1e-12 return b_config
val_features = [ torch.tensor(np.array([i for i in nsp_df['input_ids'].values])[val_idx].astype("int32"), dtype=torch.long), torch.tensor(np.array([i for i in nsp_df['input_mask'].values])[val_idx].astype("int32"), dtype=torch.long) ] y_train_torch = torch.tensor(y_train, dtype=torch.float32) y_val_torch = torch.tensor(y_val, dtype=torch.float32) train_dataset = TensorDataset(*features, y_train_torch) val_dataset = TensorDataset(*val_features, y_val_torch) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True, drop_last=True) BERT_MODEL_PATH = '/content/drive/My Drive/PyTorch版/' bert_config = BertConfig(BERT_MODEL_PATH+'bert_config.json') bert_config.layer_norm_eps=1e-12 bert_config.num_hidden_layers = 6 model = BertForNextSentencePrediction(bert_config) model.to(device) lr = 1e-5 criterion = torch.nn.BCEWithLogitsLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), lr=lr) param_lrs = [{'params': param, 'lr': lr} for param in model.parameters()] scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.6 ** epoch) from tqdm import tqdm from sklearn.metrics import accuracy_score def train_one_epoch(model, train_loader, criterion, optimizer, device, steps_upd_logging=500, accumulation_steps=1,