def evaluate_split(model, processor, args, split='dev'): evaluator = BertEvaluator(model, processor, args, split) start_time = time.time() accuracy, precision, recall, f1, avg_loss = evaluator.get_scores(silent=True)[0] print("Inference time", time.time() - start_time) print('\n' + LOG_HEADER) print(LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1, avg_loss))
def train(self): train_features = convert_examples_to_features(self.train_examples, self.args.max_seq_length, self.tokenizer) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores( )[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss)) # Update validation results if dev_f1 > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1 torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev F1: {}".format( epoch, self.best_dev_f1)) break
def evaluate_split(model, processor, tokenizer, args, split='dev'): evaluator = BertEvaluator(model, processor, tokenizer, args, split) accuracy, precision, recall, f1, avg_loss = evaluator.get_scores( silent=True)[0] print('\n' + LOG_HEADER) print( LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1, avg_loss))
def evaluate_split(model, processor, args, split='dev'): root = Path('out/bert') root.mkdir(exist_ok=True) evaluator = BertEvaluator(model, processor, args, split) start_time = time.time() accuracy, precision, recall, f1, avg_loss, f1_mac, hamming_loss, jacc, predicted_labels, target_labels = evaluator.get_scores( silent=True)[0] '''print("Inference time", time.time() - start_time) print('\n' + LOG_HEADER) print(LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1, avg_loss, f1_mac, hamming_loss, jacc))''' do_personality_analysis([predicted_labels, target_labels], split, root)
def ensemble_acc(model, processor, tokenizer, args, final_pred, label, split='dev'): evaluator = BertEvaluator(model, processor, tokenizer, args, split) accuracy, precision, recall, f1 = evaluator.get_accuracy(final_pred, label, silent=True) return accuracy, precision, recall, f1
def evaluate(model, processor, args, last_bert_layers=-1, ngram_range=(1, 1)): train_evaluator = BertEvaluator(model, processor, args, args.train_name) dev_evaluator = BertEvaluator(model, processor, args, args.dev_name) tst_evaluator = BertEvaluator(model, processor, args, args.test_name) start_time = time.time() train_layers, train_labels = train_evaluator.get_bert_layers( silent=True, last_bert_layers=last_bert_layers) dev_layers, dev_labels = dev_evaluator.get_bert_layers( silent=True, last_bert_layers=last_bert_layers) tst_layers, tst_labels = tst_evaluator.get_bert_layers( silent=True, last_bert_layers=last_bert_layers) train_xf, dev_xf, test_xf = get_feature_vector( (train_evaluator, dev_evaluator, tst_evaluator), ngram_range=ngram_range, max_seq_len=args.max_seq_length) # train train_xf = train_xf.toarray() train_layers = train_layers.cpu().data.numpy() train_x = np.concatenate((train_layers, train_xf), axis=1) #dev dev_xf = dev_xf.toarray() dev_layers = dev_layers.cpu().data.numpy() dev_x = np.concatenate((dev_layers, dev_xf), axis=1) #test test_xf = test_xf.toarray() tst_layers = tst_layers.cpu().data.numpy() test_x = np.concatenate((tst_layers, test_xf), axis=1) #train, tst, dev = (train_x, train_labels), (test_x, tst_labels), (dev_x, dev_labels) train, tst, dev = (train_xf, train_labels), (test_xf, tst_labels), (dev_xf, dev_labels) #print('train labels length', len(train_labels), train_labels[0]) #train, tst, dev = (train_layers, train_labels), (tst_layers, tst_labels), (dev_layers, dev_labels) scatter_plot(train, dev, tst) #train, tst, dev = (train_both_models, train_labels), (tst_both_models, tst_labels), (dev_both_models, dev_labels) print('train, test shape : ', train[0].shape, tst[0].shape) print("Inference time", time.time() - start_time) r_test = classification(train, tst) r_dev = classification(train, dev) print('\n' + LOG_HEADER) print( LOG_TEMPLATE.format("DEV", r_dev['acc'], r_dev['pr'], r_dev['rc'], r_dev['f1'])) print( LOG_TEMPLATE.format("TEST", r_test['acc'], r_test['pr'], r_test['rc'], r_test['f1']))
def evaluate_split(model, processor, args, split='dev'): evaluator = BertEvaluator(model, processor, args, split) start_time = time.time() accuracy, precision, recall, f1, avg_loss, f1_mac, hamming_loss, jacc, predicted_values, target_values = evaluator.get_scores( silent=True)[0] print("Inference time", time.time() - start_time) print('\n' + LOG_HEADER) print( LOG_TEMPLATE.format(split.upper(), accuracy, precision, recall, f1, avg_loss, f1_mac, hamming_loss, jacc)) model_name = args.save_path.replace('model_checkpoints/', '') path = Path(args.save_path.replace('model_checkpoints', 'out')) path = path / args.dataset path.mkdir(exist_ok=True) print('Saving prediction files in ', path) np.save(path / f'predicted_{model_name}_{split}.npy', predicted_values) np.save(path / f'target_{model_name}_{split}.npy', target_values)
def evaluate_split(model, processor, tokenizer, args, save_file, split='dev'): evaluator = BertEvaluator(model, processor, tokenizer, args, split) scores, score_names = evaluator.get_scores(silent=True) if args.is_regression: rmse, kendall, pearson, spearman, pearson_spearman, avg_loss = scores[: 6] print('\n' + LOG_HEADER_REG) print( LOG_TEMPLATE_REG.format(split.upper(), rmse, kendall, pearson, spearman, pearson_spearman, avg_loss)) else: precision, recall, f1, accuracy, avg_loss = scores[:5] print('\n' + LOG_HEADER_CLASS) print( LOG_TEMPLATE_CLASS.format(split.upper(), accuracy, precision, recall, f1, avg_loss)) scores_dict = dict(zip(score_names, scores)) with open(save_file, 'w') as f: f.write(json.dumps(scores_dict))
def train_layer_qroup(self, dataloader, to_freeze_layer, model_path): self.train_epoch(dataloader, freez_layer=to_freeze_layer) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = \ dev_evaluator.get_scores()[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(1, self.iterations, 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score)) torch.save(self.model, model_path / f'{to_freeze_layer}.pt') # update learning rate for groups in self.optimizer.param_groups: lr = groups['lr'] if 'lr' in groups else self.args.lr groups['lr'] = 2e-5
def ensemble_cal(model, processor, tokenizer, args, split='dev'): evaluator = BertEvaluator(model, processor, tokenizer, args, split) label, prediction = evaluator.get_pred(silent=True) return label, prediction
class BertTrainer(object): def __init__(self, model, optimizer, processor, args): self.args = args self.model = model self.optimizer = optimizer self.processor = processor self.train_examples = self.processor.get_train_examples(args.data_dir) self.tokenizer = BertTokenizer.from_pretrained( args.model, is_lowercase=args.is_lowercase) timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") self.snapshot_path = os.path.join(self.args.save_path, self.processor.NAME, '%s.pt' % timestamp) self.num_train_optimization_steps = int( len(self.train_examples) / args.batch_size / args.gradient_accumulation_steps) * args.epochs if args.local_rank != -1: self.num_train_optimization_steps = args.num_train_optimization_steps // torch.distributed.get_world_size( ) self.log_header = 'Epoch Iteration Progress Dev/Acc. Dev/Hamm. Dev/Jacc. Dev/Prec Dev/Rec Dev/micro-F1 Dev/F1 Dev/Loss' self.log_template = ' '.join( '{:>5.0f},{:>9.0f},{:>6.0f}/{:<5.0f} {:>6.4f},{:>8.4f},{:8.4f},{:8.4f},{:>8.4f},{:8.4f},{:8.4f},{:10.4f}' .split(',')) self.iterations, self.nb_tr_steps, self.tr_loss = 0, 0, 0 self.best_dev_f1, self.unimproved_iters = 0, 0 self.early_stop = False def train_epoch(self, train_dataloader): for step, batch in enumerate(tqdm(train_dataloader, desc="Training")): self.model.train() batch = tuple(t.to(self.args.device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch logits = self.model(input_ids, segment_ids, input_mask) if self.args.is_multilabel: if self.args.fp16: loss = F.binary_cross_entropy_with_logits( logits, label_ids.half()) else: loss = F.binary_cross_entropy_with_logits( logits, label_ids.float()) else: loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) if self.args.n_gpu > 1: loss = loss.mean() if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps if self.args.fp16: self.optimizer.backward(loss) else: loss.backward() self.tr_loss += loss.item() self.nb_tr_steps += 1 if (step + 1) % self.args.gradient_accumulation_steps == 0: if self.args.fp16: lr_this_step = self.args.lr * warmup_linear( self.iterations / self.num_train_optimization_steps, self.args.warmup_proportion) for param_group in self.optimizer.param_groups: param_group['lr'] = lr_this_step self.optimizer.step() self.optimizer.zero_grad() self.iterations += 1 def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) self.dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_hamming, dev_jaccard, dev_precision, dev_recall, dev_f1_micro, dev_f1_macro, dev_loss = self.dev_evaluator.get_scores( )[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_hamming, dev_jaccard, dev_precision, dev_recall, dev_f1_micro, dev_f1_macro, dev_loss)) # Update validation results if dev_f1_micro > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1_micro torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev F1: {}".format( epoch, self.best_dev_f1)) break
def evaluate_split(model, processor, tokenizer, args, split='dev'): evaluator = BertEvaluator(model, processor, tokenizer, args, split) scores, metric_names = evaluator.get_scores(silent=True) print('Evaluation metrics for') print(metric_names) print(scores)
def train_gradually(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) # train gradually model_path = self.snapshot_path.split('/')[0:-1] model_path = Path('/'.join(model_path)) # freeze all layers except classifier self.train_layer_qroup(train_dataloader, to_freeze_layer='classifier', model_path=model_path) # freeze all layers expect pooler and its subsequents '''self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.pooler', model_path=model_path) for i in range(11,-1, -1): self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.encoder.layer.'+str(i), model_path=model_path)''' self.unfreez_all() for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = dev_evaluator.get_scores( )[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score)) # Update validation results if dev_f1 > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1 torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev F1: {}".format( epoch, self.best_dev_f1)) break
def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) for epoch in trange(int(self.args.epochs), desc="Epoch"): loss_epoch = self.train_epoch(train_dataloader) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = dev_evaluator.get_scores( )[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, loss_epoch)) if self.args.early_on_f1: if dev_recall != 1: dev_measure = dev_f1 else: dev_measure = 0 measure_name = 'F1' else: dev_measure = dev_acc measure_name = 'Balanced Acc' # Update validation results if dev_measure > self.best_dev_measure: self.unimproved_iters = 0 self.best_dev_measure = dev_measure torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True print("Early Stopping. Epoch: {}, Best {}: {}".format( epoch, measure_name, self.best_dev_measure)) break
def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) # results for graphing learning curves results = [] iterator = trange(int(self.args.epochs), desc="Epoch") for epoch in iterator: self.train_epoch(train_dataloader) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores( )[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss)) results.append([ epoch + 1, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss ]) # Update validation results if dev_f1 > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1 torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev F1: {}".format( epoch, self.best_dev_f1)) iterator.close() break # create learning curves results_frame = pd.DataFrame(data=np.array(results), columns=['Epoch', 'Accuracy', 'Precision', 'Recall', 'F1', 'Loss']) \ .set_index('Epoch') ax_acc = results_frame[['Accuracy', 'Precision', 'Recall', 'F1']].plot() ax_loss = results_frame[['Loss']].plot() ax_acc.get_figure().savefig('accuracy_curves.png') ax_loss.get_figure().savefig('loss_curves.png')
def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer, use_guid=True, is_regression=self.args.is_regression) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) if self.args.is_regression: label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.float) else: label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) print('Begin training: ', datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) start_time = time.monotonic() for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) print('Train loss: ', self.tr_loss) if epoch == 0: self.initial_tr_loss = self.tr_loss if self.args.evaluate_dev: dev_evaluator = BertEvaluator(self.model, self.processor, self.tokenizer, self.args, split='dev') dev_scores, dev_score_names = dev_evaluator.get_scores() dev_metric = dev_scores[dev_score_names.index( self.args.eval_metric)] if self.args.is_regression: dev_rmse, dev_kendall, dev_pearson, dev_spearman, dev_pearson_spearman, dev_loss = dev_scores[: 6] # Print validation results tqdm.write(self.log_header_regression) tqdm.write( self.log_template_regression.format( epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_rmse, dev_kendall, dev_pearson, dev_spearman, dev_pearson_spearman, dev_loss)) else: dev_precision, dev_recall, dev_f1, dev_acc, dev_loss = dev_scores[: 5] # Print validation results tqdm.write(self.log_header_classification) tqdm.write( self.log_template_classification.format( epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss)) # Update validation results dev_improved = self.check_dev_improved(dev_metric) if dev_improved: self.unimproved_iters = 0 self.best_dev_metric = dev_metric torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev {}: {}". format(epoch, self.args.eval_metric, self.best_dev_metric)) break if self.args.evaluate_test: if epoch == self.patience_training: loss_percent = (self.initial_tr_loss - self.tr_loss) / self.initial_tr_loss if loss_percent <= self.minimum_loss_percent_decrease: self.training_converged = False tqdm.write( "Training failed to converge. Epoch: {}, Loss percent: {}" .format(epoch, loss_percent)) break end_time = time.monotonic() # save model at end of training # when evaluating on test if self.args.evaluate_test: torch.save(self.model, self.snapshot_path) print('End training: ', datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) print('Time elapsed: ', end_time - start_time)