def save_checkpoint(self, model, optimizer, save_dir='/home/workspace/ImageClassifier', checkpoint_file='checkpoint.pth'): """ Saves the neural network to a checkpoint file so it can be reloaded again without the need to re-train the network. INPUTS: 1. Network model: <model object> 2. Gradient descent def: <optimizer object> 3. URL for checkpoint file: <str> 4. Checkpoint file name <str> RETURNS: None """ # define the checkpoint dict for saving, loading and inference later checkpoint = {'arch' : self.arch, 'input_size' : self.input_size, 'hidden_size' : self.hidden_size, 'output_size' : self.output_size, 'classifier' : model.classifier, 'learning_rate' : self.learning_rate, 'epochs' : self.epochs, 'loss' : self.training_loss, 'class_to_idx' : self.class_to_idx, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict()} # save the model to the specified folder and file name try: torch.save(checkpoint, save_dir + "/" + checkpoint_file) except Exception as error: print("The following error: {} occurred while saving the checkpoint file to: {}".format(error, save_dir + "/" + checkpoint_file)) else: print("Trained model saved to: {}".format(save_dir + "/" + checkpoint_file))
def saveModel(state, epoch, loss_epoch, diff_epoch, is_best, epoch_len): torch.save({ "epoch": epoch, "epoch_len": epoch_len, "state_dict": state, "epoch_avg_loss": float(loss_epoch) / epoch_len, "epoch_avg_diff": float(diff_epoch) / epoch_len }, MODEL_PATH)
def saveModel(state, epoch, epoch_loss, epoch_diff, is_best): torch.save({ "epoch": epoch, "state_dict": state, "epoch_avg_loss": epoch_loss, "epoch_avg_diff": epoch_diff }, MODEL_PATH) if is_best: shutil.copyfile(MODEL_PATH, MODEL_PATH_BEST)
def saveModel(state, epoch, loss_epoch, diff_epoch, is_best, episode_idx): torch.save({ "epoch": epoch, "episodes": episode_idx + 1, "state_dict": state, "epoch_avg_loss": float(loss_epoch) / (episode_idx + 1), "epoch_avg_diff": float(diff_epoch) / (episode_idx + 1) }, MODEL_PATH) if is_best: shutil.copyfile(MODEL_PATH, MODEL_PATH_BEST)
def saveModel(state, epoch, loss_epoch, valid_epoch, is_best, episode_idx): torch.save({ "epoch": epoch, "episodes": episode_idx + 1, "state_dict": state, "epoch_avg_loss": round(loss_epoch, 10), "epoch_avg_valid": round(valid_epoch, 10) }, MODEL_PATH) if is_best: shutil.copyfile(MODEL_PATH, MODEL_PATH_BEST)
def saveModel(state, epoch, loss_epoch, diff_epoch, is_best, epoch_len): print("saving...") torch.save( { "epoch": epoch, "epoch_len": epoch_len, "state_dict": state, "epoch_avg_loss": float(loss_epoch) / epoch_len, "epoch_avg_diff": float(diff_epoch) / epoch_len }, MODEL_PATH) if is_best: shutil.copyfile(MODEL_PATH, MODEL_PATH_BEST) print("saved.")
def train(): train_dataset, test_dataset = load_normalized_datasets() net = SimpleConvNet() #debug=True) train_dataset_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) test_dataset_loader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE) for epoch in xrange(0, EPOCHS): train_step(net, train_dataset_loader, epoch) test_training_accuracy(net, test_dataset_loader, epoch) torch.save(net.state_dict(), open(MODEL_FILE, "wb"))
def train_epoch(self, save_model=False): self.model.train() # if batch_size is None: # batch_size = len(self.train_loader) # else: # batch_size = min(batch_size, len(self.train_loader)) # print('data loader size', len(self.train_loader['label'])) # for i, sample in enumerate(self.train_loader): # print(sample['label'], len(sample['label'])) loss_list = [] with tqdm(enumerate(self.train_loader), total=len(self.train_loader), desc='train epochs') as progress_bar: for i_batch, batch in progress_bar: data = batch['data'].to(self.device) # print('data shape:', data.size()) # print(data.type) # print('labels:', batch['label']) labels = batch['label'].to(self.device) output = self.model(data) acc = self.compute_acc(labels=labels, output=output) # print('accuracy:', acc.item()) #zero out the parameter gradients self.optimizer.zero_grad() # print('output', output.size()) # print('labels', labels.size()) loss = self.loss_fcn(output, labels) # print('loss:', loss.item()) loss.backward() self.optimizer.step() loss_list.append(loss) # progress_bar.set_postfix(avg_loss=sum(loss_list)/len(loss_list)) progress_bar.set_postfix(loss=loss.item(), acc=acc.item()) if save_model: torch.save(self.model.state_dict(), self.model.path) return loss_list
def save_to(model: nn.Module, path: str, ep: int): '''保存模型到指定路径 Args: model(nn.Module): 模型 path(str): 存档路径 ep(int): 当前所处的epoch ''' if not os.path.exists(path): os.mkdir(path) ckpt_path = os.path.join(path, 'ep-%d.pth' % ep) torch.save({ 'epoch': ep, 'model_state_dict': model.state_dict() }, ckpt_path) print('Model trained after %d epochs has been saved to: %s.' % (ep, ckpt_path))
def main(): model = MODEL_DISPATCHER[BASE_MODEL](pretrained=True) model.to(DEVICE) train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4) valid_dataset = BengaliDatasetTrain(folds=VALIDATION_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=4) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", patience=5, factor=0.3, verbose=True) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) for epoch in range(0, EPOCHS): train(train_dataset, train_loader, model, optimizer) with torch.no_grad(): val_score = evaluate(valid_dataset, valid_loader, model) scheduler.step(val_score) torch.save(model.state_dict(), f"{BASE_MODEL}_fold{VALIDATION_FOLDS[0]}.bin")
def merge_checkpoints(checkpoint_paths, output_path): if checkpoint_paths is None or len(checkpoint_paths) < 1: raise ValueError( 'Need to specify at least one checkpoint, %d provided.' % len(checkpoint_paths)) if len(checkpoint_paths) < 2: shutil.copyfile(checkpoint_paths[0], output_path) def __sum(source, destination): for key, value in source.items(): if isinstance(value, dict): node = destination.setdefault(key, {}) __sum(value, node) else: if isinstance(value, torch.FloatTensor): destination[key] = torch.add(destination[key], 1.0, value) return destination def __divide(source, denominator): for key, value in source.items(): if isinstance(value, dict): node = source.setdefault(key, {}) __divide(node, denominator) else: if isinstance(value, torch.FloatTensor): source[key] = torch.div(value, denominator) return source output_checkpoint = torch.load(checkpoint_paths[0]) for checkpoint_path in checkpoint_paths[1:]: checkpoint = torch.load(checkpoint_path) output_checkpoint = __sum(checkpoint, output_checkpoint) output_checkpoint = __divide(output_checkpoint, len(checkpoint_paths)) torch.save(output_checkpoint, output_path)
def build_model(train_dataset, dev_dataset, test_dataset, collate_fn, tag_idx, is_oov, embedding_matrix, model_save_path, plot_save_path): # init model model = BiLSTM_CRF(embedding_matrix, tag_idx) # Turn on cuda model = model.cuda() # verify model print(model) # remove paramters that have required_grad = False optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.LEARNING_RATE) # optimizer = optim.SGD(model.parameters(), lr=cfg.LEARNING_RATE, momentum=0.9) optimizer.zero_grad() model.zero_grad() # init loss criteria best_res_val_0 = 0.0 best_epoch = 0 dev_eval_history = [] test_eval_history = [] for epoch in range(cfg.MAX_EPOCH): print('-' * 40) print("EPOCH = {0}".format(epoch)) print('-' * 40) random.seed(epoch) train_loader = DataLoader(train_dataset, batch_size=cfg.BATCH_SIZE, shuffle=cfg.RANDOM_TRAIN, num_workers=28, collate_fn=collate_fn) train_eval, model = train_a_epoch(name="train", data=train_loader, tag_idx=tag_idx, model=model, optimizer=optimizer) dev_loader = DataLoader(dev_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) test_loader = DataLoader(test_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) dev_eval, _, _ = test("dev", dev_loader, tag_idx, model) test_eval, _, _ = test("test", test_loader, tag_idx, model) dev_eval.verify_results() test_eval.verify_results() dev_eval_history.append(dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]) test_eval_history.append(test_eval.results['test_conll_f']) plot_curve(epoch, dev_eval_history, test_eval_history, "epochs", "fscore", "epoch learning curve", plot_save_path) pickle.dump((dev_eval_history, test_eval_history), open("plot_data.p", "wb")) # pick the best epoch if epoch < cfg.MIN_EPOCH_IMP or ( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] > best_res_val_0): best_epoch = epoch best_res_val_0 = dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] torch.save(model, model_save_path) print("current dev micro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]])) print("current dev macro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[1]])) print("best dev micro_score: {0}".format(best_res_val_0)) print("best_epoch: {0}".format(str(best_epoch))) # if the best epoch model outperforms MA if 0 < cfg.MAX_EPOCH_IMP <= (epoch - best_epoch): break print("Loading Best Model ...") model = torch.load(model_save_path) return model
def train_model(self, train_data, valid_data=None, save_path=None, save_epochs=5): multi_gpu = self._gpu_ids is not None and len(self._gpu_ids) > 1 # set the mask to None; required when the same model is trained after a translation if multi_gpu: decoder = self._model.module.decoder else: decoder = self._model.decoder decoder.attn.applyMask(None) self._model.train() # define criterion of each GPU criterion = self._new_nmt_criterion(self._trg_dict.size()) perplexity_history = [] checkpoint_files = [] valid_acc, valid_ppl = None, None try: for epoch in range(self.start_epoch, self.max_epochs + 1): self._logger.log(self._log_level, 'Training epoch %g... START' % epoch) start_time_epoch = time.time() # (1) train for one epoch on the training set train_loss, train_acc = self._train_epoch(epoch, train_data, self._model, criterion, self._optim) train_ppl = math.exp(min(train_loss, 100)) self._logger.log(self._log_level, 'trainEpoch Epoch %g Train loss: %g perplexity: %g accuracy: %g' % ( epoch, train_loss, train_ppl, (float(train_acc) * 100))) force_termination = False if self.min_perplexity_decrement > 0.: perplexity_history.append(train_ppl) force_termination = self._should_terminate(perplexity_history) if valid_data: # (2) evaluate on the validation set valid_loss, valid_acc = self._evaluate(criterion, valid_data) valid_ppl = math.exp(min(valid_loss, 100)) self._logger.log(self._log_level, 'trainModel Epoch %g Validation loss: %g perplexity: %g accuracy: %g' % ( epoch, valid_loss, valid_ppl, (float(valid_acc) * 100))) # (3) update the learning rate self._optim.updateLearningRate(valid_loss, epoch) self._logger.log(self._log_level, "trainModel Epoch %g Decaying learning rate to %g" % (epoch, self._optim.lr)) if save_path is not None and save_epochs > 0: if len(checkpoint_files) > 0 and len(checkpoint_files) > save_epochs - 1: os.remove(checkpoint_files.pop(0)) model_state_dict = self._model.module.state_dict() if multi_gpu else self._model.state_dict() model_state_dict = {k: v for k, v in model_state_dict.items() if 'generator' not in k} generator_state_dict = self._model.generator.module.state_dict() if multi_gpu \ else self._model.generator.state_dict() # (4) drop a checkpoint checkpoint = { 'model': model_state_dict, 'generator': generator_state_dict, 'dicts': {'src': self._src_dict, 'tgt': self._trg_dict}, 'opt': copy.deepcopy(self._model_params.__dict__), 'epoch': epoch, 'optim': self._optim } if valid_acc is not None: checkpoint_file = \ '%s_acc_%.2f_ppl_%.2f_e%d.pt' % (save_path, 100 * valid_acc, valid_ppl, epoch) else: checkpoint_file = '%s_acc_NA_ppl_NA_e%d.pt' % (save_path, epoch) torch.save(checkpoint, checkpoint_file) checkpoint_files.append(checkpoint_file) self._logger.log(self._log_level, "Checkpoint for epoch %d saved to file %s" % (epoch, checkpoint_file)) if force_termination: break self._logger.log(self._log_level, 'Training epoch %g... END %.2fs' % (epoch, time.time() - start_time_epoch)) except KeyboardInterrupt: raise TrainingInterrupt(checkpoint=checkpoint_files[-1] if len(checkpoint_files) > 0 else None) return checkpoint_files[-1] if len(checkpoint_files) > 0 else None
def train_model(): num_epochs = 200 learning_rate = 0.000001 batch_size = 10 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) model = ConvNet().to(device) """ dataxy=get_data() with open("psd_score.txt","wb") as f: pickle.dump(dataxy,f) """ dataxy = [] with open('psd_score.txt', 'rb') as file: dataxy = pickle.load(file) x = np.array(dataxy[0]) y = np.array(dataxy[1]) train_x_origin, test_x_origin, train_y_origin, test_y_origin = train_test_split( x, y) traindataset = ds(train_x_origin, train_y_origin, len(train_x_origin)) testdataset = ds(test_x_origin, test_y_origin, len(test_x_origin)) loss = nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) train_loader = DataLoader(dataset=traindataset, batch_size=batch_size, shuffle=True, num_workers=0) test_loader = DataLoader(dataset=testdataset, batch_size=batch_size, shuffle=True, num_workers=0) train_loss = [] test_loss = [] for epoch in tqdm(range(num_epochs)): cur_train_loss = [] cur_test_loss = [] for i, (data, labels) in enumerate(train_loader): data = data.to(device).reshape(-1, 1, 5, 31) data = data.type(torch.FloatTensor).to(device) labels = labels.to(device) y_pred = model(data).type(torch.FloatTensor).to(device) labels = labels.type(torch.FloatTensor).to(device) l = loss(y_pred, labels) l.backward() optimizer.step() optimizer.zero_grad() cur_train_loss.append(l.item()) train_loss.append(np.array(cur_train_loss).mean()) print("train loss ", np.array(cur_train_loss).mean()) for i, (test_x, test_y) in enumerate(test_loader): test_x = test_x.reshape(-1, 1, 5, 31).type(torch.FloatTensor).to(device) test_y = test_y.to(device) out = model(test_x) l = loss(out, test_y) cur_test_loss.append(l.item()) test_loss.append(np.array(cur_test_loss).mean()) print("test loss ", np.array(cur_test_loss).mean()) plt.plot(train_loss, label="train_loss") plt.plot(test_loss, label="test_loss") plt.legend() plt.show() predict = [] torch.save(model.state_dict(), "model1.pt") for i in range(len(test_x_origin)): predict.append( model( torch.from_numpy(test_x_origin[i]).reshape(-1, 1, 5, 31).type( torch.FloatTensor).to(device)).item()) return test_y_origin, predict
EPOCHS_a, lr_a, clip) # Printing Learning Curves learn_curves(valid_losses, train_losses, "AudioRNN_Loss") #save model metadata audio_rnn_metadata = {"accuracy": audio_accuracies, "valid_loss": valid_losses, "train_loss": train_losses} # save metadata dictionaries pickle_save("audio_rnn.p", audio_rnn_metadata) ''' #################################################################### # Save/Load models #################################################################### ''' # SAVING MODE # save model dictionary to PATH rnn_path = os.path.abspath("rnn_metadata") TEXT_RNN_PATH = os.path.join(rnn_path, "text_rnn_model.py") AUDIO_RNN_PATH = os.path.join(rnn_path, "audio_rnn_model.py") # always tranfer to cpu for interuser compatibility model = text_rnn.to("cpu") torch.save(model.state_dict(), TEXT_RNN_PATH) model = audio_rnn.to("cpu") torch.save(model.state_dict(), AUDIO_RNN_PATH)
"accuracy": text_accuracies, "valid_loss": valid_losses, "train_loss": train_losses } # save metadata dict pickle_save(file_prefix + "text_rnn.p", text_rnn_metadata) # SAVING MODE # save model dictionary to PATH rnn_path = os.path.abspath("pretrained_models") TEXT_RNN_PATH = os.path.join(rnn_path, file_prefix + "text_rnn_model.pt") # always tranfer to cpu for interuser compatibility model = text_rnn.to("cpu") torch.save(model.state_dict(), TEXT_RNN_PATH) elif len(sys.argv) > 2 and sys.argv[2] == '-pre_audio': print(' ----- Pretrain Audio classifier ----- ') #################################################################### # Training Audio RNN Model #################################################################### EPOCHS_a = 150 lr_a = 0.0001 clip = 5.0 data_loaders = (train_loader, valid_loader, test_loader) audio_rnn, audio_accuracies, valid_losses, train_losses\ = audio_rnn_pretraining(data_loaders, audio_hyperparameters, EPOCHS_a, lr_a, clip) # Printing Learning Curves
def train_model(): num_epochs = 100 learning_rate = 0.00001 batch_size = 5 channel_size = 3 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) model = ConvNet().to(device) #model=AlexNet().to(device) """ dataxy=get_data() with open("psd_score.txt","wb") as f: pickle.dump(dataxy,f) """ dataxy = [] with open( "D:/code/code/eegemotion/git/model/corr_classify/corr_score_classify.txt", "rb") as f: dataxy = pickle.load(f) x = np.array(dataxy[0]) y = np.array(dataxy[1]) train_x_origin, test_x_origin, train_y_origin, test_y_origin = train_test_split( x, y) traindataset = ds(train_x_origin, train_y_origin, len(train_x_origin)) testdataset = ds(test_x_origin, test_y_origin, len(test_x_origin)) loss = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) train_loader = DataLoader(dataset=traindataset, batch_size=batch_size, shuffle=True, num_workers=0) test_loader = DataLoader(dataset=testdataset, batch_size=batch_size, shuffle=True, num_workers=0) train_loss = [] test_loss = [] train_accuracy = [] test_accuracy = [] for epoch in tqdm(range(num_epochs)): cur_train_loss = [] cur_test_loss = [] right, total = 0, 0 for i, (data, labels) in enumerate(train_loader): data = data.to(device).reshape(-1, channel_size, 31, 31) data = data.type(torch.FloatTensor).to(device) y_pred = model(data).to(device) labels = labels.to(device).long() l = loss(y_pred, labels) l.backward() optimizer.step() optimizer.zero_grad() cur_train_loss.append(l.item()) right += (torch.argmax(y_pred, dim=1) == labels).sum().item() total += batch_size train_loss.append(np.array(cur_train_loss).mean()) train_accuracy.append(right / total) print("train loss ", np.array(cur_train_loss).mean(), " accuracy: ", right / total) right, total = 0, 0 for i, (test_x, test_y) in enumerate(test_loader): test_x = test_x.reshape(-1, channel_size, 31, 31).type(torch.FloatTensor).to(device) test_y = test_y.to(device).long() out = model(test_x) l = loss(out, test_y) cur_test_loss.append(l.item()) right += (torch.argmax(out, dim=1) == test_y).sum().item() total += batch_size test_loss.append(np.array(cur_test_loss).mean()) test_accuracy.append(right / total) print("test loss ", np.array(cur_test_loss).mean(), " accuracy: ", right / total) plt.plot(train_loss, label="train_loss") plt.plot(test_loss, label="test_loss") plt.legend() plt.show() plt.plot(train_accuracy, label="train_accuracy") plt.plot(test_accuracy, label="test_accuracy") plt.legend() plt.show() torch.save(model.state_dict(), "D:/code/code/eegemotion/git/model/corr_classify/model.pt")
def build_model(train_dataset, dev_dataset, test_dataset, collate_fn, tag_idx, is_oov, embedding_matrix, model_save_path, plot_save_path): # init model model = MultiBatchSeqNet(embedding_matrix, batch_size=cfg.BATCH_SIZE, isCrossEnt=False, char_level=cfg.CHAR_LEVEL, pos_feat=cfg.POS_FEATURE, dep_rel_feat=cfg.DEP_LABEL_FEATURE, dep_word_feat=cfg.DEP_WORD_FEATURE) # Turn on cuda model = model.cuda() # verify model print(model) # remove paramters that have required_grad = False optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.LEARNING_RATE) # optimizer = optim.SGD(model.parameters(), lr=cfg.LEARNING_RATE, momentum=0.9) optimizer.zero_grad() model.zero_grad() # init loss criteria seq_criterion = nn.NLLLoss(size_average=False) lm_f_criterion = nn.NLLLoss(size_average=False) lm_b_criterion = nn.NLLLoss(size_average=False) att_loss = nn.CosineEmbeddingLoss(margin=1) best_res_val_0 = 0.0 best_res_val_1 = 0.0 best_epoch = 0 dev_eval_history = [] test_eval_history = [] for epoch in range(cfg.MAX_EPOCH): print('-' * 40) print("EPOCH = {0}".format(epoch)) print('-' * 40) random.seed(epoch) train_loader = DataLoader(train_dataset, batch_size=cfg.BATCH_SIZE, shuffle=cfg.RANDOM_TRAIN, num_workers=28, collate_fn=collate_fn) train_eval, model = train_a_epoch(name="train", data=train_loader, tag_idx=tag_idx, is_oov=is_oov, model=model, optimizer=optimizer, seq_criterion=seq_criterion, lm_f_criterion=lm_f_criterion, lm_b_criterion=lm_b_criterion, att_loss=att_loss, gamma=cfg.LM_GAMMA) dev_loader = DataLoader(dev_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) test_loader = DataLoader(test_dataset, batch_size=cfg.BATCH_SIZE, num_workers=28, collate_fn=collate_fn) dev_eval, _, _, _ = test("dev", dev_loader, tag_idx, model) test_eval, _, _, _ = test("test", test_loader, tag_idx, model) dev_eval.verify_results() test_eval.verify_results() dev_eval_history.append(dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]) test_eval_history.append(test_eval.results['test_conll_f']) plot_curve(epoch, dev_eval_history, test_eval_history, "epochs", "fscore", "epoch learning curve", plot_save_path) pickle.dump((dev_eval_history, test_eval_history), open("plot_data.p", "wb")) # pick the best epoch if epoch < cfg.MIN_EPOCH_IMP or ( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] > best_res_val_0): best_epoch = epoch best_res_val_0 = dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] torch.save(model, model_save_path) print("current dev micro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]])) print("current dev macro_score: {0}".format( dev_eval.results[cfg.BEST_MODEL_SELECTOR[1]])) print("best dev micro_score: {0}".format(best_res_val_0)) print("best_epoch: {0}".format(str(best_epoch))) # if the best epoch model outperforms MA if 0 < cfg.MAX_EPOCH_IMP <= (epoch - best_epoch): break print("Loading Best Model ...") model = torch.load(model_save_path) return model