def train_and_save_model(data_train, data_test, vocab, max_sentence_length, save_dir): # 确认torch版本是否能与fastnlp兼容 print(torch.__version__) # 读取神经网络 model = DPCNN(max_features=len(vocab), word_embedding_dimension=word_embedding_dimension, max_sentence_length=max_sentence_length, num_classes=num_classes) # 定义 loss 和 metric loss = CrossEntropyLoss(pred="output", target="label_seq") metric = AccuracyMetric(pred="predict", target="label_seq") # train model with train_data,and val model witst_data # embedding=300 gaussian init,weight_decay=0.0001, lr=0.001,epoch=5 trainer = Trainer(model=model, train_data=data_train, dev_data=data_test, loss=loss, metrics=metric, save_path='CD', batch_size=64, n_epochs=5, optimizer=Adam(lr=0.001, weight_decay=0.0001)) trainer.train() # 存储模型 _save_model(model, model_name='new_model.pkl', save_dir=save_dir)
def test_collect_fn2(self): """测试能否实现batch_x, batch_y""" dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2') dataset.set_target('y', 'x1') import torch def fn(ins_list): x = [] for ind, ins in ins_list: x.append(ins['x1']+ins['x2']) x = torch.FloatTensor(x) return {'x':x}, {'target':x[:, :4].argmax(dim=-1)} dataset.add_collect_fn(fn) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, x): x1 = self.fc(x1) x2 = self.fc(x2) x = self.fc(x) sum_x = x1 + x2 + x time.sleep(0.1) # loss = F.cross_entropy(x, y) return {'pred': sum_x} model = Model() trainer = Trainer(train_data=dataset, model=model, loss=CrossEntropyLoss(), print_every=2, dev_data=dataset, metrics=AccuracyMetric(), use_tqdm=False) trainer.train()
def test_save_path(self): data_set = prepare_fake_dataset() data_set.set_input("x", flag=True) data_set.set_target("y", flag=True) train_set, dev_set = data_set.split(0.3) model = NaiveClassifier(2, 1) save_path = 'test_save_models' trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set, metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=save_path, use_tqdm=True, check_code_level=2) trainer.train()
def test_early_stop(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.01), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=20, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[EarlyStopCallback(5)], check_code_level=2) trainer.train()
def bilstm_text(): w = pickle.load(open("weight.bin", "rb")) (vocab, train_data, dev_data, test_data) = read_data() model_lstm = MyBLSTMText(class_num=4, vocab_size=len(vocab), dropout=0.5, embed_weights=w) loss = CrossEntropyLoss() metrics = AccuracyMetric() trainer = Trainer(model=model_lstm, train_data=train_data, dev_data=dev_data, optimizer=Adam(lr=0.0015), print_every=10, use_tqdm=False, device='cuda:0', save_path="./lstm_model", loss=loss, metrics=metrics) # callbacks=[EarlyStopCallback(10)]) trainer.train() tester = Tester(test_data, model_lstm, metrics=AccuracyMetric()) tester.test()
def CNNmethod(): train_data, dev_data, test_data, D_dict, vocab = data_preprocess() CNN = CNNText(D_input, D_output, D_dict=D_dict) print('start train') optim = RMSprop(CNN.parameters(), lr=0.01) trainer = Trainer(model=CNN, train_data=train_data, dev_data=train_data, loss=loss, metrics=metrics, batch_size=32, optimizer=optim, save_path='.record/', validate_every=30, callbacks=[callback]) while 1: trainer.train() input('You stop the program') tester = Tester(test_data, LSTM, metrics) tester.test() tester = Tester(train_data, LSTM, metrics) tester.test() tester = Tester(dev_data, LSTM, metrics) tester.test()
def test_control_C(): # 用于测试 ControlC , 再两次训练时用 Control+C 进行退出,如果最后不显示 "Test failed!" 则通过测试 from fastNLP import ControlC, Callback import time line1 = "\n\n\n\n\n*************************" line2 = "*************************\n\n\n\n\n" class Wait(Callback): def on_epoch_end(self): time.sleep(5) data_set, model = prepare_env() print(line1 + "Test starts!" + line2) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=20, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=[Wait(), ControlC(False)], check_code_level=2) trainer.train() print(line1 + "Program goes on ..." + line2) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=20, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=[Wait(), ControlC(True)], check_code_level=2) trainer.train() print(line1 + "Test failed!" + line2)
def test_trainer_suggestion3(self): # 检查报错提示能否正确提醒用户 # 这里传入forward需要的数据,但是forward没有返回loss这个key dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 loss = F.cross_entropy(x, y) return {'wrong_loss_key': loss} model = Model() with self.assertRaises(NameError): trainer = Trainer(train_data=dataset, model=model, print_every=2, use_tqdm=False) trainer.train()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--methods", "-m", default="lstm", choices=["rnn", "lstm", "cnn"]) parser.add_argument("--n_epochs", "-n", default=5, type=int) parser.add_argument("--embedding", "-e", default=100, type=int) parser.add_argument("--category", "-c", default=4, type=int) parser.add_argument("--batch", "-b", default=4, type=int) parser.add_argument("--learning_rate", "-l", default=0.005, type=float) args = parser.parse_args() if args.category > 20 or args.category < 1: raise Exception("the number of category must be between 1 and 20") train_data, test_data, dic_size = handle_data(args.category) if args.methods == "rnn": model = rnn(dic_size, args.category) output = "rnn_model.pth" elif args.methods == "lstm": model = myLSTM(dic_size, args.category) output = "lstm_model.pth" else: #model = cnn(dic_size, args.category) model = torch.load("cnn_model.pth") output = "cnn_model.pth" trainer = Trainer(train_data, model, loss=CrossEntropyLoss(pred="pred", target='target'), optimizer=SGD(model.parameters(), lr=args.learning_rate), n_epochs=args.n_epochs, dev_data=test_data, metrics=AccuracyMetric(pred="pred", target='target'), batch_size=args.batch) trainer.train() torch.save(model, output)
def test_trainer_suggestion2(self): # 检查报错提示能否正确提醒用户 # 这里传入forward需要的数据,看是否可以运行 dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 loss = F.cross_entropy(x, y) return {'loss': loss} model = Model() trainer = Trainer(train_data=dataset, model=model, print_every=2, use_tqdm=False) trainer.train() """
def test_trainer_data_parallel(self): if torch.cuda.device_count() > 1: from fastNLP import AccuracyMetric dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y=None): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 if self.training: loss = F.cross_entropy(x, y) return {'loss': loss} else: return {'pred': x, 'target': y} model = Model() trainer = Trainer(train_data=dataset, model=model, print_every=2, use_tqdm=False, dev_data=dataset, metrics=AccuracyMetric(), device=[0, 1]) trainer.train(load_best_model=False)
def test_gradient_clip(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=20, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[GradientClipCallback(model.parameters(), clip_value=2)], check_code_level=2) trainer.train()
def test_readonly_property(self): from fastNLP.core.callback import Callback passed_epochs = [] total_epochs = 5 class MyCallback(Callback): def __init__(self): super(MyCallback, self).__init__() def on_epoch_begin(self): passed_epochs.append(self.epoch) print(self.n_epochs, self.n_steps, self.batch_size) print(self.model) print(self.optimizer) data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=total_epochs, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), callbacks=[MyCallback()]) trainer.train() assert passed_epochs == list(range(1, total_epochs + 1))
def test_warmup_callback(self): data_set, model = prepare_env() warmup_callback = WarmupCallback() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=warmup_callback, check_code_level=2) trainer.train()
def test_lr_scheduler(self): data_set, model = prepare_env() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) trainer = Trainer(data_set, model, optimizer=optimizer, loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[LRScheduler(torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1))], check_code_level=2) trainer.train()
def test_early_stop_callback(self): """ 需要观察是否真的 EarlyStop """ data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=2, n_epochs=10, print_every=5, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=EarlyStopCallback(1), check_code_level=2) trainer.train()
def test_fastnlp_1min_tutorial(self): # tutorials/fastnlp_1min_tutorial.ipynb data_path = "test/data_for_tests/tutorial_sample_dataset.csv" ds = DataSet.read_csv(data_path, headers=('raw_sentence', 'label'), sep='\t') print(ds[1]) # 将所有数字转为小写 ds.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence') # label转int ds.apply(lambda x: int(x['label']), new_field_name='target', is_target=True) def split_sent(ins): return ins['raw_sentence'].split() ds.apply(split_sent, new_field_name='words', is_input=True) # 分割训练集/验证集 train_data, dev_data = ds.split(0.3) print("Train size: ", len(train_data)) print("Test size: ", len(dev_data)) from fastNLP import Vocabulary vocab = Vocabulary(min_freq=2) train_data.apply(lambda x: [vocab.add(word) for word in x['words']]) # index句子, Vocabulary.to_index(word) train_data.apply( lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) from fastNLP.models import CNNText model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric, Adam trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=CrossEntropyLoss(), optimizer=Adam(), metrics=AccuracyMetric(target='target')) trainer.train() print('Train finished!')
def test_evaluate_callback(self): data_set, model = prepare_env() from fastNLP import Tester tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) evaluate_callback = EvaluateCallback(data_set, tester) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=evaluate_callback, check_code_level=2) trainer.train()
def test_TensorboardCallback(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[TensorboardCallback("loss", "metric")], check_code_level=2) trainer.train() import os import shutil path = os.path.join("./", 'tensorboard_logs_{}'.format(trainer.start_time)) if os.path.exists(path): shutil.rmtree(path)
def workflow(): train_data, valid_data, test_data, vocab, speech_vocab = prepare_data() ## Set the corresponding tags for each dataset, which will be used in the Trainer train_data.set_input("token_index_list", "origin_len", "speech_index_list") test_data.set_input("token_index_list", "origin_len", "speech_index_list") valid_data.set_input("token_index_list", "origin_len", "speech_index_list") train_data.set_target("speech_index_list") test_data.set_target("speech_index_list") valid_data.set_target("speech_index_list") ## Build the model config = { "vocab_size": len(vocab), "word_emb_dim": args.word_emb, "rnn_hidden_units": args.rnn_hidden, "num_classes": len(speech_vocab), "bi_direction": args.bilstm } ## Load the model from scratch or from saved model if args.cont: model = torch.load(args.cont) else: model = BiLSTMCRF(config) if args.mode == "train": ##Choose the optimizer optimizer = Adam(lr=args.lr) if args.op else SGD(lr=args.lr) ## Train the model trainer = Trainer(model=model, train_data=train_data, dev_data=valid_data, use_cuda=args.cuda, metrics=PosMetric(pred='pred', target='speech_index_list'), optimizer=optimizer, n_epochs=args.epoch, batch_size=args.batch_size, save_path="./save") trainer.train() ## Test the model tester = Tester( data=test_data, model=model, metrics=PosMetric(pred='pred', target='speech_index_list'), use_cuda=args.cuda, ) tester.test()
def run_model(self, model, data, loss, metrics): """run a model, test if it can run with fastNLP""" print('testing model:', model.__class__.__name__) tester = Tester(data=data, model=model, metrics=metrics, batch_size=BATCH_SIZE, verbose=0) before_train = tester.test() trainer = Trainer(train_data=data, model=model, loss=loss, batch_size=BATCH_SIZE, n_epochs=N_EPOCHS, dev_data=None, save_path=None, use_tqdm=False) trainer.train(load_best_model=False) after_train = tester.test() for metric_name, v1 in before_train.items(): assert metric_name in after_train
def test_save_model_callback(self): data_set, model = prepare_env() top = 3 save_model_callback = SaveModelCallback(self.tempdir, top=top) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=save_model_callback, check_code_level=2) trainer.train() timestamp = os.listdir(self.tempdir)[0] self.assertEqual(len(os.listdir(os.path.join(self.tempdir, timestamp))), top)
def train(datainfo, model, optimizer, loss, metrics, opt): trainer = Trainer(datainfo.datasets['train'], model, optimizer=optimizer, loss=loss, metrics=metrics, dev_data=datainfo.datasets['test'], device=0, check_code_level=-1, n_epochs=opt.train_epoch, save_path=opt.save_model_path) trainer.train()
def train(config): train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb")) dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb")) test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load(open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if config.task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif config.task_name == "cnn_w2v": text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels, weight=weight) elif config.task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) accuracy = AccuracyMetric(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=CrossEntropyLoss(), batch_size=config.batch_size, check_code_level=0, metrics=accuracy, n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop]) trainer.train() # test result tester = Tester(test_data, text_model, metrics=accuracy) tester.test()
def test_KeyBoardInterrupt(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=5, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, callbacks=[ControlC(False)]) trainer.train()
def test_fitlog_callback(self): import fitlog fitlog.set_log_dir(self.tempdir, new_log=True) data_set, model = prepare_env() from fastNLP import Tester tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) fitlog_callback = FitlogCallback(data_set, tester) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=fitlog_callback, check_code_level=2) trainer.train()
def test_LRFinder(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=5, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, callbacks=[LRFinder(len(data_set) // 32)]) trainer.train()
def trainer(data_folder, write2model, write2vocab): data_bundle = PeopleDailyNERLoader().load( data_folder) # 这一行代码将从{data_dir}处读取数据至DataBundle data_bundle = PeopleDailyPipe().process(data_bundle) data_bundle.rename_field('chars', 'words') # 存储vocab targetVocab = dict(data_bundle.vocabs["target"]) wordsVocab = dict(data_bundle.vocabs["words"]) targetWc = dict(data_bundle.vocabs['target'].word_count) wordsWc = dict(data_bundle.vocabs['words'].word_count) with open(write2vocab, "w", encoding="utf-8") as VocabOut: VocabOut.write( json.dumps( { "targetVocab": targetVocab, "wordsVocab": wordsVocab, "targetWc": targetWc, "wordsWc": wordsWc }, ensure_ascii=False)) embed = BertEmbedding(vocab=data_bundle.get_vocab('words'), model_dir_or_name='cn', requires_grad=False, auto_truncate=True) model = BiLSTMCRF(embed=embed, num_classes=len(data_bundle.get_vocab('target')), num_layers=1, hidden_size=100, dropout=0.5, target_vocab=data_bundle.get_vocab('target')) metric = SpanFPreRecMetric(tag_vocab=data_bundle.get_vocab('target')) optimizer = Adam(model.parameters(), lr=2e-5) loss = LossInForward() device = 0 if torch.cuda.is_available() else 'cpu' # device = "cpu" trainer = Trainer(data_bundle.get_dataset('train'), model, loss=loss, optimizer=optimizer, batch_size=8, dev_data=data_bundle.get_dataset('dev'), metrics=metric, device=device, n_epochs=1) trainer.train() tester = Tester(data_bundle.get_dataset('test'), model, metrics=metric) tester.test() saver = ModelSaver(write2model) saver.save_pytorch(model, param_only=False)
def train_TextRNN(): model = TextRNN(TextRNNConfig) loss = CrossEntropyLoss(pred="pred", target="target") metrics = AccuracyMetric(pred="pred", target="target") trainer = Trainer(model=model, train_data=dataset_train, dev_data=dataset_dev, loss=loss, metrics=metrics, batch_size=16, n_epochs=20) trainer.train() tester = Tester(dataset_test, model, metrics) tester.test()
def train(): train_data = pickle.load(open(opt.train_data_path, 'rb')) validate_data = pickle.load(open(opt.validate_data_path, 'rb')) vocab = pickle.load(open(opt.vocab, 'rb')) word2idx = vocab.word2idx idx2word = vocab.idx2word vocab_size = len(word2idx) print("vocab_size" + str(vocab_size)) embedding_dim = opt.embedding_dim hidden_dim = opt.hidden_dim model = utils.find_class_by_name(opt.model_name, [models])(vocab_size, embedding_dim, hidden_dim) if not os.path.exists(opt.save_model_path): os.mkdir(opt.save_model_path) # define dataloader train_data.set_input('input_data', flag=True) train_data.set_target('target', flag=True) validate_data.set_input('input_data', flag=True) validate_data.set_target('target', flag=True) if opt.optimizer == 'Adagrad': _optimizer = Adagrad(lr=opt.learning_rate, weight_decay=0) elif opt.optimizer == 'SGD': _optimizer = SGD(lr=opt.learning_rate, momentum=0) elif opt.optimizer == 'SGD_momentum': _optimizer = SGD(lr=opt.learning_rate, momentum=0.9) elif opt.optimizer == 'Adam': _optimizer = Adam(lr=opt.learning_rate, weight_decay=0) overfit_trainer = Trainer(model=model, train_data=train_data, loss=MyCrossEntropyLoss(pred="output", target="target"), n_epochs=opt.epoch, batch_size=opt.batch_size, device='cuda:0', dev_data=validate_data, metrics=MyPPMetric(pred="output", target="target"), metric_key="-pp", validate_every=opt.validate_every, optimizer=_optimizer, callbacks=[EarlyStopCallback(opt.patience)], save_path=opt.save_model_path) overfit_trainer.train()