def init_model(): train_data, dev_data, test_data, vocab = readdata() model = CNN((len(vocab), 128), num_classes=target_len, padding=2, dropout=0.1) #model = torch.load("rnnmodel/best_RNN_accuracy_2019-05-22-17-18-46") trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, device=0, save_path='cnnmodel', loss=loss, metrics=metrics, callbacks=[FitlogCallback(test_data)]) tester = Tester(test_data, model, metrics=AccuracyMetric()) print(2) model2 = RNN(embed_num=len(vocab), input_size=256, hidden_size=256, target_size=target_len) #model2 = torch.load("rnnmodel/best_RNN_accuracy_2019-05-22-17-18-46") trainer2 = Trainer(model=model2, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics, save_path='rnnmodel', batch_size=32, n_epochs=20, device=0) tester2 = Tester(test_data, model, metrics=AccuracyMetric()) return trainer, trainer2, tester, tester2
def test_early_stop(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.01), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=20, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[EarlyStopCallback(5)], check_code_level=2) trainer.train()
def train(args): data = get_data(args) train_data = data['train'] dev_data = data['dev'] model = get_model(args) optimizer = get_optim(args) device = 'cuda' if torch.cuda.is_available() else 'cpu' callbacks = [] trainer = Trainer( train_data=train_data, model=model, optimizer=optimizer, loss=None, batch_size=args.batch_size, n_epochs=args.epochs, num_workers=4, metrics=SpanFPreRecMetric(tag_vocab=data['tag_vocab'], encoding_type=data['encoding_type'], ignore_labels=data['ignore_labels']), metric_key='f1', dev_data=dev_data, save_path=args.save_path, device=device, callbacks=callbacks, check_code_level=-1, ) print(trainer.train())
def test_save_path(self): data_set = prepare_fake_dataset() data_set.set_input("x", flag=True) data_set.set_target("y", flag=True) train_set, dev_set = data_set.split(0.3) model = NaiveClassifier(2, 1) save_path = 'test_save_models' trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set, metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=save_path, use_tqdm=True, check_code_level=2) trainer.train()
def test_gradient_clip(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=20, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[GradientClipCallback(model.parameters(), clip_value=2)], check_code_level=2) trainer.train()
def bilstm_text(): w = pickle.load(open("weight.bin", "rb")) (vocab, train_data, dev_data, test_data) = read_data() model_lstm = MyBLSTMText(class_num=4, vocab_size=len(vocab), dropout=0.5, embed_weights=w) loss = CrossEntropyLoss() metrics = AccuracyMetric() trainer = Trainer(model=model_lstm, train_data=train_data, dev_data=dev_data, optimizer=Adam(lr=0.0015), print_every=10, use_tqdm=False, device='cuda:0', save_path="./lstm_model", loss=loss, metrics=metrics) # callbacks=[EarlyStopCallback(10)]) trainer.train() tester = Tester(test_data, model_lstm, metrics=AccuracyMetric()) tester.test()
def test_readonly_property(self): from fastNLP.core.callback import Callback passed_epochs = [] total_epochs = 5 class MyCallback(Callback): def __init__(self): super(MyCallback, self).__init__() def on_epoch_begin(self): passed_epochs.append(self.epoch) print(self.n_epochs, self.n_steps, self.batch_size) print(self.model) print(self.optimizer) data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=total_epochs, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), callbacks=[MyCallback()]) trainer.train() assert passed_epochs == list(range(1, total_epochs + 1))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--methods", "-m", default="lstm", choices=["rnn", "lstm", "cnn"]) parser.add_argument("--n_epochs", "-n", default=5, type=int) parser.add_argument("--embedding", "-e", default=100, type=int) parser.add_argument("--category", "-c", default=4, type=int) parser.add_argument("--batch", "-b", default=4, type=int) parser.add_argument("--learning_rate", "-l", default=0.005, type=float) args = parser.parse_args() if args.category > 20 or args.category < 1: raise Exception("the number of category must be between 1 and 20") train_data, test_data, dic_size = handle_data(args.category) if args.methods == "rnn": model = rnn(dic_size, args.category) output = "rnn_model.pth" elif args.methods == "lstm": model = myLSTM(dic_size, args.category) output = "lstm_model.pth" else: #model = cnn(dic_size, args.category) model = torch.load("cnn_model.pth") output = "cnn_model.pth" trainer = Trainer(train_data, model, loss=CrossEntropyLoss(pred="pred", target='target'), optimizer=SGD(model.parameters(), lr=args.learning_rate), n_epochs=args.n_epochs, dev_data=test_data, metrics=AccuracyMetric(pred="pred", target='target'), batch_size=args.batch) trainer.train() torch.save(model, output)
def test_trainer_suggestion3(self): # 检查报错提示能否正确提醒用户 # 这里传入forward需要的数据,但是forward没有返回loss这个key dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 loss = F.cross_entropy(x, y) return {'wrong_loss_key': loss} model = Model() with self.assertRaises(NameError): trainer = Trainer(train_data=dataset, model=model, print_every=2, use_tqdm=False) trainer.train()
def CNNmethod(): train_data, dev_data, test_data, D_dict, vocab = data_preprocess() CNN = CNNText(D_input, D_output, D_dict=D_dict) print('start train') optim = RMSprop(CNN.parameters(), lr=0.01) trainer = Trainer(model=CNN, train_data=train_data, dev_data=train_data, loss=loss, metrics=metrics, batch_size=32, optimizer=optim, save_path='.record/', validate_every=30, callbacks=[callback]) while 1: trainer.train() input('You stop the program') tester = Tester(test_data, LSTM, metrics) tester.test() tester = Tester(train_data, LSTM, metrics) tester.test() tester = Tester(dev_data, LSTM, metrics) tester.test()
def test_trainer_data_parallel(self): if torch.cuda.device_count() > 1: from fastNLP import AccuracyMetric dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y=None): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 if self.training: loss = F.cross_entropy(x, y) return {'loss': loss} else: return {'pred': x, 'target': y} model = Model() trainer = Trainer(train_data=dataset, model=model, print_every=2, use_tqdm=False, dev_data=dataset, metrics=AccuracyMetric(), device=[0, 1]) trainer.train(load_best_model=False)
def test_collect_fn3(self): """ 测试应该会覆盖 :return: """ dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2') dataset.set_target('y') import torch def fn(ins_list): x = [] for ind, ins in ins_list: x.append(ins['x1']+ins['x2']) x = torch.FloatTensor(x) return {'x1':torch.zeros_like(x)}, {'target':torch.zeros(x.size(0)).long(), 'y':x} dataset.add_collect_fn(fn) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 1, bias=False) def forward(self, x1): x1 = self.fc(x1) assert x1.sum()==0, "Should be replaced to one" # loss = F.cross_entropy(x, y) return {'pred': x1} model = Model() trainer = Trainer(train_data=dataset, model=model, loss=CrossEntropyLoss(), print_every=2, dev_data=dataset, metrics=AccuracyMetric(), use_tqdm=False, n_epochs=1) best_metric = trainer.train()['best_eval']['AccuracyMetric']['acc'] self.assertTrue(best_metric==1)
def test_collect_fn2(self): """测试能否实现batch_x, batch_y""" dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2') dataset.set_target('y', 'x1') import torch def fn(ins_list): x = [] for ind, ins in ins_list: x.append(ins['x1']+ins['x2']) x = torch.FloatTensor(x) return {'x':x}, {'target':x[:, :4].argmax(dim=-1)} dataset.add_collect_fn(fn) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, x): x1 = self.fc(x1) x2 = self.fc(x2) x = self.fc(x) sum_x = x1 + x2 + x time.sleep(0.1) # loss = F.cross_entropy(x, y) return {'pred': sum_x} model = Model() trainer = Trainer(train_data=dataset, model=model, loss=CrossEntropyLoss(), print_every=2, dev_data=dataset, metrics=AccuracyMetric(), use_tqdm=False) trainer.train()
def train_and_save_model(data_train, data_test, vocab, max_sentence_length, save_dir): # 确认torch版本是否能与fastnlp兼容 print(torch.__version__) # 读取神经网络 model = DPCNN(max_features=len(vocab), word_embedding_dimension=word_embedding_dimension, max_sentence_length=max_sentence_length, num_classes=num_classes) # 定义 loss 和 metric loss = CrossEntropyLoss(pred="output", target="label_seq") metric = AccuracyMetric(pred="predict", target="label_seq") # train model with train_data,and val model witst_data # embedding=300 gaussian init,weight_decay=0.0001, lr=0.001,epoch=5 trainer = Trainer(model=model, train_data=data_train, dev_data=data_test, loss=loss, metrics=metric, save_path='CD', batch_size=64, n_epochs=5, optimizer=Adam(lr=0.001, weight_decay=0.0001)) trainer.train() # 存储模型 _save_model(model, model_name='new_model.pkl', save_dir=save_dir)
def test_trainer_suggestion2(self): # 检查报错提示能否正确提醒用户 # 这里传入forward需要的数据,看是否可以运行 dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 loss = F.cross_entropy(x, y) return {'loss': loss} model = Model() trainer = Trainer(train_data=dataset, model=model, print_every=2, use_tqdm=False) trainer.train() """
def test_warmup_callback(self): data_set, model = prepare_env() warmup_callback = WarmupCallback() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=warmup_callback, check_code_level=2) trainer.train()
def test_lr_scheduler(self): data_set, model = prepare_env() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) trainer = Trainer(data_set, model, optimizer=optimizer, loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[LRScheduler(torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1))], check_code_level=2) trainer.train()
def test_early_stop_callback(self): """ 需要观察是否真的 EarlyStop """ data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=2, n_epochs=10, print_every=5, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=EarlyStopCallback(1), check_code_level=2) trainer.train()
def test_fastnlp_1min_tutorial(self): # tutorials/fastnlp_1min_tutorial.ipynb data_path = "test/data_for_tests/tutorial_sample_dataset.csv" ds = DataSet.read_csv(data_path, headers=('raw_sentence', 'label'), sep='\t') print(ds[1]) # 将所有数字转为小写 ds.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence') # label转int ds.apply(lambda x: int(x['label']), new_field_name='target', is_target=True) def split_sent(ins): return ins['raw_sentence'].split() ds.apply(split_sent, new_field_name='words', is_input=True) # 分割训练集/验证集 train_data, dev_data = ds.split(0.3) print("Train size: ", len(train_data)) print("Test size: ", len(dev_data)) from fastNLP import Vocabulary vocab = Vocabulary(min_freq=2) train_data.apply(lambda x: [vocab.add(word) for word in x['words']]) # index句子, Vocabulary.to_index(word) train_data.apply( lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) from fastNLP.models import CNNText model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric, Adam trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=CrossEntropyLoss(), optimizer=Adam(), metrics=AccuracyMetric(target='target')) trainer.train() print('Train finished!')
def test_evaluate_callback(self): data_set, model = prepare_env() from fastNLP import Tester tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) evaluate_callback = EvaluateCallback(data_set, tester) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=evaluate_callback, check_code_level=2) trainer.train()
def test_TensorboardCallback(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False, callbacks=[TensorboardCallback("loss", "metric")], check_code_level=2) trainer.train() import os import shutil path = os.path.join("./", 'tensorboard_logs_{}'.format(trainer.start_time)) if os.path.exists(path): shutil.rmtree(path)
def test_save_model_callback(self): data_set, model = prepare_env() top = 3 save_model_callback = SaveModelCallback(self.tempdir, top=top) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=save_model_callback, check_code_level=2) trainer.train() timestamp = os.listdir(self.tempdir)[0] self.assertEqual(len(os.listdir(os.path.join(self.tempdir, timestamp))), top)
def workflow(): train_data, valid_data, test_data, vocab, speech_vocab = prepare_data() ## Set the corresponding tags for each dataset, which will be used in the Trainer train_data.set_input("token_index_list", "origin_len", "speech_index_list") test_data.set_input("token_index_list", "origin_len", "speech_index_list") valid_data.set_input("token_index_list", "origin_len", "speech_index_list") train_data.set_target("speech_index_list") test_data.set_target("speech_index_list") valid_data.set_target("speech_index_list") ## Build the model config = { "vocab_size": len(vocab), "word_emb_dim": args.word_emb, "rnn_hidden_units": args.rnn_hidden, "num_classes": len(speech_vocab), "bi_direction": args.bilstm } ## Load the model from scratch or from saved model if args.cont: model = torch.load(args.cont) else: model = BiLSTMCRF(config) if args.mode == "train": ##Choose the optimizer optimizer = Adam(lr=args.lr) if args.op else SGD(lr=args.lr) ## Train the model trainer = Trainer(model=model, train_data=train_data, dev_data=valid_data, use_cuda=args.cuda, metrics=PosMetric(pred='pred', target='speech_index_list'), optimizer=optimizer, n_epochs=args.epoch, batch_size=args.batch_size, save_path="./save") trainer.train() ## Test the model tester = Tester( data=test_data, model=model, metrics=PosMetric(pred='pred', target='speech_index_list'), use_cuda=args.cuda, ) tester.test()
def train(datainfo, model, optimizer, loss, metrics, opt): trainer = Trainer(datainfo.datasets['train'], model, optimizer=optimizer, loss=loss, metrics=metrics, dev_data=datainfo.datasets['test'], device=0, check_code_level=-1, n_epochs=opt.train_epoch, save_path=opt.save_model_path) trainer.train()
def run_model(self, model, data, loss, metrics): """run a model, test if it can run with fastNLP""" print('testing model:', model.__class__.__name__) tester = Tester(data=data, model=model, metrics=metrics, batch_size=BATCH_SIZE, verbose=0) before_train = tester.test() trainer = Trainer(train_data=data, model=model, loss=loss, batch_size=BATCH_SIZE, n_epochs=N_EPOCHS, dev_data=None, save_path=None, use_tqdm=False) trainer.train(load_best_model=False) after_train = tester.test() for metric_name, v1 in before_train.items(): assert metric_name in after_train
def train(config): train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb")) dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb")) test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load(open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if config.task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif config.task_name == "cnn_w2v": text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels, weight=weight) elif config.task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) accuracy = AccuracyMetric(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=CrossEntropyLoss(), batch_size=config.batch_size, check_code_level=0, metrics=accuracy, n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop]) trainer.train() # test result tester = Tester(test_data, text_model, metrics=accuracy) tester.test()
def test_KeyBoardInterrupt(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=5, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, callbacks=[ControlC(False)]) trainer.train()
def test_fitlog_callback(self): import fitlog fitlog.set_log_dir(self.tempdir, new_log=True) data_set, model = prepare_env() from fastNLP import Tester tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) fitlog_callback = FitlogCallback(data_set, tester) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=fitlog_callback, check_code_level=2) trainer.train()
def test_LRFinder(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=5, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, callbacks=[LRFinder(len(data_set) // 32)]) trainer.train()
def run_train(): datainfo, vocabs = set_up_data() train_sampler = RandomSampler() criterion = SummLoss(config=config, padding_idx=vocabs.to_index(PAD_TOKEN)) model = CGSum(config, vocab=vocabs) model.to(device) initial_lr = config.lr logger.info(f"learning rate = {initial_lr}") optimizer = Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) train_loader = datainfo.datasets["train"] valid_loader = datainfo.datasets["dev"] callbacks = [ TrainCallback(config, patience=10), FitlogCallback(), LRDecayCallback(optimizer.param_groups, steps=args.weight_decay_step) ] trainer = Trainer(model=model, train_data=train_loader, optimizer=optimizer, loss=criterion, batch_size=config.batch_size, check_code_level=-1, sampler=train_sampler, n_epochs=config.n_epochs, print_every=100, dev_data=valid_loader, update_every=args.update_every, metrics=FastRougeMetric( pred='prediction', art_oovs='article_oovs', abstract_sentences='abstract_sentences', config=config, vocab=datainfo.vocabs["vocab"]), metric_key="rouge-l-f", validate_every=args.validate_every * args.update_every, save_path=None, callbacks=callbacks, use_tqdm=True) logger.info("-" * 5 + "start training" + "-" * 5) traininfo = trainer.train(load_best_model=True) logger.info(' | end of Train | time: {:5.2f}s | '.format( traininfo["seconds"])) logger.info('[INFO] best eval model in epoch %d and iter %d', traininfo["best_epoch"], traininfo["best_step"])