def test_fastnlp_1min_tutorial(self): # tutorials/fastnlp_1min_tutorial.ipynb data_path = "test/data_for_tests/tutorial_sample_dataset.csv" ds = DataSet.read_csv(data_path, headers=('raw_sentence', 'label'), sep='\t') print(ds[1]) # 将所有数字转为小写 ds.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence') # label转int ds.apply(lambda x: int(x['label']), new_field_name='target', is_target=True) def split_sent(ins): return ins['raw_sentence'].split() ds.apply(split_sent, new_field_name='words', is_input=True) # 分割训练集/验证集 train_data, dev_data = ds.split(0.3) print("Train size: ", len(train_data)) print("Test size: ", len(dev_data)) from fastNLP import Vocabulary vocab = Vocabulary(min_freq=2) train_data.apply(lambda x: [vocab.add(word) for word in x['words']]) # index句子, Vocabulary.to_index(word) train_data.apply( lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) from fastNLP.models import CNNText model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric, Adam trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=CrossEntropyLoss(), optimizer=Adam(), metrics=AccuracyMetric(target='target')) trainer.train() print('Train finished!')
def train(config): train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb")) dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb")) test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load(open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if config.task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif config.task_name == "cnn_w2v": text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels, weight=weight) elif config.task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) accuracy = AccuracyMetric(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=CrossEntropyLoss(), batch_size=config.batch_size, check_code_level=0, metrics=accuracy, n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop]) trainer.train() # test result tester = Tester(test_data, text_model, metrics=accuracy) tester.test()
def train_variational_ae(): n_epochs = 500 trainer = Trainer(train_data=fast_data.train_data, dev_data=fast_data.test_data, model=vae, loss=LossFunc(VariationalAE.loss), metrics=AccuracyMetric(target='dec_target'), n_epochs=n_epochs, batch_size=batch_size, optimizer=Adam(lr=0.001, weight_decay=0, model_params=vae.parameters())) trainer.train() print('VAE Train finished!')
def train_classifier(): n_epochs = 50 trainer = Trainer(train_data=fast_data.train_data, dev_data=fast_data.test_data, model=disc, loss=CrossEntropyLoss(target='label_seq'), metrics=AccuracyMetric(target='label_seq'), n_epochs=n_epochs, batch_size=batch_size, optimizer=Adam(lr=0.001, weight_decay=0, model_params=disc.parameters())) trainer.train() print('Disc Train finished!')
def train(): config = Config() train_data, dev_data, vocabulary = get_dataset(config.data_path) poetry_model = PoetryModel(vocabulary_size=len(vocabulary), embedding_size=config.embedding_size, hidden_size=config.hidden_size) loss = Loss(pred='output', target='target') perplexity = Perplexity(pred='output', target='target') print("optimizer:", config.optimizer) print("momentum:", config.momentum) if config.optimizer == 'adam': optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = SGD(lr=config.lr, momentum=config.momentum) elif config.optimizer == 'adagrad': optimizer = Adagrad(lr=config.lr, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = Adadelta(lr=config.lr, rho=config.rho, eps=config.eps, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) trainer = Trainer(train_data=train_data, model=poetry_model, loss=loss, metrics=perplexity, n_epochs=config.epoch, batch_size=config.batch_size, print_every=config.print_every, validate_every=config.validate_every, dev_data=dev_data, save_path=config.save_path, optimizer=optimizer, check_code_level=config.check_code_level, metric_key="-PPL", sampler=RandomSampler(), prefetch=False, use_tqdm=True, device=config.device, callbacks=[timing, early_stop]) trainer.train()
def train(epochs=10, lr=0.001): global model for i in range(epochs): print('----------------- ', str(i + 1), ' ------------------') trainer = Trainer(model=model, train_data=train_set, dev_data=test_set, loss=CrossEntropyLoss(pred='output', target='target'), metrics=AccuracyMetric(pred='pred', target='target'), optimizer=Adam(lr=lr), save_path=None, batch_size=1, n_epochs=1) trainer.train() model.load_state_dict(copy.deepcopy(trainer.model.state_dict())) # save('../model/cnn-' + str(kernel_sizes) + '-' + str(keep_proba) + '-' + str(i+1)) save('../model/lstm-' + str(input_dim) + '-' + str(hidden_dim) + '-' + str(i + 1))
# training device = 0 if torch.cuda.is_available() else 'cpu' ''' EMBED_DIM = 100 model = CNNText((len(vocab),EMBED_DIM), num_classes=len(vocab_target), dropout=0.1) metrics=AccuracyMetric() loss = CrossEntropyLoss() optimizer=optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) N_EPOCHS = 10 BATCH_SIZE = 16 trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics,optimizer=optimizer,n_epochs=N_EPOCHS, batch_size=BATCH_SIZE, device=device) trainer.train() ''' embed = BertEmbedding(vocab, model_dir_or_name='en', include_cls_sep=True) model = BertForSequenceClassification(embed, len(vocab_target)) trainer = Trainer(train_data, model, optimizer=Adam(model_params=model.parameters(), lr=2e-5), loss=CrossEntropyLoss(), device=device, batch_size=8, dev_data=dev_data, metrics=AccuracyMetric(), n_epochs=2, print_every=1) trainer.train() saver = ModelSaver("save_model/bert2021.1.19.pkl") saver.save_pytorch(model)
def test_Adam(self): optim = Adam(model_params=torch.nn.Linear(10, 3).parameters()) self.assertTrue("lr" in optim.__dict__["settings"]) self.assertTrue("weight_decay" in optim.__dict__["settings"]) res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters()) self.assertTrue(isinstance(res, torch.optim.Adam)) optim = Adam(lr=0.001) self.assertEqual(optim.__dict__["settings"]["lr"], 0.001) res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters()) self.assertTrue(isinstance(res, torch.optim.Adam)) optim = Adam(lr=0.002, weight_decay=0.989) self.assertEqual(optim.__dict__["settings"]["lr"], 0.002) self.assertEqual(optim.__dict__["settings"]["weight_decay"], 0.989) optim = Adam(0.001) self.assertEqual(optim.__dict__["settings"]["lr"], 0.001) res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters()) self.assertTrue(isinstance(res, torch.optim.Adam))
embeddings=embeddings, dropout=0.5, num_classes=3, device=device).to(device) trainer = Trainer(train_data=train_data, model=model, loss=CrossEntropyLoss(pred='pred', target='label'), metrics=AccuracyMetric(), n_epochs=10, batch_size=32, print_every=-1, validate_every=-1, dev_data=dev_data, use_cuda=True, optimizer=Adam(lr=0.0004, weight_decay=0), check_code_level=-1, metric_key='acc', use_tqdm=False) trainer.train() # 训练结束后model为dev的最佳模型,保存 torch.save(model.state_dict(), '../data/checkpoints/best_model.pkl') tester = Tester( data=test_data, model=model, metrics=AccuracyMetric(), batch_size=32, ) tester.test()
self.p = [] def evaluate(self, target, pred): self.l.extend(target.tolist()) pred = pred.argmax(dim=-1).tolist() self.p.extend(pred) def get_metric(self, reset=True): f1_score = m.f1_score(self.l, self.p, average="macro") if reset: self.l = [] self.p = [] return {"f1:": f1_score} optimizer = Adam(lr=args.lr, weight_decay=0) acc = AccuracyMetric() f1 = f1metric() loss = CrossEntropyLoss() trainer = Trainer( trainset, cla, optimizer=optimizer, loss=loss, batch_size=args.batch_size, n_epochs=args.num_epoch, dev_data=testset, metrics=[acc, f1], save_path=args.save_dir, callbacks=[FitlogCallback(log_loss_every=5)], )
model = SLSTM(nemb=300, nhid=300, num_layer=1, Tar_emb=embedding) trainer = Trainer( train_data=train_dataset, model=model, loss=CrossEntropyLoss(pred='predict', target='label'), metrics=AccuracyMetric(), n_epochs=20, batch_size=arg.batch_size, print_every=1, validate_every=-1, dev_data=dev_dataset, use_cuda=True, save_path=save_dir, optimizer=Adam(1e-3, weight_decay=0), check_code_level=-1, metric_key='acc', # sampler=default, use_tqdm=True, ) results = trainer.train(load_best_model=True) print(results) torch.save(model, os.path.join(save_dir,"best_model.pkl")) tester = Tester( data=test_dataset, model=model,
def test_fastnlp_advanced_tutorial(self): import os os.chdir("tutorials/fastnlp_advanced_tutorial") from fastNLP import DataSet from fastNLP import Instance from fastNLP import Vocabulary from fastNLP import Trainer from fastNLP import Tester # ### Instance # Instance表示一个样本,由一个或者多个field(域、属性、特征)组成,每个field具有自己的名字以及值 # 在初始化Instance的时候可以定义它包含的field,使用"field_name=field_value"的写法 # In[2]: # 组织一个Instance,这个Instance由premise、hypothesis、label三个field组成 instance = Instance(premise='an premise example .', hypothesis='an hypothesis example.', label=1) instance # In[3]: data_set = DataSet([instance] * 5) data_set.append(instance) data_set[-2:] # In[4]: # 如果某一个field的类型与dataset对应的field类型不一样仍可被加入dataset中 instance2 = Instance(premise='the second premise example .', hypothesis='the second hypothesis example.', label='1') try: data_set.append(instance2) except: pass data_set[-2:] # In[5]: # 如果某一个field的名字不对,则该instance不能被append到dataset中 instance3 = Instance(premises='the third premise example .', hypothesis='the third hypothesis example.', label=1) try: data_set.append(instance3) except: print('cannot append instance') pass data_set[-2:] # In[6]: # 除了文本以外,还可以将tensor作为其中一个field的value import torch tensor_ins = Instance(image=torch.randn(5, 5), label=0) ds = DataSet() ds.append(tensor_ins) ds from fastNLP import DataSet from fastNLP import Instance # 从csv读取数据到DataSet # 类csv文件,即每一行为一个example的文件,都可以使用这种方法进行数据读取 dataset = DataSet.read_csv('tutorial_sample_dataset.csv', headers=('raw_sentence', 'label'), sep='\t') # 查看DataSet的大小 len(dataset) # In[8]: # 使用数字索引[k],获取第k个样本 dataset[0] # In[9]: # 获取的样本是一个Instance type(dataset[0]) # In[10]: # 使用数字索引[a: b],获取第a到第b个样本 dataset[0:3] # In[11]: # 索引也可以是负数 dataset[-1] data_path = ['premise', 'hypothesis', 'label'] # 读入文件 with open(data_path[0]) as f: premise = f.readlines() with open(data_path[1]) as f: hypothesis = f.readlines() with open(data_path[2]) as f: label = f.readlines() assert len(premise) == len(hypothesis) and len(hypothesis) == len( label) # 组织DataSet data_set = DataSet() for p, h, l in zip(premise, hypothesis, label): p = p.strip() # 将行末空格去除 h = h.strip() # 将行末空格去除 data_set.append(Instance(premise=p, hypothesis=h, truth=l)) data_set[0] # ### DataSet的其他操作 # 在构建完毕DataSet后,仍然可以对DataSet的内容进行操作,函数接口为DataSet.apply() # In[13]: # 将premise域的所有文本转成小写 data_set.apply(lambda x: x['premise'].lower(), new_field_name='premise') data_set[-2:] # In[14]: # label转int data_set.apply(lambda x: int(x['truth']), new_field_name='truth') data_set[-2:] # In[15]: # 使用空格分割句子 def split_sent(ins): return ins['premise'].split() data_set.apply(split_sent, new_field_name='premise') data_set.apply(lambda x: x['hypothesis'].split(), new_field_name='hypothesis') data_set[-2:] # In[16]: # 筛选数据 origin_data_set_len = len(data_set) data_set.drop(lambda x: len(x['premise']) <= 6) origin_data_set_len, len(data_set) # In[17]: # 增加长度信息 data_set.apply(lambda x: [1] * len(x['premise']), new_field_name='premise_len') data_set.apply(lambda x: [1] * len(x['hypothesis']), new_field_name='hypothesis_len') data_set[-1] # In[18]: # 设定特征域、标签域 data_set.set_input("premise", "premise_len", "hypothesis", "hypothesis_len") data_set.set_target("truth") # In[19]: # 重命名field data_set.rename_field('truth', 'label') data_set[-1] # In[20]: # 切分训练、验证集、测试集 train_data, vad_data = data_set.split(0.5) dev_data, test_data = vad_data.split(0.4) len(train_data), len(dev_data), len(test_data) # In[21]: # 深拷贝一个数据集 import copy train_data_2, dev_data_2 = copy.deepcopy(train_data), copy.deepcopy( dev_data) del copy # 初始化词表,该词表最大的vocab_size为10000,词表中每个词出现的最低频率为2,'<unk>'表示未知词语,'<pad>'表示padding词语 # Vocabulary默认初始化参数为max_size=None, min_freq=None, unknown='<unk>', padding='<pad>' vocab = Vocabulary(max_size=10000, min_freq=2, unknown='<unk>', padding='<pad>') # 构建词表 train_data.apply(lambda x: [vocab.add(word) for word in x['premise']]) train_data.apply( lambda x: [vocab.add(word) for word in x['hypothesis']]) vocab.build_vocab() # In[23]: # 根据词表index句子 train_data.apply( lambda x: [vocab.to_index(word) for word in x['premise']], new_field_name='premise') train_data.apply( lambda x: [vocab.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis') dev_data.apply( lambda x: [vocab.to_index(word) for word in x['premise']], new_field_name='premise') dev_data.apply( lambda x: [vocab.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis') test_data.apply( lambda x: [vocab.to_index(word) for word in x['premise']], new_field_name='premise') test_data.apply( lambda x: [vocab.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis') train_data[-1], dev_data[-1], test_data[-1] # 读入vocab文件 with open('vocab.txt') as f: lines = f.readlines() vocabs = [] for line in lines: vocabs.append(line.strip()) # 实例化Vocabulary vocab_bert = Vocabulary(unknown=None, padding=None) # 将vocabs列表加入Vocabulary vocab_bert.add_word_lst(vocabs) # 构建词表 vocab_bert.build_vocab() # 更新unknown与padding的token文本 vocab_bert.unknown = '[UNK]' vocab_bert.padding = '[PAD]' # In[25]: # 根据词表index句子 train_data_2.apply( lambda x: [vocab_bert.to_index(word) for word in x['premise']], new_field_name='premise') train_data_2.apply( lambda x: [vocab_bert.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis') dev_data_2.apply( lambda x: [vocab_bert.to_index(word) for word in x['premise']], new_field_name='premise') dev_data_2.apply( lambda x: [vocab_bert.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis') train_data_2[-1], dev_data_2[-1] # step 1:加载模型参数(非必选) from fastNLP.io.config_io import ConfigSection, ConfigLoader args = ConfigSection() ConfigLoader().load_config("./data/config", {"esim_model": args}) args["vocab_size"] = len(vocab) args.data # In[27]: # step 2:加载ESIM模型 from fastNLP.models import ESIM model = ESIM(**args.data) model # In[28]: # 另一个例子:加载CNN文本分类模型 from fastNLP.models import CNNText cnn_text_model = CNNText(embed_num=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1) cnn_text_model from fastNLP import CrossEntropyLoss from fastNLP import Adam from fastNLP import AccuracyMetric trainer = Trainer( train_data=train_data, model=model, loss=CrossEntropyLoss(pred='pred', target='label'), metrics=AccuracyMetric(), n_epochs=3, batch_size=16, print_every=-1, validate_every=-1, dev_data=dev_data, use_cuda=False, optimizer=Adam(lr=1e-3, weight_decay=0), check_code_level=-1, metric_key='acc', use_tqdm=False, ) trainer.train() tester = Tester( data=test_data, model=model, metrics=AccuracyMetric(), batch_size=args["batch_size"], ) tester.test() os.chdir("../..")
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.data_path, config.train_name), "rb")) # debug if config.debug: train_data = train_data[0:100] dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) print(len(train_data), len(dev_data)) # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) # load w2v data # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) word_vocab = pickle.load( open(os.path.join(config.data_path, config.word_vocab_name), "rb")) char_vocab = pickle.load( open(os.path.join(config.data_path, config.char_vocab_name), "rb")) pos_vocab = pickle.load( open(os.path.join(config.data_path, config.pos_vocab_name), "rb")) # spo_vocab = pickle.load(open(os.path.join(config.data_path, config.spo_vocab_name), "rb")) tag_vocab = pickle.load( open(os.path.join(config.data_path, config.tag_vocab_name), "rb")) print('word vocab', len(word_vocab)) print('char vocab', len(char_vocab)) print('pos vocab', len(pos_vocab)) # print('spo vocab', len(spo_vocab)) print('tag vocab', len(tag_vocab)) schema = get_schemas(config.source_path) if task_name == 'bilstm_crf': model = AdvSeqLabel( char_init_embed=(len(char_vocab), config.char_embed_dim), word_init_embed=(len(word_vocab), config.word_embed_dim), pos_init_embed=(len(pos_vocab), config.pos_embed_dim), spo_embed_dim=len(schema), sentence_length=config.sentence_length, hidden_size=config.hidden_dim, num_classes=len(tag_vocab), dropout=config.dropout, id2words=tag_vocab.idx2word, encoding_type=config.encoding_type) elif task_name == 'trans_crf': model = TransformerSeqLabel( char_init_embed=(len(char_vocab), config.char_embed_dim), word_init_embed=(len(word_vocab), config.word_embed_dim), pos_init_embed=(len(pos_vocab), config.pos_embed_dim), spo_embed_dim=len(schema), num_classes=len(tag_vocab), id2words=tag_vocab.idx2word, encoding_type=config.encoding_type, num_layers=config.num_layers, inner_size=config.inner_size, key_size=config.key_size, value_size=config.value_size, num_head=config.num_head, dropout=config.dropout) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) # loss = NLLLoss() logs = FitlogCallback(dev_data) metrics = SpanFPreRecMetric(tag_vocab, pred='pred', seq_len='seq_len', target='tag') train_data.set_input('tag') dev_data.set_input('tag') dev_data.set_target('seq_len') #print(train_data.get_field_names()) trainer = Trainer( train_data=train_data, model=model, # loss=loss, metrics=metrics, metric_key='f', batch_size=config.batch_size, n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, check_code_level=-1, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester(dev_data, model, metrics=metrics, device=config.device, batch_size=config.batch_size) tester.test()
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.data_path, config.train_name), "rb")) # debug if config.debug: train_data = train_data[0:30] dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load( open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) # elif task_name == "cnn_w2v": # text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, # class_num=config.class_num, kernel_num=config.kernel_num, # kernel_sizes=config.kernel_sizes, dropout=config.dropout, # static=config.static, in_channels=config.in_channels, # weight=weight) elif task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) #elif task_name == "bert": # text_model = BertModel.from_pretrained(config.bert_path) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) logs = FitlogCallback(dev_data) f1 = F1_score(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=BCEWithLogitsLoss(), batch_size=config.batch_size, check_code_level=-1, metrics=f1, metric_key='f1', n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester( dev_data, text_model, metrics=f1, device=config.device, batch_size=config.batch_size, ) tester.test()
print("Total Number of Words:", m) rnn_text_model = RNN.RNN_Text(vocab_size=m, input_size=50, hidden_layer_size=128, target_size=k, dropout=0.1) cnn_text_model = CNN.CNN_Text(vocab_size=m, input_size=50, target_size=k, dropout=0.05) model = rnn_text_model # ModelLoader.load_pytorch(model, "model_ckpt_large_CNN.pkl") trainer = Trainer( train_data=train_set, model=model, loss=CrossEntropyLoss(pred='pred', target='label'), n_epochs=50, batch_size=16, metrics=AccuracyMetric(pred='pred', target='label'), dev_data=dev_set, optimizer=Adam(lr=1e-3), callbacks=[FitlogCallback(data=test_set)] ) trainer.train() # saver = ModelSaver("model_ckpt_large_RNN.pkl") # saver.save_pytorch(model) tester = Tester( data=train_set, model=model, metrics=AccuracyMetric(pred='pred', target='label'), batch_size=16, ) tester.test()
embed_dim=128, hidden_dim=hidden_units, output_dim=8) # mymodel = load_model(mymodel, './model/best_TextModel_acc_2019-06-28-09-07-50') trainer = Trainer( train_data=train_data, model=mymodel, loss=CrossEntropyLoss(pred='pred', target='target'), # loss=SkipBudgetLoss(pred='pred', target='target', updated_states='updated_states'), metrics=[AccuracyMetric(), UsedStepsMetric()], n_epochs=30, batch_size=batch_size, print_every=-1, validate_every=-1, dev_data=test_data, save_path='./model', optimizer=Adam(lr=learning_rate, weight_decay=0), check_code_level=0, device="cuda", metric_key='acc', use_tqdm=False, callbacks=[FitlogCallback(test_data)]) start = time.clock() trainer.train() end = time.clock() training_time = end - start print('total training time:%fs' % (end - start)) fitlog.add_hyper({'time': training_time}) fitlog.finish()