def test(): from fastNLP import DataSetIter, DataSet # 0 for not match,1 for match testset = DataSet({"raw_words": ["5::five"]}) testset.apply(addWords, new_field_name="p_words") testset.apply(addWordPiece, new_field_name="t_words") testset.apply(processItem, new_field_name="word_pieces") testset.apply(processNum, new_field_name="word_nums") testset.apply(addSeqlen, new_field_name="seq_len") testset.field_arrays["word_pieces"].is_input = True testset.field_arrays["seq_len"].is_input = True testset.field_arrays["word_nums"].is_input = True # print(testset) from fastNLP.io import ModelLoader loader = ModelLoader() if torch.cuda.is_available(): model = loader.load_pytorch_model( "../models/bert_model_max_triple.pkl") else: model = torch.load("../models/bert_model_max_triple.pkl", map_location="cpu") model.eval() test_batch = DataSetIter(batch_size=1, dataset=testset, sampler=None) outputs = [] for batch_x, batch_y in test_batch: _move_dict_value_to_device(batch_x, batch_y, device=_get_model_device(model)) outputs.append( model.forward(batch_x["word_pieces"], batch_x["word_nums"], batch_x["seq_len"])['pred']) outputs = torch.cat(outputs) outputs = torch.nn.functional.softmax(outputs, dim=1) return outputs
def test_case3(self): # 测试None model = Model() device = _get_model_device(model) model = _move_model_to_device(model, None) assert device == _get_model_device(model), "The device should not change." if torch.cuda.is_available(): model.cuda() device = _get_model_device(model) model = _move_model_to_device(model, None) assert device == _get_model_device(model), "The device should not change." model = nn.DataParallel(model, device_ids=[0]) _move_model_to_device(model, None) with self.assertRaises(Exception): _move_model_to_device(model, 'cpu')
def predict(self, data: DataSet, seq_len_field_name=None): r"""用已经训练好的模型进行inference. :param fastNLP.DataSet data: 待预测的数据集 :param str seq_len_field_name: 表示序列长度信息的field名字 :return: dict dict里面的内容为模型预测的结果 """ if not isinstance(data, DataSet): raise ValueError("Only Dataset class is allowed, not {}.".format( type(data))) if seq_len_field_name is not None and seq_len_field_name not in data.field_arrays: raise ValueError("Field name {} not found in DataSet {}.".format( seq_len_field_name, data)) prev_training = self.network.training self.network.eval() network_device = _get_model_device(self.network) batch_output = defaultdict(list) data_iterator = DataSetIter(data, batch_size=self.batch_size, sampler=SequentialSampler(), as_numpy=False) if hasattr(self.network, "predict"): predict_func = self.network.predict else: predict_func = self.network.forward with torch.no_grad(): for batch_x, _ in data_iterator: _move_dict_value_to_device(batch_x, _, device=network_device) refined_batch_x = _build_args(predict_func, **batch_x) prediction = predict_func(**refined_batch_x) if seq_len_field_name is not None: seq_lens = batch_x[seq_len_field_name].tolist() for key, value in prediction.items(): value = value.cpu().numpy() if len(value.shape) == 1 or (len(value.shape) == 2 and value.shape[1] == 1): batch_output[key].extend(value.tolist()) else: if seq_len_field_name is not None: tmp_batch = [] for idx, seq_len in enumerate(seq_lens): tmp_batch.append(value[idx, :seq_len]) batch_output[key].extend(tmp_batch) else: batch_output[key].append(value) self.network.train(prev_training) return batch_output
def produceCandidateTripleSlow(raw_phrase, Candidate_phrases, model, Candidate_hpos_sub, threshold): """ 使用BERT判断Candidate_phrases中哪个与raw_phrase语义最接近;基于最大值方式;适用于单个处理 """ from fastNLP.core.utils import _move_dict_value_to_device from fastNLP.core.utils import _get_model_device from fastNLP import DataSet from fastNLP import DataSetIter from my_bert_match import addWordPiece, addSeqlen, addWords, processItem, processNum p_Candidate_phrases = [ raw_phrase + "::" + item for item in Candidate_phrases ] Candidate_dataset = DataSet({"raw_words": p_Candidate_phrases}) Candidate_dataset.apply(addWords, new_field_name="p_words") Candidate_dataset.apply(addWordPiece, new_field_name="t_words") Candidate_dataset.apply(processItem, new_field_name="word_pieces") Candidate_dataset.apply(processNum, new_field_name="word_nums") Candidate_dataset.apply(addSeqlen, new_field_name="seq_len") Candidate_dataset.field_arrays["word_pieces"].is_input = True Candidate_dataset.field_arrays["seq_len"].is_input = True Candidate_dataset.field_arrays["word_nums"].is_input = True test_batch = DataSetIter(batch_size=10, dataset=Candidate_dataset, sampler=None) outputs = [] for batch_x, batch_y in test_batch: _move_dict_value_to_device(batch_x, batch_y, device=_get_model_device(model)) outputs.append( model.forward(batch_x["word_pieces"], batch_x["word_nums"], batch_x["seq_len"])['pred']) outputs = torch.cat(outputs) outputs = torch.nn.functional.softmax(outputs, dim=1).cpu().detach().numpy() results_2 = np.array([item[2] for item in outputs]) results_1 = np.array([item[1] for item in outputs]) # 如果这里已经能找到精确匹配的就直接输出 if max(results_2) >= threshold: return Candidate_hpos_sub[int( np.argmax(results_2))], max(results_2), "2" if max(results_1) >= threshold: return Candidate_hpos_sub[int( np.argmax(results_1))], max(results_1), "1" return "None", None, "0"
def predict(self, data: DataSet, seq_len_field_name=None): r""" """ if not isinstance(data, DataSet): raise ValueError( "Only Dataset class is allowed, not {}.".format(type(data))) if seq_len_field_name is not None and seq_len_field_name not in data.field_arrays: raise ValueError("Field name {} not found in DataSet {}.".format( seq_len_field_name, data)) self.network.eval() # self.network.module for multi-GPU network_device = _get_model_device(self.network) batch_output = defaultdict(list) data_iterator = DataSetIter( data, batch_size=self.batch_size, sampler=SequentialSampler(), as_numpy=False) # predict_func = self.network.module.predict # self.network.module for # multi-GPU try: predict_func = self.network.predict except ModuleAttributeError: predict_func = self.network.module.predict with torch.no_grad(): # for batch_x, _ in tqdm(data_iterator): for batch_x, _ in tqdm(data_iterator, total=len(data_iterator)): _move_dict_value_to_device(batch_x, _, device=network_device) refined_batch_x = _build_args(predict_func, **batch_x) prediction = predict_func(**refined_batch_x) if seq_len_field_name is not None: seq_lens = batch_x[seq_len_field_name].tolist() for key, value in prediction.items(): value = value.cpu().numpy() if len(value.shape) == 1 or ( len(value.shape) == 2 and value.shape[1] == 1): batch_output[key].extend(value.tolist()) else: if seq_len_field_name is not None: tmp_batch = [] for idx, seq_len in enumerate(seq_lens): tmp_batch.append(value[idx, :seq_len]) batch_output[key].extend(tmp_batch) else: batch_output[key].append(value) return batch_output
def _save_model(model, model_name, save_dir, only_param=False): """ 存储不含有显卡信息的 state_dict 或 model :param model: :param model_name: :param save_dir: 保存的 directory :param only_param: :return: """ model_path = os.path.join(save_dir, model_name) if not os.path.isdir(save_dir): os.makedirs(save_dir, exist_ok=True) if isinstance(model, nn.DataParallel): model = model.module if only_param: state_dict = model.state_dict() for key in state_dict: state_dict[key] = state_dict[key].cpu() torch.save(state_dict, model_path) else: _model_device = _get_model_device(model) model.cpu() torch.save(model, model_path) model.to(_model_device)
def produceCandidateTriple(Candidate_hpos_sub_total, model, hpo_tree, threshold): """ 使用BERT判断Candidate_phrases中哪个与raw_phrase语义最接近;基于最大值方式 :param Candidate_hpos_sub_total: 输出的短语及候选HPO嵌套列表 :param model: :param hpo_tree: :param threshold: 用作该模型输出阈值 :return: """ from fastNLP.core.utils import _move_dict_value_to_device from fastNLP.core.utils import _get_model_device from fastNLP import DataSet from fastNLP import DataSetIter from my_bert_match import addWordPiece, addSeqlen, addWords, processItem, processNum p_Candidate_phrases = [] phrase_nums_per_hpo = [] Candidate_hpos = [] for raw_phrase, Candidate_phrase, Candidate_hpos_sub in Candidate_hpos_sub_total: p_Candidate_phrases.extend( [raw_phrase + "::" + item for item in Candidate_phrase]) phrase_nums_per_hpo.append(len(Candidate_phrase)) Candidate_hpos.append(Candidate_hpos_sub) Candidate_dataset = DataSet({"raw_words": p_Candidate_phrases}) Candidate_dataset.apply(addWords, new_field_name="p_words") Candidate_dataset.apply(addWordPiece, new_field_name="t_words") Candidate_dataset.apply(processItem, new_field_name="word_pieces") Candidate_dataset.apply(processNum, new_field_name="word_nums") Candidate_dataset.apply(addSeqlen, new_field_name="seq_len") Candidate_dataset.field_arrays["word_pieces"].is_input = True Candidate_dataset.field_arrays["seq_len"].is_input = True Candidate_dataset.field_arrays["word_nums"].is_input = True test_batch = DataSetIter(batch_size=128, dataset=Candidate_dataset, sampler=None) outputs = [] for batch_x, batch_y in test_batch: _move_dict_value_to_device(batch_x, batch_y, device=_get_model_device(model)) outputs.append( model.forward(batch_x["word_pieces"], batch_x["word_nums"], batch_x["seq_len"])['pred']) outputs = torch.cat(outputs) outputs = torch.nn.functional.softmax(outputs, dim=1).cpu().detach().numpy() # print(outputs.size) results_2 = np.array([item[2] for item in outputs]) results_1 = np.array([item[1] for item in outputs]) # 按短语分组 count = 0 index = 0 ans = [] for group_num in phrase_nums_per_hpo: g_results_2 = results_2[index:index + group_num] g_results_1 = results_1[index:index + group_num] Candidate_hpos_sub = Candidate_hpos[count] index += group_num count += 1 # 如果这里已经能找到精确匹配的就直接输出 if max(g_results_2) >= threshold: ans.append([ Candidate_hpos_sub[int(np.argmax(g_results_2))], max(g_results_2), "2" ]) continue if max(g_results_1) >= threshold: ans.append([ Candidate_hpos_sub[int(np.argmax(g_results_1))], max(g_results_1), "1" ]) continue ans.append(["None", None, "0"]) return ans
def _beam_search_generate(decoder: Decoder, tokens=None, past=None, max_length=20, num_beams=4, temperature=1.0, top_k=50, top_p=1.0, bos_token_id=None, eos_token_id=None, do_sample=True, repetition_penalty=1.0, length_penalty=None, pad_token_id=0) -> torch.LongTensor: # 进行beam search device = _get_model_device(decoder) if tokens is None: if bos_token_id is None: raise RuntimeError( "You have to specify either `tokens` or `bos_token_id`.") if past is None: raise RuntimeError( "You have to specify either `past` or `tokens`.") batch_size = past.num_samples() if batch_size is None: raise RuntimeError( "Cannot infer the number of samples from `past`.") tokens = torch.full([batch_size, 1], fill_value=bos_token_id, dtype=torch.long).to(device) batch_size = tokens.size(0) if past is not None: assert past.num_samples( ) == batch_size, "The number of samples in `tokens` and `past` should match." # for i in range(tokens.size(1) - 1): # 如果输入的长度较长,先decode # scores, past = decoder.decode_one(tokens[:, :i + 1], # past) # (batch_size, vocab_size), Past # scores, past = decoder.decode_one(tokens, past) # 这里要传入的是整个句子的长度 scores, past = decoder.decode(tokens, past) # 这里要传入的是整个句子的长度 vocab_size = scores.size(1) assert vocab_size >= num_beams, "num_beams should be smaller than the number of vocabulary size." if do_sample: probs = F.softmax(scores, dim=-1) + 1e-12 next_tokens = torch.multinomial( probs, num_samples=num_beams) # (batch_size, num_beams) logits = probs.log() next_scores = logits.gather( dim=1, index=next_tokens) # (batch_size, num_beams) else: scores = F.log_softmax(scores, dim=-1) # (batch_size, vocab_size) # 得到(batch_size, num_beams), (batch_size, num_beams) next_scores, next_tokens = torch.topk(scores, num_beams, dim=1, largest=True, sorted=True) indices = torch.arange(batch_size, dtype=torch.long).to(device) indices = indices.repeat_interleave(num_beams) decoder.reorder_past(indices, past) tokens = tokens.index_select( dim=0, index=indices) # batch_size * num_beams x length # 记录生成好的token (batch_size', cur_len) token_ids = torch.cat([tokens, next_tokens.view(-1, 1)], dim=-1) dones = [False] * batch_size tokens = next_tokens.view(-1, 1) beam_scores = next_scores.view(-1) # batch_size * num_beams # 用来记录已经生成好的token的长度 cur_len = token_ids.size(1) hypos = [ BeamHypotheses(num_beams, max_length, length_penalty, early_stopping=False) for _ in range(batch_size) ] # 0,num_beams, 2*num_beams, ... batch_inds_with_numbeams_interval = (torch.arange(batch_size) * num_beams).view(-1, 1).to(token_ids) while cur_len < max_length: # scores, past = decoder.decode_one(tokens, past) # batch_size * num_beams x vocab_size, Past scores, past = decoder.decode(tokens, past) if repetition_penalty != 1.0: token_scores = scores.gather(dim=1, index=token_ids) lt_zero_mask = token_scores.lt(0).float() ge_zero_mask = lt_zero_mask.eq(0).float() token_scores = lt_zero_mask * repetition_penalty * token_scores + ge_zero_mask / repetition_penalty * token_scores scores.scatter_(dim=1, index=token_ids, src=token_scores) if do_sample: if temperature > 0 and temperature != 1: scores = scores / temperature # 多召回一个防止eos scores = top_k_top_p_filtering(scores, top_k, top_p, min_tokens_to_keep=num_beams + 1) # 加上1e-12是为了避免https://github.com/pytorch/pytorch/pull/27523 probs = F.softmax(scores, dim=-1) + 1e-12 # 保证至少有一个不是eos的值 _tokens = torch.multinomial(probs, num_samples=num_beams + 1) # batch_size' x (num_beams+1) logits = probs.log() # 防止全是这个beam的被选中了,且需要考虑eos被选择的情况 _scores = logits.gather( dim=1, index=_tokens) # batch_size' x (num_beams+1) _scores = _scores + beam_scores[:, None] # batch_size' x (num_beams+1) # 从这里面再选择top的2*num_beam个 _scores = _scores.view(batch_size, num_beams * (num_beams + 1)) next_scores, ids = _scores.topk(2 * num_beams, dim=1, largest=True, sorted=True) _tokens = _tokens.view(batch_size, num_beams * (num_beams + 1)) next_tokens = _tokens.gather( dim=1, index=ids) # (batch_size, 2*num_beams) from_which_beam = ids // (num_beams + 1 ) # (batch_size, 2*num_beams) else: scores = F.log_softmax( scores, dim=-1) # (batch_size * num_beams, vocab_size) _scores = scores + beam_scores[:, None] # (batch_size * num_beams, vocab_size) _scores = _scores.view(batch_size, -1) # (batch_size, num_beams*vocab_size) next_scores, ids = torch.topk(_scores, 2 * num_beams, dim=1, largest=True, sorted=True) from_which_beam = ids // vocab_size # (batch_size, 2*num_beams) next_tokens = ids % vocab_size # (batch_size, 2*num_beams) # 接下来需要组装下一个batch的结果。 # 需要选定哪些留下来 next_scores, sorted_inds = next_scores.sort(dim=-1, descending=True) next_tokens = next_tokens.gather(dim=1, index=sorted_inds) from_which_beam = from_which_beam.gather(dim=1, index=sorted_inds) not_eos_mask = next_tokens.ne(eos_token_id) # 为1的地方不是eos keep_mask = not_eos_mask.cumsum(dim=1).le(num_beams) # 为1的地方需要保留 keep_mask = not_eos_mask.__and__(keep_mask) # 为1的地方是需要进行下一步search的 _next_tokens = next_tokens.masked_select(keep_mask).view(-1, 1) _from_which_beam = from_which_beam.masked_select(keep_mask).view( batch_size, num_beams) # 上面的token是来自哪个beam _next_scores = next_scores.masked_select(keep_mask).view( batch_size, num_beams) beam_scores = _next_scores.view(-1) # 更改past状态, 重组token_ids reorder_inds = (batch_inds_with_numbeams_interval + _from_which_beam).view(-1) # flatten成一维 decoder.reorder_past(reorder_inds, past) flag = True if cur_len + 1 == max_length: eos_batch_idx = torch.arange(batch_size).to( next_tokens).repeat_interleave(repeats=num_beams, dim=0) eos_beam_ind = torch.arange(num_beams).to(token_ids).repeat( batch_size) # 表示的是indice eos_beam_idx = from_which_beam[:, :num_beams].reshape( -1) # 表示的是从哪个beam获取得到的 else: # 将每个batch中在num_beam内的序列添加到结束中, 为1的地方需要结束了 effective_eos_mask = next_tokens[:, :num_beams].eq( eos_token_id) # batch_size x num_beams if effective_eos_mask.sum().gt(0): eos_batch_idx, eos_beam_ind = effective_eos_mask.nonzero( as_tuple=True) # 是由于from_which_beam是 (batch_size, 2*num_beams)的,所以需要2*num_beams eos_beam_idx = eos_batch_idx * num_beams * 2 + eos_beam_ind eos_beam_idx = from_which_beam.view(-1)[ eos_beam_idx] # 获取真实的从哪个beam获取的eos else: flag = False if flag: for batch_idx, beam_ind, beam_idx in zip(eos_batch_idx.tolist(), eos_beam_ind.tolist(), eos_beam_idx.tolist()): if not dones[batch_idx]: score = next_scores[batch_idx, beam_ind].item() hypos[batch_idx].add( token_ids[batch_idx * num_beams + beam_idx, :cur_len].clone(), score) # 重新组织token_ids的状态 tokens = _next_tokens token_ids = torch.cat( [token_ids.index_select(index=reorder_inds, dim=0), tokens], dim=-1) for batch_idx in range(batch_size): dones[batch_idx] = dones[batch_idx] or hypos[batch_idx].is_done( next_scores[batch_idx, 0].item()) cur_len += 1 if all(dones): break # select the best hypotheses tgt_len = token_ids.new(batch_size) best = [] for i, hypotheses in enumerate(hypos): best_hyp = max(hypotheses.hyp, key=lambda x: x[0])[1] tgt_len[i] = len(best_hyp) + 1 # +1 for the <EOS> symbol best.append(best_hyp) # generate target batch decoded = token_ids.new(batch_size, tgt_len.max().item()).fill_(pad_token_id) for i, hypo in enumerate(best): decoded[i, :tgt_len[i] - 1] = hypo if eos_token_id is not None: decoded[i, tgt_len[i] - 1] = eos_token_id return decoded
def _no_beam_search_generate(decoder: Decoder, tokens=None, past=None, max_length=20, temperature=1.0, top_k=50, top_p=1.0, bos_token_id=None, eos_token_id=None, do_sample=True, repetition_penalty=1.0, length_penalty=1.0, pad_token_id=0): device = _get_model_device(decoder) if tokens is None: if bos_token_id is None: raise RuntimeError( "You have to specify either `tokens` or `bos_token_id`.") if past is None: raise RuntimeError( "You have to specify either `past` or `tokens`.") batch_size = past.num_samples() if batch_size is None: raise RuntimeError( "Cannot infer the number of samples from `past`.") tokens = torch.full([batch_size, 1], fill_value=bos_token_id, dtype=torch.long).to(device) batch_size = tokens.size(0) if past is not None: assert past.num_samples( ) == batch_size, "The number of samples in `tokens` and `past` should match." if eos_token_id is None: _eos_token_id = float('nan') else: _eos_token_id = eos_token_id # for i in range(tokens.size(1)): # scores, past = decoder.decode_one(tokens[:, :i + 1], past) # batch_size x vocab_size, Past scores, past = decoder.decode(tokens, past) token_ids = tokens.clone() cur_len = token_ids.size(1) dones = token_ids.new_zeros(batch_size).eq(1) # tokens = tokens[:, -1:] while cur_len < max_length: # scores, past = decoder.decode_one(tokens, past) # batch_size x vocab_size, Past scores, past = decoder.decode(tokens, past) # batch_size x vocab_size, Past if repetition_penalty != 1.0: token_scores = scores.gather(dim=1, index=token_ids) lt_zero_mask = token_scores.lt(0).float() ge_zero_mask = lt_zero_mask.eq(0).float() token_scores = lt_zero_mask * repetition_penalty * token_scores + ge_zero_mask / repetition_penalty * token_scores scores.scatter_(dim=1, index=token_ids, src=token_scores) if eos_token_id is not None and length_penalty != 1.0: token_scores = scores / cur_len**length_penalty # batch_size x vocab_size eos_mask = scores.new_ones(scores.size(1)) eos_mask[eos_token_id] = 0 eos_mask = eos_mask.unsqueeze(0).eq(1) scores = scores.masked_scatter( eos_mask, token_scores) # 也即除了eos,其他词的分数经过了放大/缩小 if do_sample: if temperature > 0 and temperature != 1: scores = scores / temperature scores = top_k_top_p_filtering(scores, top_k, top_p, min_tokens_to_keep=2) # 加上1e-12是为了避免https://github.com/pytorch/pytorch/pull/27523 probs = F.softmax(scores, dim=-1) + 1e-12 # 保证至少有一个不是eos的值 next_tokens = torch.multinomial(probs, num_samples=1).squeeze( 1) # batch_size else: next_tokens = torch.argmax(scores, dim=-1) # batch_size next_tokens = next_tokens.masked_fill( dones, pad_token_id) # 对已经搜索完成的sample做padding tokens = next_tokens.unsqueeze(1) token_ids = torch.cat([token_ids, tokens], dim=-1) # batch_size x max_len end_mask = next_tokens.eq(_eos_token_id) dones = dones.__or__(end_mask) cur_len += 1 if dones.min() == 1: break if eos_token_id is not None: if cur_len == max_length: token_ids[:, -1].masked_fill_( ~dones, eos_token_id) # 若到最长长度仍未到EOS,则强制将最后一个词替换成eos return token_ids
def predict(instance): x_batch = torch.LongTensor([instance['words']]) x_batch = x_batch.to(device=_get_model_device(model)) pred = model.predict(x_batch) pred = vocab_target.to_word(int(pred['pred'])) return pred
def train(): n_epochs = 10 train_set = data_set_loader._load('../models/all4bert_new_triple.txt') train_set, tmp_set = train_set.split(0.2) val_set, test_set = tmp_set.split(0.5) data_bundle = [train_set, val_set, test_set] for dataset in data_bundle: dataset.apply(addWords, new_field_name="p_words") dataset.apply(addWordPiece, new_field_name="t_words") dataset.apply(processItem, new_field_name="word_pieces") dataset.apply(processNum, new_field_name="word_nums") dataset.apply(addSeqlen, new_field_name="seq_len") dataset.apply(processTarget, new_field_name="target") for dataset in data_bundle: dataset.field_arrays["word_pieces"].is_input = True dataset.field_arrays["seq_len"].is_input = True dataset.field_arrays["word_nums"].is_input = True dataset.field_arrays["target"].is_target = True print("In total " + str(len(data_bundle)) + " datasets:") print("Trainset has " + str(len(train_set)) + " instances.") print("Validateset has " + str(len(val_set)) + " instances.") print("Testset has " + str(len(test_set)) + " instances.") train_set.print_field_meta() # print(train_set) from fastNLP.models.Mybert import BertForSentenceMatching from fastNLP import AccuracyMetric, DataSetIter from fastNLP.core.utils import _pseudo_tqdm as tqdm # 注意这里是表明分的类数 model = BertForSentenceMatching(embed, 3) if torch.cuda.is_available(): model = _move_model_to_device(model, device=0) # print(model) train_batch = DataSetIter(batch_size=16, dataset=train_set, sampler=None) optimizer = torch.optim.Adam(model.parameters(), lr=2e-5) Lossfunc = torch.nn.CrossEntropyLoss() with tqdm(total=n_epochs, postfix='loss:{0:<6.5f}', leave=False, dynamic_ncols=True) as pbar: print_every = 10 for epoch in range(1, n_epochs + 1): pbar.set_description_str( desc="Epoch {}/{}".format(epoch, n_epochs)) avg_loss = 0 step = 0 for batch_x, batch_y in train_batch: step += 1 _move_dict_value_to_device(batch_x, batch_y, device=_get_model_device(model)) optimizer.zero_grad() output = model.forward(batch_x["word_pieces"], batch_x["word_nums"], batch_x["seq_len"]) loss = Lossfunc(output['pred'], batch_y['target']) loss.backward() optimizer.step() avg_loss += loss.item() if step % print_every == 0: avg_loss = float(avg_loss) / print_every print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6}".format( epoch, step, avg_loss) pbar.update(print_every) pbar.set_postfix_str(print_output) avg_loss = 0 metric = AccuracyMetric() val_batch = DataSetIter(batch_size=8, dataset=val_set, sampler=None) for batch_x, batch_y in val_batch: _move_dict_value_to_device(batch_x, batch_y, device=_get_model_device(model)) output = model.predict(batch_x["word_pieces"], batch_x["word_nums"], batch_x["seq_len"]) metric(output, batch_y) eval_result = metric.get_metric() print("ACC on Validate Set:", eval_result) from fastNLP.io import ModelSaver saver = ModelSaver("../models/bert_model_max_triple.pkl") saver.save_pytorch(model, param_only=False) pbar.close() metric = AccuracyMetric() test_batch = DataSetIter(batch_size=8, dataset=test_set, sampler=None) for batch_x, batch_y in test_batch: _move_dict_value_to_device(batch_x, batch_y, device=_get_model_device(model)) output = model.predict(batch_x["word_pieces"], batch_x["word_nums"], batch_x["seq_len"]) metric(output, batch_y) eval_result = metric.get_metric() print("ACC on Test Set:", eval_result) from fastNLP.io import ModelSaver saver = ModelSaver("../models/bert_model_max_triple.pkl") saver.save_pytorch(model, param_only=False)