def train(): source_data_path = "D:\\nlp语料\\各类中文对话语料\\qingyun-11w\\sources.txt" target_data_path = "D:\\nlp语料\\各类中文对话语料\\qingyun-11w\\targets.txt" src_encoding = "utf-8" tgt_encoding = "utf-8" source_split_char = " " target_split_char = " " model_path = "../modelFile/nmt/attention/model.ckpt" batch_size = 80 epochs = 40 smwf = 2 tmwf = 2 dataInfoObj, gen = load_data(source_data_path,target_data_path,source_split_char,target_split_char,source_minimum_word_frequency=smwf,target_minimum_word_frequency=tmwf,batch_size=batch_size,epochs=epochs,source_encoding=src_encoding,target_encoding=tgt_encoding) f = open("../modelFile/nmt/attention/model.dataInfoObj","wb") pickle.dump(dataInfoObj,f) f.close() src_embedding_size = 200 tgt_embedding_size = 200 is_encoder_bidirectional = True rnn_layer_size = 4 rnn_num_units = 256 cell_type = "LSTM" lr = 0.001 decoding_method = "beamSearch" attention_mechanism = "normed_bahdanau" model = AttentionSeq2SeqModel(src_vocab_size=dataInfoObj.source_vocab_size,tgt_time_step=dataInfoObj.target_max_len,tgt_vocab_size=dataInfoObj.target_vocab_size,start_token_id=dataInfoObj.target_token_2_id['<s>'],end_toekn_id=dataInfoObj.target_token_2_id['</s>'],attention_mechanism=attention_mechanism,batch_size=batch_size) model.train(model_path, gen, src_embedding_size, tgt_embedding_size, is_encoder_bidirectional,rnn_layer_size, rnn_num_units, cell_type, lr,decoding_method=decoding_method,beam_width=10)
def train(): source_data_path = "../data/letters_source2.txt" target_data_path = "../data/letters_target2.txt" #model_path = "../modelFile/testAttentionSeq2Seq/model_beam_search.ckpt" model_path = "../modelFile/testAttentionSeq2Seq/model_greedy.ckpt" batch_size = 128 epochs = 20 dataInfoObj, gen = load_data(source_data_path,target_data_path,None,None,source_minimum_word_frequency=1,target_minimum_word_frequency=1,batch_size=batch_size,epochs=epochs) #保存数据集的一些信息 f = open("../modelFile/testAttentionSeq2Seq/model.dataInfoObj","wb") pickle.dump(dataInfoObj,f) f.close() #超参数开始 src_embedding_size = 15 tgt_embedding_size = 15 ''' encoder是否双向 注意:使用bidirectional,encoder rnn的num_units变为decoder的一半,这是为了能够保证encoder_states和decoder的输入shape能对应上 ''' is_encoder_bidirectional = True rnn_layer_size = 2 rnn_num_units = 128 cell_type = "LSTM" lr = 0.001 decoding_method = "greedy" attention_mechanism = "scaled_luong" #训练 model = AttentionSeq2SeqModel(src_vocab_size=dataInfoObj.source_vocab_size,tgt_time_step=dataInfoObj.target_max_len,tgt_vocab_size=dataInfoObj.target_vocab_size,start_token_id=dataInfoObj.target_token_2_id['<s>'],end_toekn_id=dataInfoObj.target_token_2_id['</s>'],attention_mechanism=attention_mechanism,batch_size=batch_size) model.train(model_path, gen, src_embedding_size, tgt_embedding_size, is_encoder_bidirectional,rnn_layer_size, rnn_num_units, cell_type, lr,decoding_method=decoding_method,beam_width=10)
def train(): source_data_path = "D:\\nlp语料\\各类中文对话语料\\qingyun-11w\\sources.txt" target_data_path = "D:\\nlp语料\\各类中文对话语料\\qingyun-11w\\targets.txt" model_path = "../modelFile/chatbot/model_basic/model.ckpt" src_encoding = "utf-8" tgt_encoding = "utf-8" source_split_char = " " target_split_char = " " smwf = 2 #source 最小词频 tmwf = 2 #target最小词频 batch_size = 50 epochs = 40 dataInfoObj, gen = load_data(source_data_path, target_data_path, source_split_char, target_split_char, source_minimum_word_frequency=smwf, target_minimum_word_frequency=tmwf, batch_size=batch_size, epochs=epochs, source_encoding=src_encoding, target_encoding=tgt_encoding) #保存数据集的一些信息 f = open("../modelFile/chatbot/model_basic/model.dataInfoObj", "wb") pickle.dump(dataInfoObj, f) f.close() #超参数开始 src_embedding_size = 200 tgt_embedding_size = 200 ''' encoder是否双向 注意:使用bidirectional,encoder rnn的num_units变为decoder的一半,这是为了能够保证encoder_states和decoder的输入shape能对应上 ''' is_encoder_bidirectional = True rnn_layer_size = 4 rnn_num_units = 256 cell_type = "LSTM" lr = 0.001 decoding_method = "beamSearch" #训练 model = BasicSeq2SeqModel( src_vocab_size=dataInfoObj.source_vocab_size, tgt_time_step=dataInfoObj.target_max_len, tgt_vocab_size=dataInfoObj.target_vocab_size, start_token_id=dataInfoObj.target_token_2_id['<s>'], end_toekn_id=dataInfoObj.target_token_2_id['</s>']) model.train(model_path, gen, src_embedding_size, tgt_embedding_size, is_encoder_bidirectional, rnn_layer_size, rnn_num_units, cell_type, lr, decoding_method=decoding_method, beam_width=10)
import tensorflow as tf from tensorflow.python.keras.layers.core import Dense from utils.dataPreprocessing import load_data import numpy as np import pickle ''' 所以数据直接从dataPreprocessing中导入 ''' source_data_path = "../data/letters_source2.txt" target_data_path = "../data/letters_target2.txt" batch_size = 128 epochs = 60 dataInfoObj, gen = load_data(source_data_path, target_data_path, None, None, source_minimum_word_frequency=1, target_minimum_word_frequency=1, batch_size=batch_size, epochs=epochs) #保存数据集的一些信息 f = open("./model.dataInfoObj", "wb") pickle.dump(dataInfoObj, f) f.close() # num_batch = dataInfoObj.num_samples // batch_size source_max_len = dataInfoObj.source_max_len source_token_list = dataInfoObj.source_token_list source_token_2_id = dataInfoObj.source_token_2_id target_max_len = dataInfoObj.target_max_len target_token_list = dataInfoObj.target_token_list target_token_2_id = dataInfoObj.target_token_2_id