def __init__(self, train_input_file, train_target_file, test_input_file, test_target_file, vocab_file, num_units, layers, dropout, batch_size, learning_rate, output_dir, save_step=100, eval_step=1000, param_histogram=False, restore_model=False, init_train=True, init_infer=False, decode_method='greedy', beam_width=20): self.num_units = num_units self.layers = layers self.dropout = dropout self.batch_size = batch_size self.learning_rate = learning_rate self.save_step = save_step self.eval_step = eval_step self.param_histogram = param_histogram self.restore_model = restore_model self.init_train = init_train self.init_infer = init_infer self.decode_method = decode_method self.beam_width = beam_width if init_train: self.train_reader = reader.SeqReader(train_input_file, train_target_file, vocab_file, batch_size) self.train_reader.start() self.train_data = self.train_reader.read() self.eval_reader = reader.SeqReader(test_input_file, test_target_file, vocab_file, batch_size) self.eval_reader.start() self.eval_data = self.eval_reader.read() self.model_file = path.join(output_dir, 'model.ckpl') self.log_writter = tf.summary.FileWriter(output_dir) if init_train: self._init_train() self._init_eval() if init_infer: self.infer_vocabs = reader.read_vocab(vocab_file) self.infer_vocab_indices = dict( (c, i) for i, c in enumerate(self.infer_vocabs)) self._init_infer() self.reload_infer_model()
def __init__(self, train_input_file, train_target_file, test_input_file, test_target_file, vocab_file, num_units, layers, dropout, batch_size, learning_rate, output_dir, save_step=100, eval_step=1000, restore_model=False, init_train=True, init_infer=False): self.num_units = num_units self.layers = layers self.dropout = dropout self.batch_size = batch_size self.learning_rate = learning_rate self.save_step = save_step self.eval_step = eval_step self.restore_model = restore_model self.init_train = init_train self.init_infer = init_infer if init_train: # 初始化读取训练集输入文件信息 self.train_reader = reader.SeqReader(train_input_file, train_target_file, vocab_file, batch_size) self.train_reader.start() self.train_data = self.train_reader.read() # 初始化读取训练集输出文件信息 self.eval_reader = reader.SeqReader(test_input_file, test_target_file, vocab_file, batch_size) self.eval_reader.start() self.eval_data = self.eval_reader.read() # 指定模型输出路径 self.model_file = path.join(output_dir, 'model.ckpl') self.log_writter = tf.summary.FileWriter(output_dir) if init_train: self._init_train() # 训练初始化 self._init_eval() # eval初始化 if init_infer: # 初始化infer self.infer_vocabs = reader.read_vocab( vocab_file) # infer_vocabs词表文件 self.infer_vocab_indices = dict((c, i) for i, c in # 汉字与序号对应 enumerate(self.infer_vocabs)) self._init_infer() # infer初始化 self.reload_infer_model()
def __init__(self, train_input_file, train_target_file, test_input_file, test_target_file, vocab_file, num_units, layers, dropout, batch_size, learning_rate, output_dir, save_step = 10, eval_step = 10,max_len=128, param_histogram=False, restore_model=False, init_train=True, init_infer=False): self.num_units = num_units self.layers = layers self.dropout = dropout self.batch_size = batch_size self.learning_rate = learning_rate self.save_step = save_step self.eval_step = eval_step self.param_histogram = param_histogram self.restore_model = restore_model self.init_train = init_train self.init_infer = init_infer self.max_len=max_len self.bert_model=BertSim() logging.basicConfig(filename='log/log_info.log', filemode="w", level=logging.DEBUG) if init_train: self.train_reader = reader.SeqReader(train_input_file, train_target_file, vocab_file, batch_size,max_len=max_len) self.train_reader.start() self.train_data = self.train_reader.read() self.eval_reader = reader.SeqReader(test_input_file, test_target_file, vocab_file, batch_size,max_len=max_len) self.eval_reader.start() self.eval_data = self.eval_reader.read() self.model_file = path.join(output_dir, 'model.ckpl') self.log_writter = tf.summary.FileWriter(output_dir) if init_train: self._init_train() self._init_eval() if init_infer: self.infer_vocabs = reader.read_vocab(vocab_file) self.infer_vocab_indices = dict((c, i) for i, c in enumerate(self.infer_vocabs)) self._init_infer() self.reload_infer_model()
def __init__(self, train_input_file, train_target_file, test_input_file, test_target_file, vocab_file, num_units, layers, dropout, batch_size, learning_rate, output_dir, save_step=100, eval_step=1000, param_histogram=False, restore_model=False, init_train=True, init_infer=False): self.num_units = num_units self.layers = layers self.dropout = dropout self.batch_size = batch_size self.learning_rate = learning_rate self.save_step = save_step self.eval_step = eval_step self.param_histogram = param_histogram self.restore_model = restore_model self.init_train = init_train self.init_infer = init_infer #self.output_dir=output_dir if init_train: #Verify the next train in test_ What does reader get self.train_reader = reader.SeqReader(train_input_file, train_target_file, vocab_file, batch_size) #Empty method self.train_reader.start() #The code indicates yield, next, inseq, targetseq, and length, #Note that 1 is followed by 0, and each is a batch_ Size list, [100] self.train_data = self.train_reader.read() self.eval_reader = reader.SeqReader(test_input_file, test_target_file, vocab_file, batch_size) self.eval_reader.start() #It's similar to train_data self.eval_data = self.eval_reader.read() self.model_file = output_dir + '/model.ckpl' self.log_writter = tf.summary.FileWriter(output_dir) #During the training process, just look at these two methods if init_train: self.train_vocabs = reader.read_vocab(vocab_file) self._init_train() self._init_eval() if init_infer: self.infer_vocabs = reader.read_vocab(vocab_file) self.infer_vocab_indices = dict( (c, i) for i, c in enumerate(self.infer_vocabs)) self._init_infer() self.reload_infer_model()
import reader train_input_file = './data/dl-data/couplet/train/in.txt' train_target_file = './data/dl-data/couplet/train/out.txt' vocab_file = './data/dl-data/couplet/vocabs' batch_size = 32 train_reader = reader.SeqReader(train_input_file, train_target_file, vocab_file, batch_size) # # There are many properties in this class. Just look at data[] and it's almost the same # It is found that each data in [] has the following form: # #{ # # 'in_seq': [71, 459, 157, 325, 55, 1], # # 'in_seq_len': 6, # # 'target_seq': [0, 47, 772, 472, 285, 202, 1], # # 'target_seq_len': 6} train_data = train_reader.read() data = next(train_data) print(train_reader.vocab_indices) print(data['in_seq']) print(data['in_seq_len']) print(data['target_seq']) print(data['target_seq_len']) # Decode it #infer_vocabs = reader.read_vocab(vocab_file) #print(len(infer_vocabs)) #output_text = reader.decode_text(data, infer_vocabs) # In order to test the output, it will be very difficult # Generate training map in_seq = data['in_seq']
def __init__(self, train_input_file, train_target_file, test_input_file, test_target_file, vocab_file, num_units, layers, dropout, batch_size, learning_rate, output_dir, save_step=100, eval_step=1000, param_histogram=False, restore_model=False, init_train=True, init_infer=False): self.num_units = num_units self.layers = layers self.dropout = dropout self.batch_size = batch_size self.learning_rate = learning_rate self.save_step = save_step self.eval_step = eval_step self.param_histogram = param_histogram self.restore_model = restore_model self.init_train = init_train self.init_infer = init_infer if init_train: self.train_reader = reader.SeqReader(train_input_file, train_target_file, vocab_file, batch_size) self.train_reader.start() self.train_data = self.train_reader.read() self.eval_reader = reader.SeqReader(test_input_file, test_target_file, vocab_file, batch_size) self.eval_reader.start() self.eval_data = self.eval_reader.read() self.model_file = path.join(output_dir, 'model.ckpl') self.log_writter = tf.summary.FileWriter(output_dir) #new segement init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) self.log_writter_temp = tf.summary.FileWriter( '../models/tf-lib/output_couplet/logs', sess.graph) #new segement if init_train: self._init_train() self._init_eval() if init_infer: self.infer_vocabs = reader.read_vocab(vocab_file) self.infer_vocab_indices = dict( (c, i) for i, c in enumerate(self.infer_vocabs)) self._init_infer() self.reload_infer_model()
def __init__(self, train_input_file, train_target_file, test_input_file, test_target_file, vocab_file, num_units, layers, dropout, batch_size, learning_rate, output_dir, save_step=500, eval_step=1000, param_histogram=False, restore_model=False, init_train=True, init_infer=False): print('=================Model模块初始化============================') self.num_units = num_units self.layers = layers self.dropout = dropout self.batch_size = batch_size self.learning_rate = learning_rate self.save_step = save_step self.eval_step = eval_step self.param_histogram = param_histogram self.restore_model = restore_model self.init_train = init_train self.init_infer = init_infer if init_train: print('开始训练初始化,运行init_train=================================') self.train_reader = reader.SeqReader(train_input_file, train_target_file, vocab_file, batch_size) self.train_reader.start() self.train_data = self.train_reader.read() #至此,输入数据train_data已经处理完成 self.eval_reader = reader.SeqReader(test_input_file, test_target_file, vocab_file, batch_size) self.eval_reader.start() self.eval_data = self.eval_reader.read() #至此,测试数据处理完成 print('结束运行 init_train=================================') self.model_file = path.join(output_dir, 'model.ckpl') self.log_writter = tf.summary.FileWriter(output_dir) if init_train: self._init_train() self._init_eval() if init_infer: self.infer_vocabs = reader.read_vocab(vocab_file) self.infer_vocab_indices = dict( (c, i) for i, c in enumerate(self.infer_vocabs)) self._init_infer() self.reload_infer_model() print('=================Model模块初始化结束============================')