Esempio n. 1
0
    def __init__(self,
                 train_input_file,
                 train_target_file,
                 test_input_file,
                 test_target_file,
                 vocab_file,
                 num_units,
                 layers,
                 dropout,
                 batch_size,
                 learning_rate,
                 output_dir,
                 save_step=100,
                 eval_step=1000,
                 param_histogram=False,
                 restore_model=False,
                 init_train=True,
                 init_infer=False,
                 decode_method='greedy',
                 beam_width=20):
        self.num_units = num_units
        self.layers = layers
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.save_step = save_step
        self.eval_step = eval_step
        self.param_histogram = param_histogram
        self.restore_model = restore_model
        self.init_train = init_train
        self.init_infer = init_infer
        self.decode_method = decode_method
        self.beam_width = beam_width

        if init_train:
            self.train_reader = reader.SeqReader(train_input_file,
                                                 train_target_file, vocab_file,
                                                 batch_size)
            self.train_reader.start()
            self.train_data = self.train_reader.read()
            self.eval_reader = reader.SeqReader(test_input_file,
                                                test_target_file, vocab_file,
                                                batch_size)
            self.eval_reader.start()
            self.eval_data = self.eval_reader.read()

        self.model_file = path.join(output_dir, 'model.ckpl')
        self.log_writter = tf.summary.FileWriter(output_dir)

        if init_train:
            self._init_train()
            self._init_eval()

        if init_infer:
            self.infer_vocabs = reader.read_vocab(vocab_file)
            self.infer_vocab_indices = dict(
                (c, i) for i, c in enumerate(self.infer_vocabs))
            self._init_infer()
            self.reload_infer_model()
    def __init__(self,
                 train_input_file,
                 train_target_file,
                 test_input_file,
                 test_target_file,
                 vocab_file,
                 num_units,
                 layers,
                 dropout,
                 batch_size,
                 learning_rate,
                 output_dir,
                 save_step=100,
                 eval_step=1000,
                 restore_model=False,
                 init_train=True,
                 init_infer=False):
        self.num_units = num_units
        self.layers = layers
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.save_step = save_step
        self.eval_step = eval_step
        self.restore_model = restore_model
        self.init_train = init_train
        self.init_infer = init_infer

        if init_train:
            # 初始化读取训练集输入文件信息
            self.train_reader = reader.SeqReader(train_input_file,
                                                 train_target_file, vocab_file,
                                                 batch_size)
            self.train_reader.start()
            self.train_data = self.train_reader.read()
            # 初始化读取训练集输出文件信息
            self.eval_reader = reader.SeqReader(test_input_file,
                                                test_target_file, vocab_file,
                                                batch_size)
            self.eval_reader.start()
            self.eval_data = self.eval_reader.read()

        # 指定模型输出路径
        self.model_file = path.join(output_dir, 'model.ckpl')
        self.log_writter = tf.summary.FileWriter(output_dir)

        if init_train:
            self._init_train()  # 训练初始化
            self._init_eval()  # eval初始化

        if init_infer:  # 初始化infer
            self.infer_vocabs = reader.read_vocab(
                vocab_file)  # infer_vocabs词表文件
            self.infer_vocab_indices = dict((c, i) for i, c in  # 汉字与序号对应
                                            enumerate(self.infer_vocabs))
            self._init_infer()  # infer初始化
            self.reload_infer_model()
Esempio n. 3
0
    def __init__(self, train_input_file, train_target_file,
            test_input_file, test_target_file, vocab_file,
            num_units, layers, dropout,
            batch_size, learning_rate, output_dir,
            save_step = 10, eval_step = 10,max_len=128,
            param_histogram=False, restore_model=False,
            init_train=True, init_infer=False):
        self.num_units = num_units
        self.layers = layers
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.save_step = save_step
        self.eval_step = eval_step
        self.param_histogram = param_histogram
        self.restore_model = restore_model
        self.init_train = init_train
        self.init_infer = init_infer
        self.max_len=max_len
        self.bert_model=BertSim()
        logging.basicConfig(filename='log/log_info.log', filemode="w", level=logging.DEBUG)
        if init_train:
            self.train_reader = reader.SeqReader(train_input_file,
                    train_target_file, vocab_file, batch_size,max_len=max_len)
            self.train_reader.start()
            self.train_data = self.train_reader.read()
            self.eval_reader = reader.SeqReader(test_input_file, test_target_file,
                    vocab_file, batch_size,max_len=max_len)
            self.eval_reader.start()
            self.eval_data = self.eval_reader.read()

        self.model_file = path.join(output_dir, 'model.ckpl')
        self.log_writter = tf.summary.FileWriter(output_dir)

        if init_train:
            self._init_train()
            self._init_eval()

        if init_infer:
            self.infer_vocabs = reader.read_vocab(vocab_file)
            self.infer_vocab_indices = dict((c, i) for i, c in
                    enumerate(self.infer_vocabs))
            self._init_infer()
            self.reload_infer_model()
Esempio n. 4
0
    def __init__(self,
                 train_input_file,
                 train_target_file,
                 test_input_file,
                 test_target_file,
                 vocab_file,
                 num_units,
                 layers,
                 dropout,
                 batch_size,
                 learning_rate,
                 output_dir,
                 save_step=100,
                 eval_step=1000,
                 param_histogram=False,
                 restore_model=False,
                 init_train=True,
                 init_infer=False):
        self.num_units = num_units
        self.layers = layers
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.save_step = save_step
        self.eval_step = eval_step
        self.param_histogram = param_histogram
        self.restore_model = restore_model
        self.init_train = init_train
        self.init_infer = init_infer
        #self.output_dir=output_dir
        if init_train:
            #Verify the next train in test_ What does reader get

            self.train_reader = reader.SeqReader(train_input_file,
                                                 train_target_file, vocab_file,
                                                 batch_size)
            #Empty method
            self.train_reader.start()
            #The code indicates yield, next, inseq, targetseq, and length,
            #Note that 1 is followed by 0, and each is a batch_ Size list, [100]
            self.train_data = self.train_reader.read()
            self.eval_reader = reader.SeqReader(test_input_file,
                                                test_target_file, vocab_file,
                                                batch_size)
            self.eval_reader.start()
            #It's similar to train_data
            self.eval_data = self.eval_reader.read()

        self.model_file = output_dir + '/model.ckpl'
        self.log_writter = tf.summary.FileWriter(output_dir)
        #During the training process, just look at these two methods
        if init_train:
            self.train_vocabs = reader.read_vocab(vocab_file)
            self._init_train()
            self._init_eval()

        if init_infer:
            self.infer_vocabs = reader.read_vocab(vocab_file)
            self.infer_vocab_indices = dict(
                (c, i) for i, c in enumerate(self.infer_vocabs))
            self._init_infer()
            self.reload_infer_model()
Esempio n. 5
0
    def __init__(self,
                 train_input_file,
                 train_target_file,
                 test_input_file,
                 test_target_file,
                 vocab_file,
                 num_units,
                 layers,
                 dropout,
                 batch_size,
                 learning_rate,
                 output_dir,
                 save_step=100,
                 eval_step=1000,
                 param_histogram=False,
                 restore_model=False,
                 init_train=True,
                 init_infer=False):
        self.num_units = num_units
        self.layers = layers
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.save_step = save_step
        self.eval_step = eval_step
        self.param_histogram = param_histogram
        self.restore_model = restore_model
        self.init_train = init_train
        self.init_infer = init_infer

        if init_train:
            self.train_reader = reader.SeqReader(train_input_file,
                                                 train_target_file, vocab_file,
                                                 batch_size)
            self.train_reader.start()
            self.train_data = self.train_reader.read()
            self.eval_reader = reader.SeqReader(test_input_file,
                                                test_target_file, vocab_file,
                                                batch_size)
            self.eval_reader.start()
            self.eval_data = self.eval_reader.read()

        self.model_file = path.join(output_dir, 'model.ckpl')
        self.log_writter = tf.summary.FileWriter(output_dir)
        #new segement
        init = tf.global_variables_initializer()
        sess = tf.Session()
        sess.run(init)
        self.log_writter_temp = tf.summary.FileWriter(
            '../models/tf-lib/output_couplet/logs', sess.graph)
        #new segement

        if init_train:
            self._init_train()
            self._init_eval()

        if init_infer:
            self.infer_vocabs = reader.read_vocab(vocab_file)
            self.infer_vocab_indices = dict(
                (c, i) for i, c in enumerate(self.infer_vocabs))
            self._init_infer()
            self.reload_infer_model()
Esempio n. 6
0
    def __init__(self,
                 train_input_file,
                 train_target_file,
                 test_input_file,
                 test_target_file,
                 vocab_file,
                 num_units,
                 layers,
                 dropout,
                 batch_size,
                 learning_rate,
                 output_dir,
                 save_step=500,
                 eval_step=1000,
                 param_histogram=False,
                 restore_model=False,
                 init_train=True,
                 init_infer=False):
        print('=================Model模块初始化============================')
        self.num_units = num_units
        self.layers = layers
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.save_step = save_step
        self.eval_step = eval_step
        self.param_histogram = param_histogram
        self.restore_model = restore_model
        self.init_train = init_train
        self.init_infer = init_infer

        if init_train:
            print('开始训练初始化,运行init_train=================================')
            self.train_reader = reader.SeqReader(train_input_file,
                                                 train_target_file, vocab_file,
                                                 batch_size)
            self.train_reader.start()
            self.train_data = self.train_reader.read()
            #至此,输入数据train_data已经处理完成
            self.eval_reader = reader.SeqReader(test_input_file,
                                                test_target_file, vocab_file,
                                                batch_size)
            self.eval_reader.start()
            self.eval_data = self.eval_reader.read()
            #至此,测试数据处理完成
            print('结束运行 init_train=================================')

        self.model_file = path.join(output_dir, 'model.ckpl')
        self.log_writter = tf.summary.FileWriter(output_dir)

        if init_train:
            self._init_train()
            self._init_eval()

        if init_infer:
            self.infer_vocabs = reader.read_vocab(vocab_file)
            self.infer_vocab_indices = dict(
                (c, i) for i, c in enumerate(self.infer_vocabs))
            self._init_infer()
            self.reload_infer_model()
        print('=================Model模块初始化结束============================')