Esempio n. 1
0
    def run_one_epoch(self, sess, train_data, test_data, tgt_vocab, epoch,
                      saver):
        num_batches = (len(train_data) + self.batch_size -
                       1) // self.batch_size

        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        batches = batch_yield(train_data, self.batch_size)
        for step, (seqs, labels) in enumerate(batches):
            sys.stdout.write('processing: {} batch / {} batches.'.format(
                step + 1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr,
                                              self.dropout_keep_prob)
            _, loss_train, summary, step_num_ = sess.run(
                [self.train_op, self.loss, self.merged, self.global_step],
                feed_dict=feed_dict)
            if step + 1 == 1 or (step +
                                 1) % 300 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                    format(start_time, epoch + 1, step + 1, loss_train,
                           step_num))

            self.file_writer.add_summary(summary, step_num)

            if step + 1 == num_batches:
                saver.save(sess, self.model_path, global_step=step_num)
        '''self.logger.info('===========validation / test===========')
Esempio n. 2
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):

        num_batches = (len(train) + self.batch_size - 1) // self.batch_size
        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        # Calling function batch_yield
        batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle)
        # Creating a index list, step is index and (seqs, labels) is data
        for step, (seqs, labels) in enumerate(batches):

            sys.stdout.write(' processing: {} batch / {} batches.'.format(step + 1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob)
            _, loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step],
                                                         feed_dict=feed_dict)
            if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches:
                # Writing the log information
                self.logger.info('{} epoch {}, step {}, loss: {:.4}, global_step: {}'.format(start_time, epoch + 1,
                                                                                             step + 1,
                                                                                             loss_train, step_num))
            # Writing the information(loss) to summary
            self.file_writer.add_summary(summary, step_num)

            if step + 1 == num_batches:
                saver.save(sess, self.model_path, global_step=step_num)

        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
Esempio n. 3
0
 def test_one_epoch(self, sess, test_data):
     label_list, seq_len_list = [], []
     for seqs, labels in batch_yield(test_data, self.batch_size):
         label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs)
         label_list.extend(label_list_)
         seq_len_list.extend(seq_len_list_)
     return label_list, seq_len_list
Esempio n. 4
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        """

        :param sess:
        :param train:
        :param dev:
        :param tag2label:
        :param epoch:
        :param saver:
        :return:
        """
        num_batches = (len(train) + self.batch_size - 1) // self.batch_size
        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle)
        for step, (seqs, labels) in enumerate(batches):
            # 相当print, input相当于sys.stdin.realine()
            sys.stdout.write(' processing: {} batch / {} batches. \n'.format(step + 1, num_batches) + '\r')
            self.step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob)
            _, loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step],
                                                         feed_dict=feed_dict)
            if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {} '.format(start_time, epoch + 1, step + 1,
                                                                                 loss_train, self.step_num))

            self.file_writer.add_summary(summary, self.step_num)

            if step + 1 == num_batches:
                saver.save(sess, self.model_path, global_step=self.step_num)


        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
Esempio n. 5
0
    def run_one_epoch(self, sess, train_data_source, train_data_len, tag2label,
                      epoch):
        num_batches = (train_data_len + self.batch_size - 1) // self.batch_size

        starttime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
        # batches = batch_yield(train, self.batch_size, self.word2id, self.tag2label, shuffle=self.shuffle)
        batches = batch_yield(train_data_source, self.batch_size, self.word2id,
                              self.tag2label)

        for step, (seqs, labels) in enumerate(batches):
            sys.stdout.write('processing: {} batch / {} batches.'.format(
                step + 1, num_batches) + '\r')

            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels,
                                              self.dropout_keep_prob)
            _, loss_train, _ = sess.run(
                [self.train_op, self.loss, self.global_step],
                feed_dict=feed_dict)

            if step + 1 == 1 or (step +
                                 1) % 300 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                    format(starttime, epoch + 1, step + 1, loss_train,
                           step_num))

            if step + 1 == num_batches:
                self.saver.save(sess,
                                self.model_file_prefix,
                                global_step=step_num)
Esempio n. 6
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        num_batches = (len(train) + self.batch_size -
                       1) // self.batch_size  # 目的是为了上取整
        start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
        batches = batch_yield(train,
                              self.batch_size,
                              self.vocab,
                              self.tag2label,
                              shuffle=self.shuffle)

        for step, (seqs, labels) in enumerate(batches):
            sys.stdout.write(
                f'precessing {step + 1} batch / {num_batches} batches\r')
            # 当前总共走了多少步
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr,
                                              self.dropout_keep_prob)

            _, loss_train, summery, step_nu = sess.run(
                [self.train_op, self.loss, self.merged, self.glob_step],
                feed_dict=feed_dict)
            if (step + 1) == 1 or (step + 1) % 300 == 0 or (step +
                                                            1) == num_batches:
                self.logger.info(
                    f'{start_time}: epoch {epoch+1}, '
                    f'step {step+1}, loss:{loss_train:.4}, global step:{step_num}'
                )
            self.file_writer.add_summary(summary=summery,
                                         global_step=step_num)  # 总步

            if step + 1 == num_batches:  # 最后一个batch的时候
                saver.save(sess, self.model_path, global_step=step_num)

        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
    def run_one_epoch(self, sess, train, dev, epoch, saver):
        num_batches = (len(train) + self.batch_size - 1) // self.batch_size

        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        batches = batch_yield(train, self.batch_size, shuffle=True)
        for step, (seqs, labels, lengths) in enumerate(batches):
            if (step + 1) % 10 == 0:
                print(' processing: {} batch / {} batches.'.format(
                    step + 1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            feed_dict = self.get_feed_dict(lengths, seqs, labels, self.lr,
                                           self.dropout_keep_prob)
            _, loss_train, step_num_ = sess.run(
                [self.train_op, self.loss, self.global_step],
                feed_dict=feed_dict)
            if step + 1 == 1 or (step +
                                 1) % 10 == 0 or step + 1 == num_batches:
                print('{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                      format(start_time, epoch + 1, step + 1, loss_train,
                             step_num))

            if step + 1 == num_batches and (epoch + 1) % 5 == 0:
                saver.save(sess, self.model_path, global_step=step_num)

        print('===========validation epoch{}==========='.format(epoch + 1))
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev,
                      seq_len_list_dev,
                      dev,
                      file=self.valid_result)
Esempio n. 8
0
    def demo_one(self, sess, sent):
        """

        :param sess:
        :param sent: 
        :return:
        """
        label_list = []
        #随机将句子分批次,并遍历这些批次,对每一批数据进行预测
        for seqs, labels in batch_yield(sent,
                                        self.batch_size,
                                        self.vocab,
                                        self.tag2label,
                                        shuffle=False):
            #预测该批样本,并返回相应的标签数字序列
            label_list_, _ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
        label2tag = {}
        for tag, label in self.tag2label.items():
            label2tag[label] = tag if label != 0 else label
        #根据标签对照表将数字序列转换为文字标签序列
        tag = [label2tag[label] for label in label_list[0]]
        print('===mode.demo_one:', 'label_list=', label_list, ',label2tag=',
              label2tag, ',tag=', tag)
        return tag
Esempio n. 9
0
 def demo_one(self, sess, sent):
     label_list = []
     for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False):
         label_list_, _ = self.predict_one_batch(sess, seqs)
         label_list.extend(label_list_)
     label2tag = {}
     for tag, label in self.tag2label.items():
         label2tag[label] = tag if label != 0 else label
     tag = [label2tag[label] for label in label_list[0]]
     return tag
Esempio n. 10
0
 def predict_sentence(self, sess, demo_data):
     label_list = []
     for seqs, labels in batch_yield(demo_data, self.batch_size, is_train=False):
         label_list_, _ = self.predict_one_batch(sess, seqs, labels)
         label_list.extend(label_list_)
     label2tag = {}
     for tag, label in self.tag2label.items():
         label2tag[label] = tag
     tags = [label2tag[label] for label in label_list[0]]
     return tags
Esempio n. 11
0
    def dev_one_epoch(self, sess, dev):

        label_list, seq_len_list = [], []
        for seqs, labels in batch_yield(dev,
                                        self.config.batch_size,
                                        self.vocab,
                                        shuffle=False):
            label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
            seq_len_list.extend(seq_len_list_)
        return label_list, seq_len_list
Esempio n. 12
0
    def run_epoches(self, sess, train, dev, tag2label, saver, args):
        """
        :param sess:
        :param train:
        :param dev:
        :param tag2label:
        :param epoch:
        :param saver:
        :return:
        """
        best_f1 = 0  # 用于记录训练过程中最好的f1值
        for epoch in range(self.epoch_num):
            num_batches = (len(train) + self.batch_size - 1) // self.batch_size

            start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

            batches = batch_yield(train,
                                  self.batch_size,
                                  self.vocab,
                                  self.tag2label,
                                  shuffle=self.shuffle)

            for step, (seqs, labels) in enumerate(batches):
                sys.stdout.write(' processing: {} batch / {} batches.'.format(
                    step + 1, num_batches) + '\r')

                step_num = epoch * num_batches + step + 1
                feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr,
                                                  self.dropout_keep_prob)
                _, loss_train, summary, step_num_ = sess.run(
                    [self.train_op, self.loss, self.merged, self.global_step],
                    feed_dict=feed_dict)
                if step + 1 == 1 or (step +
                                     1) % 300 == 0 or step + 1 == num_batches:
                    self.logger.info(
                        '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                        format(start_time, epoch + 1, step + 1, loss_train,
                               step_num))

                self.file_writer.add_summary(summary, step_num)

                # if step + 1 == num_batches:
                #     saver.save(sess, self.model_path, global_step=step_num)

            self.logger.info('-----------验证集测试结果------------')
            label_list_dev, seq_len_list_dev = self.dev_one_epoch(
                sess, dev, args)
            _, _, f1 = self.evaluate(label_list_dev, seq_len_list_dev, dev,
                                     epoch)

            if f1 > best_f1:
                best_f1 = f1
                saver.save(sess, self.model_path, global_step=step_num)
            print("BET_F1: {}".format(best_f1))
Esempio n. 13
0
 def dev_one_epoch(self, sess, dev):
     """
     :param sess: Session
     :param dev: testing data
     :return:
     """
     label_list, seq_len_list = [], []
     for seqs, labels in batch_yield(dev, self.batch_size, self.vocab, self.tag2label, shuffle=False):
         label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs)
         label_list.extend(label_list_)
         seq_len_list.extend(seq_len_list_)
     return label_list, seq_len_list
Esempio n. 14
0
    def demo_one(self, sess, sent):
        """

        :param sess:
        :param sent: 
        :return:
        """
        label_list = []
        for seqs, labels in batch_yield(sent, self.batch_size, shuffle=False):
            label_list_, _ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
        return label_list
    def dev_one_epoch(self, sess, dev):
        """

        :param sess:
        :param dev:
        :return:
        """
        label_list, seq_len_list = [], []
        for seqs, labels, lengths in batch_yield(dev, self.batch_size):
            label_list_ = self.predict_one_batch(sess, seqs, lengths)
            label_list.extend(label_list_)
            seq_len_list.extend(lengths)
        return label_list, seq_len_list
Esempio n. 16
0
    def dev_one_epoch(self, sess, dev):
        """

        :param sess:
        :param dev:
        :return:
        """
        label_list, seq_len_list = [], []
        # 获取一个批次的句子中词的id以及标签
        for seqs, labels in data.batch_yield(dev, self.batch_size, self.vocab, self.tag2label, shuffle=False):
            label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
            seq_len_list.extend(seq_len_list_)
        return label_list, seq_len_list
Esempio n. 17
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        """

        :param sess:
        :param train:训练集
        :param dev:验证集
        :param tag2label:标签转换字典
        :param epoch:当前训练的轮数
        :param saver:保存的模型
        :return:
        """
        #训练批次数
        num_batches = (len(train) + self.batch_size - 1) // self.batch_size

        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        #随机为每一批分配数据
        batches = batch_yield(train,
                              self.batch_size,
                              self.vocab,
                              self.tag2label,
                              shuffle=self.shuffle)
        #训练每一批训练集
        for step, (seqs, labels) in enumerate(batches):

            sys.stdout.write(' processing: {} batch / {} batches.'.format(
                step + 1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr,
                                              self.dropout_keep_prob)
            _, loss_train, summary, step_num_ = sess.run(
                [self.train_op, self.loss, self.merged, self.global_step],
                feed_dict=feed_dict)
            if step + 1 == 1 or (step +
                                 1) % 300 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                    format(start_time, epoch + 1, step + 1, loss_train,
                           step_num))

            self.file_writer.add_summary(summary, step_num)
            #保存模型
            if step + 1 == num_batches:
                #保存模型数据
                #第一个参数sess,这个就不用说了。第二个参数设定保存的路径和名字,第三个参数将训练的次数作为后缀加入到模型名字中。
                saver.save(sess, self.model_path, global_step=step_num)

        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        #模型评估
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
Esempio n. 18
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        """

        :param sess:
        :param train:
        :param dev:
        :param tag2label:
        :param epoch:
        :param saver:
        :return:
        """
        # 计算batch的大小
        num_batches = (len(train) + self.batch_size - 1) // self.batch_size
        #
        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        # 数据进行batch处理
        batches = batch_yield(train,
                              self.batch_size,
                              self.vocab,
                              self.tag2label,
                              shuffle=self.shuffle)
        # 遍历每一个batch数据
        for step, (seqs, labels) in enumerate(batches):

            sys.stdout.write(' processing: {} batch / {} batches.'.format(
                step + 1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            #
            feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr,
                                              self.dropout_keep_prob)
            #
            _, loss_train, step_num_ = sess.run(
                [self.train_op, self.loss, self.global_step],
                feed_dict=feed_dict)

            if step + 1 == 1 or (step +
                                 1) % 100 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                    format(start_time, epoch + 1, step + 1, loss_train,
                           step_num))
            # 保存模型
            if step + 1 == num_batches:
                saver.save(sess, self.model_path, global_step=step_num)

        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
        print("\n")
Esempio n. 19
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        """
        :param sess:
        :param train:
        :param dev:
        :param tag2label:
        :param epoch:
        :param saver:
        :return:
        """
        num_batches = (
            len(train) + self.batch_size - 1
        ) // self.batch_size  #一次向神经网络喂入self.batch_size个数据,求出需要喂入数据的轮数,//表示整除
        ##batch_size为64
        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())  #时间戳
        #batches的类型是一个generator
        batches = batch_yield(train,
                              self.batch_size,
                              self.vocab,
                              self.tag2label,
                              shuffle=self.shuffle)

        for step, (seqs,
                   labels) in enumerate(batches):  #default:start=0,则step从0开始编号
            sys.stdout.write(' processing: {} batch / {} batches.'.format(
                step + 1, num_batches) + '\n')  #输出信息:正在处理第1轮数据,共多少轮数据
            step_num = epoch * num_batches + step + 1  #计步器   记录共运行了多少条数据
            feed_dict, seq_len_list = self.get_feed_dict(
                seqs, labels, self.lr, self.dropout_keep_prob)
            op, loss_train, summary, step_num_ = sess.run(
                [self.train_op, self.loss, self.merged, self.global_step],
                feed_dict=feed_dict)
            if step + 1 == 1 or (
                    step + 1
            ) % 300 == 0 or step + 1 == num_batches:  #第1轮、每300轮、最后一轮 输出信息
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                    format(start_time, epoch + 1, step + 1, loss_train,
                           step_num))

            self.file_writer.add_summary(summary, step_num)

            if step + 1 == num_batches:
                saver.save(sess, self.model_path, global_step=step_num)

        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
Esempio n. 20
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        """
        :param sess:
        :param train:--train_data
        :param dev:--dev_data
        :param tag2label:--标签对比表
        :param epoch:--default40轮
        :param saver:--保存一些基本参数
        :return:
        """
        num_batches = (len(train) + self.batch_size -
                       1) // self.batch_size  #batch_size 默认64

        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        batches = batch_yield(
            train,
            self.batch_size,
            self.vocab,
            self.tag2label,
            shuffle=self.shuffle
        )  #vocab:word2id#返回句子字典序列和标签序列#return batch_size的seqs and labels
        #enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中。
        for step, (seqs, labels) in enumerate(batches):

            sys.stdout.write(' processing: {} batch / {} batches.'.format(
                step + 1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(
                seqs, labels, self.lr,
                self.dropout_keep_prob)  #learning rate 0.001,dropout0.5
            _, loss_train, summary, step_num_ = sess.run(
                [self.train_op, self.loss, self.merged, self.global_step],
                feed_dict=feed_dict)
            if step + 1 == 1 or (step +
                                 1) % 300 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                    format(start_time, epoch + 1, step + 1, loss_train,
                           step_num))

            self.file_writer.add_summary(summary, step_num)

            if step + 1 == num_batches:
                saver.save(sess, self.model_path, global_step=step_num)

        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
Esempio n. 21
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        num_batches = (len(train) + self.batch_size - 1) // self.batch_size

        start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
        batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle)
        for step, (seqs, labels) in enumerate(batches):
            sys.stdout.write(' preprocessing: {} batch / {} batches.'.format(step+1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels)
            _ , loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step],
                                                          feed_dict=feed_dict)
            if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.format(start_time,
                                                                                epoch + 1,
                                                                                step + 1,
                                                                                loss_train,
                                                                                step_num
                                                                                )
                )

            self.file_writer.add_summary(summary, step_num)

            if step + 1 == num_batches:
                saver.save(sess, self.model_path, global_step=step_num)

                # # 将模型存成一个 pb 文件
                # graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names=['proj/logits'])
                # tf.train.write_graph(graph, '.', 'graph.pb', as_text=False)

                # 使用 saved_model 来保存模型

                builder = tf.saved_model.builder.SavedModelBuilder('E:\\NER_LSTM\\model\\%s' % str(int(time.time())))
                inputs = {'input_x': tf.saved_model.utils.build_tensor_info(self.word_ids),
                          'sequence_length': tf.saved_model.utils.build_tensor_info(self.sequence_lengths)}

                outputs = {'output': tf.saved_model.utils.build_tensor_info(self.logits),
                           'transition_param': tf.saved_model.utils.build_tensor_info(self.transition_params)}

                signature = tf.saved_model.signature_def_utils.build_signature_def(inputs, outputs, 'test_sig_name')
                builder.add_meta_graph_and_variables(sess,['test_saved_model'], {'test_signature': signature})
                builder.save()


        self.logger.info('======== validation ========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
Esempio n. 22
0
    def dev_one_epoch(self, sess, dev):
        """

        :param sess:
        :param dev:
        :return:
        """
        label_list, seq_len_list = [], []
        for seqs, labels in batch_yield(dev,
                                        self.batch_size,
                                        self.vocab,
                                        self.tag2label,
                                        shuffle=False):
            label_list_, seq_len_list_ = self.predict_one_batch(
                sess, seqs)  # 对一个batch进行预测
            label_list.extend(label_list_)
            seq_len_list.extend(seq_len_list_)
        return label_list, seq_len_list  # 所有的预测结果,用于后续的评价结果
Esempio n. 23
0
    def demo_one(self, sess, sent):
        """

        :param sess:
        :param sent: 
        :return:
        """

        #batch_yield就是把输入的句子每个字的id返回,以及每个标签转化为对应的tag2label的值
        label_list = []
        for seqs, labels in data.batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False):
            label_list_, _ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
        label2tag = {}
        for tag, label in self.tag2label.items():
            label2tag[label] = tag if label != 0 else label
        tag = [label2tag[label] for label in label_list[0]]
        return tag
Esempio n. 24
0
    def run_one_epoch(self, sess, train, epoch, saver, writer):

        num_batches = (len(train) + self.batch_size - 1) // self.batch_size
        batches = batch_yield(train,
                              self.batch_size,
                              self.vocab,
                              self.tag2label,
                              shuffle=self.shuffle)
        for step, (seqs, labels) in enumerate(batches):
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels)
            _, loss_train, step_num_ = sess.run(
                [self.train_op, self.loss, self.global_step],
                feed_dict=feed_dict)
            # writer.add_summary(summary, global_step=step_num)
            if step + 1 == 1 or (step +
                                 1) % 10 == 0 or step + 1 == num_batches:
                print('epoch {}, step {}, loss: {:.4}, global_step: {}'.format(
                    epoch + 1, step + 1, loss_train, step_num))
Esempio n. 25
0
    def dev_one_epoch(self, sess, dev, args):
        """
        :param sess:
        :param dev:
        :return:
        """
        label_list, seq_len_list = [], []
        count_batch = 0
        for seqs, labels in batch_yield(dev,
                                        self.batch_size,
                                        self.vocab,
                                        self.tag2label,
                                        shuffle=False):
            label_list_, seq_len_list_ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
            seq_len_list.extend(seq_len_list_)
            count_batch += 1

        return label_list, seq_len_list
Esempio n. 26
0
 def demo_one(self, server, sent, verbose=None):
     label_list = []
     for seqs, labels in batch_yield(sent,
                                     self.batch_size,
                                     self.vocab,
                                     self.tag2label,
                                     shuffle=False):
         if verbose:
             print('seqs', type(seqs), len(seqs), len(seqs[0]), seqs)
             print('batch_size', self.batch_size)
         label_list_, _ = self.predict_one_batch(server,
                                                 seqs,
                                                 verbose=verbose)
         label_list.extend(label_list_)
     label2tag = {}
     for tag, label in self.tag2label.items():
         label2tag[label] = tag if label != 0 else label
     tag = [label2tag[label] for label in label_list[0]]
     return tag
Esempio n. 27
0
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        num_batches = (len(train) + self.batch_size - 1) // self.batch_size
        self.logger.info("train lenght={} number_batches={}".format(
            len(train), num_batches))

        #start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        batches = batch_yield(train,
                              self.batch_size,
                              self.vocab,
                              self.tag2label,
                              shuffle=self.shuffle)
        start_time0 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        self.logger.info("=========={} epoch begin train, time is {}".format(
            epoch + 1, start_time0))
        for step, (seq, labels) in enumerate(batches):
            self.logger.info("======seq length======{}".format(len(seq)))
            sys.stdout.write(' processing: {} batch / {} batches.'.format(
                step + 1, num_batches) + '\r')
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seq, labels, self.lr,
                                              self.dropout_keep_prob)

            _, loss_train, summary, step_num_ = sess.run(
                [self.train_op, self.loss, self.merged, self.global_step],
                feed_dict=feed_dict)
            start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            if step + 1 == 1 or (step + 1) % 2 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                    format(start_time, epoch + 1, step + 1, loss_train,
                           step_num))

            self.file_writer.add_summary(summary, step_num)

            if step + 1 == num_batches:
                self.logger.info("========save session========{}".format(
                    self.model_path))
                saver.save(sess, self.model_path, global_step=step_num)
        self.logger.info("=============validation==========")
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
Esempio n. 28
0
    def demo_one(self, sess, sent):
        """

        :param sess:
        :param sent: 
        :return:
        """
        label_list = []
        for seqs, labels in batch_yield(sent,
                                        self.batch_size,
                                        self.vocab,
                                        self.tag2label,
                                        shuffle=False):
            label_list_, _ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
        label2tag = {}
        for tag, label in self.tag2label.items():
            # label2tag[label] = tag if label != 0 else label # 0 label not converted to None
            label2tag[label] = tag
        tag = [label2tag[label] for label in label_list[0]]
        return tag
Esempio n. 29
0
    def demo_many(self, sess, sent):
        """

        :param sess:
        :param sent: 
        :return:
        """
        label_list = []
        count = 0
        for seqs, labels in batch_yield(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False):
            count += self.batch_size
            label_list_, _ = self.predict_one_batch(sess, seqs)
            label_list.extend(label_list_)
            print(count/len(sent))
        # label2tag = {}
        tags = []
        # for tag, label in self.tag2label.items():
        #     label2tag[label] = tag if label != 0 else label
        for labels in label_list:
            tag = [label for label in labels]
            tags.append(tag)
        return tags
    def run_one_epoch(self, sess, train, dev, tag2label, epoch, saver):
        """
        :param sess:
        :param train:
        :param dev:
        :param tag2label:
        :param epoch:
        :param saver:
        :return:
        """
        # 计算出多少个batch,计算过程:(50658+64-1)//64=792
        num_batches = (len(train) + self.batch_size - 1) // self.batch_size
        #num_batches=2000
        # 记录开始训练的时间
        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        # 产生每一个batch
        batches = batch_yield(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle)
        for step, (seqs, labels) in enumerate(batches):
            # sys.stdout 是标准输出文件,write就是往这个文件写数据
            sys.stdout.write(' processing: {} batch / {} batches.'.format(step + 1, num_batches) + '\r')
            # step_num=epoch*792+step+1
            step_num = epoch * num_batches + step + 1
            feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob)
            _, loss_train, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step],
                                                         feed_dict=feed_dict)
            if step + 1 == 1 or (step + 1) % 300 == 0 or step + 1 == num_batches:
                self.logger.info(
                    '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.format(start_time, epoch + 1, step + 1,
                                                                                loss_train, step_num))

            self.file_writer.add_summary(summary, step_num)

            if step + 1 == num_batches:
                # 训练的最后一个batch保存模型
                saver.save(sess, self.model_path, global_step=step_num)

        self.logger.info('===========validation / test===========')
        label_list_dev, seq_len_list_dev = self.dev_one_epoch(sess, dev)
        self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)