Ejemplo n.º 1
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_id": unique_ids,
        }

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                      predictions=predictions,
                                                      scaffold_fn=scaffold_fn)
        return output_spec
Ejemplo n.º 2
0
 def bert_config(self):
     bert_config_file = os.path.join(self.bert_root, 'bert_config.json')
     # 获取预训练模型的参数文件
     self.bert_config = modeling.BertConfig.from_json_file(bert_config_file)
     self.init_checkpoint = os.path.join(self.bert_root, 'bert_model.ckpt')
     self.bert_vocab_file = os.path.join(self.bert_root, 'vocab.txt')
     # 初始化变量
     self.input_ids = tf.placeholder(tf.int32,
                                     shape=[None, None],
                                     name='input_ids')
     self.input_mask = tf.placeholder(tf.int32,
                                      shape=[None, None],
                                      name='input_masks')
     self.segment_ids = tf.placeholder(tf.int32,
                                       shape=[None, None],
                                       name='segment_ids')
     self.input_y = tf.placeholder(tf.float32,
                                   shape=[None, self.max_sent, 1],
                                   name="input_y")
     self.global_step = tf.Variable(0, trainable=False)
     output_weights = tf.get_variable(
         "output_weights", [768, self.max_sent],
         initializer=tf.random_normal_initializer(stddev=0.1))
     output_bias = tf.get_variable(
         "output_bias", [
             self.max_sent,
         ],
         initializer=tf.random_normal_initializer(stddev=0.01))
     self.w_out = output_weights
     self.b_out = output_bias
     # 初始化bert model
     model = modeling.BertModel(config=self.bert_config,
                                is_training=False,
                                input_ids=self.input_ids,
                                input_mask=self.input_mask,
                                token_type_ids=self.segment_ids,
                                use_one_hot_embeddings=False)
     # 变量赋值
     tvars = tf.trainable_variables()
     (assignment, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(
          tvars, self.init_checkpoint)
     tf.train.init_from_checkpoint(self.init_checkpoint, assignment)
     # 这个获取句子的output,shape = 768
     output_layer_pooled = model.get_pooled_output()
     # 添加dropout层,减轻过拟合
     self.output_layer_pooled = tf.nn.dropout(output_layer_pooled,
                                              keep_prob=self.keep_prob)
Ejemplo n.º 3
0
 def bert_config(self):
     bert_config_file = os.path.join(self.bert_root, 'bert_config.json')
     # 获取预训练模型的参数文件
     self.bert_config = modeling.BertConfig.from_json_file(bert_config_file)
     self.init_checkpoint = os.path.join(self.bert_root, 'bert_model.ckpt')
     self.bert_vocab_file = os.path.join(self.bert_root, 'vocab.txt')
     #初始化变量
     self.input_ids = tf.placeholder(tf.int32, shape=[None, None], name='input_ids')
     self.input_mask = tf.placeholder(tf.int32, shape=[None, None], name='input_masks')
     self.segment_ids = tf.placeholder(tf.int32, shape=[None, None], name='segment_ids')
     self.input_y = tf.placeholder(tf.float32, shape=[None, 95, 1], name="input_y")
     self.global_step = tf.Variable(0, trainable=False)
     output_weights = tf.get_variable("output_weights", [768, 95],initializer=tf.contrib.layers.xavier_initializer())
     output_bias = tf.get_variable("output_bias", [95,], initializer=tf.contrib.layers.xavier_initializer())
     self.w_out = output_weights
     self.b_out = output_bias
     ########
     # output_weights2 = tf.get_variable("output_weights2", [256, 95],initializer=tf.contrib.layers.xavier_initializer())
     # output_bias2 = tf.get_variable("output_bias2", [95, ], initializer=tf.contrib.layers.xavier_initializer())
     # self.w_out2 = output_weights2
     # self.b_out2 = output_bias2
     ########
     # 初始化bert model
     model = modeling.BertModel(
                                config=self.bert_config,
                                is_training=False,
                                input_ids=self.input_ids,
                                input_mask=self.input_mask,
                                token_type_ids=self.segment_ids,
                                use_one_hot_embeddings=False)
                                # 变量赋值
                                tvars = tf.trainable_variables()
                                (assignment, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, self.init_checkpoint)
                                tf.train.init_from_checkpoint(self.init_checkpoint, assignment)
                                # 这个获取句子的output,shape = 768
                                output_layer_pooled = model.get_pooled_output()
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, probabilities) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits, is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(
                    labels=label_ids, predictions=predictions, weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn,
                            [per_example_loss, label_ids, logits, is_real_example])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Ejemplo n.º 5
0
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        tag_ids = features["tag_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(tag_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        total_loss, logits, predictions = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, tag_ids,
            num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:
                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:
            # 添加loss的hook,不然在GPU/CPU上不打印loss
            logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=10)
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            def metric_fn(per_example_loss, tag_ids, is_real_example):
                # 这里使用的accuracy来计算,宽松匹配方法
                accuracy = tf.metrics.accuracy(
                    labels=tag_ids, predictions=predictions, weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                }

            eval_metrics = (metric_fn,
                            [total_loss, tag_ids, is_real_example])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"predictions": predictions},
                scaffold_fn=scaffold_fn)
        return output_spec
def main():
    """ 训练主入口 """
    tf.logging.info('start to train')

    # 部分参数设置
    process = AllProcessor()
    label_list = process.get_labels()
    tokenizer = tokenization.FullTokenizer(
        vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

    train_examples = process.get_train_examples(FLAGS.data_dir)
    train_cnt = file_based_convert_examples_to_features(
        train_examples,
        label_list,
        FLAGS.max_seq_length,
        tokenizer,
        FLAGS.data_dir,
        'train'
    )
    dev_examples = process.get_dev_examples(FLAGS.data_dir)
    dev_cnt = file_based_convert_examples_to_features(
        dev_examples,
        label_list,
        FLAGS.max_seq_length,
        tokenizer,
        FLAGS.data_dir,
        'dev'
    )

    # 输入输出定义
    input_ids = tf.placeholder(tf.int64, shape=[None, FLAGS.max_seq_length],
                               name='input_ids')
    input_mask = tf.placeholder(tf.int64, shape=[None, FLAGS.max_seq_length],
                                name='input_mask')
    segment_ids = tf.placeholder(tf.int64, shape=[None, FLAGS.max_seq_length],
                                 name='segment_ids')
    labels = tf.placeholder(tf.int64, shape=[None], name='labels')
    task = tf.placeholder(tf.int64, name='task')

    # bert相关参数设置
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    loss, logits, acc, pre_id = create_model(
        bert_config,
        True,
        input_ids,
        input_mask,
        segment_ids,
        labels,
        False,
        task
    )
    num_train_steps = int(len(train_examples) / FLAGS.train_batch_size)
    num_warmup_steps = math.ceil(
        num_train_steps * FLAGS.train_batch_size * FLAGS.warmup_proportion)
    train_op = optimization.create_optimizer(
        loss,
        FLAGS.learning_rate,
        num_train_steps * FLAGS.num_train_epochs,
        num_warmup_steps,
        False
    )

    # 初始化参数
    init_global = tf.global_variables_initializer()
    saver = tf.train.Saver(
        [v for v in tf.global_variables()
         if 'adam_v' not in v.name and 'adam_m' not in v.name])

    with tf.Session() as sess:
        sess.run(init_global)
        print('start to load bert params')
        if FLAGS.init_checkpoint:
            # tvars = tf.global_variables()
            tvars = tf.trainable_variables()
            print("global_variables", len(tvars))
            assignment_map, initialized_variable_names = \
                modeling.get_assignment_map_from_checkpoint(tvars,
                                                            FLAGS.init_checkpoint)
            print("initialized_variable_names:", len(initialized_variable_names))
            saver_ = tf.train.Saver([v for v in tvars if v.name in initialized_variable_names])
            saver_.restore(sess, FLAGS.init_checkpoint)
            tvars = tf.global_variables()
            # initialized_vars = [v for v in tvars if v.name in initialized_variable_names]
            not_initialized_vars = [v for v in tvars if v.name not in initialized_variable_names]
            print('all size %s; not initialized size %s' % (len(tvars), len(not_initialized_vars)))
            if len(not_initialized_vars):
                sess.run(tf.variables_initializer(not_initialized_vars))
            # for v in initialized_vars:
            #     print('initialized: %s, shape = %s' % (v.name, v.shape))
            # for v in not_initialized_vars:
            #     print('not initialized: %s, shape = %s' % (v.name, v.shape))
        else:
            print('the bert init checkpoint is None!!!')
            sess.run(tf.global_variables_initializer())

        # 训练的step
        def train_step(ids, mask, seg, true_y, task_id):
            feed = {input_ids: ids,
                    input_mask: mask,
                    segment_ids: seg,
                    labels: true_y,
                    task: task_id}
            _, logits_out, loss_out = sess.run([train_op, logits, loss], feed_dict=feed)
            return logits_out, loss_out

        # 验证的step
        def dev_step(ids, mask, seg, true_y, task_id):
            feed = {input_ids: ids,
                    input_mask: mask,
                    segment_ids: seg,
                    labels: true_y,
                    task: task_id}
            pre_out, acc_out = sess.run([pre_id, acc], feed_dict=feed)
            return pre_out, acc_out

        # 开始训练
        for epoch in range(FLAGS.num_train_epochs):
            tf.logging.info(f'start to train and the epoch:{epoch}')
            epoch_loss = do_train(sess, train_cnt, train_step, epoch)
            tf.logging.info(f'the epoch{epoch} loss is {epoch_loss}')
            saver.save(sess, FLAGS.output_dir + 'bert.ckpt', global_step=epoch)
            # 每一个epoch开始验证模型
            do_eval(sess, dev_cnt, dev_step)

        # 进行预测并保存结果
        do_predict(label_list, process, tokenizer, dev_step)

        tf.logging.info('the training is over!!!!')