Exemple #1
0
    def train(self):
        with tf.Session() as sess:
            tvars = tf.trainable_variables()
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
                tvars, self.__bert_checkpoint_path)
            print("init bert model params")
            tf.train.init_from_checkpoint(self.__bert_checkpoint_path, assignment_map)
            print("init bert model params done")
            sess.run(tf.variables_initializer(tf.global_variables()))

            current_step = 0

            for epoch in range(self.config["epochs"]):
                print("----- Epoch {}/{} -----".format(epoch + 1, self.config["epochs"]))

                for batch in self.data_obj.next_batch(self.t_in_ids_a, self.t_in_masks_a, self.t_seg_ids_a,
                                                      self.t_in_ids_b, self.t_in_masks_b, self.t_seg_ids_b,
                                                      self.t_lab_ids):
                    loss, predictions = self.model.train(sess, batch)

                    acc, recall, prec, f_beta = get_multi_metrics(pred_y=predictions, true_y=batch["label_ids"],
                                                                  labels=self.label_list)
                    print("train: step: {}, loss: {}, acc: {}, recall: {}, precision: {}, f_beta: {}".format(
                        current_step, loss, acc, recall, prec, f_beta))

                    current_step += 1
                    if self.data_obj and current_step % self.config["checkpoint_every"] == 0:

                        eval_losses = []
                        eval_accs = []
                        eval_aucs = []
                        eval_recalls = []
                        eval_precs = []
                        eval_f_betas = []
                        for eval_batch in self.data_obj.next_batch(self.e_in_ids_a, self.e_in_masks_a, self.e_seg_ids_a,
                                                                   self.e_in_ids_b, self.e_in_masks_b, self.e_seg_ids_b,
                                                                   self.e_lab_ids):
                            eval_loss, eval_predictions = self.model.eval(sess, eval_batch)

                            eval_losses.append(eval_loss)

                            acc, recall, prec, f_beta = get_multi_metrics(pred_y=eval_predictions,
                                                                          true_y=eval_batch["label_ids"],
                                                                          labels=self.label_list)
                            eval_accs.append(acc)
                            eval_recalls.append(recall)
                            eval_precs.append(prec)
                            eval_f_betas.append(f_beta)
                        print("\n")
                        print("eval:  loss: {}, acc: {}, auc: {}, recall: {}, precision: {}, f_beta: {}".format(
                            mean(eval_losses), mean(eval_accs), mean(eval_aucs), mean(eval_recalls),
                            mean(eval_precs), mean(eval_f_betas)))
                        print("\n")

                        if self.config["ckpt_model_path"]:
                            save_path = self.config["ckpt_model_path"]
                            if not os.path.exists(save_path):
                                os.makedirs(save_path)
                            model_save_path = os.path.join(save_path, self.config["model_name"])
                            self.model.saver.save(sess, model_save_path, global_step=current_step)
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]
        extract_indices = features["extract_indices"]

        model = modeling.AlbertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map,
         initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
            tvars, init_checkpoint)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_ids": unique_ids,
            "extract_indices": extract_indices
        }

        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
    def albert_bilstm_crf(self):
        # parameter
        used = tf.sign(tf.abs(self.input_ids))
        length = tf.reduce_sum(used, reduction_indices=1)
        self.lengths = tf.cast(length, tf.int32)
        self.batch_size = tf.shape(self.input_ids)[0]
        self.num_steps = tf.shape(self.input_ids)[-1]

        # albert embedding
        embedding = self.bert_embedding()
        # dropout
        lstm_inputs = tf.nn.dropout(embedding, self.dropout)
        # bi-directional lstm layer
        lstm_outputs = self.biLSTM_layer(lstm_inputs, self.config.lstm_dim,
                                         self.lengths)
        # logits for tags
        self.logits = self.project_layer(lstm_outputs)
        # loss of the model
        self.loss = self.loss_layer(self.logits, self.lengths)

        # bert模型参数初始化的地方
        init_checkpoint = self.config.init_checkpoint
        # 获取模型中所有的训练参数。
        tvars = tf.trainable_variables()
        # 加载BERT模型
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        print("**** Trainable Variables ****")
        # 打印加载模型的参数
        train_vars = []
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            else:
                train_vars.append(var)
            print("  name = %s, shape = %s%s", var.name, var.shape,
                  init_string)

        optimizer = self.config.optimizer
        if optimizer == "adam":
            self.opt = tf.train.AdamOptimizer(self.config.lr)
        else:
            raise KeyError
        grads = tf.gradients(self.loss, train_vars)
        (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

        self.train_op = self.opt.apply_gradients(zip(grads, train_vars),
                                                 global_step=self.global_step)
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""

            guids = features["guids"]
            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]

            jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

            with jit_scope():
                model = modeling.BertModel(config=bert_config,
                                           is_training=False,
                                           input_ids=input_ids,
                                           input_mask=input_mask,
                                           token_type_ids=segment_ids)

                if mode != tf.estimator.ModeKeys.PREDICT:
                    raise ValueError("Only PREDICT modes are supported: %s" %
                                     (mode))

                tvars = tf.trainable_variables()

                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, init_checkpoint)

                tf.logging.info("**** Trainable Variables ****")
                for var in tvars:
                    init_string = ""
                    if var.name in initialized_variable_names:
                        init_string = ", *INIT_FROM_CKPT*"
                    tf.logging.info("  name = %s, shape = %s%s", var.name,
                                    var.shape, init_string)

                all_layers = model.get_all_encoder_layers()

                predictions = {
                    "guid": guids,
                }

                for (i, layer_index) in enumerate(layer_indexes):
                    predictions["layer_output_%d" %
                                i] = all_layers[layer_index]

                from tensorflow.estimator import EstimatorSpec

                output_spec = EstimatorSpec(mode=mode, predictions=predictions)
                return output_spec
Exemple #5
0
def build_model(sess):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config_path = os.path.join(FLAGS.albert_directory,
                                      "albert_config.json")
    albert_config = modeling.AlbertConfig.from_json_file(albert_config_path)
    model = modeling.AlbertModel(
        config=albert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=False,
    )

    get_mlm_logits(
        model.get_sequence_output(),
        albert_config,
        mlm_positions,
        model.get_embedding_table(),
    )
    get_sentence_order_logits(model.get_pooled_output(), albert_config)

    checkpoint_path = os.path.join(FLAGS.albert_directory,
                                   FLAGS.checkpoint_name)
    tvars = tf.trainable_variables()
    (
        assignment_map,
        initialized_variable_names,
    ) = modeling.get_assignment_map_from_checkpoint(tvars, checkpoint_path)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                        init_string)
    tf.train.init_from_checkpoint(checkpoint_path, assignment_map)
    init = tf.global_variables_initializer()
    sess.run(init)
    return sess
Exemple #6
0
    def __init__(self, albert_config, num_labels, seq_length, init_checkpoint):
        self.albert_config = albert_config
        self.num_labels = num_labels
        self.seq_length = seq_length

        self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length],
                                         name='input_mask')
        self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                          name='segment_ids')
        self.labels = tf.placeholder(tf.int32, [None], name='labels')
        self.is_training = tf.placeholder(tf.bool,
                                          shape=[],
                                          name='is_training')
        #self.learning_rate = tf.placeholder(tf.float32, shape=[], name='learn_rate')

        self.model = modeling.AlbertModel(config=self.albert_config,
                                          is_training=self.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_mask,
                                          token_type_ids=self.segment_ids)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                         init_string)

        self.inference()
Exemple #7
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        # Note: We keep this feature name `next_sentence_labels` to be compatible
        # with the original data created by lanzhzh@. However, in the ALBERT case
        # it does represent sentence_order_labels.
        sentence_order_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.AlbertModel(
            config=albert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             albert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (sentence_order_loss, sentence_order_example_loss,
         sentence_order_log_probs) = get_sentence_order_output(
             albert_config, model.get_pooled_output(), sentence_order_labels)

        total_loss = masked_lm_loss + sentence_order_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            tf.logging.info("number of hidden group %d to initialize",
                            albert_config.num_hidden_groups)
            num_of_initialize_group = 1
            if FLAGS.init_from_group0:
                num_of_initialize_group = albert_config.num_hidden_groups
                if albert_config.net_structure_type > 0:
                    num_of_initialize_group = albert_config.num_hidden_layers
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint, num_of_initialize_group)
            if use_tpu:

                def tpu_scaffold():
                    for gid in range(num_of_initialize_group):
                        tf.logging.info("initialize the %dth layer", gid)
                        tf.logging.info(assignment_map[gid])
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map[gid])
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                for gid in range(num_of_initialize_group):
                    tf.logging.info("initialize the %dth layer", gid)
                    tf.logging.info(assignment_map[gid])
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map[gid])

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer, poly_power,
                                                     start_warmup_step)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(*args):
                """Computes the loss and accuracy of the model."""
                (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                 masked_lm_weights, sentence_order_example_loss,
                 sentence_order_log_probs, sentence_order_labels) = args[:7]

                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                metrics = {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                }

                sentence_order_log_probs = tf.reshape(
                    sentence_order_log_probs,
                    [-1, sentence_order_log_probs.shape[-1]])
                sentence_order_predictions = tf.argmax(
                    sentence_order_log_probs, axis=-1, output_type=tf.int32)
                sentence_order_labels = tf.reshape(sentence_order_labels, [-1])
                sentence_order_accuracy = tf.metrics.accuracy(
                    labels=sentence_order_labels,
                    predictions=sentence_order_predictions)
                sentence_order_mean_loss = tf.metrics.mean(
                    values=sentence_order_example_loss)
                metrics.update({
                    "sentence_order_accuracy": sentence_order_accuracy,
                    "sentence_order_loss": sentence_order_mean_loss
                })
                return metrics

            metric_values = [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, sentence_order_example_loss,
                sentence_order_log_probs, sentence_order_labels
            ]

            eval_metrics = (metric_fn, metric_values)

            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Exemple #8
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        input_cdc_ids = features["input_cdc_ids"]
        age = features["age"]
        sex_ids = features["sex_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, probabilities, predictions) = \
            create_model(albert_config, is_training, input_ids, input_mask,
                         segment_ids, input_cdc_ids, age, sex_ids, label_ids, num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        pprint(tvars)
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint or FLAGS.cdc_init_checkpoint:
            if init_checkpoint:
                print('Loading ALBERT model')
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, init_checkpoint)
                pprint(assignment_map)

            if FLAGS.cdc_init_checkpoint:
                print('Loading CDC model')
                cdc_map = {}
                for var in tvars:
                    if var.name.startswith('cdc/'):
                        name = str.replace(var.name, ':0', '')
                        cdc_map[name] = name
                        initialized_variable_names[var.name] = 1
                pprint(cdc_map)

            if use_tpu:

                def tpu_scaffold():
                    if init_checkpoint:
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map)
                    if FLAGS.cdc_init_checkpoint:
                        tf.train.init_from_checkpoint(
                            FLAGS.cdc_init_checkpoint, cdc_map)

                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                if init_checkpoint:
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                if FLAGS.cdc_init_checkpoint:
                    tf.train.init_from_checkpoint(FLAGS.cdc_init_checkpoint,
                                                  cdc_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, predictions,
                          is_real_example):
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                mean_per_class_accuracy = tf.metrics.mean_per_class_accuracy(
                    labels=label_ids,
                    predictions=predictions,
                    num_classes=num_labels,
                    weights=is_real_example)

                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                    "eval_mean_per_class_accuracy": mean_per_class_accuracy,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, predictions, is_real_example
            ])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       predictions={
                                                           "probabilities":
                                                           probabilities,
                                                           "predictions":
                                                           predictions
                                                       },
                                                       scaffold_fn=scaffold_fn)
        return output_spec
Exemple #9
0
    def train(self):
        with tf.Session() as sess:
            tvars = tf.trainable_variables()
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, self.__bert_checkpoint_path)
            print("init bert model params")
            tf.train.init_from_checkpoint(self.__bert_checkpoint_path,
                                          assignment_map)
            print("init bert model params done")
            sess.run(tf.variables_initializer(tf.global_variables()))

            current_step = 0
            start = time.time()
            for epoch in range(self.config["epochs"]):
                print("----- Epoch {}/{} -----".format(epoch + 1,
                                                       self.config["epochs"]))

                for batch in self.data_obj.next_batch(self.t_features):
                    loss, start_logits, end_logits = self.model.train(
                        sess, batch)
                    # print("start: ", start_logits)
                    # print("end: ", end_logits)
                    print("train: step: {}, loss: {}".format(
                        current_step, loss))

                    current_step += 1
                    if self.data_obj and current_step % self.config[
                            "checkpoint_every"] == 0:

                        all_results = []
                        for eval_batch in self.data_obj.next_batch(
                                self.e_features, is_training=False):
                            start_logits, end_logits = self.model.eval(
                                sess, eval_batch)

                            for unique_id, start_logit, end_logit in zip(
                                    eval_batch["unique_id"], start_logits,
                                    end_logits):
                                all_results.append(
                                    dict(unique_id=unique_id,
                                         start_logits=start_logit.tolist(),
                                         end_logits=end_logit.tolist()))

                        with open("output/cmrc2018/results.json",
                                  "w",
                                  encoding="utf8") as fw:
                            json.dump(all_results,
                                      fw,
                                      indent=4,
                                      ensure_ascii=False)

                        write_predictions(
                            all_examples=self.e_examples,
                            all_features=self.e_features,
                            all_results=all_results,
                            n_best_size=self.config["n_best_size"],
                            max_answer_length=self.config["max_answer_length"],
                            output_prediction_file=self.
                            config["output_predictions_path"],
                            output_nbest_file=self.config["output_nbest_path"])

                        result = get_eval(
                            original_file=self.config["eval_data"],
                            prediction_file=self.
                            config["output_predictions_path"])

                        print("\n")
                        print("eval:  step: {}, f1: {}, em: {}".format(
                            current_step, result["f1"], result["em"]))
                        print("\n")

                        if self.config["ckpt_model_path"]:
                            save_path = self.config["ckpt_model_path"]
                            if not os.path.exists(save_path):
                                os.makedirs(save_path)
                            model_save_path = os.path.join(
                                save_path, self.config["model_name"])
                            self.model.saver.save(sess,
                                                  model_save_path,
                                                  global_step=current_step)

            end = time.time()
            print("total train time: ", end - start)
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        token_label_ids = features["token_label_ids"]
        predicate_matrix_ids = features["predicate_matrix_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(token_label_ids),
                                      dtype=tf.float32)  # TO DO

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, predicate_head_select_loss, predicate_head_probabilities,
         predicate_head_predictions, token_label_loss,
         token_label_per_example_loss,
         token_label_logits, token_label_predictions) = create_model(
             bert_config, is_training, input_ids, input_mask, segment_ids,
             token_label_ids, predicate_matrix_ids, num_token_labels,
             num_predicate_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(predicate_head_select_loss,
                          token_label_per_example_loss, token_label_ids,
                          token_label_logits, is_real_example):
                token_label_predictions = tf.argmax(token_label_logits,
                                                    axis=-1,
                                                    output_type=tf.int32)
                token_label_pos_indices_list = list(
                    range(num_token_labels)
                )[4:]  # ["[Padding]","[##WordPiece]", "[CLS]", "[SEP]"] + seq_out_set
                pos_indices_list = token_label_pos_indices_list[:
                                                                -1]  # do not care "O"
                token_label_precision_macro = tf_metrics.precision(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="macro")
                token_label_recall_macro = tf_metrics.recall(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="macro")
                token_label_f_macro = tf_metrics.f1(token_label_ids,
                                                    token_label_predictions,
                                                    num_token_labels,
                                                    pos_indices_list,
                                                    average="macro")
                token_label_precision_micro = tf_metrics.precision(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="micro")
                token_label_recall_micro = tf_metrics.recall(
                    token_label_ids,
                    token_label_predictions,
                    num_token_labels,
                    pos_indices_list,
                    average="micro")
                token_label_f_micro = tf_metrics.f1(token_label_ids,
                                                    token_label_predictions,
                                                    num_token_labels,
                                                    pos_indices_list,
                                                    average="micro")
                token_label_loss = tf.metrics.mean(
                    values=token_label_per_example_loss,
                    weights=is_real_example)
                predicate_head_select_loss = tf.metrics.mean(
                    values=predicate_head_select_loss)
                return {
                    "predicate_head_select_loss": predicate_head_select_loss,
                    "eval_token_label_precision(macro)":
                    token_label_precision_macro,
                    "eval_token_label_recall(macro)": token_label_recall_macro,
                    "eval_token_label_f(macro)": token_label_f_macro,
                    "eval_token_label_precision(micro)":
                    token_label_precision_micro,
                    "eval_token_label_recall(micro)": token_label_recall_micro,
                    "eval_token_label_f(micro)": token_label_f_micro,
                    "eval_token_label_loss": token_label_loss,
                }

            eval_metrics = (metric_fn, [
                predicate_head_select_loss, token_label_per_example_loss,
                token_label_ids, token_label_logits, is_real_example
            ])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "predicate_head_probabilities":
                    predicate_head_probabilities,
                    "predicate_head_predictions": predicate_head_predictions,
                    "token_label_predictions": token_label_predictions
                },
                scaffold_fn=scaffold_fn)
        return output_spec
Exemple #11
0
    def train(self):
        with tf.Session() as sess:
            tvars = tf.trainable_variables()
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, self.__bert_checkpoint_path)
            print("init bert model params")
            tf.train.init_from_checkpoint(self.__bert_checkpoint_path,
                                          assignment_map)
            print("init bert model params done")
            sess.run(tf.variables_initializer(tf.global_variables()))

            current_step = 0

            for epoch in range(self.config["epochs"]):
                print("----- Epoch {}/{} -----".format(epoch + 1,
                                                       self.config["epochs"]))
                t_in_ids_a, t_in_masks_a, t_seg_ids_a, t_in_ids_b, t_in_masks_b, t_seg_ids_b = \
                    self.data_obj.gen_task_samples(self.queries, self.config["train_n_tasks"])

                for batch in self.data_obj.next_batch(t_in_ids_a, t_in_masks_a,
                                                      t_seg_ids_a, t_in_ids_b,
                                                      t_in_masks_b,
                                                      t_seg_ids_b):
                    loss, predictions = self.model.train(sess, batch)
                    acc = accuracy(predictions)
                    print("train: step: {}, loss: {}, acc: {}".format(
                        current_step, loss, acc))

                    current_step += 1
                    if self.data_obj and current_step % self.config[
                            "checkpoint_every"] == 0:
                        e_in_ids_a, e_in_masks_a, e_seg_ids_a, e_in_ids_b, e_in_masks_b, e_seg_ids_b = \
                            self.data_obj.gen_task_samples(self.queries, self.config["eval_n_tasks"])
                        eval_losses = []
                        eval_accs = []

                        for eval_batch in self.data_obj.next_batch(
                                e_in_ids_a, e_in_masks_a, e_seg_ids_a,
                                e_in_ids_b, e_in_masks_b, e_seg_ids_b):
                            eval_loss, eval_predictions = self.model.eval(
                                sess, eval_batch)

                            eval_losses.append(eval_loss)

                            acc = accuracy(eval_predictions)
                            eval_accs.append(acc)

                        print("\n")
                        print("eval:  loss: {}, acc: {}".format(
                            mean(eval_losses), mean(eval_accs)))
                        print("\n")

                        if self.config["ckpt_model_path"]:
                            save_path = self.config["ckpt_model_path"]
                            if not os.path.exists(save_path):
                                os.makedirs(save_path)
                            model_save_path = os.path.join(
                                save_path, self.config["model_name"])
                            self.model.saver.save(sess,
                                                  model_save_path,
                                                  global_step=current_step)
    def __init__(self):
        self.config = Config()  # 配置参数
        # Placeholder
        self.input_ids = tf.placeholder(tf.int32,
                                        shape=[None, self.config.seq_length],
                                        name='input_ids')
        self.input_masks = tf.placeholder(tf.int32,
                                          shape=[None, self.config.seq_length],
                                          name='input_masks')
        self.segment_ids = tf.placeholder(tf.int32,
                                          shape=[None, self.config.seq_length],
                                          name='segment_ids')
        self.label_ids = tf.placeholder(tf.int32,
                                        shape=[None, self.config.seq_length],
                                        name='label_ids')

        self.input_length = tf.placeholder(shape=[None],
                                           dtype=tf.int32,
                                           name='input-length')  # 输入文本的长度
        self.input_keep_prob = tf.placeholder(
            dtype=tf.float32, name='input-keep-prob')  # keep-prob

        # 加载Albert配置参数
        bert_config = modeling.AlbertConfig.from_json_file(
            self.config.bert_config_file)
        # 加载Albert网络结构
        self.model = modeling.AlbertModel(config=bert_config,
                                          is_training=self.config.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_masks,
                                          token_type_ids=self.segment_ids,
                                          use_one_hot_embeddings=False)

        # 使用预训练的参数赋值给上步加载的网络结构中
        tvars = tf.trainable_variables()
        assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(
            tvars, self.config.initial_checkpoint)
        tf.train.init_from_checkpoint(self.config.initial_checkpoint,
                                      assignment_map=assignment_map)
        # 去序列输出(字向量)  dim:(batch_size, seq_length, 384)
        self.sequence_output = self.model.get_sequence_output()

        if self.config.is_bilstm:  # 是否使用Bi-LSTM层
            # Bi-LSTM/Bi-GRU
            cell_fw = self.get_rnn(self.config.rnn_type)  # 前向cell
            cell_bw = self.get_rnn(self.config.rnn_type)  # 后向cell
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=self.sequence_output,
                dtype=tf.float32)
            outputs = tf.concat(
                values=outputs, axis=2
            )  # 将前向cell和后向cell的结果进行concat拼接   dim:(batch_size, max_length, 2*hidden_dim)
            outputs = tf.layers.dropout(inputs=outputs,
                                        rate=self.input_keep_prob)
        else:
            outputs = self.sequence_output

        # 输出层   dim:(batch_size, max_length, num_classes)
        self.logits = tf.layers.dense(inputs=outputs,
                                      units=self.config.num_classes,
                                      name='logits')

        # 是否使用CRF层
        if self.config.crf:
            log_likelihood, self.transition_params = crf.crf_log_likelihood(
                inputs=self.logits,
                tag_indices=self.label_ids,
                sequence_lengths=self.input_length)
            self.loss = -tf.reduce_mean(log_likelihood)
            # 结果输出
            self.predict, self.viterbi_score = crf.crf_decode(
                potentials=self.logits,
                transition_params=self.transition_params,
                sequence_length=self.input_length)
        else:
            # 损失函数,交叉熵
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.label_ids, logits=self.logits)
            mask = tf.sequence_mask(lengths=self.input_length)
            losses = tf.boolean_mask(cross_entropy, mask=mask)
            self.loss = tf.reduce_mean(losses)
            # 结果输出
            self.predict = tf.argmax(tf.nn.softmax(self.logits),
                                     axis=1,
                                     name='predict')

        # 优化器
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config.learning_rate).minimize(loss=self.loss)
Exemple #13
0
    def __init__(self, albert_config, num_labels, seq_length, init_checkpoint):
        self.albert_config = albert_config
        self.num_labels = num_labels
        self.seq_length = seq_length
        self.tower_grads = []
        self.losses = []

        self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length],
                                         name='input_mask')
        self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                          name='segment_ids')
        self.labels = tf.placeholder(tf.int32, [None], name='labels')
        self.batch_size = tf.placeholder(tf.int32, shape=[], name='batch_size')
        self.is_training = tf.placeholder(tf.bool,
                                          shape=[],
                                          name='is_training')
        print(self.batch_size)
        self.gpu_step = self.batch_size // gpu_nums

        global_step = tf.train.get_or_create_global_step()

        learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)

        # Implements linear decay of the learning rate.
        learning_rate = tf.train.polynomial_decay(learning_rate,
                                                  global_step,
                                                  num_train_steps,
                                                  end_learning_rate=0.0,
                                                  power=1.0,
                                                  cycle=False)

        if num_warmup_steps:
            global_steps_int = tf.cast(global_step, tf.int32)
            warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)

            global_steps_float = tf.cast(global_steps_int, tf.float32)
            warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

            warmup_percent_done = global_steps_float / warmup_steps_float
            warmup_learning_rate = init_lr * warmup_percent_done

            is_warmup = tf.cast(global_steps_int < warmup_steps_int,
                                tf.float32)
            learning_rate = ((1.0 - is_warmup) * learning_rate +
                             is_warmup * warmup_learning_rate)

        optimizer = optimization.AdamWeightDecayOptimizer(
            learning_rate=learning_rate,
            weight_decay_rate=0.01,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-6,
            exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])

        with tf.variable_scope(tf.get_variable_scope()) as outer_scope:
            pred = []
            label = []
            for d in range(gpu_nums):
                with tf.device("/gpu:%s" % d), tf.name_scope("%s_%s" %
                                                             ("tower", d)):
                    self.model = modeling.AlbertModel(
                        config=self.albert_config,
                        is_training=self.is_training,
                        input_ids=self.input_ids[d * self.gpu_step:(d + 1) *
                                                 self.gpu_step],
                        input_mask=self.input_mask[d * self.gpu_step:(d + 1) *
                                                   self.gpu_step],
                        token_type_ids=self.segment_ids[d *
                                                        self.gpu_step:(d + 1) *
                                                        self.gpu_step])
                    print("GPU:", d)

                    tvars = tf.trainable_variables()
                    initialized_variable_names = {}
                    if init_checkpoint:
                        (assignment_map, initialized_variable_names
                         ) = modeling.get_assignment_map_from_checkpoint(
                             tvars, init_checkpoint)

                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map)

                    logging.info("**** Trainable Variables ****")
                    for var in tvars:
                        init_string = ""
                        if var.name in initialized_variable_names:
                            init_string = ", *INIT_FROM_CKPT*"
                        logging.info("  name = %s, shape = %s%s", var.name,
                                     var.shape, init_string)

                    output_layer = self.model.get_pooled_output()
                    logging.info(output_layer)

                    if self.is_training == True:
                        output_layer = tf.nn.dropout(output_layer,
                                                     keep_prob=0.9)

                    match_1 = tf.strided_slice(output_layer, [0],
                                               [self.gpu_step], [2])
                    match_2 = tf.strided_slice(output_layer, [1],
                                               [self.gpu_step], [2])

                    match = tf.concat([match_1, match_2], 1)

                    self.logits = tf.layers.dense(match,
                                                  self.num_labels,
                                                  name='fc',
                                                  reuse=tf.AUTO_REUSE)

                    logging.info(self.logits)
                    self.r_labels = tf.strided_slice(
                        self.labels[d * self.gpu_step:(d + 1) * self.gpu_step],
                        [0], [self.gpu_step], [2])
                    logging.info(self.r_labels)

                    self.r_labels = tf.expand_dims(self.r_labels, -1)
                    logging.info(self.r_labels)
                    self.loss = tf.losses.mean_squared_error(
                        self.logits, self.r_labels)

                    tvars = tf.trainable_variables()
                    grads = tf.gradients(self.loss, tvars)

                    (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

                    self.tower_grads.append(list(zip(grads, tvars)))
                    self.losses.append(self.loss)
                    label.append(self.r_labels)
                    pred.append(self.logits)
                outer_scope.reuse_variables()

        with tf.name_scope("apply_gradients"), tf.device("/cpu:0"):
            gradients = self.average_gradients(self.tower_grads)
            train_op = optimizer.apply_gradients(gradients,
                                                 global_step=global_step)
            new_global_step = global_step + 1
            self.train_op = tf.group(train_op,
                                     [global_step.assign(new_global_step)])
            self.losses = tf.reduce_mean(self.losses)
            self.pred = tf.concat(pred, 0)
            self.label = tf.concat(label, 0)
            logging.info(self.pred)
            logging.info(self.label)
Exemple #14
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, probabilities, logits,
         predictions) = create_model(albert_config, is_training, input_ids,
                                     input_mask, segment_ids, label_ids,
                                     num_labels, use_one_hot_embeddings,
                                     task_name, hub_module)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                #predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # print("###metric_fn.logits:",logits.shape) # (?,80)
                # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # print("###metric_fn.label_ids:",label_ids.shape,";predictions:",predictions.shape) # label_ids: (?,80);predictions:(?,)
                num_aspects = 10  # magic number
                logits_split = tf.split(
                    logits, num_aspects,
                    axis=-1)  # a list. length is num_aspects
                label_ids_split = tf.split(
                    logits, num_aspects,
                    axis=-1)  # a list. length is num_aspects
                accuracy = tf.constant(0.0, dtype=tf.float64)

                for j, logits in enumerate(logits_split):  #
                    #  accuracy = tf.metrics.accuracy(label_ids, predictions)

                    predictions = tf.argmax(
                        logits, axis=-1,
                        output_type=tf.int32)  # should be [batch_size,]
                    label_id_ = tf.cast(tf.argmax(label_ids_split[j], axis=-1),
                                        dtype=tf.int32)
                    tf.logging.debug(
                        "label_ids_split[j] = %s; predictions = %s; label_id_ = %s"
                        % (label_ids_split[j], predictions, label_id_))
                    current_accuracy, update_op_accuracy = tf.metrics.accuracy(
                        labels=label_id_,
                        predictions=predictions,
                        weights=is_real_example)
                    accuracy += tf.cast(current_accuracy, dtype=tf.float64)
                accuracy = accuracy / tf.constant(num_aspects,
                                                  dtype=tf.float64)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": (accuracy, update_op_accuracy),
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)

        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       predictions={
                                                           "probabilities":
                                                           probabilities,
                                                           "predictions":
                                                           predictions
                                                       },
                                                       scaffold_fn=scaffold_fn)
        return output_spec
Exemple #15
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, probabilities, logits, predictions) = \
            create_model(albert_config, is_training, input_ids, input_mask,
                         segment_ids, label_ids, num_labels, use_one_hot_embeddings,
                         task_name, hub_module)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            if task_name not in ["sts-b", "cola"]:

                def metric_fn(per_example_loss, label_ids, logits,
                              is_real_example):
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    accuracy = tf.metrics.accuracy(labels=label_ids,
                                                   predictions=predictions,
                                                   weights=is_real_example)
                    loss = tf.metrics.mean(values=per_example_loss,
                                           weights=is_real_example)
                    return {
                        "eval_accuracy": accuracy,
                        "eval_loss": loss,
                    }
            elif task_name == "sts-b":

                def metric_fn(per_example_loss, label_ids, logits,
                              is_real_example):
                    """Compute Pearson correlations for STS-B."""
                    # Display labels and predictions
                    concat1 = contrib_metrics.streaming_concat(logits)
                    concat2 = contrib_metrics.streaming_concat(label_ids)

                    # Compute Pearson correlation
                    pearson = contrib_metrics.streaming_pearson_correlation(
                        logits, label_ids, weights=is_real_example)

                    # Compute MSE
                    # mse = tf.metrics.mean(per_example_loss)
                    mse = tf.metrics.mean_squared_error(
                        label_ids, logits, weights=is_real_example)

                    loss = tf.metrics.mean(values=per_example_loss,
                                           weights=is_real_example)

                    return {
                        "pred": concat1,
                        "label_ids": concat2,
                        "pearson": pearson,
                        "MSE": mse,
                        "eval_loss": loss,
                    }
            elif task_name == "cola":

                def metric_fn(per_example_loss, label_ids, logits,
                              is_real_example):
                    """Compute Matthew's correlations for STS-B."""
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
                    tp, tp_op = tf.metrics.true_positives(
                        predictions, label_ids, weights=is_real_example)
                    tn, tn_op = tf.metrics.true_negatives(
                        predictions, label_ids, weights=is_real_example)
                    fp, fp_op = tf.metrics.false_positives(
                        predictions, label_ids, weights=is_real_example)
                    fn, fn_op = tf.metrics.false_negatives(
                        predictions, label_ids, weights=is_real_example)

                    # Compute Matthew's correlation
                    mcc = tf.div_no_nan(
                        tp * tn - fp * fn,
                        tf.pow((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn),
                               0.5))

                    # Compute accuracy
                    accuracy = tf.metrics.accuracy(labels=label_ids,
                                                   predictions=predictions,
                                                   weights=is_real_example)

                    loss = tf.metrics.mean(values=per_example_loss,
                                           weights=is_real_example)

                    return {
                        "matthew_corr":
                        (mcc, tf.group(tp_op, tn_op, fp_op, fn_op)),
                        "eval_accuracy": accuracy,
                        "eval_loss": loss,
                    }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       predictions={
                                                           "probabilities":
                                                           probabilities,
                                                           "predictions":
                                                           predictions
                                                       },
                                                       scaffold_fn=scaffold_fn)
        return output_spec
Exemple #16
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, probabilities, logits, predictions) = \
        create_model(albert_config, is_training, input_ids, input_mask,
                     segment_ids, label_ids, num_labels,
                     use_one_hot_embeddings, max_seq_length, dropout_prob,
                     hub_module)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:
      def metric_fn(per_example_loss, label_ids, logits, is_real_example):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(
            labels=label_ids, predictions=predictions,
            weights=is_real_example)
        loss = tf.metrics.mean(
            values=per_example_loss, weights=is_real_example)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
        }

      eval_metrics = (metric_fn,
                      [per_example_loss, label_ids, logits, is_real_example])
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities,
                       "predictions": predictions},
          scaffold_fn=scaffold_fn)
    return output_spec
Exemple #17
0
    def train(self):
        with tf.Session() as sess:
            tvars = tf.trainable_variables()
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, self.__bert_checkpoint_path)
            print("init bert model params")
            tf.train.init_from_checkpoint(self.__bert_checkpoint_path,
                                          assignment_map)
            print("init bert model params done")
            sess.run(tf.variables_initializer(tf.global_variables()))

            current_step = 0
            start = time.time()
            for epoch in range(self.config["epochs"]):
                print("----- Epoch {}/{} -----".format(epoch + 1,
                                                       self.config["epochs"]))

                for batch in self.data_obj.next_batch(self.t_in_ids,
                                                      self.t_in_masks,
                                                      self.t_seg_ids,
                                                      self.t_lab_ids,
                                                      self.t_seq_len):

                    loss, true_y, predictions = self.model.train(
                        sess, batch, self.config["keep_prob"])

                    f1, precision, recall = gen_metrics(
                        pred_y=predictions,
                        true_y=true_y,
                        label_to_index=self.lab_to_idx)
                    print(
                        "train: step: {}, loss: {}, recall: {}, precision: {}, f1: {}"
                        .format(current_step, loss, recall, precision, f1))

                    current_step += 1
                    if self.data_obj and current_step % self.config[
                            "checkpoint_every"] == 0:

                        eval_losses = []
                        eval_recalls = []
                        eval_precisions = []
                        eval_f1s = []
                        for eval_batch in self.data_obj.next_batch(
                                self.e_in_ids, self.e_in_masks, self.e_seg_ids,
                                self.e_lab_ids, self.e_seq_len):
                            eval_loss, eval_true_y, eval_predictions = self.model.eval(
                                sess, eval_batch)

                            eval_losses.append(eval_loss)

                            f1, precision, recall = gen_metrics(
                                pred_y=eval_predictions,
                                true_y=eval_true_y,
                                label_to_index=self.lab_to_idx)
                            eval_recalls.append(recall)
                            eval_precisions.append(precision)
                            eval_f1s.append(f1)
                        print("\n")
                        print(
                            "eval:  loss: {}, recall: {}, precision: {}, f1: {}"
                            .format(mean(eval_losses), mean(eval_recalls),
                                    mean(eval_precisions), mean(eval_f1s)))
                        print("\n")

                        if self.config["ckpt_model_path"]:
                            save_path = self.config["ckpt_model_path"]
                            if not os.path.exists(save_path):
                                os.makedirs(save_path)
                            model_save_path = os.path.join(
                                save_path, self.config["model_name"])
                            self.model.saver.save(sess,
                                                  model_save_path,
                                                  global_step=current_step)

            end = time.time()
            print("total train time: ", end - start)