Ejemplo n.º 1
0
def align_checkpoint_for_lm(
    tvars,
    checkpoint_type,
    init_checkpoint,
    second_init_checkpoint=None,
):
    tf_logging.debug("align_checkpoint_for_lm")

    use_multiple_checkpoint = checkpoint_type in [
        "v2_and_bert", "nli_and_bert"
    ]

    initialized_variable_names2 = {}
    if init_checkpoint:
        if not use_multiple_checkpoint:
            if checkpoint_type == "" or checkpoint_type == "bert":
                assignment_fn = get_bert_assignment_map
            elif checkpoint_type == "v2":
                assignment_fn = assignment_map_v2_to_v2
            else:
                raise Exception("Undefined checkpoint exists")

            assignment_map, initialized_variable_names = assignment_fn(
                tvars, init_checkpoint)

            def init_fn():
                tf.compat.v1.train.init_from_checkpoint(
                    init_checkpoint, assignment_map)

        else:
            if checkpoint_type == "nli_and_bert":
                assignment_map, initialized_variable_names = get_bert_assignment_map(
                    tvars, init_checkpoint)
                assignment_map2, initialized_variable_names2 = get_cls_assignment(
                    tvars, second_init_checkpoint)
            if checkpoint_type == "v2_and_bert":
                assignment_map, initialized_variable_names = assignment_map_v2_to_v2(
                    tvars, init_checkpoint)
                assignment_map2, initialized_variable_names2 = get_cls_assignment(
                    tvars, second_init_checkpoint)

            else:
                raise Exception("Undefined checkpoint exists")

            def init_fn():
                tf.compat.v1.train.init_from_checkpoint(
                    init_checkpoint, assignment_map)

                tf.compat.v1.train.init_from_checkpoint(
                    second_init_checkpoint, assignment_map2)

    else:
        initialized_variable_names = {}

        def init_fn():
            pass

    return initialized_variable_names, initialized_variable_names2, init_fn
Ejemplo n.º 2
0
def init_dict_model_with_nli_and_bert(sess, nli_checkpoint, bert_checkpoint):
    tvars = tf.compat.v1.trainable_variables()
    bert_to_nli, init_vars = get_bert_assignment_map(tvars, nli_checkpoint)
    loader = tf.compat.v1.train.Saver(bert_to_nli)
    loader.restore(sess, nli_checkpoint)

    _, bert_to_dict, init_vars = get_bert_assignment_map_for_dict(
        tvars, bert_checkpoint)
    loader = tf.compat.v1.train.Saver(bert_to_dict)
    loader.restore(sess, bert_checkpoint)
Ejemplo n.º 3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        label_masks = features["label_masks"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        logits = tf.keras.layers.Dense(train_config.num_classes,
                                       name="token_regression")(
                                           model.get_sequence_output())

        per_ex_losses = tf.keras.losses.MAE(tf.expand_dims(label_ids, 2),
                                            logits)
        masked_losses = per_ex_losses * tf.cast(label_masks, tf.float32)
        losses_sum = tf.reduce_sum(masked_losses, axis=1)
        denom = tf.reduce_sum(tf.cast(label_masks, tf.float32), axis=1) + 1e-5
        losses = losses_sum / denom
        total_loss = tf.reduce_mean(losses)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(logits, label_ids, label_masks):
                logits_reduced = tf.squeeze(logits, 2)
                is_neg_correct = tf.logical_and(tf.less(label_ids, 0.),
                                                tf.less(logits_reduced, 0.))
                is_pos_correct = tf.logical_and(tf.less(0., label_ids),
                                                tf.less(0., logits_reduced))
                is_correct = tf.logical_or(is_neg_correct, is_pos_correct)

                float_masks = tf.cast(label_masks, tf.float32)
                num_correct = tf.reduce_sum(tf.cast(is_correct, tf.float32) *
                                            float_masks,
                                            axis=1)
                num_problems = tf.reduce_sum(float_masks, axis=1) + 1e-5
                acc_list = num_correct / num_problems

                mean_acc = tf.compat.v1.metrics.mean(values=acc_list)

                return {'mean_acc': mean_acc}

            eval_metrics = (metric_fn, [logits, label_ids, label_masks])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "input_ids": input_ids,
                "labels": label_ids,
                "label_masks": label_masks,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Ejemplo n.º 4
0
def init_model_with_bert(sess, init_checkpoint):
    tvars = tf.compat.v1.trainable_variables()
    map1, init_vars = get_bert_assignment_map(tvars, init_checkpoint)
    loader = tf.compat.v1.train.Saver(map1)
    loader.restore(sess, init_checkpoint)
Ejemplo n.º 5
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids, input_mask, segment_ids = combine_paired_input_features(
            features)

        strict_good = features["strict_good"]
        strict_bad = features["strict_bad"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model.get_pooled_output()
        losses, logits, pair_logits = pairwise_model(pooled, strict_good,
                                                     strict_bad)
        total_loss = tf.reduce_mean(losses)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(pair_logits, strict_good, strict_bad):
                diff = pair_logits[:, 0] - pair_logits[:, 1]
                pairwise_correct = tf.less(0.0, diff)

                strict_good_correct_raw = tf.reshape(
                    tf.less(1.0, pair_logits[:, 0]), [-1, 1])
                strict_good_correct = cast_float_multiply(
                    strict_good_correct_raw, strict_good)
                strict_bad_correct_raw = tf.reshape(
                    tf.less(pair_logits[:, 1], -1.0), [-1, 1])
                strict_bad_correct = cast_float_multiply(
                    strict_bad_correct_raw, strict_bad)

                pairwise_acc_raw = tf.cast(pairwise_correct, tf.float32)
                mean_acc = tf.compat.v1.metrics.mean(values=pairwise_acc_raw)

                def strict_accuracy(correctness, gold):
                    return tf.compat.v1.metrics.accuracy(
                        labels=tf.ones_like(gold, tf.int32),
                        predictions=tf.cast(correctness, tf.int32),
                        weights=tf.cast(gold, tf.float32))

                return {
                    'mean_acc':
                    mean_acc,
                    'strict_good_acc':
                    strict_accuracy(strict_good_correct, strict_good),
                    'strict_bad_acc':
                    strict_accuracy(strict_bad_correct, strict_bad)
                }

            eval_metrics = (metric_fn, [pair_logits, strict_good, strict_bad])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "input_ids": input_ids,
                "strict_good": strict_good,
                "strict_bad": strict_bad,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Ejemplo n.º 6
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        loss_base = features["loss_base"]
        loss_target = features["loss_target"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]

        input_ids = recover_mask(input_ids, masked_lm_positions, masked_lm_ids)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        if model_config.loss_model == "independent":
            loss_model = IndependentLossModel(bert_config)
            loss_model.train_modeling(model.get_sequence_output(),
                                      masked_lm_positions, masked_lm_weights,
                                      loss_base, loss_target)

            total_loss = loss_model.total_loss
            loss1 = loss_model.loss1
            loss2 = loss_model.loss2
            per_example_loss1 = loss_model.per_example_loss1
            per_example_loss2 = loss_model.per_example_loss2
            losses1 = tf.reduce_sum(per_example_loss1, axis=1)
            losses2 = tf.reduce_sum(per_example_loss2, axis=1)
            prob1 = loss_model.prob1
            prob2 = loss_model.prob2

            def host_call_fn(total_loss, loss1, loss2):
                tf.summary.scalar("total_loss", total_loss[0])
                tf.summary.scalar("loss_base", loss1[0])
                tf.summary.scalar("loss_target", loss2[0])
                return tf.compat.v1.summary.all_v2_summary_ops()

            host_call = (host_call_fn, [
                tf.reshape(total_loss, [1]),
                tf.reshape(loss1, [1]),
                tf.reshape(loss2, [1])
            ])

        elif model_config.loss_model == "diff_regression":
            total_loss, losses, logits = get_diff_loss(
                bert_config, model.get_sequence_output(), masked_lm_positions,
                masked_lm_weights, loss_base, loss_target)
            host_call = None

        pred_diff = prob1 - prob2
        gold_diff = get_gold_diff(loss_base, loss_target)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.tain.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss1, per_example_loss2):
                loss1 = tf.compat.v1.metrics.mean(values=per_example_loss1)
                loss2 = tf.compat.v1.metrics.mean(values=per_example_loss2)

                pel = per_example_loss1 + per_example_loss2

                return {
                    #    "eval_loss": loss,
                    "loss1": loss1,
                    "loss2": loss2,
                }

            eval_metrics = (metric_fn, [losses1, losses2])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "loss_base": loss_base,
                "loss_target": loss_target,
                "prob1": prob1,
                "prob2": prob2,
                "per_example_loss1": per_example_loss1,
                "per_example_loss2": per_example_loss2,
                "input_ids": input_ids,
                "masked_lm_positions": masked_lm_positions,
                "pred_diff": pred_diff,
                "gold_diff": gold_diff,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec