Exemple #1
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("    name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        next_sentence_labels = features["next_sentence_labels"]

        t = input_ids + input_mask + segment_ids
        t = t * next_sentence_labels
        total_loss = tf.reduce_sum(tf.cast(t[:, 0], tf.float32))

        scaffold_fn = None
        eval_metrics = (lambda x: x, [
            input_ids,
        ])
        output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
            mode=mode, loss=total_loss, scaffold_fn=scaffold_fn)

        return output_spec
def main(_):
    tf_logging.info("Train MLM  with alternative embedding2")
    config = JsonConfig.from_json_file(FLAGS.model_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)

    is_training = FLAGS.do_train
    input_files = get_input_files_from_flags(FLAGS)

    input_fn = input_fn_builder_alt_emb2_classification(
        input_files, FLAGS, is_training)

    def model_constructor(config, is_training, input_ids, input_mask,
                          token_type_ids, use_one_hot_embeddings, features):
        return EmbeddingReplacer2(config, is_training, input_ids, input_mask,
                                  token_type_ids, use_one_hot_embeddings,
                                  features)

    special_flags = FLAGS.special_flags.split(",")
    special_flags.append("feed_features")
    special_flags.append("ask_tvar")

    model_fn = model_fn_classification(config, train_config, model_constructor,
                                       special_flags)
    run_estimator(model_fn, input_fn)
Exemple #3
0
def train_LMS(bert_hp, train_config, lms_config: LMSConfigI, save_dir,
              nli_data, modeling_option, init_fn):
    tf_logging.info("train_pairing ENTRY")
    train_batches, dev_batches = nli_data

    max_steps = train_config.max_steps
    num_gpu = train_config.num_gpu

    lms_model = LMSModel(modeling_option, bert_hp, lms_config, num_gpu)
    train_cls = lms_model.get_train_op(bert_hp.lr, max_steps)
    global_step = tf.train.get_or_create_global_step()

    run_name = os.path.basename(save_dir)
    train_writer, test_writer = setup_summary_writer(run_name)

    sess = init_session()
    sess.run(tf.global_variables_initializer())
    init_fn(sess)

    # make explain train_op does not increase global step

    def fetch_global_step():
        step, = sess.run([global_step])
        return step

    train_classification = partial(train_fn_factory, sess,
                                   lms_model.loss_tensor,
                                   lms_model.per_layer_loss, train_cls,
                                   lms_model.batch2feed_dict)
    eval_acc = partial(eval_fn_factory, sess, dev_batches[:20],
                       lms_model.loss_tensor, lms_model.per_layer_loss,
                       lms_model.ex_score_tensor,
                       lms_model.per_layer_logit_tensor, global_step,
                       lms_model.batch2feed_dict, test_writer)

    save_fn = partial(save_fn_factory, sess, save_dir, global_step)
    init_step, = sess.run([global_step])

    def train_fn(batch, step_i):
        loss_val, acc = train_classification(batch, step_i)
        summary = tf.Summary()
        summary.value.add(tag='loss', simple_value=loss_val)
        train_writer.add_summary(summary, fetch_global_step())
        train_writer.flush()
        return loss_val, acc

    def valid_fn():
        eval_acc()

    tf_logging.info("Initialize step to {}".format(init_step))
    tf_logging.info("{} train batches".format(len(train_batches)))
    valid_freq = 100
    save_interval = 300
    loss, _ = step_runner(train_batches, train_fn, init_step, valid_fn,
                          valid_freq, save_fn, save_interval, max_steps)
    return save_fn()
def run_estimator(model_fn, input_fn, host_call=None):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)
    #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    if FLAGS.do_predict:
        tf_logging.addFilter(CounterFilter())

    tpu_cluster_resolver = None
    config = tf.compat.v1.ConfigProto(allow_soft_placement=False, )
    is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.compat.v1.estimator.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=config,
        tf_random_seed=FLAGS.random_seed,
    )

    if FLAGS.random_seed is not None:
        tf_logging.info("Using random seed : {}".format(FLAGS.random_seed))
        tf.random.set_seed(FLAGS.random_seed)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.compat.v1.estimator.Estimator(model_fn=model_fn,
                                                 config=run_config,
                                                 params={'batch_size': 16})

    if FLAGS.do_train:
        tf_logging.info("***** Running training *****")
        tf_logging.info("  Batch size = %d", FLAGS.train_batch_size)

        estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps)
Exemple #5
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("    name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        next_sentence_labels = features["next_sentence_labels"]

        initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.02)

        vocab_size = 40000
        embedding_size = 512
        embedding_table = tf.compat.v1.get_variable(
            name="embedding",
            shape=[vocab_size, embedding_size],
            initializer=initializer)

        input_shape = get_shape_list(input_ids)
        flat_input_ids = tf.reshape(input_ids, [-1])
        one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size)
        output = tf.matmul(one_hot_input_ids, embedding_table)
        output = tf.reshape(output, input_shape + [embedding_size])

        t_list = []
        n_of_t = 10
        for j in range(n_of_t):
            t_list.append(output + j)

        dense = tf.keras.layers.Dense(embedding_size,
                                      kernel_initializer=initializer,
                                      name="MDense")
        if modeling == "B":
            dense_list = []
            for j in range(n_of_t):
                dense_list.append(
                    tf.keras.layers.Dense(embedding_size,
                                          kernel_initializer=initializer,
                                          name="MDense_{}".format(j)))

        for i in range(20):
            if modeling == "A":
                t = tf.stack(t_list, 0)
                with tf.compat.v1.variable_scope("scope_A", reuse=i > 0):
                    t = dense(t)
                t = tf.nn.dropout(t, rate=0.5)

                t_0 = 0
                for j in range(1, n_of_t):
                    t_0 += t[j]

                new_t_list = [t_0]
                for j in range(1, n_of_t):
                    new_t_list.append(t[j])

                t_list = new_t_list
            else:
                with tf.compat.v1.variable_scope("scope_B", reuse=i > 0):
                    temp_t = []
                    for j in range(n_of_t):
                        t = dense_list[j](t_list[j])
                        t = tf.nn.dropout(t, rate=0.5)
                        temp_t.append(t)

                    t_0 = 0
                    for j in range(1, n_of_t):
                        t_0 += temp_t[j]

                    new_t_list = [t_0]
                    for j in range(1, n_of_t):
                        new_t_list.append(temp_t[j])

                    t_list = new_t_list

        t = t_list[0]
        total_loss = tf.reduce_mean(t)
        for t in tf.compat.v1.trainable_variables():
            print(t)
        train_op = create_optimizer(total_loss, 1e-4, 1000, 1000, True)

        scaffold_fn = None
        output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
            mode=mode,
            train_op=train_op,
            loss=total_loss,
            training_hooks=[OomReportingHook()],
            scaffold_fn=scaffold_fn)

        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        domain_ids = features["domain_ids"]
        domain_ids = tf.reshape(domain_ids, [-1])

        is_valid_label = features["is_valid_label"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model_1 = BertModel(
            config=model_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model_1.get_pooled_output()
        if is_training:
            pooled = dropout(pooled, 0.1)

        logits = tf.keras.layers.Dense(train_config.num_classes,
                                       name="cls_dense")(pooled)
        pred_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)
        num_domain = 2
        pooled_for_domain = grad_reverse(pooled)
        domain_logits = tf.keras.layers.Dense(
            num_domain, name="domain_dense")(pooled_for_domain)
        domain_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=domain_logits, labels=domain_ids)

        pred_loss = tf.reduce_mean(pred_losses *
                                   tf.cast(is_valid_label, tf.float32))
        domain_loss = tf.reduce_mean(domain_losses)

        tf.compat.v1.summary.scalar('domain_loss', domain_loss)
        tf.compat.v1.summary.scalar('pred_loss', pred_loss)
        alpha = model_config.alpha
        loss = pred_loss + alpha * domain_loss
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [logits, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Exemple #7
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        def reform_a_input(raw_input):
            return tf.reshape(raw_input,
                              [dict_run_config.inner_batch_size, -1])

        def reform_b_input(raw_input):
            return tf.reshape(raw_input, [dict_run_config.def_per_batch, -1])

        input_ids = reform_a_input(features["input_ids"])
        input_mask = reform_a_input(features["input_mask"])
        segment_ids = reform_a_input(features["segment_ids"])
        tf_logging.info("input_ids, input_mask")

        # input_ids = features["input_ids"]
        # input_mask = features["input_mask"]
        # segment_ids = features["segment_ids"]

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        # tf_logging.info("Doing dynamic masking (random)")
        # masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
        #     = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)
        # if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask:
        masked_input_ids = input_ids
        masked_lm_positions = reform_a_input(features["masked_lm_positions"])
        masked_lm_ids = reform_a_input(features["masked_lm_ids"])
        masked_lm_weights = reform_a_input(features["masked_lm_weights"])

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        if model_name == "APR":
            model = APR(
                masked_input_ids,
                input_mask,
                segment_ids,
                is_training,
                train_config.use_one_hot_embeddings,
                bert_config,
                ssdr_config,
                dict_run_config.def_per_batch,
                dict_run_config.inner_batch_size,
                dict_run_config.max_def_length,
            )
        elif model_name == "BERT":
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        else:
            assert False

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(),
                 masked_lm_positions, masked_lm_ids, masked_lm_weights)

        loss = masked_lm_loss

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict
        initialized_variable_names, init_fn = align_checkpoint_twice(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            if ssdr_config.compare_attrib_value_safe("use_two_lr", True):
                tf_logging.info("Using two lr for each parts")
                train_op = create_optimizer_with_separate_lr(
                    loss, train_config)
            else:
                tf_logging.info("Using single lr ")
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           training_hooks=[OomReportingHook()],
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Exemple #8
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_apr_lm")
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        tf_logging.info("Doing dynamic masking (random)")
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        tf_logging.info("Using masked_input_ids")
        model = APR(
            masked_input_ids,
            input_mask,
            segment_ids,
            is_training,
            train_config.use_one_hot_embeddings,
            bert_config,
            ssdr_config,
            dict_run_config.def_per_batch,
            dict_run_config.inner_batch_size,
            dict_run_config.max_def_length,
            #  MainTransformer,
            #  SecondTransformerEmbeddingLess,
        )

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(),
                 masked_lm_positions, masked_lm_ids, masked_lm_weights)

        loss = masked_lm_loss

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict
        initialized_variable_names, init_fn = align_checkpoint_twice(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            if ssdr_config.compare_attrib_value_safe("use_two_lr", True):
                tf_logging.info("Using two lr for each parts")
                train_op = create_optimizer_with_separate_lr(
                    loss, train_config)
            else:
                tf_logging.info("Using single lr ")
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           training_hooks=[OomReportingHook()],
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Exemple #9
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        q_input_ids_1 = features["q_input_ids_1"]
        q_input_mask_1 = features["q_input_mask_1"]
        d_input_ids_1 = features["d_input_ids_1"]
        d_input_mask_1 = features["d_input_mask_1"]

        q_input_ids_2 = features["q_input_ids_2"]
        q_input_mask_2 = features["q_input_mask_2"]
        d_input_ids_2 = features["d_input_ids_2"]
        d_input_mask_2 = features["d_input_mask_2"]

        q_input_ids = tf.stack([q_input_ids_1, q_input_ids_2], axis=0)
        q_input_mask = tf.stack([q_input_mask_1, q_input_mask_2], axis=0)
        q_segment_ids = tf.zeros_like(q_input_ids, tf.int32)

        d_input_ids = tf.stack([d_input_ids_1, d_input_ids_2], axis=0)
        d_input_mask = tf.stack([d_input_mask_1, d_input_mask_2], axis=0)
        d_segment_ids = tf.zeros_like(d_input_ids, tf.int32)

        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        with tf.compat.v1.variable_scope("query"):
            model_q = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=q_input_ids,
                input_mask=q_input_mask,
                token_type_ids=q_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope("document"):
            model_d = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=d_input_ids,
                input_mask=d_input_mask,
                token_type_ids=d_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        pooled_q = model_q.get_pooled_output()
        pooled_d = model_d.get_pooled_output()

        logits = tf.matmul(pooled_q, pooled_d, transpose_b=True)
        y = tf.cast(label_ids, tf.float32) * 2 - 1
        losses = tf.maximum(1.0 - logits * y, 0)
        loss = tf.reduce_mean(losses)

        pred = tf.cast(logits > 0, tf.int32)

        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}

        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if "simple_optimizer" in special_flags:
                tf_logging.info("using simple optimizer")
                train_op = create_simple_optimizer(loss,
                                                   train_config.learning_rate,
                                                   train_config.use_tpu)
            else:
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [pred, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "q_input_ids": q_input_ids,
                "d_input_ids": d_input_ids,
                "score": logits
            }

            useful_inputs = ["data_id", "input_ids2", "data_ids"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]
        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            pooled = model_1.get_pooled_output()
            if is_training:
                pooled = dropout(pooled, 0.1)
            logits = tf.keras.layers.Dense(train_config.num_classes,
                                           name="cls_dense")(pooled)
        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model_2 = BertModel(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            pooled = model_2.get_pooled_output()
            if is_training:
                pooled = dropout(pooled, 0.1)
            conf_probs = tf.keras.layers.Dense(
                train_config.num_classes,
                name="cls_dense",
                activation=tf.keras.activations.softmax)(pooled)

            confidence = conf_probs[:, 1]
        confidence_loss = 1 - confidence

        cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        k = model_config.k
        alpha = model_config.alpha
        loss_arr = cls_loss * confidence + confidence_loss * k

        loss_arr = apply_weighted_loss(loss_arr, label_ids, alpha)

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        def metric_fn(log_probs, label, is_real_example, confidence):
            r = classification_metric_fn(log_probs, label, is_real_example)
            r['confidence'] = tf.compat.v1.metrics.mean(confidence)
            return r

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn,
                            [logits, label_ids, is_real_example, confidence])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "logits": logits,
                "confidence": confidence,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Exemple #11
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        label_masks = features["label_masks"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        logits = tf.keras.layers.Dense(train_config.num_classes,
                                       name="token_regression")(
                                           model.get_sequence_output())

        per_ex_losses = tf.keras.losses.MAE(tf.expand_dims(label_ids, 2),
                                            logits)
        masked_losses = per_ex_losses * tf.cast(label_masks, tf.float32)
        losses_sum = tf.reduce_sum(masked_losses, axis=1)
        denom = tf.reduce_sum(tf.cast(label_masks, tf.float32), axis=1) + 1e-5
        losses = losses_sum / denom
        total_loss = tf.reduce_mean(losses)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(logits, label_ids, label_masks):
                logits_reduced = tf.squeeze(logits, 2)
                is_neg_correct = tf.logical_and(tf.less(label_ids, 0.),
                                                tf.less(logits_reduced, 0.))
                is_pos_correct = tf.logical_and(tf.less(0., label_ids),
                                                tf.less(0., logits_reduced))
                is_correct = tf.logical_or(is_neg_correct, is_pos_correct)

                float_masks = tf.cast(label_masks, tf.float32)
                num_correct = tf.reduce_sum(tf.cast(is_correct, tf.float32) *
                                            float_masks,
                                            axis=1)
                num_problems = tf.reduce_sum(float_masks, axis=1) + 1e-5
                acc_list = num_correct / num_problems

                mean_acc = tf.compat.v1.metrics.mean(values=acc_list)

                return {'mean_acc': mean_acc}

            eval_metrics = (metric_fn, [logits, label_ids, label_masks])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "input_ids": input_ids,
                "labels": label_ids,
                "label_masks": label_masks,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemple #12
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        label_ids = features["label_ids"]
        label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=model_config,
            is_training=is_training,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            features=features,
        )
        probs = model.get_prob()
        print(probs)
        prob0 = 1-probs
        print(prob0)
        prob2d = tf.stack([prob0, probs], 1)
        print(prob2d)
        epsilon = 1e-6
        probs_ad = tf.clip_by_value(prob2d, epsilon, 1.0 - epsilon)
        logits = tf.math.log(probs_ad)
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=label_ids)

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn, [
                prob2d, label_ids, is_real_example
            ])
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
        else:
            predictions = {
                    "label_ids": label_ids,
                    "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
        return output_spec
Exemple #13
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        label_ids = features["label_ids"]
        label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=model_config,
            is_training=is_training,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            features=features,
        )
        logits = model.get_logits()
        loss = model.get_loss(label_ids)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [logits, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "label_ids": label_ids,
                "logits": logits,
                "is_first_window": model.is_first_window,
                "num_content_tokens": model.num_content_tokens,
                "has_enough_evidence": model.has_enough_evidence,
                "is_valid_window": model.is_valid_window,
                "is_valid_window_mask": model.is_valid_window_mask
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Exemple #14
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids, input_mask, segment_ids = combine_paired_input_features(
            features)

        strict_good = features["strict_good"]
        strict_bad = features["strict_bad"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model.get_pooled_output()
        losses, logits, pair_logits = pairwise_model(pooled, strict_good,
                                                     strict_bad)
        total_loss = tf.reduce_mean(losses)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            assignment_map, initialized_variable_names = get_bert_assignment_map(
                tvars, train_config.init_checkpoint)
            if train_config.use_tpu:

                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(
                        train_config.init_checkpoint, assignment_map)
                    return tf.compat.v1.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)

        tf_logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf_logging.info("name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(pair_logits, strict_good, strict_bad):
                diff = pair_logits[:, 0] - pair_logits[:, 1]
                pairwise_correct = tf.less(0.0, diff)

                strict_good_correct_raw = tf.reshape(
                    tf.less(1.0, pair_logits[:, 0]), [-1, 1])
                strict_good_correct = cast_float_multiply(
                    strict_good_correct_raw, strict_good)
                strict_bad_correct_raw = tf.reshape(
                    tf.less(pair_logits[:, 1], -1.0), [-1, 1])
                strict_bad_correct = cast_float_multiply(
                    strict_bad_correct_raw, strict_bad)

                pairwise_acc_raw = tf.cast(pairwise_correct, tf.float32)
                mean_acc = tf.compat.v1.metrics.mean(values=pairwise_acc_raw)

                def strict_accuracy(correctness, gold):
                    return tf.compat.v1.metrics.accuracy(
                        labels=tf.ones_like(gold, tf.int32),
                        predictions=tf.cast(correctness, tf.int32),
                        weights=tf.cast(gold, tf.float32))

                return {
                    'mean_acc':
                    mean_acc,
                    'strict_good_acc':
                    strict_accuracy(strict_good_correct, strict_good),
                    'strict_bad_acc':
                    strict_accuracy(strict_bad_correct, strict_bad)
                }

            eval_metrics = (metric_fn, [pair_logits, strict_good, strict_bad])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "input_ids": input_ids,
                "strict_good": strict_good,
                "strict_bad": strict_bad,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemple #15
0
    def __init__(self, modeling_option, bert_hp, lms_config, num_gpu):
        ex_modeling_class = {
            'ce': CrossEntropyModeling,
            'co': CorrelationModeling
        }[modeling_option]

        def build_model_fn():
            return build_model(ex_modeling_class, bert_hp, lms_config)

        self.num_gpu = num_gpu
        self.match_predictor = None
        self.match_predictor_list = None
        self.bert_hp = bert_hp

        if num_gpu == 1:
            tf_logging.info("Using single GPU")
            task_model_, ex_model_, match_predictor_3way = build_model_fn()
            loss_tensor = match_predictor_3way.loss
            # List[Tensor[num_layer]]
            per_layer_loss_list = match_predictor_3way.all_losses
            batch2feed_dict = task_model_.batch2feed_dict
            logits = task_model_.logits
            ex_score_tensor = ex_model_.get_scores()

            # List[List[Tensor[batch, 2]]]
            per_layer_logit_list = match_predictor_3way.per_layer_logits_list

            self.match_predictor = match_predictor_3way

        else:
            main_models, ex_models, match_predictor_list = zip(
                *get_multiple_models(build_model_fn, num_gpu))
            loss_tensor = get_avg_loss(match_predictor_list)
            per_layer_loss_list = [
                get_avg_tensors_from_models(
                    match_predictor_list, lambda match_predictor:
                    match_predictor.per_layer_loss_list[i])
                for i in range(lms_config.num_tags)
            ]
            batch2feed_dict = get_batch2feed_dict_for_multi_gpu(main_models)
            logits = get_concat_tensors_from_models(main_models,
                                                    lambda model: model.logits)

            def get_loss_tensor(model):
                t = tf.expand_dims(tf.stack(model.get_losses()), 0)
                return t

            ex_score_tensor = get_concat_tensors_from_models(
                ex_models, lambda model: model.get_scores())
            per_layer_logit_list = [
                get_concat_tensors_list_from_models(
                    match_predictor_list,
                    lambda model: model.per_layer_logit_list[i])
                for i in range(lms_config.num_tags)
            ]

            self.match_predictor_list = match_predictor_list

        self.logits = logits
        self.batch2feed_dict = batch2feed_dict
        self.ex_score_tensor = ex_score_tensor
        self.loss_tensor = loss_tensor

        # List[List[Tensor[batch_size, 2]]]
        self.per_layer_logit_list = per_layer_logit_list
        # List[List[Tensor[1]]]
        self.per_layer_loss_list = per_layer_loss_list
Exemple #16
0
 def init_from_nli(sess):
     tf_logging.info(
         "Initializing model with nli(main) and bert(dict reader)")
     init_dict_model_with_nli_and_bert(sess, nli_checkpoint_path,
                                       get_bert_full_path())
Exemple #17
0
 def load_last_saved_model(self, model_path):
     last_saved = get_latest_model_path_from_dir_path(model_path)
     load_model(self.sess, last_saved)
     tf_logging.info("Loading previous model from {}".format(last_saved))
Exemple #18
0
def eval_fn_factory(sess, dev_batches, loss_tensor, per_layer_loss,
                    ex_scores_tensor, per_layer_logits_tensor,
                    global_step_tensor, batch2feed_dict, test_writer):
    loss_list = []
    all_ex_scores: List[np.array] = []
    all_per_layer_logits: List[List[np.array]] = []
    input_mask_list = []
    per_layer_loss_list = []
    segment_ids_list = []
    label_list = []
    for batch in dev_batches:
        input_ids, input_mask, segment_ids, label = batch
        input_mask_list.append(input_mask)
        segment_ids_list.append(segment_ids)
        label_list.append(label)
        loss_val, per_layer_loss_val, ex_scores, per_layer_logits, g_step_val \
            = sess.run([loss_tensor, per_layer_loss, ex_scores_tensor, per_layer_logits_tensor, global_step_tensor],
                       feed_dict=batch2feed_dict(batch)
                       )
        loss_list.append(loss_val)
        all_ex_scores.append(ex_scores)
        per_layer_loss_list.append(per_layer_loss_val)
        all_per_layer_logits.append(per_layer_logits)

    all_segment_ids = np.concatenate(segment_ids_list, axis=0)
    all_input_mask = np.concatenate(input_mask_list, axis=0)
    all_ex_scores_np = np.concatenate(all_ex_scores, axis=0)
    all_per_layer_loss = np.stack(per_layer_loss_list)
    all_label = np.concatenate(label_list, axis=0)
    num_layer = len(per_layer_logits_tensor)
    logits_grouped_by_layer = []
    for layer_no in range(num_layer):
        t = np.concatenate([batch[layer_no] for batch in all_per_layer_logits],
                           axis=0)
        logits_grouped_by_layer.append(t)

    avg_loss = np.average(loss_list)
    summary = tf.Summary()

    tf_logging.info("Step dev step={0} loss={1:.04f}".format(
        g_step_val, avg_loss))
    summary.value.add(tag='loss', simple_value=avg_loss)
    gold_ex_binary = all_ex_scores_np > 0.5
    for layer_no, logits in enumerate(logits_grouped_by_layer):
        pred_binary = np.less(logits[:, :, 0], logits[:, :, 1])

        # score_per_data_point = []
        score_list_d = defaultdict(list)
        for data_point_idx in range(len(gold_ex_binary)):
            # per_data_point = get_acc_prec_recall(pred_binary[data_point_idx], gold_ex_binary[data_point_idx])
            # score_per_data_point.append(per_data_point)
            padding_start = find_padding(all_input_mask[data_point_idx])
            seg2_start = find_seg2(all_segment_ids[data_point_idx])
            assert 1 < seg2_start < padding_start
            for st, ed, name in [(0, len(pred_binary), "all"),
                                 (0, padding_start, "non-padding"),
                                 (1, seg2_start, "seg1"),
                                 (seg2_start, padding_start, "seg2")]:
                conditioned_score = get_acc_prec_recall(
                    pred_binary[data_point_idx][st:ed],
                    gold_ex_binary[data_point_idx][st:ed])
                score_list_d[name].append(conditioned_score)

            if all_label[data_point_idx] == 1:
                st = seg2_start
                ed = padding_start
                conditioned_score = get_acc_prec_recall(
                    pred_binary[data_point_idx][st:ed],
                    gold_ex_binary[data_point_idx][st:ed])
                score_list_d["neutral"].append(conditioned_score)

        for condition_name in score_list_d:
            score_list = score_list_d[condition_name]
            scores = {}
            for metric in ["accuracy", "precision", "recall", "f1"]:
                # with tf.name_scope(metric):
                scores[metric] = average([d[metric] for d in score_list])
                tag_name = '{}-{}/Layer{}'.format(condition_name, metric,
                                                  layer_no)
                summary.value.add(tag=tag_name, simple_value=scores[metric])

            if condition_name == "all":
                tf_logging.info(
                    "Layer {0} acc={1:.02f}, prec={2:.02f} recall={3:.02f} f1={4:.02f}"
                    .format(layer_no, scores['accuracy'], scores['precision'],
                            scores['recall'], scores['f1']))

        layer_loss = np.mean(all_per_layer_loss[:, layer_no])
        summary.value.add(tag="loss/Layer{}".format(layer_no),
                          simple_value=layer_loss)

    test_writer.add_summary(summary, g_step_val)
    test_writer.flush()

    return avg_loss
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("    name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if "next_sentence_labels" in features:
            next_sentence_labels = features["next_sentence_labels"]
        else:
            next_sentence_labels = get_dummy_next_sentence_labels(input_ids)

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
            print("Seed as zero")
        else:
            seed = None

        tf_logging.info("Doing dynamic masking (random)")
        special_tokens = [LABEL_UNK, LABEL_0, LABEL_1, LABEL_2]
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids,
                             input_mask,
                             train_config.max_predictions_per_seq,
                             MASK_ID,
                             seed,
                             special_tokens)

        masked_input_ids, masked_lm_positions_label, masked_label_ids_label, is_test_inst \
            = get_label_indices(masked_input_ids)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=model_config,
            is_training=is_training,
            input_ids=masked_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output_fn(
             model_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        with tf.compat.v1.variable_scope("label_token"):
            (masked_lm_loss_label, masked_lm_example_loss_label,
             masked_lm_log_probs_label) = get_masked_lm_output_fn(
                 model_config, model.get_sequence_output(),
                 model.get_embedding_table(), masked_lm_positions_label,
                 masked_label_ids_label, is_test_inst)
        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             model_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + masked_lm_loss_label * model_config.ratio

        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names, initialized_variable_names2, init_fn\
            = align_checkpoint_for_lm(tvars,
                                      train_config.checkpoint_type,
                                      train_config.init_checkpoint,
                                      train_config.second_init_checkpoint,
                                      )

        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)

        log_var_assignments(tvars, initialized_variable_names,
                            initialized_variable_names2)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[OomReportingHook()],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, masked_lm_example_loss_label,
                masked_lm_log_probs_label, masked_label_ids_label, is_test_inst
            ])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
                "masked_lm_example_loss_label": masked_lm_example_loss_label,
                "masked_lm_log_probs_label": masked_lm_log_probs_label,
                "masked_label_ids_label": masked_label_ids_label,
                "is_test_inst": is_test_inst,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemple #20
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("    name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if "next_sentence_labels" in features:
            next_sentence_labels = features["next_sentence_labels"]
        else:
            next_sentence_labels = get_dummy_next_sentence_labels(input_ids)
        tlm_prefix = "target_task"

        with tf.compat.v1.variable_scope(tlm_prefix):
            priority_score = tf.stop_gradient(priority_model(features))

        priority_score = priority_score * target_model_config.amp
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights\
            = biased_masking(input_ids,
                             input_mask,
                             priority_score,
                             target_model_config.alpha,
                             train_config.max_predictions_per_seq,
                             MASK_ID)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=bert_config,
            is_training=is_training,
            input_ids=masked_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        all_vars = tf.compat.v1.all_variables()

        tf_logging.info("We assume priority model is from v2")

        if train_config.checkpoint_type == "v2":
            assignment_map, initialized_variable_names = assignment_map_v2_to_v2(
                all_vars, train_config.init_checkpoint)
            assignment_map2, initialized_variable_names2 = get_assignment_map_remap_from_v2(
                all_vars, tlm_prefix, train_config.second_init_checkpoint)
        else:
            assignment_map, assignment_map2, initialized_variable_names \
                                            = get_tlm_assignment_map_v2(all_vars,
                                              tlm_prefix,
                                              train_config.init_checkpoint,
                                              train_config.second_init_checkpoint)
            initialized_variable_names2 = None

        def init_fn():
            if train_config.init_checkpoint:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.init_checkpoint, assignment_map)
            if train_config.second_init_checkpoint:
                tf.compat.v1.train.init_from_checkpoint(
                    train_config.second_init_checkpoint, assignment_map2)

        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)

        tvars = [v for v in all_vars if not v.name.startswith(tlm_prefix)]
        log_var_assignments(tvars, initialized_variable_names,
                            initialized_variable_names2)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                total_loss, train_config, tvars)
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "priority_score": priority_score,
                "lm_loss1": features["loss1"],
                "lm_loss2": features["loss2"],
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemple #21
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        q_input_ids = features["q_input_ids"]
        q_input_mask = features["q_input_mask"]
        d_input_ids = features["d_input_ids"]
        d_input_mask = features["d_input_mask"]

        input_shape = get_shape_list(q_input_ids, expected_rank=2)
        batch_size = input_shape[0]

        doc_length = model_config.max_doc_length
        num_docs = model_config.num_docs

        d_input_ids_unpacked = tf.reshape(d_input_ids,
                                          [-1, num_docs, doc_length])
        d_input_mask_unpacked = tf.reshape(d_input_mask,
                                           [-1, num_docs, doc_length])

        d_input_ids_flat = tf.reshape(d_input_ids_unpacked, [-1, doc_length])
        d_input_mask_flat = tf.reshape(d_input_mask_unpacked, [-1, doc_length])

        q_segment_ids = tf.zeros_like(q_input_ids, tf.int32)
        d_segment_ids = tf.zeros_like(d_input_ids_flat, tf.int32)

        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            q_model_config = copy.deepcopy(model_config)
            q_model_config.max_seq_length = model_config.max_sent_length
            model_q = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=q_input_ids,
                input_mask=q_input_mask,
                token_type_ids=q_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            d_model_config = copy.deepcopy(model_config)
            d_model_config.max_seq_length = model_config.max_doc_length
            model_d = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=d_input_ids_flat,
                input_mask=d_input_mask_flat,
                token_type_ids=d_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        pooled_q = model_q.get_pooled_output()  # [batch, vector_size]
        pooled_d_flat = model_d.get_pooled_output(
        )  # [batch, num_window, vector_size]

        pooled_d = tf.reshape(pooled_d_flat, [batch_size, num_docs, -1])
        pooled_q_t = tf.expand_dims(pooled_q, 1)
        pooled_d_t = tf.transpose(pooled_d, [0, 2, 1])
        all_logits = tf.matmul(pooled_q_t,
                               pooled_d_t)  # [batch, 1, num_window]
        if "hinge_all" in special_flags:
            apply_loss_modeing = hinge_all
        elif "sigmoid_all" in special_flags:
            apply_loss_modeing = sigmoid_all
        else:
            apply_loss_modeing = hinge_max
        logits, loss = apply_loss_modeing(all_logits, label_ids)
        pred = tf.cast(logits > 0, tf.int32)

        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}

        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if "simple_optimizer" in special_flags:
                tf_logging.info("using simple optimizer")
                train_op = create_simple_optimizer(loss,
                                                   train_config.learning_rate,
                                                   train_config.use_tpu)
            else:
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [pred, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "q_input_ids": q_input_ids,
                "d_input_ids": d_input_ids,
                "logits": logits
            }

            useful_inputs = ["data_id", "input_ids2", "data_ids"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
Exemple #22
0
def run_estimator(model_fn, input_fn, host_call=None):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)
    #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    if FLAGS.do_predict:
        tf_logging.addFilter(CounterFilter())

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    print("FLAGS.save_checkpoints_steps", FLAGS.save_checkpoints_steps)
    config = tf.compat.v1.ConfigProto(allow_soft_placement=False, )
    is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.compat.v1.estimator.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=config,
        tf_random_seed=FLAGS.random_seed,
        tpu_config=tf.compat.v1.estimator.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    if FLAGS.random_seed is not None:
        tf_logging.info("Using random seed : {}".format(FLAGS.random_seed))
        tf.random.set_seed(FLAGS.random_seed)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.compat.v1.estimator.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.eval_batch_size,
    )

    if FLAGS.do_train:
        tf_logging.info("***** Running training *****")
        tf_logging.info("  Batch size = %d", FLAGS.train_batch_size)

        estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps)

    if FLAGS.do_eval:
        tf_logging.info("***** Running evaluation *****")
        tf_logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        if FLAGS.initialize_to_predict:
            checkpoint = FLAGS.init_checkpoint
        else:
            checkpoint = None
        result = estimator.evaluate(input_fn=input_fn,
                                    steps=FLAGS.max_eval_steps,
                                    checkpoint_path=checkpoint)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.io.gfile.GFile(output_eval_file, "w") as writer:
            tf_logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf_logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
        return result
    if FLAGS.do_predict:
        tf_logging.info("***** Running prediction *****")
        tf_logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        if not FLAGS.initialize_to_predict:
            verify_checkpoint(estimator.model_dir)
            checkpoint = None
            time.sleep(1)
        else:
            checkpoint = FLAGS.init_checkpoint

        result = estimator.predict(input_fn=input_fn,
                                   checkpoint_path=checkpoint,
                                   yield_single_examples=False)
        pickle.dump(list(result), open(FLAGS.out_file, "wb"))
        tf_logging.info("Prediction saved at {}".format(FLAGS.out_file))
Exemple #23
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("    name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        instance_id = features["instance_id"]
        next_sentence_labels = get_dummy_next_sentence_labels(input_ids)

        tf_logging.info("Doing dynamic masking (random)")
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = model_class(
            config=model_config,
            is_training=is_training,
            input_ids=masked_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             model_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        total_loss = masked_lm_loss

        tvars = tf.compat.v1.trainable_variables()

        use_multiple_checkpoint = is_multiple_checkpoint(
            train_config.checkpoint_type)
        initialized_variable_names, initialized_variable_names2, init_fn\
            = align_checkpoint_for_lm(tvars,
                                      train_config.checkpoint_type,
                                      train_config.init_checkpoint,
                                      train_config.second_init_checkpoint,
                                      use_multiple_checkpoint)

        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names,
                            initialized_variable_names2)

        output_spec = None
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "input_ids": input_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "instance_id": instance_id
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Exemple #24
0
def log_features(features):
    tf_logging.info("*** Features ***")
    for name in sorted(features.keys()):
        tf_logging.info("    name = %s, shape = %s" %
                        (name, features[name].shape))
Exemple #25
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        #
        #
        # d_input_ids, d_input_mask, d_segment_ids, d_location_ids, ab_mapping, ab_mapping_mask \
        #     = get_dummy_apr_input(input_ids, input_mask,
        #                           dict_run_config.def_per_batch,
        #                           dict_run_config.inner_batch_size,
        #                           ssdr_config.max_loc_length,
        #                           dict_run_config.max_def_length)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = APR(input_ids, input_mask, segment_ids, is_training,
                    train_config.use_one_hot_embeddings, bert_config,
                    ssdr_config, dict_run_config.def_per_batch,
                    dict_run_config.inner_batch_size,
                    dict_run_config.max_def_length)

        #
        # model = model_class(
        #         config=bert_config,
        #         ssdr_config=ssdr_config,
        #         is_training=is_training,
        #         input_ids=input_ids,
        #         input_mask=input_mask,
        #         token_type_ids=segment_ids,
        #         d_input_ids=d_input_ids,
        #         d_input_mask=d_input_mask,
        #         d_segment_ids=d_segment_ids,
        #         d_location_ids=d_location_ids,
        #         ab_mapping=ab_mapping,
        #         ab_mapping_mask=ab_mapping_mask,
        #         use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        # )
        task = Classification(3, features, model.get_pooled_output(),
                              is_training)
        loss = task.loss

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = tlm.training.assignment_map.get_assignment_map_as_is
        initialized_variable_names, init_fn = get_init_fn(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        output_spec = None
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            if ssdr_config.compare_attrib_value_safe("use_two_lr", True):
                tf_logging.info("Using two lr for each parts")
                train_op = create_optimizer_with_separate_lr(
                    loss, train_config)
            else:
                tf_logging.info("Using single lr ")
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            output_spec = TPUEstimatorSpec(mode=model,
                                           loss=loss,
                                           eval_metrics=task.eval_metrics(),
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = TPUEstimatorSpec(mode=model,
                                           loss=loss,
                                           predictions={"loss": task.loss_arr},
                                           scaffold_fn=scaffold_fn)
        return output_spec
Exemple #26
0
def train_nli_w_dict(run_name, model: DictReaderInterface, model_path,
                     model_config, data_feeder_loader, model_init_fn):
    print("Train nil :", run_name)
    batch_size = FLAGS.train_batch_size
    f_train_lookup = "lookup" in FLAGS.train_op
    tf_logging.debug("Building graph")

    with tf.compat.v1.variable_scope("optimizer"):
        lr = FLAGS.learning_rate
        lr2 = lr * 0.1
        if model_config.compare_attrib_value_safe("use_two_lr", True):
            tf_logging.info("Using two lr for each parts")
            train_cls, global_step = get_train_op_sep_lr(
                model.get_cls_loss(), lr, 5, "dict")
        else:
            train_cls, global_step = train_module.get_train_op(
                model.get_cls_loss(), lr)
        train_lookup_op, global_step = train_module.get_train_op(
            model.get_lookup_loss(), lr2, global_step)

    sess = train_module.init_session()
    sess.run(tf.compat.v1.global_variables_initializer())

    train_writer, test_writer = setup_summary_writer(run_name)

    last_saved = get_latest_model_path_from_dir_path(model_path)
    if last_saved:
        tf_logging.info("Loading previous model from {}".format(last_saved))
        load_model(sess, last_saved)
    elif model_init_fn is not None:
        model_init_fn(sess)

    log = log_module.train_logger()
    train_data_feeder = data_feeder_loader.get_train_feeder()
    dev_data_feeder = data_feeder_loader.get_dev_feeder()
    lookup_train_feeder = train_data_feeder
    valid_runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info, sess)

    dev_batches = []
    n_dev_batch = 100
    dev_batches_w_dict = dev_data_feeder.get_all_batches(batch_size,
                                                         True)[:n_dev_batch]
    for _ in range(n_dev_batch):
        dev_batches.append(dev_data_feeder.get_random_batch(batch_size))
        dev_batches_w_dict.append(dev_data_feeder.get_lookup_batch(batch_size))

    def get_summary_obj(loss, acc):
        summary = tf.compat.v1.Summary()
        summary.value.add(tag='loss', simple_value=loss)
        summary.value.add(tag='accuracy', simple_value=acc)
        return summary

    def get_summary_obj_lookup(loss, p_at_1):
        summary = tf.compat.v1.Summary()
        summary.value.add(tag='lookup_loss', simple_value=loss)
        summary.value.add(tag='P@1', simple_value=p_at_1)
        return summary

    def train_lookup(step_i):
        batches, info = lookup_train_feeder.get_lookup_train_batches(
            batch_size)
        if not batches:
            raise NoLookupException()

        def get_cls_loss(batch):
            return sess.run([model.get_cls_loss_arr()],
                            feed_dict=model.batch2feed_dict(batch))

        loss_array = get_loss_from_batches(batches, get_cls_loss)

        supervision_for_lookup = train_data_feeder.get_lookup_training_batch(
            loss_array, batch_size, info)

        def lookup_train(batch):
            return sess.run(
                [model.get_lookup_loss(),
                 model.get_p_at_1(), train_lookup_op],
                feed_dict=model.batch2feed_dict(batch))

        avg_loss, p_at_1, _ = lookup_train(supervision_for_lookup)
        train_writer.add_summary(get_summary_obj_lookup(avg_loss, p_at_1),
                                 step_i)
        log.info("Step {0} lookup loss={1:.04f}".format(step_i, avg_loss))
        return avg_loss

    def train_classification(step_i):
        batch = train_data_feeder.get_random_batch(batch_size)
        loss_val, acc, _ = sess.run(
            [model.get_cls_loss(),
             model.get_acc(), train_cls],
            feed_dict=model.batch2feed_dict(batch))
        log.info("Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        train_writer.add_summary(get_summary_obj(loss_val, acc), step_i)

        return loss_val, acc

    lookup_loss_window = MovingWindow(20)

    def train_classification_w_lookup(step_i):
        data_indices, batch = train_data_feeder.get_lookup_batch(batch_size)
        logits, = sess.run([model.get_lookup_logits()],
                           feed_dict=model.batch2feed_dict(batch))
        term_ranks = np.flip(np.argsort(logits[:, :, 1], axis=1))
        batch = train_data_feeder.augment_dict_info(data_indices, term_ranks)

        loss_val, acc, _ = sess.run(
            [model.get_cls_loss(),
             model.get_acc(), train_cls],
            feed_dict=model.batch2feed_dict(batch))
        log.info("ClsW]Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        train_writer.add_summary(get_summary_obj(loss_val, acc), step_i)

        return loss_val, acc

    def lookup_enabled(lookup_loss_window, step_i):
        return step_i > model_config.lookup_min_step\
               and lookup_loss_window.get_average() < model_config.lookup_threshold

    def train_fn(step_i):
        if lookup_enabled(lookup_loss_window, step_i):
            loss, acc = train_classification_w_lookup((step_i))
        else:
            loss, acc = train_classification(step_i)
        if f_train_lookup and step_i % model_config.lookup_train_frequency == 0:
            try:
                lookup_loss = train_lookup(step_i)
                lookup_loss_window.append(lookup_loss, 1)
            except NoLookupException:
                log.warning("No possible lookup found")

        return loss, acc

    def debug_fn(batch):
        y_lookup, = sess.run([
            model.y_lookup,
        ],
                             feed_dict=model.batch2feed_dict(batch))
        print(y_lookup)
        return 0, 0

    def valid_fn(step_i):
        if lookup_enabled(lookup_loss_window, step_i):
            valid_fn_w_lookup(step_i)
        else:
            valid_fn_wo_lookup(step_i)

    def valid_fn_wo_lookup(step_i):
        loss_val, acc = valid_runner.run_batches_wo_lookup(dev_batches)
        log.info("Step {0} Dev loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        test_writer.add_summary(get_summary_obj(loss_val, acc), step_i)
        return acc

    def valid_fn_w_lookup(step_i):
        loss_val, acc = valid_runner.run_batches_w_lookup(dev_batches_w_dict)
        log.info("Step {0} DevW loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        test_writer.add_summary(get_summary_obj(loss_val, acc), step_i)
        return acc

    def save_fn():
        op = tf.compat.v1.assign(global_step, step_i)
        sess.run([op])
        return save_model_to_dir_path(sess, model_path, global_step)

    n_data = train_data_feeder.get_data_len()
    step_per_epoch = int((n_data + batch_size - 1) / batch_size)
    tf_logging.debug("{} data point -> {} batches / epoch".format(
        n_data, step_per_epoch))
    train_steps = step_per_epoch * FLAGS.num_train_epochs
    tf_logging.debug("Max train step : {}".format(train_steps))
    valid_freq = 100
    save_interval = 60 * 20
    last_save = time.time()

    init_step, = sess.run([global_step])
    print("Initial step : ", init_step)
    for step_i in range(init_step, train_steps):
        if dev_fn is not None:
            if (step_i + 1) % valid_freq == 0:
                valid_fn(step_i)

        if save_fn is not None:
            if time.time() - last_save > save_interval:
                save_fn()
                last_save = time.time()

        loss, acc = train_fn(step_i)

    return save_fn()
Exemple #27
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.float32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = BertModel(
            config=model_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model.get_pooled_output()
        if is_training:
            pooled = dropout(pooled, 0.1)
        logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled)
        scale = model_config.scale

        label_ids = scale * label_ids

        weight = tf.abs(label_ids)
        loss_arr = tf.keras.losses.MAE(y_true=label_ids, y_pred=logits)
        loss_arr = loss_arr * weight

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None

        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        def metric_fn(logits, label, is_real_example):
            mae = tf.compat.v1.metrics.mean_absolute_error(
                labels=label, predictions=logits, weights=is_real_example)

            return {
                "mae": mae
            }

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                logits, label_ids, is_real_example
            ])
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
        else:
            predictions = {
                    "input_ids": input_ids,
                    "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
        return output_spec
Exemple #28
0
def init_from_bert(sess):
    tf_logging.info("Initializing model with bert ")
    init_dict_model_with_bert(sess, get_bert_full_path())
Exemple #29
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        query = features["query"]
        doc = features["doc"]
        doc_mask = features["doc_mask"]
        data_ids = features["data_id"]

        segment_len = max_seq_length - query_len - 3
        step_size = model_config.step_size
        input_ids, input_mask, segment_ids, n_segments = \
            iterate_over(query, doc, doc_mask, total_doc_len, segment_len, step_size)
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])

        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        if "feed_features" in special_flags:
            model = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
                features=features,
            )
        else:
            model = model_class(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        if "new_pooling" in special_flags:
            pooled = mimic_pooling(model.get_sequence_output(),
                                   model_config.hidden_size,
                                   model_config.initializer_range)
        else:
            pooled = model.get_pooled_output()

        if train_config.checkpoint_type != "bert_nli" and train_config.use_old_logits:
            tf_logging.info("Use old version of logistic regression")
            if is_training:
                pooled = dropout(pooled, 0.1)
            logits = tf.keras.layers.Dense(train_config.num_classes,
                                           name="cls_dense")(pooled)
        else:
            tf_logging.info("Use fixed version of logistic regression")
            output_weights = tf.compat.v1.get_variable(
                "output_weights",
                [train_config.num_classes, model_config.hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [train_config.num_classes],
                initializer=tf.compat.v1.zeros_initializer())

            if is_training:
                pooled = dropout(pooled, 0.1)

            logits = tf.matmul(pooled, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        if "bias_loss" in special_flags:
            tf_logging.info("Using special_flags : bias_loss")
            loss_arr = reweight_zero(label_ids, loss_arr)

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names = {}

        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if "simple_optimizer" in special_flags:
                tf_logging.info("using simple optimizer")
                train_op = create_simple_optimizer(loss,
                                                   train_config.learning_rate,
                                                   train_config.use_tpu)
            else:
                if "ask_tvar" in special_flags:
                    tvars = model.get_trainable_vars()
                else:
                    tvars = None
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [logits, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=model,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "logits": logits,
                "doc": doc,
                "data_ids": data_ids,
            }

            useful_inputs = ["data_id", "input_ids2", "data_ids"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]

            if override_prediction_fn is not None:
                predictions = override_prediction_fn(predictions, model)

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)

        return output_spec
Exemple #30
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_nli_lm")
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids = features["input_ids"]  # [batch_size, seq_length]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        batch_size, seq_max = get_shape_list2(input_ids)
        if "nli_input_ids" in features:
            nli_input_ids = features[
                "nli_input_ids"]  # [batch_size, seq_length]
            nli_input_mask = features["nli_input_mask"]
            nli_segment_ids = features["nli_segment_ids"]
        else:
            nli_input_ids = input_ids
            nli_input_mask = input_mask
            nli_segment_ids = segment_ids
            features["label_ids"] = tf.ones([batch_size], tf.int32)

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        tf_logging.info("Doing dynamic masking (random)")

        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask,
                             train_config.max_predictions_per_seq, MASK_ID, seed)

        sharing_model = sharing_model_factory(
            config, train_config.use_one_hot_embeddings, is_training,
            masked_input_ids, input_mask, segment_ids, nli_input_ids,
            nli_input_mask, nli_segment_ids)

        sequence_output_lm = sharing_model.lm_sequence_output()
        nli_feature = sharing_model.get_tt_feature()

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(config, sequence_output_lm, sharing_model.get_embedding_table(),
                                     masked_lm_positions, masked_lm_ids, masked_lm_weights)

        masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [batch_size, -1])

        masked_lm_per_inst_loss = tf.reshape(masked_lm_example_loss,
                                             [batch_size, -1])

        task = Classification(3, features, nli_feature, is_training)
        nli_loss = task.loss

        task_prob = tf.nn.softmax(task.logits, axis=-1)
        arg_like = task_prob[:, 1] + task_prob[:, 2]

        vars = sharing_model.model.all_layer_outputs
        grads_1 = tf.gradients(ys=masked_lm_loss, xs=vars)  # List[ batch_szie,
        grads_2 = tf.gradients(ys=arg_like, xs=vars)
        l = []
        for g1, g2 in zip(grads_1, grads_2):
            if g1 is not None and g2 is not None:
                a = tf.reshape(g1, [batch_size * 2, seq_max, -1])[:batch_size]
                a = a / masked_lm_per_inst_loss
                b = tf.reshape(g2, [batch_size * 2, seq_max, -1])[batch_size:]
                l.append(tf.abs(a * b))
        h_overlap = tf.stack(l, axis=1)
        h_overlap = tf.reduce_sum(h_overlap, axis=2)

        loss = combine_loss_fn(masked_lm_loss, nli_loss)
        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = get_bert_assignment_map
        initialized_variable_names, init_fn = get_init_fn(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
                "masked_lm_log_probs": masked_lm_log_probs,
                "h_overlap": h_overlap,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec