Example #1
0
    def __init__(self, hp, voca_size, is_training=True):
        config = BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()

        task = ClassificationB(is_training, hp.hidden_units, 3)
        task.call(pooled_output, label_ids)
        self.loss = task.loss
        self.logits = task.logits
        self.acc = task.acc
Example #2
0
class SimpleSharingModel:
    def __init__(
        self,
        config,
        use_one_hot_embeddings,
        is_training,
        masked_input_ids,
        input_mask,
        segment_ids,
        nli_input_ids,
        nli_input_mask,
        nli_segment_ids,
    ):

        all_input_ids = tf.concat([masked_input_ids, nli_input_ids], axis=0)
        all_input_mask = tf.concat([input_mask, nli_input_mask], axis=0)
        all_segment_ids = tf.concat([segment_ids, nli_segment_ids], axis=0)
        self.batch_size, _ = get_shape_list2(masked_input_ids)
        self.model = BertModel(config, is_training, all_input_ids,
                               all_input_mask, all_segment_ids,
                               use_one_hot_embeddings)

    def lm_sequence_output(self):
        return self.model.get_sequence_output()[:self.batch_size]

    def get_embedding_table(self):
        return self.model.get_embedding_table()

    def get_tt_feature(self):
        return self.model.get_pooled_output()[self.batch_size:]
Example #3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_ranking")
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        input_ids, input_mask, segment_ids = combine_paired_input_features(features)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        # Updated

        model = BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled_output = model.get_pooled_output()
        if is_training:
            pooled_output = dropout(pooled_output, 0.1)

        loss, losses, y_pred = apply_loss_modeling(modeling_opt, pooled_output, features)


        assignment_fn = assignment_map.get_bert_assignment_map
        scaffold_fn = checkpoint_init(assignment_fn, train_config)

        optimizer_factory = lambda x: create_optimizer_from_config(x, train_config)
        input_ids1 = tf.identity(features["input_ids1"])
        input_ids2 = tf.identity(features["input_ids2"])
        prediction = {
            "input_ids1": input_ids1,
            "input_ids2": input_ids2
        }
        return ranking_estimator_spec(mode, loss, losses, y_pred, scaffold_fn, optimizer_factory, prediction)
Example #4
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(DualBertTwoInputWithDoubleInputLength, self).__init__()

        input_ids1 = features["input_ids1"]
        input_mask1 = features["input_mask1"]
        segment_ids1 = features["segment_ids1"]
        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model_2 = DoubleLengthInputModel(
                config,
                is_training,
                input_ids1,
                input_mask1,
                segment_ids1,
                input_ids2,
                input_mask2,
                segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = model_2.get_sequence_output()[:, 0, :]

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)

        self.sequence_output = model_1.get_sequence_output()
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
Example #5
0
def tlm2_raw_prob(bert_config, use_one_hot_embeddings, input_ids, input_mask, segment_ids):
    encode_model = BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
    )
    loss_model = IndependentLossModel(bert_config)
    loss_model.build_predictions(encode_model.get_sequence_output())
    output = -(loss_model.prob1 - loss_model.prob2)
    return output, loss_model.prob1, loss_model.prob2
Example #6
0
 def init(self, config, is_training, input_ids, input_ids2, input_mask,
          input_mask2, token_type_ids, segment_ids2,
          use_one_hot_embeddings):
     with tf.compat.v1.variable_scope(dual_model_prefix1):
         model_1 = BertModel(
             config=config,
             is_training=is_training,
             input_ids=input_ids,
             input_mask=input_mask,
             token_type_ids=token_type_ids,
             use_one_hot_embeddings=use_one_hot_embeddings,
         )
     with tf.compat.v1.variable_scope(dual_model_prefix2):
         model_2 = BertModel(
             config=config,
             is_training=is_training,
             input_ids=input_ids2,
             input_mask=input_mask2,
             token_type_ids=segment_ids2,
             use_one_hot_embeddings=use_one_hot_embeddings,
         )
     model_1_first_token = model_1.get_sequence_output()[:, 0, :]
     model_2_first_token = model_2.get_sequence_output()[:, 0, :]
     rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)
     self.sequence_output = tf.concat(
         [model_1.get_sequence_output(),
          model_2.get_sequence_output()],
         axis=2)
     dense_layer = tf.keras.layers.Dense(
         config.hidden_size,
         activation=tf.keras.activations.tanh,
         kernel_initializer=create_initializer(config.initializer_range))
     pooled_output = dense_layer(rep)
     self.pooled_output = pooled_output
Example #7
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        tf_logging.info("model_fn_sero_classification")
        """The `model_fn` for TPUEstimator."""
        log_features(features)
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        # Updated
        model = BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled_output = model.get_pooled_output()
        if is_training:
            pooled_output = dropout(pooled_output, 0.1)

        logits = get_prediction_structure(modeling_opt, pooled_output)
        loss = 0

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = assignment_map.get_bert_assignment_map
        initialized_variable_names, init_fn = get_init_fn(tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        predictions = None
        if modeling_opt == "multi_label_hinge":
            predictions = {
                "input_ids":input_ids,
                "logits":logits,
            }
        else:
            predictions = {
                "input_ids": input_ids,
                "logits": logits,
            }
            useful_inputs = ["data_id", "input_ids2", "data_ids"]
            for input_name in useful_inputs:
                if input_name in features:
                    predictions[input_name] = features[input_name]
        output_spec = rank_predict_estimator_spec(logits, mode, scaffold_fn, predictions)
        return output_spec
Example #8
0
class ProjectedMaxPooling(BertModelInterface):
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 scope=None):
        super(ProjectedMaxPooling, self).__init__()
        config = copy.deepcopy(config)
        self.config = config
        self.vector_size = config.vector_size

        self.bert_model = BertModel(config, is_training, input_ids, input_mask,
                                    token_type_ids, use_one_hot_embeddings,
                                    scope)

    def get_pooled_output(self):
        seq_output = self.bert_model.get_sequence_output()
        # projected = tf.keras.layers.Dense(self.vector_size,
        #                                   activation=tf.keras.activations.tanh,
        #                                   kernel_initializer=
        #                                   create_initializer(self.config.initializer_range))(seq_output)
        projected = seq_output
        pooled_output = tf.reduce_mean(projected, axis=1)
        return pooled_output
Example #9
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 scope=None):
        super(ProjectedMaxPooling, self).__init__()
        config = copy.deepcopy(config)
        self.config = config
        self.vector_size = config.vector_size

        self.bert_model = BertModel(config, is_training, input_ids, input_mask,
                                    token_type_ids, use_one_hot_embeddings,
                                    scope)
Example #10
0
class AddLayerSharingModel:
    def __init__(
        self,
        config,
        use_one_hot_embeddings,
        is_training,
        masked_input_ids,
        input_mask,
        segment_ids,
        tt_input_ids,
        tt_input_mask,
        tt_segment_ids,
    ):

        all_input_ids = tf.concat([masked_input_ids, tt_input_ids], axis=0)
        all_input_mask = tf.concat([input_mask, tt_input_mask], axis=0)
        all_segment_ids = tf.concat([segment_ids, tt_segment_ids], axis=0)
        self.config = config
        self.lm_batch_size, _ = get_shape_list2(masked_input_ids)
        self.model = BertModel(config, is_training, all_input_ids,
                               all_input_mask, all_segment_ids,
                               use_one_hot_embeddings)
        initializer = base.create_initializer(config.initializer_range)
        self.tt_layer = ForwardLayer(config, initializer)

        self.tt_input_mask = tt_input_mask
        seq_output = self.model.get_sequence_output()[self.lm_batch_size:]
        tt_batch_size, seq_length = get_shape_list2(tt_input_ids)
        tt_attention_mask = create_attention_mask_from_input_mask2(
            seq_output, self.tt_input_mask)

        print('tt_attention_mask', tt_attention_mask.shape)
        print("seq_output", seq_output.shape)
        seq_output = self.tt_layer.apply_3d(seq_output, tt_batch_size,
                                            seq_length, tt_attention_mask)
        self.tt_feature = mimic_pooling(seq_output, self.config.hidden_size,
                                        self.config.initializer_range)

    def lm_sequence_output(self):
        return self.model.get_sequence_output()[:self.lm_batch_size]

    def get_embedding_table(self):
        return self.model.get_embedding_table()

    def get_tt_feature(self):
        return self.tt_feature
Example #11
0
    def __init__(self,
                 config, # This is different from BERT config,
                 is_training,
                 input_ids,
                 input_mask,
                 token_type_ids,
                 use_one_hot_embeddings,
                 features,
                 ):
        super(MultiContextEncoder, self).__init__()
        self.config = config
        if not is_training:
            config.set_attrib("hidden_dropout_prob", 0.0)
            config.set_attrib("attention_probs_dropout_prob", 0.0)

        def reform_context(context):
            return tf.reshape(context, [-1, config.max_context, config.max_context_length])

        batch_size, _ = get_shape_list(input_ids)
        def combine(input_ids, context_input_ids):
            a = tf.tile(tf.expand_dims(input_ids, 1), [1, config.max_context, 1])
            b = reform_context(context_input_ids)
            rep_3d = tf.concat([a, b], 2)
            return tf.reshape(rep_3d, [batch_size * config.max_context, -1])

        context_input_ids = features["context_input_ids"]
        context_input_mask = features["context_input_mask"]
        context_segment_ids = features["context_segment_ids"]
        context_segment_ids = tf.ones_like(context_segment_ids, tf.int32) * 2
        self.module = BertModel(config=config,
                                is_training=is_training,
                                input_ids=combine(input_ids, context_input_ids),
                                input_mask=combine(input_mask, context_input_mask),
                                token_type_ids=combine(token_type_ids, context_segment_ids),
                                use_one_hot_embeddings=use_one_hot_embeddings,
                                )
        dense_layer_setup = tf.keras.layers.Dense(config.hidden_size,
                                                  activation=tf.keras.activations.tanh,
                                                  kernel_initializer=create_initializer(config.initializer_range))
        h1 = self.module.get_pooled_output()
        h2 = dense_layer_setup(h1)
        h2 = tf.reshape(h2, [batch_size, config.max_context, -1])
        h2 = h2[:, :config.num_context]
        h3 = tf.reduce_mean(h2, axis=1)
        h4 = dense_layer_setup(h3)
        self.pooled_output = h4
Example #12
0
def tlm_prefer_hard(bert_config, use_one_hot_embeddings, features):
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    encode_model = BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
    )
    loss_model = IndependentLossModel(bert_config)
    loss_model.build_predictions(encode_model.get_sequence_output())
    # if score is higher, it is more often sampled
    output = -loss_model.prob1
    return output
Example #13
0
    def __init__(self,
                 sero_config,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 scope=None):
        super(DualSeroBertModel, self).__init__()

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        with tf.compat.v1.variable_scope(dual_model_prefix2):
            with tf.compat.v1.variable_scope("sero"):
                model = SeroEpsilon(sero_config, is_training,
                                    use_one_hot_embeddings)

                batch_size, _ = get_shape_list(input_mask)
                use_context = tf.ones([batch_size, 1], tf.int32)
                input_ids = tf.expand_dims(input_ids, 1)
                input_mask = tf.expand_dims(input_mask, 1)
                segment_ids = tf.expand_dims(token_type_ids, 1)
                sequence_output2 = model.network_stacked(
                    input_ids, input_mask, segment_ids, use_context)

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = sequence_output2[:, 0, :]

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
Example #14
0
    def __init__(
        self,
        config,
        use_one_hot_embeddings,
        is_training,
        masked_input_ids,
        input_mask,
        segment_ids,
        nli_input_ids,
        nli_input_mask,
        nli_segment_ids,
    ):

        all_input_ids = tf.concat([masked_input_ids, nli_input_ids], axis=0)
        all_input_mask = tf.concat([input_mask, nli_input_mask], axis=0)
        all_segment_ids = tf.concat([segment_ids, nli_segment_ids], axis=0)
        self.batch_size, _ = get_shape_list2(masked_input_ids)
        self.model = BertModel(config, is_training, all_input_ids,
                               all_input_mask, all_segment_ids,
                               use_one_hot_embeddings)
Example #15
0
def tlm2(bert_config, use_one_hot_embeddings, features):
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    hp = hyperparams.HPBert()
    voca_size = 30522
    sequence_shape = bert_common.get_shape_list2(input_ids)

    encode_model = BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
    )
    loss_model = IndependentLossModel(bert_config)
    loss_model.build_predictions(encode_model.get_sequence_output())
    output = -(loss_model.prob1 - loss_model.prob2)
    return output
Example #16
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = BertModel(config=config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
Example #17
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):

        super(MES_pad, self).__init__()
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        trained_l = config.trained_seq_length
        data_l = config.data_seq_length

        batch_size, _ = get_shape_list2(input_ids)

        add_len = trained_l - data_l
        zero_pad = tf.zeros([batch_size, add_len], tf.int32)
        input_ids = tf.concat([input_ids, zero_pad], axis=1)
        input_mask = tf.concat([input_mask, zero_pad], axis=1)
        segment_ids = tf.concat([segment_ids, zero_pad], axis=1)

        # [Batch, unit_seq_length]
        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        pooled = model.get_pooled_output()
        logits_2d = tf.keras.layers.Dense(2, name="cls_dense")(pooled)  #

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        logits = tf.keras.layers.Dense(2, name="cls_dense")(
            model.get_pooled_output())
        self.logits = logits
        label_ids = tf.reshape(label_ids, [-1])
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        layer2_loss = tf.reduce_mean(loss_arr)
        self.loss = layer2_loss
Example #18
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        domain_ids = features["domain_ids"]
        domain_ids = tf.reshape(domain_ids, [-1])

        is_valid_label = features["is_valid_label"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model_1 = BertModel(
            config=model_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model_1.get_pooled_output()
        if is_training:
            pooled = dropout(pooled, 0.1)

        logits = tf.keras.layers.Dense(train_config.num_classes,
                                       name="cls_dense")(pooled)
        pred_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)
        num_domain = 2
        pooled_for_domain = grad_reverse(pooled)
        domain_logits = tf.keras.layers.Dense(
            num_domain, name="domain_dense")(pooled_for_domain)
        domain_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=domain_logits, labels=domain_ids)

        pred_loss = tf.reduce_mean(pred_losses *
                                   tf.cast(is_valid_label, tf.float32))
        domain_loss = tf.reduce_mean(domain_losses)

        tf.compat.v1.summary.scalar('domain_loss', domain_loss)
        tf.compat.v1.summary.scalar('pred_loss', pred_loss)
        alpha = model_config.alpha
        loss = pred_loss + alpha * domain_loss
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (classification_metric_fn,
                            [logits, label_ids, is_real_example])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Example #19
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.float32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = BertModel(
            config=model_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=train_config.use_one_hot_embeddings,
        )
        pooled = model.get_pooled_output()
        if is_training:
            pooled = dropout(pooled, 0.1)
        logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled)
        scale = model_config.scale

        label_ids = scale * label_ids

        weight = tf.abs(label_ids)
        loss_arr = tf.keras.losses.MAE(y_true=label_ids, y_pred=logits)
        loss_arr = loss_arr * weight

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None

        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        def metric_fn(logits, label, is_real_example):
            mae = tf.compat.v1.metrics.mean_absolute_error(
                labels=label, predictions=logits, weights=is_real_example)

            return {
                "mae": mae
            }

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                logits, label_ids, is_real_example
            ])
            output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
        else:
            predictions = {
                    "input_ids": input_ids,
                    "logits": logits,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
        return output_spec
Example #20
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):

        super(MES_pred_with_layer1, self).__init__()
        alpha = config.alpha
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        unit_length = config.max_seq_length
        d_seq_length = config.max_d_seq_length
        num_window = int(d_seq_length / unit_length)
        batch_size, _ = get_shape_list2(input_ids)

        # [Batch, num_window, unit_seq_length]
        stacked_input_ids, stacked_input_mask, stacked_segment_ids = split_input(
            input_ids, input_mask, segment_ids, d_seq_length, unit_length)

        is_first_window = tf.concat([
            tf.ones([batch_size, 1], tf.bool),
            tf.zeros([batch_size, num_window - 1], tf.bool)
        ],
                                    axis=1)
        num_content_tokens = tf.reduce_sum(stacked_segment_ids, 2)
        has_enough_evidence = tf.less(10, num_content_tokens)
        is_valid_window = tf.logical_or(is_first_window, has_enough_evidence)
        is_valid_window_mask = tf.cast(is_valid_window, tf.float32)
        self.is_first_window = is_first_window
        self.num_content_tokens = num_content_tokens
        self.has_enough_evidence = has_enough_evidence
        self.is_valid_window = is_valid_window
        self.is_valid_window_mask = is_valid_window_mask

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=r3to2(stacked_input_ids),
                input_mask=r3to2(stacked_input_mask),
                token_type_ids=r3to2(stacked_segment_ids),
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        def r2to3(arr):
            return tf.reshape(arr, [batch_size, num_window, -1])

        # [Batch, num_window, window_length, hidden_size]
        pooled = model.get_pooled_output()
        logits_2d = tf.keras.layers.Dense(2, name="cls_dense")(pooled)  #
        logits_3d = r2to3(logits_2d)
        label_ids_repeat = tf.tile(label_ids, [1, num_window])
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_3d, labels=label_ids_repeat)
        loss_arr = loss_arr * is_valid_window_mask
        layer1_loss = tf.reduce_mean(loss_arr)

        probs = tf.nn.softmax(logits_3d)[:, :, 1]  # [batch_size, num_window]
        self.logits = logits_3d

        # Probabilistic selection
        def select_seg(stacked_input_ids, indices):
            # indices : [batch_size, 1]
            return tf.gather(stacked_input_ids, indices, axis=1, batch_dims=1)

        valid_probs = probs * is_valid_window_mask
        max_seg = tf.argmax(valid_probs, axis=1)
        input_ids = select_seg(stacked_input_ids, max_seg)
        input_mask = select_seg(stacked_input_mask, max_seg)
        segment_ids = select_seg(stacked_segment_ids, max_seg)

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        logits = tf.keras.layers.Dense(2, name="cls_dense")(
            model.get_pooled_output())
        label_ids = tf.reshape(label_ids, [-1])
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        layer2_loss = tf.reduce_mean(loss_arr)
        loss = alpha * layer1_loss + layer2_loss
        self.loss = loss
Example #21
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):

        super(MES_single, self).__init__()
        alpha = config.alpha
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        unit_length = config.max_seq_length
        d_seq_length = config.max_d_seq_length
        num_window = int(d_seq_length / unit_length)
        batch_size, _ = get_shape_list2(input_ids)

        # [Batch, num_window, unit_seq_length]
        stacked_input_ids, stacked_input_mask, stacked_segment_ids = split_input(
            input_ids, input_mask, segment_ids, d_seq_length, unit_length)
        # Ignore the window if
        # 1. The window is not first window and
        #   1.1 All input_mask is 0
        #   1.2 Content is too short, number of document tokens (other than query tokens) < 10

        # [Batch, num_window]
        is_first_window = tf.concat([
            tf.ones([batch_size, 1], tf.bool),
            tf.zeros([batch_size, num_window - 1], tf.bool)
        ],
                                    axis=1)
        num_content_tokens = tf.reduce_sum(stacked_segment_ids, 2)
        has_enough_evidence = tf.less(10, num_content_tokens)
        is_valid_window = tf.logical_or(is_first_window, has_enough_evidence)
        is_valid_window_mask = tf.cast(is_valid_window, tf.float32)
        # [batch, num_window]
        self.is_first_window = is_first_window
        self.num_content_tokens = num_content_tokens
        self.has_enough_evidence = has_enough_evidence
        self.is_valid_window = is_valid_window
        self.is_valid_window_mask = is_valid_window_mask

        model = BertModel(
            config=config,
            is_training=is_training,
            input_ids=r3to2(stacked_input_ids),
            input_mask=r3to2(stacked_input_mask),
            token_type_ids=r3to2(stacked_segment_ids),
            use_one_hot_embeddings=use_one_hot_embeddings,
        )

        def r2to3(arr):
            return tf.reshape(arr, [batch_size, num_window, -1])

        # [Batch, num_window, window_length, hidden_size]
        pooled = model.get_pooled_output()
        logits_2d = tf.keras.layers.Dense(2, name="cls_dense")(pooled)  #
        logits_3d = r2to3(logits_2d)
        label_ids_repeat = tf.tile(label_ids, [1, num_window])
        # [batch, num_window]
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_3d, labels=label_ids_repeat)
        loss_arr = loss_arr * is_valid_window_mask
        probs = tf.nn.softmax(logits_3d)[:, :, 1]  # [batch_size, num_window]
        max_prob_window = tf.argmax(probs, axis=1)
        beta = 10
        loss_weight = tf.nn.softmax(probs * is_valid_window_mask * beta)
        loss_weight = loss_weight * is_valid_window_mask
        # apply loss if it is max
        loss = tf.reduce_mean(loss_arr * loss_weight)
        logits = tf.gather(logits_3d, max_prob_window, axis=1, batch_dims=1)
        self.logits = logits

        self.loss = loss
Example #22
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        log_features(features)

        def reform_a_input(raw_input):
            return tf.reshape(raw_input,
                              [dict_run_config.inner_batch_size, -1])

        def reform_b_input(raw_input):
            return tf.reshape(raw_input, [dict_run_config.def_per_batch, -1])

        input_ids = reform_a_input(features["input_ids"])
        input_mask = reform_a_input(features["input_mask"])
        segment_ids = reform_a_input(features["segment_ids"])
        tf_logging.info("input_ids, input_mask")

        # input_ids = features["input_ids"]
        # input_mask = features["input_mask"]
        # segment_ids = features["segment_ids"]

        if mode == tf.estimator.ModeKeys.PREDICT:
            tf.random.set_seed(0)
            seed = 0
        else:
            seed = None

        # tf_logging.info("Doing dynamic masking (random)")
        # masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
        #     = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)
        # if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask:
        masked_input_ids = input_ids
        masked_lm_positions = reform_a_input(features["masked_lm_positions"])
        masked_lm_ids = reform_a_input(features["masked_lm_ids"])
        masked_lm_weights = reform_a_input(features["masked_lm_weights"])

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        if model_name == "APR":
            model = APR(
                masked_input_ids,
                input_mask,
                segment_ids,
                is_training,
                train_config.use_one_hot_embeddings,
                bert_config,
                ssdr_config,
                dict_run_config.def_per_batch,
                dict_run_config.inner_batch_size,
                dict_run_config.max_def_length,
            )
        elif model_name == "BERT":
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
        else:
            assert False

        masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \
            = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(),
                 masked_lm_positions, masked_lm_ids, masked_lm_weights)

        loss = masked_lm_loss

        tvars = tf.compat.v1.trainable_variables()
        assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict
        initialized_variable_names, init_fn = align_checkpoint_twice(
            tvars, train_config.init_checkpoint, assignment_fn)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)

        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            if ssdr_config.compare_attrib_value_safe("use_two_lr", True):
                tf_logging.info("Using two lr for each parts")
                train_op = create_optimizer_with_separate_lr(
                    loss, train_config)
            else:
                tf_logging.info("Using single lr ")
                train_op = optimization.create_optimizer_from_config(
                    loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           training_hooks=[OomReportingHook()],
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn_lm, [
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "masked_input_ids": masked_input_ids,
                "masked_lm_ids": masked_lm_ids,
                "masked_lm_example_loss": masked_lm_example_loss,
                "masked_lm_positions": masked_lm_positions,
            }
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Example #23
0
    def model_fn(features, labels, mode, params):    # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("    name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        next_sentence_labels = features["next_sentence_labels"]

        seed = 0
        threshold = 1e-2
        logging.info("Doing All Masking")
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        prefix1 = "MaybeBERT"
        prefix2 = "MaybeNLI"

        with tf.compat.v1.variable_scope(prefix1):
            model = BertModel(
                    config=bert_config,
                    is_training=is_training,
                    input_ids=input_ids,
                    input_mask=input_mask,
                    token_type_ids=segment_ids,
                    use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            (masked_lm_loss,
             masked_lm_example_loss1, masked_lm_log_probs2) = get_masked_lm_output(
                     bert_config, model.get_sequence_output(), model.get_embedding_table(),
                     masked_lm_positions, masked_lm_ids, masked_lm_weights)
            all_layers1 = model.get_all_encoder_layers()

        with tf.compat.v1.variable_scope(prefix2):
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            all_layers2 = model.get_all_encoder_layers()

        preserved_infos = []
        for a_layer, b_layer in zip(all_layers1, all_layers2):
            layer_diff = a_layer - b_layer
            is_preserved = tf.less(tf.abs(layer_diff), threshold)
            preserved_infos.append(is_preserved)

        t = tf.cast(preserved_infos[1], dtype=tf.int32) #[batch_size, seq_len, dims]
        layer_1_count = tf.reduce_sum(t, axis=2)

        tvars = tf.compat.v1.trainable_variables()

        initialized_variable_names, init_fn = get_init_fn_for_two_checkpoints(train_config,
                                                                              tvars,
                                                                              train_config.init_checkpoint,
                                                                              prefix1,
                                                                              train_config.second_init_checkpoint,
                                                                              prefix2)
        scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu)

        log_var_assignments(tvars, initialized_variable_names)

        output_spec = None
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "input_ids": input_ids,
                "layer_count": layer_1_count
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=None,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)

        return output_spec
Example #24
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("    name = %s, shape = %s" %
                         (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        batch_size, seq_len = get_shape_list2(input_ids)
        n_trial = 5

        logging.info("Doing All Masking")
        new_input_ids, new_segment_ids, new_input_mask, indice, length_arr = \
            candidate_gen(input_ids, input_mask, segment_ids, n_trial)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        prefix_cls = "classification"
        prefix_explain = "explain"
        all_input_ids = tf.concat([input_ids, new_input_ids], axis=0)
        all_segment_ids = tf.concat([segment_ids, new_segment_ids], axis=0)
        all_input_mask = tf.concat([input_mask, new_input_mask], axis=0)
        with tf.compat.v1.variable_scope(prefix_cls):
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=all_input_ids,
                input_mask=all_input_mask,
                token_type_ids=all_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            output_weights = tf.compat.v1.get_variable(
                "output_weights",
                [train_config.num_classes, bert_config.hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [train_config.num_classes],
                initializer=tf.compat.v1.zeros_initializer())
            pooled = model.get_pooled_output()
            raw_logits = tf.matmul(pooled, output_weights, transpose_b=True)
            logits = tf.stop_gradient(raw_logits)
            cls_logits = tf.nn.bias_add(logits, output_bias)
            cls_probs = tf.nn.softmax(cls_logits)

            orig_probs = cls_probs[:batch_size]
            new_probs = tf.reshape(cls_probs[batch_size:],
                                   [batch_size, n_trial, -1])

            best_run, informative = get_informative(new_probs, orig_probs)
            # informative.shape= [batch_size, num_clases]
            best_del_idx, best_del_len = select_best(best_run, indice,
                                                     length_arr)

            signal_label = get_mask(best_del_idx, best_del_len, seq_len)

        with tf.compat.v1.variable_scope(prefix_explain):
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            seq = model.get_sequence_output()

            output_weights = tf.compat.v1.get_variable(
                "output_weights",
                [train_config.num_classes, bert_config.hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [train_config.num_classes],
                initializer=tf.compat.v1.zeros_initializer())
            logits = tf.matmul(seq, output_weights, transpose_b=True)
            ex_logits = tf.nn.bias_add(
                logits, output_bias)  # [batch, seq_len, num_class]

        ex_logits_flat = tf.reshape(tf.transpose(ex_logits, [0, 2, 1]),
                                    [-1, seq_len])
        signal_label_flat = tf.cast(tf.reshape(signal_label, [-1, seq_len]),
                                    tf.float32)
        losses_per_clas_flat = correlation_coefficient_loss(
            signal_label_flat, ex_logits_flat)  # [batch_size, num_class]
        losses_per_clas = tf.reshape(losses_per_clas_flat, [batch_size, -1])
        losses_per_clas = losses_per_clas * tf.cast(informative, tf.float32)
        losses = tf.reduce_mean(losses_per_clas, axis=1)
        loss = tf.reduce_mean(losses)

        tvars = tf.compat.v1.trainable_variables()

        scaffold_fn = None
        initialized_variable_names, init_fn = get_init_fn_for_two_checkpoints(
            train_config, tvars, train_config.init_checkpoint, prefix_explain,
            train_config.second_init_checkpoint, prefix_cls)
        if train_config.use_tpu:

            def tpu_scaffold():
                init_fn()
                return tf.compat.v1.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            init_fn()

        log_var_assignments(tvars, initialized_variable_names)

        output_spec = None
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer_from_config(
                loss, train_config)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "input_ids": input_ids,
                "ex_logits": ex_logits,
                "logits": logits,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=None,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Example #25
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("    name = %s, shape = %s" %
                         (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        next_sentence_labels = features["next_sentence_labels"]

        n_trial = 25

        logging.info("Doing All Masking")
        masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \
            = planned_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, n_trial)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        repeat_input_mask = tf.tile(input_mask, [n_trial, 1])
        repeat_segment_ids = tf.tile(segment_ids, [n_trial, 1])
        prefix1 = "MaybeBERT"
        prefix2 = "MaybeBFN"

        with tf.compat.v1.variable_scope(prefix1):
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=repeat_input_mask,
                token_type_ids=repeat_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            (masked_lm_loss, masked_lm_example_loss1,
             masked_lm_log_probs2) = get_masked_lm_output(
                 bert_config, model.get_sequence_output(),
                 model.get_embedding_table(), masked_lm_positions,
                 masked_lm_ids, masked_lm_weights)

        with tf.compat.v1.variable_scope(prefix2):
            model = BertModel(
                config=bert_config,
                is_training=is_training,
                input_ids=masked_input_ids,
                input_mask=repeat_input_mask,
                token_type_ids=repeat_segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )

            (masked_lm_loss, masked_lm_example_loss2,
             masked_lm_log_probs2) = get_masked_lm_output(
                 bert_config, model.get_sequence_output(),
                 model.get_embedding_table(), masked_lm_positions,
                 masked_lm_ids, masked_lm_weights)

        n_mask = train_config.max_predictions_per_seq

        def reform(t):
            t = tf.reshape(t, [n_trial, -1, n_mask])
            t = tf.transpose(t, [1, 0, 2])
            return t

        grouped_positions = reform(masked_lm_positions)
        grouped_loss1 = reform(masked_lm_example_loss1)
        grouped_loss2 = reform(masked_lm_example_loss2)
        tvars = tf.compat.v1.trainable_variables()

        scaffold_fn = None
        initialized_variable_names, init_fn = get_init_fn_for_two_checkpoints(
            train_config, tvars, train_config.init_checkpoint, prefix1,
            train_config.second_init_checkpoint, prefix2)
        if train_config.use_tpu:

            def tpu_scaffold():
                init_fn()
                return tf.compat.v1.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            init_fn()

        log_var_assignments(tvars, initialized_variable_names)

        output_spec = None
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "input_ids": input_ids,
                "input_mask": input_mask,
                "segment_ids": segment_ids,
                "grouped_positions": grouped_positions,
                "grouped_loss1": grouped_loss1,
                "grouped_loss2": grouped_loss2,
            }
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=None,
                predictions=predictions,
                scaffold_fn=scaffold_fn)

        return output_spec
Example #26
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(DualBertTwoInputModelEx, self).__init__()

        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]

        modeling_option = config.model_option

        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model_2 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = model_2.get_sequence_output()[:, 0, :]
        print('model_2_first_token', model_2_first_token)
        mask_scalar = {
            "0": 0.,
            "1": 1.,
            "random": tf.random.uniform(shape=[], minval=0., maxval=1.)
        }[modeling_option]
        print("Mask_scalar:", mask_scalar)
        model_2_first_token = mask_scalar * model_2_first_token
        print('model_2_first_token', model_2_first_token)

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)

        self.sequence_output = tf.concat(
            [model_1.get_sequence_output(),
             model_2.get_sequence_output()],
            axis=2)
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
Example #27
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):

        super(MES_sel, self).__init__()
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        unit_length = config.max_seq_length
        d_seq_length = config.max_d_seq_length
        num_window = int(d_seq_length / unit_length)
        batch_size, _ = get_shape_list2(input_ids)

        # [Batch, num_window, unit_seq_length]
        stacked_input_ids, stacked_input_mask, stacked_segment_ids = split_input(
            input_ids, input_mask, segment_ids, d_seq_length, unit_length)
        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=r3to2(stacked_input_ids),
                input_mask=r3to2(stacked_input_mask),
                token_type_ids=r3to2(stacked_segment_ids),
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        def r2to3(arr):
            return tf.reshape(arr, [batch_size, num_window, -1])

        # [Batch, num_window, window_length, hidden_size]
        pooled = model.get_pooled_output()
        logits_2d = tf.keras.layers.Dense(2, name="cls_dense")(pooled)  #
        logits_3d = r2to3(logits_2d)
        label_ids_repeat = tf.tile(label_ids, [1, num_window])
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_3d, labels=label_ids_repeat)
        layer1_loss = tf.reduce_mean(loss_arr)

        probs = tf.nn.softmax(logits_3d)[:, :, 1]  # [batch_size, num_window]

        # Probabilistic selection
        def select_seg(stacked_input_ids, indices):
            # indices : [batch_size, 1]
            return tf.gather(stacked_input_ids, indices, axis=1, batch_dims=1)

        max_seg = tf.argmax(probs, axis=1)
        input_ids = select_seg(stacked_input_ids, max_seg)
        input_mask = select_seg(stacked_input_mask, max_seg)
        segment_ids = select_seg(stacked_segment_ids, max_seg)

        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
        logits = tf.keras.layers.Dense(2, name="cls_dense")(
            model.get_pooled_output())
        self.logits = logits
        label_ids = tf.reshape(label_ids, [-1])
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        layer2_loss = tf.reduce_mean(loss_arr)
        alpha = 0.1
        loss = alpha * layer1_loss + layer2_loss
        self.loss = loss
Example #28
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(TripleBertWeighted, self).__init__()

        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]

        input_ids3 = features["input_ids3"]
        input_mask3 = features["input_mask3"]
        segment_ids3 = features["segment_ids3"]

        def apply_binary_dense(vector):
            output = tf.keras.layers.Dense(
                2,
                activation=tf.keras.activations.softmax,
                name="cls_dense",
                kernel_initializer=create_initializer(
                    config.initializer_range))(vector)
            return output

        with tf.compat.v1.variable_scope(triple_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
            model_1_pred = tf.keras.layers.Dense(
                3,
                activation=tf.keras.activations.softmax,
                name="cls_dense",
                kernel_initializer=create_initializer(
                    config.initializer_range))(model_1.get_pooled_output())
            model_1_pred = model_1_pred[:, :2]

        with tf.compat.v1.variable_scope(triple_model_prefix2):
            model_2 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
            model_2_pred = apply_binary_dense(model_2.get_pooled_output())

        with tf.compat.v1.variable_scope(triple_model_prefix3):
            model_3 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids3,
                input_mask=input_mask3,
                token_type_ids=segment_ids3,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )
            model_3_pred = apply_binary_dense(model_3.get_pooled_output())

        # Option : initialize dense

        combined_pred = model_1_pred * model_3_pred[:, 0:1] \
                        + model_2_pred * model_3_pred[:, 1:2]

        self.rel_score = model_3_pred[:, 1:2]
        self.pooled_output = combined_pred
Example #29
0
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 use_one_hot_embeddings=True,
                 features=None,
                 scope=None):
        super(TripleBertMasking, self).__init__()

        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]

        input_ids3 = features["input_ids3"]
        input_mask3 = features["input_mask3"]
        segment_ids3 = features["segment_ids3"]

        with tf.compat.v1.variable_scope(triple_model_prefix1):
            model_1 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(triple_model_prefix2):
            model_2 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        with tf.compat.v1.variable_scope(triple_model_prefix3):
            model_3 = BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids3,
                input_mask=input_mask3,
                token_type_ids=segment_ids3,
                use_one_hot_embeddings=use_one_hot_embeddings,
            )

        model_1_first_token = model_1.get_sequence_output()[:, 0, :]
        model_2_first_token = model_2.get_sequence_output()[:, 0, :]

        pooled3 = model_3.get_pooled_output()
        probs3 = tf.keras.layers.Dense(2,
                                       activation=tf.keras.activations.softmax,
                                       kernel_initializer=create_initializer(
                                           config.initializer_range))(pooled3)
        mask_scalar = probs3[:, 1:2]
        self.rel_score = mask_scalar

        model_2_first_token = mask_scalar * model_2_first_token

        rep = tf.concat([model_1_first_token, model_2_first_token], axis=1)

        self.sequence_output = tf.concat(
            [model_1.get_sequence_output(),
             model_2.get_sequence_output()],
            axis=2)
        dense_layer = tf.keras.layers.Dense(
            config.hidden_size,
            activation=tf.keras.activations.tanh,
            kernel_initializer=create_initializer(config.initializer_range))
        pooled_output = dense_layer(rep)
        self.pooled_output = pooled_output
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        tf_logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf_logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        if mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32)
        else:
            label_ids = features["label_ids"]
            label_ids = tf.reshape(label_ids, [-1])
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        input_ids2 = features["input_ids2"]
        input_mask2 = features["input_mask2"]
        segment_ids2 = features["segment_ids2"]
        with tf.compat.v1.variable_scope(dual_model_prefix1):
            model_1 = BertModel(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            pooled = model_1.get_pooled_output()
            if is_training:
                pooled = dropout(pooled, 0.1)
            logits = tf.keras.layers.Dense(train_config.num_classes,
                                           name="cls_dense")(pooled)
        with tf.compat.v1.variable_scope(dual_model_prefix2):
            model_2 = BertModel(
                config=model_config,
                is_training=is_training,
                input_ids=input_ids2,
                input_mask=input_mask2,
                token_type_ids=segment_ids2,
                use_one_hot_embeddings=train_config.use_one_hot_embeddings,
            )
            pooled = model_2.get_pooled_output()
            if is_training:
                pooled = dropout(pooled, 0.1)
            conf_probs = tf.keras.layers.Dense(
                train_config.num_classes,
                name="cls_dense",
                activation=tf.keras.activations.softmax)(pooled)

            confidence = conf_probs[:, 1]
        confidence_loss = 1 - confidence

        cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=label_ids)

        k = model_config.k
        alpha = model_config.alpha
        loss_arr = cls_loss * confidence + confidence_loss * k

        loss_arr = apply_weighted_loss(loss_arr, label_ids, alpha)

        loss = tf.reduce_mean(input_tensor=loss_arr)
        tvars = tf.compat.v1.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if train_config.init_checkpoint:
            initialized_variable_names, init_fn = get_init_fn(
                train_config, tvars)
            scaffold_fn = get_tpu_scaffold_or_init(init_fn,
                                                   train_config.use_tpu)
        log_var_assignments(tvars, initialized_variable_names)
        TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec

        def metric_fn(log_probs, label, is_real_example, confidence):
            r = classification_metric_fn(log_probs, label, is_real_example)
            r['confidence'] = tf.compat.v1.metrics.mean(confidence)
            return r

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            tvars = None
            train_op = optimization.create_optimizer_from_config(
                loss, train_config, tvars)
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn,
                            [logits, label_ids, is_real_example, confidence])
            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "input_ids": input_ids,
                "logits": logits,
                "confidence": confidence,
            }
            if "data_id" in features:
                predictions['data_id'] = features['data_id']
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec