Example #1
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [2]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        with tf.variable_scope("part1"):
            self.model1 = bert.BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings)

        with tf.variable_scope("part2"):
            self.model2 = bert.BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings)

        enc = tf.concat([
            self.model1.get_sequence_output(),
            self.model2.get_sequence_output()
        ],
                        axis=2)

        pred, loss = task.predict(enc, label_ids, True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
Example #2
0
    def network(self, features, mode):
        config = bert.BertConfig(
            vocab_size=self.voca_size,
            hidden_size=self.hp.hidden_units,
            num_hidden_layers=self.hp.num_blocks,
            num_attention_heads=self.hp.num_heads,
            intermediate_size=self.hp.intermediate_size,
            type_vocab_size=self.hp.type_vocab_size,
        )
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        self.label_ids = features["label_ids"]

        is_training = (tf.estimator.ModeKeys.TRAIN == mode)
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=self.use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        return self.task.predict_ex(enc, self.label_ids, mode)
Example #3
0
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()

        task = ClassificationB(is_training, hp.hidden_units, 3)
        task.call(pooled_output, label_ids)
        self.loss = task.loss
        self.logits = task.logits
        self.acc = task.acc
Example #4
0
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        s_portion = tf.placeholder(tf.float32, [None])
        d_portion = tf.placeholder(tf.float32, [None])
        s_sum = tf.placeholder(tf.int64, [None])
        d_sum = tf.placeholder(tf.int64, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = [s_portion, d_portion]
        self.y_sum = [s_sum, d_sum]

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        pool = tf.layers.dense(enc[:, 0, :], hp.hidden_units, name="pool")

        s_logits = tf.layers.dense(pool, 2, name="cls_dense_support")
        d_logits = tf.layers.dense(pool, 2, name="cls_dense_dispute")

        loss = 0
        self.acc = []
        for logits, y, mask_sum in [(s_logits, self.y[0], s_sum),
                                    (d_logits, self.y[1], d_sum)]:
            labels = tf.cast(tf.greater(y, 0.5), tf.int32)
            labels = tf.one_hot(labels, 2)
            preds = tf.to_int32(tf.argmax(logits, axis=-1))
            acc = tf_module.accuracy(logits, y)

            self.acc.append(acc)
            loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits, labels=labels)

            loss_arr = loss_arr * tf.cast(mask_sum, tf.float32)
            loss += tf.reduce_sum(loss_arr)

        self.loss = loss

        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('s_acc', self.acc[0])
        tf.summary.scalar('d_acc', self.acc[1])
Example #5
0
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.float32, [None, 3])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        feature = self.model.get_pooled_output()


        def dense_softmax(feature, name):
            logits = tf.layers.dense(feature, 2, name=name)
            sout = tf.nn.softmax(logits)
            return sout

        alpha = dense_softmax(feature, "dense_alpha")  # Probability of being Argument P(Arg)
        beta = dense_softmax(feature, "dense_beta")    # P(Arg+|Arg)
        gamma = dense_softmax(feature, "dense_gamma")  # P(not Noise)
        self.alpha = alpha[:, 0]
        self.beta = beta[:, 0]
        self.gamma = gamma[:, 0]

        p1_prior = 0.2
        p2_prior = 0.2
        p0_prior = 1 - p1_prior - p2_prior

        p1 = alpha[:, 0] * beta[:, 0] * gamma[:, 0] + gamma[:, 1] * p1_prior
        p2 = alpha[:, 0] * beta[:, 1] * gamma[:, 0] + gamma[:, 1] * p2_prior
        p0 = alpha[:, 1] * gamma[:, 0] + gamma[:, 1] * p0_prior


        pred = tf.stack([p0,p1,p2], axis=1)
        log_likelihood = tf.log(pred) * label_ids
        loss = - tf.reduce_mean(log_likelihood)
        self.pred = pred
        self.loss = loss
Example #6
0
    def __init__(self, hp, voca_size, mode=1):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        scores = tf.placeholder(tf.float32, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = scores

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        enc = tf.layers.dense(enc, hp.hidden_units,
                              name="dense1")  # [ None, seq_length, hidden]
        matching = tf.expand_dims(enc, 3)  # [ None, seq_length, hidden, 1]
        pooled_rep = tf.nn.max_pool(matching,
                                    ksize=[1, seq_length, 1, 1],
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    data_format='NHWC')
        # [None, 1, hidden, 1]
        self.doc_v = tf.placeholder_with_default(tf.reshape(
            pooled_rep, [-1, hp.hidden_units]), (None, hp.hidden_units),
                                                 name='pooled_rep')

        logits = tf.layers.dense(self.doc_v, 1, name="dense_reg")
        self.logits = logits
        paired = tf.reshape(logits, [-1, 2])
        losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]), 0)

        self.loss = tf.reduce_mean(losses)
        tf.summary.scalar('loss', self.loss)
Example #7
0
def fetch_bert_parameter(model_path):
    hp = hyperparams.HPSENLI()
    vocab_size = 30522
    vocab_filename = "bert_voca.txt"

    config = bert.BertConfig(
        vocab_size=vocab_size,
        hidden_size=hp.hidden_units,
        num_hidden_layers=hp.num_blocks,
        num_attention_heads=hp.num_heads,
        intermediate_size=hp.intermediate_size,
        type_vocab_size=hp.type_vocab_size,
    )

    hp.compare_deletion_num = 20
    seq_length = hp.seq_max

    is_training = False
    input_ids = tf.placeholder(tf.int64, [None, seq_length])
    input_mask = tf.placeholder(tf.int64, [None, seq_length])
    segment_ids = tf.placeholder(tf.int64, [None, seq_length])
    label_ids = tf.placeholder(tf.int64, [None])
    use_one_hot_embeddings = False
    model = bert.BertModel(config=config,
                           is_training=is_training,
                           input_ids=input_ids,
                           input_mask=input_mask,
                           token_type_ids=segment_ids,
                           use_one_hot_embeddings=use_one_hot_embeddings)

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True

    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    variables = tf.contrib.slim.get_variables_to_restore()
    for v in variables:
        print(v)

    names = list([v.name for v in variables])
    loader = tf.train.Saver()
    loader.restore(sess, model_path)
    r, = sess.run([variables])

    output = dict(zip(names, r))

    for k in output:
        print(k)

    return output
Example #8
0
    def __init__(self, hp, num_classes, voca_size, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()
        output_weights = tf.get_variable(
            "output_weights", [num_classes, hp.hidden_units],
            initializer=tf.truncated_normal_initializer(stddev=0.02)
        )

        output_bias = tf.get_variable(
            "output_bias", [num_classes],
            initializer=tf.zeros_initializer()
        )

        logits = tf.matmul(pooled_output, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=label_ids)
        loss = tf.reduce_mean(input_tensor=loss_arr)

        self.loss = loss
        self.logits = logits
        self.sout = tf.nn.softmax(self.logits)
Example #9
0
    def __init__(self, hp, voca_size, mode=1):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        scores = tf.placeholder(tf.float32, [None])
        #        self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = scores

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode == 1:
            enc = self.model.get_pooled_output()
        else:
            enc = self.model.get_all_encoder_layers()
        self.enc = enc
        logits = tf.layers.dense(enc, 1, name="reg_dense")  # [ None, 1]
        self.logits = logits

        paired = tf.reshape(logits, [-1, 2])
        y_paired = tf.reshape(self.y, [-1, 2])
        raw_l = (paired[:, 1] - paired[:, 0])
        losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]), 0)

        self.loss = tf.reduce_mean(losses)
        tf.summary.scalar('loss', self.loss)
Example #10
0
    def __init__(self, hp, voca_size):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        scores = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = scores

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        enc = tf.layers.dense(enc, hp.hidden_units, name="dense1") # [ None, seq_length, hidden]
        logits = tf.layers.dense(enc, 1, name="dense2")
        self.logits = tf.reshape(logits, [-1, seq_length])

        self.sout = tf.sigmoid(self.logits)
        #self.sout = tf.nn.softmax(self.logits, axis=1)
        #losses = tf.cast(self.y, tf.float32) * -tf.log(self.sout) # [ None, seq_length ]

        self.loss = tf.reduce_sum(tf.losses.sigmoid_cross_entropy(self.y, logits=self.logits))
        tf.summary.scalar('loss', self.loss)

        p = self.sout
        pred = tf.less(tf.zeros_like(p), p - 0.5)
        self.prec = tf_module.precision_b(pred, self.y)
        self.recall = tf_module.recall_b(pred, self.y)
        tf.summary.scalar('prec', self.prec)
Example #11
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled = self.model.get_pooled_output()
        pooled = tf.nn.dropout(pooled, hp.dropout_rate)
        logits = tf.layers.dense(pooled,
                                 data_generator.NLI.nli_info.num_classes,
                                 name="cls_dense")
        labels = tf.one_hot(label_ids, data_generator.NLI.nli_info.num_classes)
        self.acc = tf_module.accuracy(logits, label_ids)
        self.logits = logits
        tf.summary.scalar("acc", self.acc)
        self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=labels)
        self.loss = tf.reduce_mean(self.loss_arr)
        tf.summary.scalar("loss", self.loss)
Example #12
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss

        self.embedding_output = self.model.get_embedding_output()
        self.all_layers = self.model.get_all_encoder_layers()
Example #13
0
    def __init__(self, hp, voca_size):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        score = tf.placeholder(tf.float32, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = score

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        all_layers = self.model.get_all_encoder_layers()
        enc = tf.concat(all_layers, axis=2) # [None, seq_len, Hidden_dim * num_blocks]
        per_token_score = tf.layers.dense(enc[0], 1, name="reg_dense") # [ None, seq_len, 1]
        self.logits = tf.reduce_sum(per_token_score, axis=1) # [ None, 1]

        paired = tf.reshape(self.logits, [-1, 2])
        y_paired = tf.reshape(self.y, [-1,2])
        raw_l = (paired[:, 1] - paired[:, 0])
        losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]) , 0)
        self.loss = tf.reduce_mean(losses)

        gain = tf.maximum(paired[:, 1] - paired[:, 0], 0)
        self.acc = tf.cast(tf.count_nonzero(gain), tf.float32) / tf.reduce_sum(tf.ones_like(gain))
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)
Example #14
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
  """Creates a classification model."""
  model = bert.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire
  # segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  output_layer = model.get_pooled_output()

  hidden_size = output_layer.shape[-1].value

  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    probabilities = tf.nn.softmax(logits, axis=-1)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)


    loss = tf.reduce_mean(per_example_loss)
    #loss = f1_loss(logits, one_hot_labels)
    return (loss, per_example_loss, logits, probabilities)
Example #15
0
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(2)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)
Example #16
0
    def __init__(self, hp, voca_size):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        self.begin = tf.placeholder(tf.int32, [None, seq_length])
        self.end = tf.placeholder(tf.int32, [None, seq_length])

        self.y = tf.stack([self.begin, self.end], axis=2)

        self.x_list = [input_ids, input_mask, segment_ids]

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        enc = tf.layers.dense(enc, hp.hidden_units, name="dense1") # [ None, seq_length, hidden]
        self.logits = tf.layers.dense(enc, 2, name="dense2")

        self.sout = tf.nn.softmax(self.logits, axis=1)
        losses = tf.cast(self.y, tf.float32) * -tf.log(self.sout) # [ None, seq_length ]

        self.loss = tf.reduce_sum(losses)
        tf.summary.scalar('loss', self.loss)
Example #17
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)
        if method == 0:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.nn.sigmoid(cl)
            # cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            # self.pkc = self.conf_logits * self.rf_mask
            # rl_loss_list = tf.reduce_sum(self.pkc, axis=1)
            rl_loss_list = tf.reduce_sum(self.conf_logits *
                                         tf.cast(self.rf_mask, tf.float32),
                                         axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
        elif method == 1:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            #rl_loss_list = tf_module.cossim(cl, self.rf_mask)
            #self.pkc = self.conf_logits * self.rf_mask
            rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask,
                                         axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
        elif method == METHOD_CROSSENT:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 2,
                                 name="aux_conflict")
            probs = tf.nn.softmax(cl)
            losses = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(
                self.rf_mask, 2),
                                                     logits=cl)
            self.conf_logits = probs[:, :, 1] - 0.5
            self.rl_loss = tf.reduce_mean(losses)
        elif method == 3:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            self.bias = tf.Variable(0.0)
            self.conf_logits = (cl + self.bias)
            rl_loss_list = tf.nn.relu(1 - self.conf_logits * self.rf_mask)
            rl_loss_list = tf.reduce_mean(rl_loss_list, axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.hinge_loss = tf.reduce_sum(hinge_losses)
        elif method == 4:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.rl_loss = hinge_losses
        elif method == 5:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            #cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            self.labels = tf.cast(tf.greater(self.rf_mask, 0), tf.float32)
            self.rl_loss = tf.reduce_mean(
                tf_module.correlation_coefficient_loss(cl, -self.rf_mask))
        elif method == 6:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            #cl = tf.layers.dense(cl1, 1, name="aux_conflict2")
            cl = tf.reshape(cl, [-1, seq_length])
            #cl = tf.nn.sigmoid(cl)
            #cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            #rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask , axis=1)
            self.rl_loss = tf.reduce_mean(
                tf_module.correlation_coefficient_loss(cl, -self.rf_mask))
        elif method == METHOD_HINGE:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            self.conf_logits = cl
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.rl_loss = tf.reduce_sum(hinge_losses)

        self.conf_softmax = tf.nn.softmax(self.conf_logits, axis=-1)
Example #18
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        dummy_data = features["dummy"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
        )

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Example #19
0
    def __init__(self, hp, voca_size, num_class_list, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )
        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length],
                                   name="input_ids")
        input_mask = tf.placeholder(tf.int64, [None, seq_length],
                                    name="input_mask")
        segment_ids = tf.placeholder(tf.int64, [None, seq_length],
                                     name="segment_ids")

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y1 = tf.placeholder(tf.int64, [None], name="y1")
        self.y2 = tf.placeholder(tf.int64, [None], name="y2")
        self.y = [self.y1, self.y2]
        summary1 = {}
        summary2 = {}
        self.summary_list = [summary1, summary2]

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        task = Classification(num_class_list[0])
        pred, loss = task.predict(self.model.get_sequence_output(), self.y1,
                                  True)
        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        summary1['loss1'] = tf.summary.scalar('loss', self.loss)
        summary1['acc1'] = tf.summary.scalar('acc', self.acc)

        with tf.variable_scope("cls2"):
            task2 = Classification(num_class_list[1])
            pred, loss = task2.predict(self.model.get_sequence_output(),
                                       self.y2, True)
            self.logits2 = task2.logits
            self.sout2 = tf.nn.softmax(self.logits2)
            self.pred2 = pred
            self.loss2 = loss
            self.acc2 = task2.acc
            summary2['loss2'] = tf.summary.scalar('loss2', self.loss2)
            summary2['acc2'] = tf.summary.scalar('acc2', self.acc2)

        self.logit_list = [self.logits, self.logits2]
        self.loss_list = [self.loss, self.loss2]
        self.pred_list = [self.pred, self.pred2]
Example #20
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)
        task2_num_classes = 3

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0,1,3,4,5,6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [2]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids
        self.y1 = tf.placeholder(tf.int64, [None], name="y1")
        self.y2 = tf.placeholder(tf.int64, [None], name="y2")
        self.f_loc1 = tf.placeholder(tf.int64, [None], name="f_loc1")
        self.f_loc2 = tf.placeholder(tf.int64, [None], name="f_loc2")

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        #tf.summary.scalar('loss', self.loss)
        #tf.summary.scalar('acc', self.acc)

        enc = self.model.get_sequence_output() # [Batch, Seq_len, hidden_dim]

        logits_raw = tf.layers.dense(enc, 3) # [Batch, seq_len, 3]
        def select(logits, f_loc):
            mask = tf.reshape(tf.one_hot(f_loc, seq_length), [-1,seq_length, 1]) # [Batch, seq_len, 1]
            t = tf.reduce_sum(logits * mask, axis=1)
            return t

        logits1 = select(logits_raw, self.f_loc1) # [Batch, 3]
        logits2 = select(logits_raw, self.f_loc2)  # [Batch, 3]
        self.logits1 = logits1
        self.logits2 = logits2
        label1 = tf.one_hot(self.y1, task2_num_classes) # [Batch, num_class]
        label2 = tf.one_hot(self.y2, task2_num_classes)
        losses1_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits1,
            labels=label1)

        losses2_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits2,
            labels=label2)

        self.loss_paired = tf.reduce_mean(losses1_arr) #+ tf.reduce_mean(losses2_arr)