Exemple #1
0
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()

        task = ClassificationB(is_training, hp.hidden_units, 3)
        task.call(pooled_output, label_ids)
        self.loss = task.loss
        self.logits = task.logits
        self.acc = task.acc
Exemple #2
0
    def network(self, features, mode):
        config = bert.BertConfig(
            vocab_size=self.voca_size,
            hidden_size=self.hp.hidden_units,
            num_hidden_layers=self.hp.num_blocks,
            num_attention_heads=self.hp.num_heads,
            intermediate_size=self.hp.intermediate_size,
            type_vocab_size=self.hp.type_vocab_size,
        )
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        self.label_ids = features["label_ids"]

        is_training = (tf.estimator.ModeKeys.TRAIN == mode)
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=self.use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        return self.task.predict_ex(enc, self.label_ids, mode)
Exemple #3
0
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        s_portion = tf.placeholder(tf.float32, [None])
        d_portion = tf.placeholder(tf.float32, [None])
        s_sum = tf.placeholder(tf.int64, [None])
        d_sum = tf.placeholder(tf.int64, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = [s_portion, d_portion]
        self.y_sum = [s_sum, d_sum]

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        pool = tf.layers.dense(enc[:, 0, :], hp.hidden_units, name="pool")

        s_logits = tf.layers.dense(pool, 2, name="cls_dense_support")
        d_logits = tf.layers.dense(pool, 2, name="cls_dense_dispute")

        loss = 0
        self.acc = []
        for logits, y, mask_sum in [(s_logits, self.y[0], s_sum),
                                    (d_logits, self.y[1], d_sum)]:
            labels = tf.cast(tf.greater(y, 0.5), tf.int32)
            labels = tf.one_hot(labels, 2)
            preds = tf.to_int32(tf.argmax(logits, axis=-1))
            acc = tf_module.accuracy(logits, y)

            self.acc.append(acc)
            loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits, labels=labels)

            loss_arr = loss_arr * tf.cast(mask_sum, tf.float32)
            loss += tf.reduce_sum(loss_arr)

        self.loss = loss

        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('s_acc', self.acc[0])
        tf.summary.scalar('d_acc', self.acc[1])
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.float32, [None, 3])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        feature = self.model.get_pooled_output()


        def dense_softmax(feature, name):
            logits = tf.layers.dense(feature, 2, name=name)
            sout = tf.nn.softmax(logits)
            return sout

        alpha = dense_softmax(feature, "dense_alpha")  # Probability of being Argument P(Arg)
        beta = dense_softmax(feature, "dense_beta")    # P(Arg+|Arg)
        gamma = dense_softmax(feature, "dense_gamma")  # P(not Noise)
        self.alpha = alpha[:, 0]
        self.beta = beta[:, 0]
        self.gamma = gamma[:, 0]

        p1_prior = 0.2
        p2_prior = 0.2
        p0_prior = 1 - p1_prior - p2_prior

        p1 = alpha[:, 0] * beta[:, 0] * gamma[:, 0] + gamma[:, 1] * p1_prior
        p2 = alpha[:, 0] * beta[:, 1] * gamma[:, 0] + gamma[:, 1] * p2_prior
        p0 = alpha[:, 1] * gamma[:, 0] + gamma[:, 1] * p0_prior


        pred = tf.stack([p0,p1,p2], axis=1)
        log_likelihood = tf.log(pred) * label_ids
        loss = - tf.reduce_mean(log_likelihood)
        self.pred = pred
        self.loss = loss
Exemple #5
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [2]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        with tf.variable_scope("part1"):
            self.model1 = bert.BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings)

        with tf.variable_scope("part2"):
            self.model2 = bert.BertModel(
                config=config,
                is_training=is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=segment_ids,
                use_one_hot_embeddings=use_one_hot_embeddings)

        enc = tf.concat([
            self.model1.get_sequence_output(),
            self.model2.get_sequence_output()
        ],
                        axis=2)

        pred, loss = task.predict(enc, label_ids, True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
Exemple #6
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert_get_hidden.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss

        all_layer_grads = []
        all_layers = self.model.all_layer_outputs
        for i in range(len(all_layers)):
            grad = tf.gradients(self.logits, all_layers[i])
            all_layer_grads.append(grad)

        grad_emb = tf.gradients(self.logits, self.model.embedding_output)
        self.all_layer_grads = all_layer_grads
        self.grad_emb = grad_emb
Exemple #7
0
    def __init__(self, hp, voca_size, mode=1):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        scores = tf.placeholder(tf.float32, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = scores

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        enc = tf.layers.dense(enc, hp.hidden_units,
                              name="dense1")  # [ None, seq_length, hidden]
        matching = tf.expand_dims(enc, 3)  # [ None, seq_length, hidden, 1]
        pooled_rep = tf.nn.max_pool(matching,
                                    ksize=[1, seq_length, 1, 1],
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    data_format='NHWC')
        # [None, 1, hidden, 1]
        self.doc_v = tf.placeholder_with_default(tf.reshape(
            pooled_rep, [-1, hp.hidden_units]), (None, hp.hidden_units),
                                                 name='pooled_rep')

        logits = tf.layers.dense(self.doc_v, 1, name="dense_reg")
        self.logits = logits
        paired = tf.reshape(logits, [-1, 2])
        losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]), 0)

        self.loss = tf.reduce_mean(losses)
        tf.summary.scalar('loss', self.loss)
Exemple #8
0
def fetch_bert_parameter(model_path):
    hp = hyperparams.HPSENLI()
    vocab_size = 30522
    vocab_filename = "bert_voca.txt"

    config = bert.BertConfig(
        vocab_size=vocab_size,
        hidden_size=hp.hidden_units,
        num_hidden_layers=hp.num_blocks,
        num_attention_heads=hp.num_heads,
        intermediate_size=hp.intermediate_size,
        type_vocab_size=hp.type_vocab_size,
    )

    hp.compare_deletion_num = 20
    seq_length = hp.seq_max

    is_training = False
    input_ids = tf.placeholder(tf.int64, [None, seq_length])
    input_mask = tf.placeholder(tf.int64, [None, seq_length])
    segment_ids = tf.placeholder(tf.int64, [None, seq_length])
    label_ids = tf.placeholder(tf.int64, [None])
    use_one_hot_embeddings = False
    model = bert.BertModel(config=config,
                           is_training=is_training,
                           input_ids=input_ids,
                           input_mask=input_mask,
                           token_type_ids=segment_ids,
                           use_one_hot_embeddings=use_one_hot_embeddings)

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True

    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    variables = tf.contrib.slim.get_variables_to_restore()
    for v in variables:
        print(v)

    names = list([v.name for v in variables])
    loader = tf.train.Saver()
    loader.restore(sess, model_path)
    r, = sess.run([variables])

    output = dict(zip(names, r))

    for k in output:
        print(k)

    return output
Exemple #9
0
    def __init__(self, hp, num_classes, voca_size, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()
        output_weights = tf.get_variable(
            "output_weights", [num_classes, hp.hidden_units],
            initializer=tf.truncated_normal_initializer(stddev=0.02)
        )

        output_bias = tf.get_variable(
            "output_bias", [num_classes],
            initializer=tf.zeros_initializer()
        )

        logits = tf.matmul(pooled_output, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=label_ids)
        loss = tf.reduce_mean(input_tensor=loss_arr)

        self.loss = loss
        self.logits = logits
        self.sout = tf.nn.softmax(self.logits)
Exemple #10
0
    def __init__(self, hp, voca_size, mode=1):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        scores = tf.placeholder(tf.float32, [None])
        #        self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = scores

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode == 1:
            enc = self.model.get_pooled_output()
        else:
            enc = self.model.get_all_encoder_layers()
        self.enc = enc
        logits = tf.layers.dense(enc, 1, name="reg_dense")  # [ None, 1]
        self.logits = logits

        paired = tf.reshape(logits, [-1, 2])
        y_paired = tf.reshape(self.y, [-1, 2])
        raw_l = (paired[:, 1] - paired[:, 0])
        losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]), 0)

        self.loss = tf.reduce_mean(losses)
        tf.summary.scalar('loss', self.loss)
Exemple #11
0
    def __init__(self, hp, voca_size):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        scores = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = scores

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        enc = tf.layers.dense(enc, hp.hidden_units, name="dense1") # [ None, seq_length, hidden]
        logits = tf.layers.dense(enc, 1, name="dense2")
        self.logits = tf.reshape(logits, [-1, seq_length])

        self.sout = tf.sigmoid(self.logits)
        #self.sout = tf.nn.softmax(self.logits, axis=1)
        #losses = tf.cast(self.y, tf.float32) * -tf.log(self.sout) # [ None, seq_length ]

        self.loss = tf.reduce_sum(tf.losses.sigmoid_cross_entropy(self.y, logits=self.logits))
        tf.summary.scalar('loss', self.loss)

        p = self.sout
        pred = tf.less(tf.zeros_like(p), p - 0.5)
        self.prec = tf_module.precision_b(pred, self.y)
        self.recall = tf_module.recall_b(pred, self.y)
        tf.summary.scalar('prec', self.prec)
Exemple #12
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled = self.model.get_pooled_output()
        pooled = tf.nn.dropout(pooled, hp.dropout_rate)
        logits = tf.layers.dense(pooled,
                                 data_generator.NLI.nli_info.num_classes,
                                 name="cls_dense")
        labels = tf.one_hot(label_ids, data_generator.NLI.nli_info.num_classes)
        self.acc = tf_module.accuracy(logits, label_ids)
        self.logits = logits
        tf.summary.scalar("acc", self.acc)
        self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=labels)
        self.loss = tf.reduce_mean(self.loss_arr)
        tf.summary.scalar("loss", self.loss)
    def __init__(self, hp, voca_size):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        score = tf.placeholder(tf.float32, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = score

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        all_layers = self.model.get_all_encoder_layers()
        enc = tf.concat(all_layers, axis=2) # [None, seq_len, Hidden_dim * num_blocks]
        per_token_score = tf.layers.dense(enc[0], 1, name="reg_dense") # [ None, seq_len, 1]
        self.logits = tf.reduce_sum(per_token_score, axis=1) # [ None, 1]

        paired = tf.reshape(self.logits, [-1, 2])
        y_paired = tf.reshape(self.y, [-1,2])
        raw_l = (paired[:, 1] - paired[:, 0])
        losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]) , 0)
        self.loss = tf.reduce_mean(losses)

        gain = tf.maximum(paired[:, 1] - paired[:, 0], 0)
        self.acc = tf.cast(tf.count_nonzero(gain), tf.float32) / tf.reduce_sum(tf.ones_like(gain))
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)
Exemple #14
0
    def __init__(self, hp, voca_size, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(2)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)
Exemple #15
0
    def __init__(self, hp, voca_size):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        self.begin = tf.placeholder(tf.int32, [None, seq_length])
        self.end = tf.placeholder(tf.int32, [None, seq_length])

        self.y = tf.stack([self.begin, self.end], axis=2)

        self.x_list = [input_ids, input_mask, segment_ids]

        use_one_hot_embeddings = use_tpu
        is_training = True
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        enc = self.model.get_sequence_output()
        enc = tf.layers.dense(enc, hp.hidden_units, name="dense1") # [ None, seq_length, hidden]
        self.logits = tf.layers.dense(enc, 2, name="dense2")

        self.sout = tf.nn.softmax(self.logits, axis=1)
        losses = tf.cast(self.y, tf.float32) * -tf.log(self.sout) # [ None, seq_length ]

        self.loss = tf.reduce_sum(losses)
        tf.summary.scalar('loss', self.loss)
Exemple #16
0
    def __init__(self, hp, voca_size, is_training):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        #        self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids
        self.encoded_embedding_in = tf.placeholder(
            tf.float32, [None, seq_length, hp.hidden_units])
        self.attention_mask_in = tf.placeholder(tf.float32,
                                                [None, seq_length, seq_length])
        use_one_hot_embeddings = use_tpu
        self.model = bert.BertEmbeddingInOut(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
            embeddding_as_input=(self.encoded_embedding_in,
                                 self.attention_mask_in),
        )

        self.encoded_embedding_out = self.model.embedding_output
        self.attention_mask_out = self.model.attention_mask

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)

        cl = tf.layers.dense(self.model.get_sequence_output(),
                             1,
                             name="aux_conflict")
        cl = tf.reshape(cl, [-1, seq_length])
        #cl = tf.nn.sigmoid(cl)
        #cl = tf.contrib.layers.layer_norm(cl)
        self.conf_logits = cl
        #self.pkc = self.conf_logits * self.rf_mask
        #rl_loss_list = tf.reduce_sum(self.pkc, axis=1)
        rl_loss_list = tf.reduce_sum(self.conf_logits *
                                     tf.cast(self.rf_mask, tf.float32),
                                     axis=1)

        num_tagged = tf.nn.relu(self.conf_logits + 1)
        self.verbose_loss = tf.reduce_mean(tf.reduce_sum(num_tagged, axis=1))
        self.rl_loss = tf.reduce_mean(rl_loss_list)
Exemple #17
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)
        if method == 0:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.nn.sigmoid(cl)
            # cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            # self.pkc = self.conf_logits * self.rf_mask
            # rl_loss_list = tf.reduce_sum(self.pkc, axis=1)
            rl_loss_list = tf.reduce_sum(self.conf_logits *
                                         tf.cast(self.rf_mask, tf.float32),
                                         axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
        elif method == 1:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            #rl_loss_list = tf_module.cossim(cl, self.rf_mask)
            #self.pkc = self.conf_logits * self.rf_mask
            rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask,
                                         axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
        elif method == METHOD_CROSSENT:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 2,
                                 name="aux_conflict")
            probs = tf.nn.softmax(cl)
            losses = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(
                self.rf_mask, 2),
                                                     logits=cl)
            self.conf_logits = probs[:, :, 1] - 0.5
            self.rl_loss = tf.reduce_mean(losses)
        elif method == 3:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            self.bias = tf.Variable(0.0)
            self.conf_logits = (cl + self.bias)
            rl_loss_list = tf.nn.relu(1 - self.conf_logits * self.rf_mask)
            rl_loss_list = tf.reduce_mean(rl_loss_list, axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.hinge_loss = tf.reduce_sum(hinge_losses)
        elif method == 4:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.rl_loss = hinge_losses
        elif method == 5:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            #cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            self.labels = tf.cast(tf.greater(self.rf_mask, 0), tf.float32)
            self.rl_loss = tf.reduce_mean(
                tf_module.correlation_coefficient_loss(cl, -self.rf_mask))
        elif method == 6:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            #cl = tf.layers.dense(cl1, 1, name="aux_conflict2")
            cl = tf.reshape(cl, [-1, seq_length])
            #cl = tf.nn.sigmoid(cl)
            #cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            #rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask , axis=1)
            self.rl_loss = tf.reduce_mean(
                tf_module.correlation_coefficient_loss(cl, -self.rf_mask))
        elif method == METHOD_HINGE:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            self.conf_logits = cl
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.rl_loss = tf.reduce_sum(hinge_losses)

        self.conf_softmax = tf.nn.softmax(self.conf_logits, axis=-1)
Exemple #18
0
    def __init__(self, hp, voca_size, num_class_list, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )
        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length],
                                   name="input_ids")
        input_mask = tf.placeholder(tf.int64, [None, seq_length],
                                    name="input_mask")
        segment_ids = tf.placeholder(tf.int64, [None, seq_length],
                                     name="segment_ids")

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y1 = tf.placeholder(tf.int64, [None], name="y1")
        self.y2 = tf.placeholder(tf.int64, [None], name="y2")
        self.y = [self.y1, self.y2]
        summary1 = {}
        summary2 = {}
        self.summary_list = [summary1, summary2]

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        task = Classification(num_class_list[0])
        pred, loss = task.predict(self.model.get_sequence_output(), self.y1,
                                  True)
        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        summary1['loss1'] = tf.summary.scalar('loss', self.loss)
        summary1['acc1'] = tf.summary.scalar('acc', self.acc)

        with tf.variable_scope("cls2"):
            task2 = Classification(num_class_list[1])
            pred, loss = task2.predict(self.model.get_sequence_output(),
                                       self.y2, True)
            self.logits2 = task2.logits
            self.sout2 = tf.nn.softmax(self.logits2)
            self.pred2 = pred
            self.loss2 = loss
            self.acc2 = task2.acc
            summary2['loss2'] = tf.summary.scalar('loss2', self.loss2)
            summary2['acc2'] = tf.summary.scalar('acc2', self.acc2)

        self.logit_list = [self.logits, self.logits2]
        self.loss_list = [self.loss, self.loss2]
        self.pred_list = [self.pred, self.pred2]
Exemple #19
0
    def __init__(self, hp, voca_size, is_training):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        #        self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        self.fixed_embedding = tf.get_variable(
            "fixed_v", [hp.num_v, hp.fixed_v, hp.hidden_units],
            dtype=tf.float32,
            initializer=bert.create_initializer(config.initializer_range))
        self.encoded_embedding_in = tf.placeholder(
            tf.float32, [None, seq_length, hp.hidden_units])

        batch_dyn = tf.shape(self.encoded_embedding_in)[0]
        tile_fixed_emb = tf.reshape(
            tf.tile(self.fixed_embedding, [batch_dyn, 1, 1]),
            [-1, hp.num_v, hp.fixed_v, hp.hidden_units])

        valid_input_embedding = self.encoded_embedding_in[:, hp.fixed_v:, :]
        tile_enc_emb = tf.reshape(
            tf.tile(valid_input_embedding, [hp.num_v, 1, 1]),
            [hp.num_v, -1, hp.seq_max - hp.fixed_v, hp.hidden_units])
        tile_enc_emb = tf.transpose(tile_enc_emb, [1, 0, 2, 3])
        concat_embedding = tf.concat([tile_enc_emb, tile_fixed_emb],
                                     2)[:, :, :seq_length, :]
        concat_emb_flat = tf.reshape(concat_embedding,
                                     [-1, hp.seq_max, hp.hidden_units])
        self.attention_mask_in = tf.placeholder(tf.float32,
                                                [None, seq_length, seq_length])

        def repeat_num_v(t):
            tile_param = [hp.num_v] + tf.shape(t)[1:]
            t = tf.tile(t, tile_param)
            last_shape = [-1] + tf.shape(t)[1:]
            return tf.reshape(t, last_shape)

        # If we directly feed input_ids, it will get locations embedding of begging, while we need input_ids to be second segments.
        # We bypass this by first retrieving word embedding only.
        attention_mask_repeat = tf.reshape(
            tf.tile(self.attention_mask_in, [hp.num_v, 1, 1]),
            [-1, seq_length, seq_length])

        def repeat_dummy(in_tensor):
            return tf.concat(
                [in_tensor[:, :hp.fixed_v], in_tensor[:, :-hp.fixed_v]], 1)

        input_ids_pad = repeat_dummy(input_ids)
        input_mask_pad = repeat_dummy(input_mask)
        segment_ids_pad = repeat_dummy(segment_ids)

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertEmbeddingInOut(
            config=config,
            is_training=is_training,
            input_ids=input_ids_pad,
            input_mask=input_mask_pad,
            token_type_ids=segment_ids_pad,
            use_one_hot_embeddings=use_one_hot_embeddings,
            embeddding_as_input=(concat_emb_flat, attention_mask_repeat),
        )

        self.encoded_embedding_out = self.model.embedding_output
        self.attention_mask_out = self.model.attention_mask

        def predict(enc, Y, is_train):
            if is_train:
                mode = tf.estimator.ModeKeys.TRAIN
            else:
                mode = tf.estimator.ModeKeys.EVAL
            return predict_ex(enc, Y, mode)

        def predict_ex(enc, Y, mode):
            feature_loc = 0
            logits_raw = tf.layers.dense(
                enc[:, feature_loc, :],
                data_generator.NLI.nli_info.num_classes,
                name="cls_dense")
            if hp.use_reorder:
                logits_reorder = [
                    logits_raw[:, 1], logits_raw[:, 0], logits_raw[:, 2]
                ]
                logits_candidate = tf.stack(logits_reorder, axis=1)  # [-1, 3]
            else:
                logits_candidate = logits_raw
            logits_candidate = tf.reshape(
                logits_candidate,
                [-1, hp.num_v, data_generator.NLI.nli_info.num_classes])
            soft_candidate = tf.nn.softmax(logits_candidate)
            active_arg = tf.cast(tf.argmin(soft_candidate[:, :, 0], axis=1),
                                 dtype=tf.int32)  # [batch]
            indice = tf.stack([tf.range(batch_dyn), active_arg], axis=1)
            print(indice.shape)
            logits = tf.gather_nd(logits_candidate, indice)
            print(logits_candidate.shape)
            print(logits.shape)

            labels = tf.one_hot(Y, data_generator.NLI.nli_info.num_classes)
            preds = tf.to_int32(tf.argmax(logits, axis=-1))
            self.acc = tf_module.accuracy(logits, Y)
            self.logits = logits
            tf.summary.scalar("acc", self.acc)
            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=logits, labels=labels)

                neg = tf.cast(tf.equal(preds, 0),
                              tf.float32) * tf.constant(0.1)
                pos = tf.cast(tf.not_equal(preds, 0), tf.float32)

                weight_losses = self.loss_arr * (pos + neg)
                # TP : 1
                # FN : 0.1
                # FP : 1
                # TN : 0.1
                self.loss = tf.reduce_mean(weight_losses)
                tf.summary.scalar("loss", self.loss)
                return preds, self.loss
            else:
                return preds

        pred, loss = predict(self.model.get_sequence_output(), label_ids, True)
        #pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True)

        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)

        cl = tf.layers.dense(self.model.get_sequence_output(),
                             1,
                             name="aux_conflict")
        cl = tf.reshape(cl, [-1, seq_length])
        #cl = tf.nn.sigmoid(cl)
        #cl = tf.contrib.layers.layer_norm(cl)
        self.conf_logits = cl
        #self.pkc = self.conf_logits * self.rf_mask
        #rl_loss_list = tf.reduce_sum(self.pkc, axis=1)
        rl_loss_list = tf.reduce_sum(self.conf_logits *
                                     tf.cast(self.rf_mask, tf.float32),
                                     axis=1)

        num_tagged = tf.nn.relu(self.conf_logits + 1)
        self.verbose_loss = tf.reduce_mean(tf.reduce_sum(num_tagged, axis=1))
        self.rl_loss = tf.reduce_mean(rl_loss_list)
Exemple #20
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)
        task2_num_classes = 3

        input_ids = tf.placeholder(tf.int64, [None, seq_length])
        input_mask = tf.placeholder(tf.int64, [None, seq_length])
        segment_ids = tf.placeholder(tf.int64, [None, seq_length])
        label_ids = tf.placeholder(tf.int64, [None])
        if method in [0,1,3,4,5,6]:
            self.rf_mask = tf.placeholder(tf.float32, [None, seq_length])
        elif method in [2]:
            self.rf_mask = tf.placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids
        self.y1 = tf.placeholder(tf.int64, [None], name="y1")
        self.y2 = tf.placeholder(tf.int64, [None], name="y2")
        self.f_loc1 = tf.placeholder(tf.int64, [None], name="f_loc1")
        self.f_loc2 = tf.placeholder(tf.int64, [None], name="f_loc2")

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        #tf.summary.scalar('loss', self.loss)
        #tf.summary.scalar('acc', self.acc)

        enc = self.model.get_sequence_output() # [Batch, Seq_len, hidden_dim]

        logits_raw = tf.layers.dense(enc, 3) # [Batch, seq_len, 3]
        def select(logits, f_loc):
            mask = tf.reshape(tf.one_hot(f_loc, seq_length), [-1,seq_length, 1]) # [Batch, seq_len, 1]
            t = tf.reduce_sum(logits * mask, axis=1)
            return t

        logits1 = select(logits_raw, self.f_loc1) # [Batch, 3]
        logits2 = select(logits_raw, self.f_loc2)  # [Batch, 3]
        self.logits1 = logits1
        self.logits2 = logits2
        label1 = tf.one_hot(self.y1, task2_num_classes) # [Batch, num_class]
        label2 = tf.one_hot(self.y2, task2_num_classes)
        losses1_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits1,
            labels=label1)

        losses2_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits2,
            labels=label2)

        self.loss_paired = tf.reduce_mean(losses1_arr) #+ tf.reduce_mean(losses2_arr)