Beispiel #1
0
    def __init__(self, hp, voca_size, is_training=True):
        config = BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()

        task = ClassificationB(is_training, hp.hidden_units, 3)
        task.call(pooled_output, label_ids)
        self.loss = task.loss
        self.logits = task.logits
        self.acc = task.acc
Beispiel #2
0
    def __init__(self, hp, num_classes, voca_size, is_training=True):
        config = bert.BertConfig(vocab_size=voca_size,
                                 hidden_size=hp.hidden_units,
                                 num_hidden_layers=hp.num_blocks,
                                 num_attention_heads=hp.num_heads,
                                 intermediate_size=hp.intermediate_size,
                                 type_vocab_size=hp.type_vocab_size,
                                 )

        seq_length = hp.seq_max
        use_tpu = False

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled_output = self.model.get_pooled_output()
        output_weights = tf.compat.v1.get_variable(
            "output_weights", [num_classes, hp.hidden_units],
            initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02)
        )

        output_bias = tf.compat.v1.get_variable(
            "output_bias", [num_classes],
            initializer=tf.compat.v1.zeros_initializer()
        )

        logits = tf.matmul(pooled_output, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=label_ids)
        loss = tf.reduce_mean(input_tensor=loss_arr)

        self.loss = loss
        self.logits = logits
        self.sout = tf.nn.softmax(self.logits)
Beispiel #3
0
    def model_tag(self, sequence_output, seq_max, var_name):
        ex_label = placeholder(tf.int32, [None, seq_max])
        valid_mask = placeholder(tf.float32, [None, 1])
        with tf.variable_scope(var_name):
            ex_logits = tf.layers.dense(sequence_output, 2, name=var_name)
            ex_prob = tf.nn.softmax(ex_logits)[:, :, 1]
            losses = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(ex_label, 2), logits=ex_logits)
            losses = valid_mask * losses
            loss = tf.reduce_mean(losses)

        return {
            'labels': ex_label,
            'mask': valid_mask,
            'ex_logits': ex_logits,
            'score': ex_prob,
            'losses': losses,
            'loss': loss
        }
Beispiel #4
0
    def model_tag(self, sequence_output, seq_max, var_name):
        ex_labels = placeholder(tf.float32, [None, seq_max])
        valid_mask = placeholder(tf.float32, [None, 1])
        with tf.variable_scope(var_name):
            ex_logits = tf.layers.dense(sequence_output, 1, name=var_name)
            ex_logits = tf.reshape(ex_logits, [-1, seq_max])
            labels_ = tf.cast(tf.greater(ex_labels, 0), tf.float32)
            losses = tf_module.correlation_coefficient_loss(ex_logits, -labels_)
            losses = valid_mask * losses
        loss = tf.reduce_mean(losses)
        score = ex_logits

        return {
            'labels': ex_labels,
            'mask':valid_mask,
            'ex_logits': ex_logits,
            'score': score,
            'losses':losses,
            'loss': loss
        }
Beispiel #5
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = BertModel(config=config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
Beispiel #6
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pooled = self.model.get_pooled_output()
        pooled = tf.nn.dropout(pooled, hp.dropout_rate)
        logits = tf.layers.dense(pooled,
                                 data_generator.NLI.nli_info.num_classes,
                                 name="cls_dense")
        labels = tf.one_hot(label_ids, data_generator.NLI.nli_info.num_classes)
        self.acc = tf_module.accuracy(logits, label_ids)
        self.logits = logits
        tf.summary.scalar("acc", self.acc)
        self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=labels)
        self.loss = tf.reduce_mean(self.loss_arr)
        tf.summary.scalar("loss", self.loss)
Beispiel #7
0
    def __init__(self, hp, voca_size, method, is_training=True):
        config = bert.BertConfig(
            vocab_size=voca_size,
            hidden_size=hp.hidden_units,
            num_hidden_layers=hp.num_blocks,
            num_attention_heads=hp.num_heads,
            intermediate_size=hp.intermediate_size,
            type_vocab_size=hp.type_vocab_size,
        )

        seq_length = hp.seq_max
        use_tpu = False
        task = Classification(data_generator.NLI.nli_info.num_classes)

        input_ids = placeholder(tf.int64, [None, seq_length])
        input_mask = placeholder(tf.int64, [None, seq_length])
        segment_ids = placeholder(tf.int64, [None, seq_length])
        label_ids = placeholder(tf.int64, [None])
        if method in [0, 1, 3, 4, 5, 6]:
            self.rf_mask = placeholder(tf.float32, [None, seq_length])
        elif method in [METHOD_CROSSENT, METHOD_HINGE]:
            self.rf_mask = placeholder(tf.int32, [None, seq_length])

        self.x_list = [input_ids, input_mask, segment_ids]
        self.y = label_ids

        use_one_hot_embeddings = use_tpu
        self.model = bert.BertModel(
            config=config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        pred, loss = task.predict(self.model.get_sequence_output(), label_ids,
                                  True)

        self.logits = task.logits
        self.sout = tf.nn.softmax(self.logits)
        self.pred = pred
        self.loss = loss
        self.acc = task.acc
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('acc', self.acc)
        if method == 0:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.nn.sigmoid(cl)
            # cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            # self.pkc = self.conf_logits * self.rf_mask
            # rl_loss_list = tf.reduce_sum(self.pkc, axis=1)
            rl_loss_list = tf.reduce_sum(self.conf_logits *
                                         tf.cast(self.rf_mask, tf.float32),
                                         axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
        elif method == 1:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            #rl_loss_list = tf_module.cossim(cl, self.rf_mask)
            #self.pkc = self.conf_logits * self.rf_mask
            rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask,
                                         axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
        elif method == METHOD_CROSSENT:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 2,
                                 name="aux_conflict")
            probs = tf.nn.softmax(cl)
            losses = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot(
                self.rf_mask, 2),
                                                     logits=cl)
            self.conf_logits = probs[:, :, 1] - 0.5
            self.rl_loss = tf.reduce_mean(losses)
        elif method == 3:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            self.bias = tf.Variable(0.0)
            self.conf_logits = (cl + self.bias)
            rl_loss_list = tf.nn.relu(1 - self.conf_logits * self.rf_mask)
            rl_loss_list = tf.reduce_mean(rl_loss_list, axis=1)
            self.rl_loss = tf.reduce_mean(rl_loss_list)
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.hinge_loss = tf.reduce_sum(hinge_losses)
        elif method == 4:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.rl_loss = hinge_losses
        elif method == 5:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            #cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            self.labels = tf.cast(tf.greater(self.rf_mask, 0), tf.float32)
            self.rl_loss = tf.reduce_mean(
                tf_module.correlation_coefficient_loss(cl, -self.rf_mask))
        elif method == 6:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            #cl = tf.layers.dense(cl1, 1, name="aux_conflict2")
            cl = tf.reshape(cl, [-1, seq_length])
            #cl = tf.nn.sigmoid(cl)
            #cl = tf.contrib.layers.layer_norm(cl)
            self.conf_logits = cl
            #rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask , axis=1)
            self.rl_loss = tf.reduce_mean(
                tf_module.correlation_coefficient_loss(cl, -self.rf_mask))
        elif method == METHOD_HINGE:
            cl = tf.layers.dense(self.model.get_sequence_output(),
                                 1,
                                 name="aux_conflict")
            cl = tf.reshape(cl, [-1, seq_length])
            self.conf_logits = cl
            labels = tf.greater(self.rf_mask, 0)
            hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits)
            self.rl_loss = tf.reduce_sum(hinge_losses)

        self.conf_softmax = tf.nn.softmax(self.conf_logits, axis=-1)
Beispiel #8
0
def run():
    all_loss = 0
    tower_grads = []

    input_x_list = []
    input_y_list = []
    models = []
    for gpu_idx in range(2):
        with tf.device("/gpu:{}".format(gpu_idx)):
            with tf.variable_scope("vars", reuse=gpu_idx > 0):
                input_x = placeholder(tf.float32, [None, 10])
                input_y = placeholder(tf.int32, [
                    None,
                ])
                input_x_list.append(input_x)
                input_y_list.append(input_y)
                model = FF(input_x, input_y)
                models.append(model)
                tf.get_variable_scope().reuse_variables()
                all_loss += model.task.loss

    tvars = tf.trainable_variables()
    for t in tvars:
        print(t.name)

    for gpu_idx in range(2):
        grads = tf.gradients(model.task.loss, tvars)
        print(grads)
        # Keep track of the gradients across all towers.
        tower_grads.append(grads)

    avg_grads = []
    for t_idx, _ in enumerate(tvars):
        g1 = tower_grads[0][0]
        g2 = tower_grads[1][1]

        g_avg = (g1 + g2) / 2 if g1 is not None else None
        avg_grads.append(g_avg)

    global_step = tf.Variable(0, name='global_step')
    optimizer = AdamWeightDecayOptimizer(
        learning_rate=0.001,
        weight_decay_rate=0.02,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-6,
        exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
    train_cls = optimizer.apply_gradients(zip(grads, tvars),
                                          global_step=global_step)

    #train_cls = get_train_op2(all_loss, 0.001, "adam", 10000)
    sess = init_session()
    sess.run(tf.global_variables_initializer())

    def train_classification(i):
        if i % 2 == 0:
            random_input = np.ones([
                batch_size,
            ], )
        else:
            random_input = np.zeros([
                batch_size,
            ])
            random_input = np.ones([
                batch_size,
            ], )

        loss_val, _ = sess.run(
            [model.task.loss, train_cls],
            feed_dict={
                input_x_list[0]: np.ones([batch_size, 10]),
                input_x_list[1]: np.ones([batch_size, 10]),
                input_y_list[0]: np.zeros([
                    batch_size,
                ]),
                input_y_list[1]: random_input,
            })
        print(loss_val)

    for i in range(10):
        print("Train")
        train_classification(i)