Beispiel #1
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]
        extract_indices = features["extract_indices"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map,
         initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
            tvars, init_checkpoint)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_ids": unique_ids,
            "extract_indices": extract_indices
        }

        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 reuse_flag=False):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    output_layer = model.get_pooled_output()
    hidden_size = output_layer.shape[-1].value
    with tf.variable_scope("weights", reuse=reuse_flag):
        output_weights = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        #if is_training:
        #    print("###create_model.is_training:",is_training)
        #    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        def apply_dropout_last_layer(output_layer):
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
            return output_layer

        def not_apply_dropout(output_layer):
            return output_layer

        output_layer = tf.cond(is_training,
                               lambda: apply_dropout_last_layer(output_layer),
                               lambda: not_apply_dropout(output_layer))
        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        print("output_layer:", output_layer.shape, ";output_weights:",
              output_weights.shape, ";logits:",
              logits.shape)  # shape=(?, 1999)

        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.sigmoid(logits)  #tf.nn.softmax(logits, axis=-1)
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels, logits=logits)  # shape=(?, 1999)
        loss_batch = tf.reduce_sum(per_example_loss, axis=1)  #  (?,)
        loss = tf.reduce_mean(loss_batch)  #  (?,)

        return loss, per_example_loss, logits, probabilities, model
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, input_span_mask,
                 use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    final_hidden = model.get_sequence_output()

    final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
    batch_size = final_hidden_shape[0]
    seq_length = final_hidden_shape[1]
    hidden_size = final_hidden_shape[2]

    output_weights = tf.get_variable(
        "cls/squad/output_weights", [2, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "cls/squad/output_bias", [2], initializer=tf.zeros_initializer())

    final_hidden_matrix = tf.reshape(final_hidden,
                                     [batch_size * seq_length, hidden_size])
    logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    logits = tf.reshape(logits, [batch_size, seq_length, 2])
    logits = tf.transpose(logits, [2, 0, 1])

    unstacked_logits = tf.unstack(logits, axis=0)

    (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])

    # apply output mask
    adder = (1.0 - tf.cast(input_span_mask, tf.float32)) * -10000.0
    start_logits += adder
    end_logits += adder

    return (start_logits, end_logits)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits, probabilities)
Beispiel #5
0
    def forward(self):
        model = bert_modeling.BertModel(
            config=self.bert_config,
            is_training=self.is_train,
            input_ids=self.input_ids,
            input_mask=self.input_mask,
            token_type_ids=self.segment_ids,
            use_one_hot_embeddings=self.use_one_hot_embeddings)
        self.tvars = tf.trainable_variables()
        print(self.init_checkpoint)
        (self.assignment_map,
         _) = bert_modeling.get_assigment_map_from_checkpoint(
             self.tvars, self.init_checkpoint)
        tf.train.init_from_checkpoint(self.init_checkpoint,
                                      self.assignment_map)
        self.sequence_output_layer = model.get_pooled_output()

        with tf.variable_scope("output_layer"):
            self.predict_layer_logits = tf.layers.dense(
                self.sequence_output_layer, units=14, name="prediction_layer")
            self.y_pred = tf.nn.softmax(self.predict_layer_logits,
                                        name="scores")
            self.predictions = tf.argmax(self.y_pred,
                                         axis=1,
                                         name="predictions")
            print("self.predictions:", self.predictions)

        with tf.name_scope("loss"):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.predict_layer_logits, labels=self.y_true)
            self.loss = tf.reduce_mean(cross_entropy, name="loss")

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(tf.argmax(self.y_pred, 1),
                                           tf.argmax(self.y_true, 1))
            self.acc = tf.reduce_mean(tf.cast(correct_predictions, "float"),
                                      name="acc")

        with tf.name_scope("optimize"):
            self.optim = bert_optimization.create_optimizer(
                self.loss, self.learning_rate, self.num_train_steps,
                self.num_warmup_steps, False)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value

    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [-1, ner_params.max_seq_length, ner_params.labels_len])

        log_probs = tf.nn.log_softmax(logits, axis=-1)
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_sum(per_example_loss)
        probabilities = tf.nn.softmax(logits, axis=-1)
        predict = tf.argmax(probabilities, axis=-1)
        return (loss, per_example_loss, logits, predict)
Beispiel #7
0
    def RACL_BERT(self, bert_input_ids, bert_input_mask, bert_segment_ids,
                  position_att):
        bert_model = bert_modeling.BertModel(
            config=self.bert_config,
            is_training=False,
            input_ids=self.bert_input_ids,
            input_mask=self.bert_input_mask,
            token_type_ids=self.bert_segment_ids,
            use_one_hot_embeddings=False)
        bert_out = bert_model.get_sequence_output()

        # Since BERT acts as a shared trainable module by different tasks, we don't need the shared fully-connected layer.
        inputs = tf.concat(
            [bert_out[:, 1:, :],
             tf.expand_dims(bert_out[:, 0, :], 1)], 1)
        batch_size = tf.shape(inputs)[0]

        mask256 = tf.tile(tf.expand_dims(self.word_mask, -1),
                          [1, 1, self.opt.filter_num])
        mask70 = tf.tile(tf.expand_dims(self.word_mask, 1),
                         [1, self.opt.max_sentence_len, 1])

        # Private Feature
        aspect_input, opinion_input, context_input = list(), list(), list()
        aspect_prob_list, opinion_prob_list, senti_prob_list = list(), list(
        ), list()
        aspect_input.append(inputs)
        opinion_input.append(inputs)
        context_input.append(inputs)

        # We found that the SC task is more difficult than the AE and OE tasks.
        # Hence, we augment it with a memory-like mechanism by updating the aspect query with the retrieved contexts.
        # Refer to https://www.aclweb.org/anthology/D16-1021/ for for more details about the memory network.
        query = list()
        query.append(inputs)

        for hop in range(self.opt.hop_num):
            with tf.variable_scope('layers_{}'.format(hop)):
                # AE & OE Convolution
                aspect_conv = tf.layers.conv1d(aspect_input[-1],
                                               self.opt.filter_num,
                                               self.opt.kernel_size,
                                               padding='SAME',
                                               activation=tf.nn.relu,
                                               name='aspect_conv')
                opinion_conv = tf.layers.conv1d(opinion_input[-1],
                                                self.opt.filter_num,
                                                self.opt.kernel_size,
                                                padding='SAME',
                                                activation=tf.nn.relu,
                                                name='opinion_conv')

                # Relation R1
                aspect_see_opinion = tf.matmul(
                    tf.nn.l2_normalize(aspect_conv, -1),
                    tf.nn.l2_normalize(opinion_conv, -1),
                    adjoint_b=True)
                aspect_att_opinion = softmask_2d(aspect_see_opinion,
                                                 self.word_mask)
                aspect_inter = tf.concat(
                    [aspect_conv,
                     tf.matmul(aspect_att_opinion, opinion_conv)], -1)

                opinion_see_aspect = tf.matmul(
                    tf.nn.l2_normalize(opinion_conv, -1),
                    tf.nn.l2_normalize(aspect_conv, -1),
                    adjoint_b=True)
                opinion_att_aspect = softmask_2d(opinion_see_aspect,
                                                 self.word_mask)
                opinion_inter = tf.concat(
                    [opinion_conv,
                     tf.matmul(opinion_att_aspect, aspect_conv)], -1)

                # AE & OE Prediction
                aspect_p = layers.fully_connected(
                    aspect_inter,
                    self.opt.class_num,
                    activation_fn=None,
                    weights_initializer=self.Winit,
                    biases_initializer=self.Winit,
                    scope='aspect_p')
                opinion_p = layers.fully_connected(
                    opinion_inter,
                    self.opt.class_num,
                    activation_fn=None,
                    weights_initializer=self.Winit,
                    biases_initializer=self.Winit,
                    scope='opinion_p')

                # OE Confidence
                # A slight difference from the original paper.
                # For propagating R3, we calculate the confidence of each candidate opinion word.
                # Only when a word satisfies the condition Prob[B,I] > Prob[O] in OE, it can be propagated to SC.
                confidence = tf.maximum(
                    0., 1 - 2. * tf.nn.softmax(opinion_p, -1)[:, :, 0])
                opinion_propagate = tf.tile(
                    tf.expand_dims(confidence, 1),
                    [1, self.opt.max_sentence_len, 1]) * mask70 * position_att

                # SC Convolution
                context_conv = tf.layers.conv1d(context_input[-1],
                                                self.opt.emb_dim,
                                                self.opt.kernel_size,
                                                padding='SAME',
                                                activation=tf.nn.relu,
                                                name='context_conv')

                # SC Aspect-Context Attention
                word_see_context = tf.matmul(
                    (query[-1]),
                    tf.nn.l2_normalize(context_conv, -1),
                    adjoint_b=True) * position_att
                word_att_context = softmask_2d(word_see_context,
                                               self.word_mask,
                                               scale=True)

                # Relation R2 & R3
                word_att_context += aspect_att_opinion + opinion_propagate
                context_inter = (query[-1] +
                                 tf.matmul(word_att_context, context_conv)
                                 )  # query + value
                query.append(context_inter)  # update query

                # SC Prediction
                senti_p = layers.fully_connected(
                    context_inter,
                    self.opt.class_num,
                    activation_fn=None,
                    weights_initializer=self.Winit,
                    biases_initializer=self.Winit,
                    scope='senti_p')

                # Stacking
                aspect_prob_list.append(tf.expand_dims(aspect_p, -1))
                opinion_prob_list.append(tf.expand_dims(opinion_p, -1))
                senti_prob_list.append(tf.expand_dims(senti_p, -1))

                # We use DropBlock to enhance the learning of the private features for AE & OE & SC.
                # Refer to http://papers.nips.cc/paper/8271-dropblock-a-regularization-method-for-convolutional-networks for more details.
                aspect_inter = tf.squeeze(
                    self.drop_block1(inputs=tf.expand_dims(aspect_inter, -1),
                                     training=self.is_training), -1)
                opinion_inter = tf.squeeze(
                    self.drop_block2(inputs=tf.expand_dims(opinion_inter, -1),
                                     training=self.is_training), -1)
                context_conv = tf.squeeze(
                    self.drop_block3(inputs=tf.expand_dims(context_conv, -1),
                                     training=self.is_training), -1)

                aspect_input.append(aspect_inter)
                opinion_input.append(opinion_inter)
                context_input.append(context_conv)

        # Multi-layer Short-cut
        aspect_prob = tf.reduce_mean(tf.concat(aspect_prob_list, -1), -1)
        opinion_prob = tf.reduce_mean(tf.concat(opinion_prob_list, -1), -1)
        sentiment_prob = tf.reduce_mean(tf.concat(senti_prob_list, -1), -1)

        return aspect_prob, opinion_prob, sentiment_prob