def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]
        extract_indices = features["extract_indices"]

        model = modeling.AlbertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map,
         initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
            tvars, init_checkpoint)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_ids": unique_ids,
            "extract_indices": extract_indices
        }

        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Exemple #2
0
def create_model(albert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings,
                 task_name):
    """Creates a classification model."""
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, rate=0.1)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        if task_name != "sts-b":
            probabilities = tf.nn.softmax(logits, axis=-1)
            predictions = tf.argmax(probabilities,
                                    axis=-1,
                                    output_type=tf.int32)
            log_probs = tf.nn.log_softmax(logits, axis=-1)
            one_hot_labels = tf.one_hot(labels,
                                        depth=num_labels,
                                        dtype=tf.float32)

            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                              axis=-1)
        else:
            probabilities = logits
            logits = tf.squeeze(logits, [-1])
            predictions = logits
            per_example_loss = tf.square(logits - labels)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, probabilities, logits, predictions)
def _create_model_from_scratch(albert_config, is_training, input_ids,
                               input_mask, segment_ids,
                               use_one_hot_embeddings):
    """Creates an ALBERT model from scratch (as opposed to hub)."""
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=use_one_hot_embeddings)
    output_layer = model.get_pooled_output()
    return output_layer
def _create_model_from_scratch(albert_config, is_training, input_ids,
                               input_mask, segment_ids, use_one_hot_embeddings,
                               use_einsum):
    """Creates an ALBERT model from scratch/config."""
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=use_one_hot_embeddings,
                                 use_einsum=use_einsum)
    return (model.get_pooled_output(), model.get_sequence_output())
 def bert_embedding(self):
     # load bert embedding
     albert_config = modeling.AlbertConfig.from_json_file(
         self.config.bert_config_path)  # 配置文件地址。
     model = modeling.AlbertModel(config=albert_config,
                                  is_training=True,
                                  input_ids=self.input_ids,
                                  input_mask=self.input_mask,
                                  token_type_ids=self.segment_ids,
                                  use_one_hot_embeddings=False)
     embedding = model.get_sequence_output()
     return embedding
Exemple #6
0
def create_model(
        albert_config,
        is_training,
        input_ids,
        input_mask,
        segment_ids,
        labels,
        num_labels,
        num_choices,
        use_one_hot_embeddings):
  """Creates a classification model."""

  output_layer = modeling.AlbertModel(
      config=albert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings
  ).get_pooled_output()

  hidden_size = output_layer.shape[-1].value
  print('HIDDEN: ', hidden_size)

  softmax_weights = tf.get_variable(
      "softmax_weights", [hidden_size, 1],
      initializer=tf.truncated_normal_initializer(stddev=0.02))
  print('WEIGHT: ', softmax_weights.shape)
  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, softmax_weights)
    print('LOGITS: ', logits.shape)
    logits = tf.reshape(logits, (-1, num_choices))
    print(logits.shape)

    probabilities = tf.nn.softmax(logits, axis=-1)
    predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    print('PROB: ', log_probs.shape)
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    print('ONE: ', one_hot_labels.shape)
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)

    loss = tf.reduce_mean(per_example_loss)

    return (loss, per_example_loss, probabilities, logits, predictions)
def module_fn(is_training):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config = modeling.AlbertConfig.from_json_file(
        os.path.join(FLAGS.albert_directory, "albert_config.json"))
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=False)

    mlm_logits = get_mlm_logits(model, albert_config, mlm_positions)

    assert tf.gfile.Exists(FLAGS.vocab_path)
    vocab_file = tf.constant(value=FLAGS.vocab_path,
                             dtype=tf.string,
                             name="vocab_file")

    # By adding `vocab_file` to the ASSET_FILEPATHS collection, TF-Hub will
    # rewrite this tensor so that this asset is portable.
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file)

    hub.add_signature(name="tokens",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output()))

    hub.add_signature(name="mlm",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids,
                                  mlm_positions=mlm_positions),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output(),
                                   mlm_logits=mlm_logits))

    hub.add_signature(name="tokenization_info",
                      inputs={},
                      outputs=dict(vocab_file=vocab_file,
                                   do_lower_case=tf.constant(
                                       FLAGS.do_lower_case)))
Exemple #8
0
def build_model(sess):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config_path = os.path.join(FLAGS.albert_directory,
                                      "albert_config.json")
    albert_config = modeling.AlbertConfig.from_json_file(albert_config_path)
    model = modeling.AlbertModel(
        config=albert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=False,
    )

    get_mlm_logits(
        model.get_sequence_output(),
        albert_config,
        mlm_positions,
        model.get_embedding_table(),
    )
    get_sentence_order_logits(model.get_pooled_output(), albert_config)

    checkpoint_path = os.path.join(FLAGS.albert_directory,
                                   FLAGS.checkpoint_name)
    tvars = tf.trainable_variables()
    (
        assignment_map,
        initialized_variable_names,
    ) = modeling.get_assignment_map_from_checkpoint(tvars, checkpoint_path)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                        init_string)
    tf.train.init_from_checkpoint(checkpoint_path, assignment_map)
    init = tf.global_variables_initializer()
    sess.run(init)
    return sess
Exemple #9
0
        def create_model(self):
            input_ids = AlbertModelTest.ids_tensor(
                [self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = AlbertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = AlbertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], self.type_vocab_size)

            config = modeling.AlbertConfig(
                vocab_size=self.vocab_size,
                embedding_size=self.embedding_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range,
            )

            model = modeling.AlbertModel(
                config=config,
                is_training=self.is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                scope=self.scope,
            )

            outputs = {
                "embedding_output": model.get_embedding_output(),
                "sequence_output": model.get_sequence_output(),
                "pooled_output": model.get_pooled_output(),
                "all_encoder_layers": model.get_all_encoder_layers(),
            }
            return outputs
Exemple #10
0
    def __init__(self, albert_config, num_labels, seq_length, init_checkpoint):
        self.albert_config = albert_config
        self.num_labels = num_labels
        self.seq_length = seq_length

        self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length],
                                         name='input_mask')
        self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                          name='segment_ids')
        self.labels = tf.placeholder(tf.int32, [None], name='labels')
        self.is_training = tf.placeholder(tf.bool,
                                          shape=[],
                                          name='is_training')
        #self.learning_rate = tf.placeholder(tf.float32, shape=[], name='learn_rate')

        self.model = modeling.AlbertModel(config=self.albert_config,
                                          is_training=self.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_mask,
                                          token_type_ids=self.segment_ids)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                         init_string)

        self.inference()
Exemple #11
0
def create_model(albert_config, is_training, input_ids, input_mask,
                 segment_ids, input_cdc_ids, age, sex_ids, labels, num_labels,
                 use_one_hot_embeddings):
    """Creates a classification model."""
    if not FLAGS.cdc_only:
        model = modeling.AlbertModel(
            config=albert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        # In the demo, we are doing a simple classification task on the entire
        # segment.
        #
        # If you want to use the token-level output, use model.get_sequence_output()
        # instead.
        if FLAGS.use_pooled_output:
            tf.logging.info("using pooled output")
            output_albert_layer = model.get_pooled_output()
        else:
            tf.logging.info("using meaned output")
            output_albert_layer = tf.reduce_mean(model.get_sequence_output(),
                                                 axis=1)

    with tf.variable_scope('cdc'):
        with tf.variable_scope("embedding"):
            embedding_table = tf.get_variable(
                name="embedding_table",
                shape=[FLAGS.cdc_vocab_size, FLAGS.cdc_embedding_size],
                initializer=modeling.create_initializer())
            embedded = tf.nn.embedding_lookup(embedding_table, input_cdc_ids)
            mask = tf.not_equal(input_cdc_ids, 0)
            embed_average = tf.keras.layers.GlobalAveragePooling1D()(embedded,
                                                                     mask)
            embed_max = tf.keras.layers.GlobalMaxPooling1D()(embedded)
            concat_max_average = tf.concat([embed_average, embed_max], axis=-1)


#            concat_sex_age = tf.concat([average, age, sex_ids], axis=-1)
#
#        with tf.variable_scope("dense_1"):
#            input_size = concat_sex_age.shape[-1].value
#            output_size = 2 * FLAGS.cdc_embedding_size
#
#            W = tf.get_variable(name="kernel",
#                                shape=[input_size, output_size],
#                                initializer=modeling.create_initializer())
#            b = tf.get_variable(name="bias",
#                                shape=[output_size],
#                                initializer=tf.zeros_initializer)
#            dense_1 = tf.matmul(concat_sex_age, W)
#            dense_1 = tf.nn.bias_add(dense_1, b)
#            dense_1 = tf.nn.relu(dense_1)
#
#        with tf.variable_scope("dense_2"):
#            input_size = dense_1.shape[-1].value
#            output_size = FLAGS.cdc_embedding_size
#            W = tf.get_variable(name="kernel",
#                                shape=[input_size, output_size],
#                                initializer=modeling.create_initializer())
#            b = tf.get_variable(name="bias",
#                                shape=[output_size],
#                                initializer=tf.zeros_initializer)
#            dense_2 = tf.matmul(dense_1, W)
#            dense_2 = tf.nn.bias_add(dense_2, b)
#            dense_2 = tf.nn.relu(dense_2)

        output_cdc_layer = tf.concat([age, sex_ids, concat_max_average],
                                     axis=-1)

    # Concatenate the output_layer with other features
    if FLAGS.cdc_only:
        output_layer = output_cdc_layer
    else:
        output_layer = tf.concat([output_albert_layer, output_cdc_layer],
                                 axis=-1)

    hidden_size = output_layer.shape[-1].value

    with tf.variable_scope("output"):
        output_weights = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, probabilities, predictions)
Exemple #12
0
    def __init__(self, bert_config, tokenizer):
        _graph = tf.Graph()
        with _graph.as_default():
            self.X = tf.placeholder(tf.int32, [None, None])
            self.top_p = tf.placeholder(tf.float32, None)
            self.top_k = tf.placeholder(tf.int32, None)
            self.k = tf.placeholder(tf.int32, None)
            self.temperature = tf.placeholder(tf.float32, None)
            self.indices = tf.placeholder(tf.int32, [None, None])
            self.MASK = tf.placeholder(tf.int32, [None, None])
            self._tokenizer = tokenizer

            self.model = modeling.AlbertModel(
                config=bert_config,
                is_training=False,
                input_ids=self.X,
                input_mask=self.MASK,
                use_one_hot_embeddings=False,
            )
            self.logits = self.model.get_pooled_output()
            input_tensor = self.model.get_sequence_output()
            output_weights = self.model.get_embedding_table()

            with tf.variable_scope('cls/predictions'):
                with tf.variable_scope('transform'):
                    input_tensor = tf.layers.dense(
                        input_tensor,
                        units=bert_config.embedding_size,
                        activation=modeling.get_activation(
                            bert_config.hidden_act),
                        kernel_initializer=modeling.create_initializer(
                            bert_config.initializer_range),
                    )
                    input_tensor = modeling.layer_norm(input_tensor)

                output_bias = tf.get_variable(
                    'output_bias',
                    shape=[bert_config.vocab_size],
                    initializer=tf.zeros_initializer(),
                )
                logits = tf.matmul(input_tensor,
                                   output_weights,
                                   transpose_b=True)
                self._logits = tf.nn.bias_add(logits, output_bias)
                self._log_softmax = tf.nn.log_softmax(self._logits, axis=-1)

            logits = tf.gather_nd(self._logits, self.indices)
            logits = logits / self.temperature

            def necleus():
                return top_p_logits(logits, self.top_p)

            def select_k():
                return top_k_logits(logits, self.top_k)

            logits = tf.cond(self.top_p > 0, necleus, select_k)
            self.samples = tf.multinomial(logits,
                                          num_samples=self.k,
                                          output_dtype=tf.int32)

            self._sess = tf.InteractiveSession()
            self._sess.run(tf.global_variables_initializer())
            var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope='bert')
            cls = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope='cls')
            self._saver = tf.train.Saver(var_list=var_lists + cls)
            attns = _extract_attention_weights(bert_config.num_hidden_layers,
                                               tf.get_default_graph())
            self.attns = attns
    def __init__(self):
        self.config = Config()  # 配置参数
        # Placeholder
        self.input_ids = tf.placeholder(tf.int32,
                                        shape=[None, self.config.seq_length],
                                        name='input_ids')
        self.input_masks = tf.placeholder(tf.int32,
                                          shape=[None, self.config.seq_length],
                                          name='input_masks')
        self.segment_ids = tf.placeholder(tf.int32,
                                          shape=[None, self.config.seq_length],
                                          name='segment_ids')
        self.label_ids = tf.placeholder(tf.int32,
                                        shape=[None, self.config.seq_length],
                                        name='label_ids')

        self.input_length = tf.placeholder(shape=[None],
                                           dtype=tf.int32,
                                           name='input-length')  # 输入文本的长度
        self.input_keep_prob = tf.placeholder(
            dtype=tf.float32, name='input-keep-prob')  # keep-prob

        # 加载Albert配置参数
        bert_config = modeling.AlbertConfig.from_json_file(
            self.config.bert_config_file)
        # 加载Albert网络结构
        self.model = modeling.AlbertModel(config=bert_config,
                                          is_training=self.config.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_masks,
                                          token_type_ids=self.segment_ids,
                                          use_one_hot_embeddings=False)

        # 使用预训练的参数赋值给上步加载的网络结构中
        tvars = tf.trainable_variables()
        assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(
            tvars, self.config.initial_checkpoint)
        tf.train.init_from_checkpoint(self.config.initial_checkpoint,
                                      assignment_map=assignment_map)
        # 去序列输出(字向量)  dim:(batch_size, seq_length, 384)
        self.sequence_output = self.model.get_sequence_output()

        if self.config.is_bilstm:  # 是否使用Bi-LSTM层
            # Bi-LSTM/Bi-GRU
            cell_fw = self.get_rnn(self.config.rnn_type)  # 前向cell
            cell_bw = self.get_rnn(self.config.rnn_type)  # 后向cell
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=self.sequence_output,
                dtype=tf.float32)
            outputs = tf.concat(
                values=outputs, axis=2
            )  # 将前向cell和后向cell的结果进行concat拼接   dim:(batch_size, max_length, 2*hidden_dim)
            outputs = tf.layers.dropout(inputs=outputs,
                                        rate=self.input_keep_prob)
        else:
            outputs = self.sequence_output

        # 输出层   dim:(batch_size, max_length, num_classes)
        self.logits = tf.layers.dense(inputs=outputs,
                                      units=self.config.num_classes,
                                      name='logits')

        # 是否使用CRF层
        if self.config.crf:
            log_likelihood, self.transition_params = crf.crf_log_likelihood(
                inputs=self.logits,
                tag_indices=self.label_ids,
                sequence_lengths=self.input_length)
            self.loss = -tf.reduce_mean(log_likelihood)
            # 结果输出
            self.predict, self.viterbi_score = crf.crf_decode(
                potentials=self.logits,
                transition_params=self.transition_params,
                sequence_length=self.input_length)
        else:
            # 损失函数,交叉熵
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.label_ids, logits=self.logits)
            mask = tf.sequence_mask(lengths=self.input_length)
            losses = tf.boolean_mask(cross_entropy, mask=mask)
            self.loss = tf.reduce_mean(losses)
            # 结果输出
            self.predict = tf.argmax(tf.nn.softmax(self.logits),
                                     axis=1,
                                     name='predict')

        # 优化器
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config.learning_rate).minimize(loss=self.loss)
Exemple #14
0
    def __init__(self, albert_config, num_labels, seq_length, init_checkpoint):
        self.albert_config = albert_config
        self.num_labels = num_labels
        self.seq_length = seq_length
        self.tower_grads = []
        self.losses = []

        self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length],
                                         name='input_mask')
        self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length],
                                          name='segment_ids')
        self.labels = tf.placeholder(tf.int32, [None], name='labels')
        self.batch_size = tf.placeholder(tf.int32, shape=[], name='batch_size')
        self.is_training = tf.placeholder(tf.bool,
                                          shape=[],
                                          name='is_training')
        print(self.batch_size)
        self.gpu_step = self.batch_size // gpu_nums

        global_step = tf.train.get_or_create_global_step()

        learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)

        # Implements linear decay of the learning rate.
        learning_rate = tf.train.polynomial_decay(learning_rate,
                                                  global_step,
                                                  num_train_steps,
                                                  end_learning_rate=0.0,
                                                  power=1.0,
                                                  cycle=False)

        if num_warmup_steps:
            global_steps_int = tf.cast(global_step, tf.int32)
            warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)

            global_steps_float = tf.cast(global_steps_int, tf.float32)
            warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

            warmup_percent_done = global_steps_float / warmup_steps_float
            warmup_learning_rate = init_lr * warmup_percent_done

            is_warmup = tf.cast(global_steps_int < warmup_steps_int,
                                tf.float32)
            learning_rate = ((1.0 - is_warmup) * learning_rate +
                             is_warmup * warmup_learning_rate)

        optimizer = optimization.AdamWeightDecayOptimizer(
            learning_rate=learning_rate,
            weight_decay_rate=0.01,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-6,
            exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])

        with tf.variable_scope(tf.get_variable_scope()) as outer_scope:
            pred = []
            label = []
            for d in range(gpu_nums):
                with tf.device("/gpu:%s" % d), tf.name_scope("%s_%s" %
                                                             ("tower", d)):
                    self.model = modeling.AlbertModel(
                        config=self.albert_config,
                        is_training=self.is_training,
                        input_ids=self.input_ids[d * self.gpu_step:(d + 1) *
                                                 self.gpu_step],
                        input_mask=self.input_mask[d * self.gpu_step:(d + 1) *
                                                   self.gpu_step],
                        token_type_ids=self.segment_ids[d *
                                                        self.gpu_step:(d + 1) *
                                                        self.gpu_step])
                    print("GPU:", d)

                    tvars = tf.trainable_variables()
                    initialized_variable_names = {}
                    if init_checkpoint:
                        (assignment_map, initialized_variable_names
                         ) = modeling.get_assignment_map_from_checkpoint(
                             tvars, init_checkpoint)

                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map)

                    logging.info("**** Trainable Variables ****")
                    for var in tvars:
                        init_string = ""
                        if var.name in initialized_variable_names:
                            init_string = ", *INIT_FROM_CKPT*"
                        logging.info("  name = %s, shape = %s%s", var.name,
                                     var.shape, init_string)

                    output_layer = self.model.get_pooled_output()
                    logging.info(output_layer)

                    if self.is_training == True:
                        output_layer = tf.nn.dropout(output_layer,
                                                     keep_prob=0.9)

                    match_1 = tf.strided_slice(output_layer, [0],
                                               [self.gpu_step], [2])
                    match_2 = tf.strided_slice(output_layer, [1],
                                               [self.gpu_step], [2])

                    match = tf.concat([match_1, match_2], 1)

                    self.logits = tf.layers.dense(match,
                                                  self.num_labels,
                                                  name='fc',
                                                  reuse=tf.AUTO_REUSE)

                    logging.info(self.logits)
                    self.r_labels = tf.strided_slice(
                        self.labels[d * self.gpu_step:(d + 1) * self.gpu_step],
                        [0], [self.gpu_step], [2])
                    logging.info(self.r_labels)

                    self.r_labels = tf.expand_dims(self.r_labels, -1)
                    logging.info(self.r_labels)
                    self.loss = tf.losses.mean_squared_error(
                        self.logits, self.r_labels)

                    tvars = tf.trainable_variables()
                    grads = tf.gradients(self.loss, tvars)

                    (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

                    self.tower_grads.append(list(zip(grads, tvars)))
                    self.losses.append(self.loss)
                    label.append(self.r_labels)
                    pred.append(self.logits)
                outer_scope.reuse_variables()

        with tf.name_scope("apply_gradients"), tf.device("/cpu:0"):
            gradients = self.average_gradients(self.tower_grads)
            train_op = optimizer.apply_gradients(gradients,
                                                 global_step=global_step)
            new_global_step = global_step + 1
            self.train_op = tf.group(train_op,
                                     [global_step.assign(new_global_step)])
            self.losses = tf.reduce_mean(self.losses)
            self.pred = tf.concat(pred, 0)
            self.label = tf.concat(label, 0)
            logging.info(self.pred)
            logging.info(self.label)
def module_fn(is_training):
    """Module function."""
    input_ids = tf.placeholder(tf.int32, [None, None], "input_ids")
    input_mask = tf.placeholder(tf.int32, [None, None], "input_mask")
    segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids")
    mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions")

    albert_config_path = os.path.join(FLAGS.albert_directory,
                                      "albert_config.json")
    albert_config = modeling.AlbertConfig.from_json_file(albert_config_path)
    model = modeling.AlbertModel(config=albert_config,
                                 is_training=is_training,
                                 input_ids=input_ids,
                                 input_mask=input_mask,
                                 token_type_ids=segment_ids,
                                 use_one_hot_embeddings=False,
                                 use_einsum=FLAGS.use_einsum)

    mlm_logits = get_mlm_logits(model, albert_config, mlm_positions)
    # sop_log_probs = get_sop_log_probs(model, albert_config)

    vocab_model_path = os.path.join(FLAGS.albert_directory, "30k-clean.model")
    vocab_file_path = os.path.join(FLAGS.albert_directory, "30k-clean.vocab")

    config_file = tf.constant(value=albert_config_path,
                              dtype=tf.string,
                              name="config_file")
    vocab_model = tf.constant(value=vocab_model_path,
                              dtype=tf.string,
                              name="vocab_model")
    # This is only for visualization purpose.
    vocab_file = tf.constant(value=vocab_file_path,
                             dtype=tf.string,
                             name="vocab_file")

    # By adding `config_file, vocab_model and vocab_file`
    # to the ASSET_FILEPATHS collection, TF-Hub will
    # rewrite this tensor so that this asset is portable.
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, config_file)
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_model)
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file)

    hub.add_signature(name="tokens",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output()))

    # change
    # hub.add_signature(
    #     name="sop",
    #     inputs=dict(
    #         input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids),
    #     outputs=dict(
    #         sequence_output=model.get_sequence_output(),
    #         pooled_output=model.get_pooled_output(),
    #         sop_log_probs=sop_log_probs))

    hub.add_signature(name="mlm",
                      inputs=dict(input_ids=input_ids,
                                  input_mask=input_mask,
                                  segment_ids=segment_ids,
                                  mlm_positions=mlm_positions),
                      outputs=dict(sequence_output=model.get_sequence_output(),
                                   pooled_output=model.get_pooled_output(),
                                   mlm_logits=mlm_logits))

    hub.add_signature(name="tokenization_info",
                      inputs={},
                      outputs=dict(vocab_file=vocab_model,
                                   do_lower_case=tf.constant(
                                       FLAGS.do_lower_case)))
Exemple #16
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        # Note: We keep this feature name `next_sentence_labels` to be compatible
        # with the original data created by lanzhzh@. However, in the ALBERT case
        # it does represent sentence_order_labels.
        sentence_order_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.AlbertModel(
            config=albert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             albert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (sentence_order_loss, sentence_order_example_loss,
         sentence_order_log_probs) = get_sentence_order_output(
             albert_config, model.get_pooled_output(), sentence_order_labels)

        total_loss = masked_lm_loss + sentence_order_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            tf.logging.info("number of hidden group %d to initialize",
                            albert_config.num_hidden_groups)
            num_of_initialize_group = 1
            if FLAGS.init_from_group0:
                num_of_initialize_group = albert_config.num_hidden_groups
                if albert_config.net_structure_type > 0:
                    num_of_initialize_group = albert_config.num_hidden_layers
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint, num_of_initialize_group)
            if use_tpu:

                def tpu_scaffold():
                    for gid in range(num_of_initialize_group):
                        tf.logging.info("initialize the %dth layer", gid)
                        tf.logging.info(assignment_map[gid])
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map[gid])
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                for gid in range(num_of_initialize_group):
                    tf.logging.info("initialize the %dth layer", gid)
                    tf.logging.info(assignment_map[gid])
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map[gid])

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer, poly_power,
                                                     start_warmup_step)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(*args):
                """Computes the loss and accuracy of the model."""
                (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                 masked_lm_weights, sentence_order_example_loss,
                 sentence_order_log_probs, sentence_order_labels) = args[:7]

                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                metrics = {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                }

                sentence_order_log_probs = tf.reshape(
                    sentence_order_log_probs,
                    [-1, sentence_order_log_probs.shape[-1]])
                sentence_order_predictions = tf.argmax(
                    sentence_order_log_probs, axis=-1, output_type=tf.int32)
                sentence_order_labels = tf.reshape(sentence_order_labels, [-1])
                sentence_order_accuracy = tf.metrics.accuracy(
                    labels=sentence_order_labels,
                    predictions=sentence_order_predictions)
                sentence_order_mean_loss = tf.metrics.mean(
                    values=sentence_order_example_loss)
                metrics.update({
                    "sentence_order_accuracy": sentence_order_accuracy,
                    "sentence_order_loss": sentence_order_mean_loss
                })
                return metrics

            metric_values = [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, sentence_order_example_loss,
                sentence_order_log_probs, sentence_order_labels
            ]

            eval_metrics = (metric_fn, metric_values)

            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 token_label_ids, predicate_matrix_ids, num_token_labels, num_predicate_labels,
                 use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.AlbertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    # We "pool" the model by simply taking the hidden state corresponding
    # to the first token. float Tensor of shape [batch_size, hidden_size]
    # model_pooled_output = model.get_pooled_output()

    #     """Gets final hidden layer of encoder.
    #
    #     Returns:
    #       float Tensor of shape [batch_size, seq_length, hidden_size] corresponding
    #       to the final hidden of the transformer encoder.
    #     """
    sequence_bert_encode_output = model.get_sequence_output()
    if is_training:
        sequence_bert_encode_output = tf.nn.dropout(sequence_bert_encode_output, keep_prob=0.9)

    with tf.variable_scope("predicate_head_select_loss"):
        bert_sequenc_length = sequence_bert_encode_output.shape[-2].value
        #shape [batch_size, sequence_length, sequencd_length, predicate_label_numbers]
        predicate_score_matrix = getHeadSelectionScores(encode_input=sequence_bert_encode_output, hidden_size_n1=100,
                                                        label_number=num_predicate_labels)
        predicate_head_probabilities = tf.nn.sigmoid(predicate_score_matrix)
        #predicate_head_prediction = tf.argmax(predicate_head_probabilities, axis=3)
        predicate_head_predictions_round = tf.round(predicate_head_probabilities)
        predicate_head_predictions = tf.cast(predicate_head_predictions_round, tf.int32)
        #shape [batch_size, sequence_length, sequencd_length]
        predicate_matrix = tf.reshape(predicate_matrix_ids, [-1, bert_sequenc_length, bert_sequenc_length])
        #shape [batch_size, sequence_length, sequencd_length, predicate_label_numbers]
        gold_predicate_matrix_one_hot = tf.one_hot(predicate_matrix, depth=num_predicate_labels, dtype=tf.float32)
        # shape [batch_size, sequence_length, sequencd_length, predicate_label_numbers]
        predicate_sigmoid_cross_entropy_with_logits = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=predicate_score_matrix,
            labels=gold_predicate_matrix_one_hot)
        # shape []
        predicate_head_select_loss = tf.reduce_sum(predicate_sigmoid_cross_entropy_with_logits)
        # return predicate_head_probabilities, predicate_head_predictions, predicate_head_select_loss

    with tf.variable_scope("token_label_loss"):
        bert_encode_hidden_size = sequence_bert_encode_output.shape[-1].value
        token_label_output_weight = tf.get_variable(
            "token_label_output_weights", [num_token_labels, bert_encode_hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02)
        )
        token_label_output_bias = tf.get_variable(
            "token_label_output_bias", [num_token_labels], initializer=tf.zeros_initializer()
        )
        sequence_bert_encode_output = tf.reshape(sequence_bert_encode_output, [-1, bert_encode_hidden_size])
        token_label_logits = tf.matmul(sequence_bert_encode_output, token_label_output_weight, transpose_b=True)
        token_label_logits = tf.nn.bias_add(token_label_logits, token_label_output_bias)

        token_label_logits = tf.reshape(token_label_logits, [-1, FLAGS.max_seq_length, num_token_labels])
        token_label_log_probs = tf.nn.log_softmax(token_label_logits, axis=-1)
        token_label_one_hot_labels = tf.one_hot(token_label_ids, depth=num_token_labels, dtype=tf.float32)
        token_label_per_example_loss = -tf.reduce_sum(token_label_one_hot_labels * token_label_log_probs, axis=-1)
        token_label_loss = tf.reduce_sum(token_label_per_example_loss)
        token_label_probabilities = tf.nn.softmax(token_label_logits, axis=-1)
        token_label_predictions = tf.argmax(token_label_probabilities, axis=-1)
        # return (token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predict)

    loss = predicate_head_select_loss + token_label_loss
    return (loss,
            predicate_head_select_loss, predicate_head_probabilities, predicate_head_predictions,
            token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predictions)