Ejemplo n.º 1
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings, token_start_mask,
                 hidden_size, num_layers, hidden_dropout_prob):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    embedding = model.get_sequence_output()
    batch_size, max_seq_length, embedding_size = modeling.get_shape_list(
        embedding, expected_rank=3)
    lengths = tf.reduce_sum(
        input_mask, reduction_indices=1
    )  # [batch_size] vector, sequence lengths of current batch
    mask = tf.to_float(token_start_mask)

    # mlp = MLP_and_softmax(embedded_chars=embedding, hidden_size=hidden_size, num_layers=num_layers,
    #                         hidden_dropout_prob=hidden_dropout_prob, initializers=initializers, num_labels=num_labels,
    #                         seq_length=max_seq_length, labels=labels, length_mask=mask, is_training=is_training)
    # rst = mlp.compute()
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=hidden_size,
                          cell_type='lstm',
                          num_layers=num_layers,
                          dropout_rate=1.0 - hidden_dropout_prob,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer_not_really_working(crf_only=False)
    return rst
Ejemplo n.º 2
0
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers,
                          dropout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels,
                          seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=True)
    return rst
Ejemplo n.º 3
0
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings,
                 dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1):
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )

    bert_layer = model.get_sequence_output()
    a = bert_layer.get_shape()
    max_seq_length = bert_layer.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    # [batch_size] 大小的向量,包含了当前batch中的序列长度
    lengths = tf.reduce_sum(used, reduction_indices=1)
    # 这里添加pos embedding层
    # 添加CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=bert_layer, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers,
                          dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels,
                          seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=True)

    return rst
Ejemplo n.º 4
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    embedding = model.get_sequence_output(
    )  # BERT模型输出的embedding  [batch_size,max_seq_len,embedding_size]
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used,
                            axis=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=512,
                          cell_type="lstm",
                          num_layers=2,
                          dropout_rate=0.1,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    loss, logits, trans, pred_ids = blstm_crf.add_blstm_crf_layer(
        crf_only=False)

    return loss, logits, trans, pred_ids
Ejemplo n.º 5
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """Creates a classification model."""
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=FLAGS.lstm_size,
                          cell_type=FLAGS.cell,
                          num_layers=FLAGS.num_layers,
                          dropout_rate=FLAGS.droupout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=True)
    return rst
Ejemplo n.º 6
0
def slot_filling(model, lengths, slot_id, num_slot, is_training):
    '''
        slot filling
    '''
    #获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()

    max_seq_length = embedding.shape[1].value

    #添加CRF out
    blstm_crf_config = {
        "embedded_chars": embedding,
        "hidden_unit": config['lstm_size'],
        "cell_type": config['cell'],
        "num_layers": config['num_layers'],
        "dropout_rate": config['dropout_rate'],
        "initializers": initializers,
        "num_labels": num_slot,
        "seq_length": max_seq_length,
        "labels": slot_id,
        "lengths": lengths,
        "is_training": is_training
    }

    blstm_crf = BLSTM_CRF(blstm_crf_config)
    loss, logits, trans, pred_ids = blstm_crf.add_blstm_crf_layer(
        crf_only=False)

    return loss, logits, trans, pred_ids
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers,
                          droupout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels,
                          seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer()
    return rst
Ejemplo n.º 8
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):

    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取bert的输出embedding: NxTxH
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          droupout_rate=droupout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer()
    return rst
Ejemplo n.º 9
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1):
    """
        创建X模型
        :param bert_config: bert 配置
        :param is_training:
        :param input_ids: 数据的idx 表示
        :param input_mask:
        :param segment_ids:
        :param labels: 标签的idx 表示
        :param num_labels: 类别数量
        :param use_one_hot_embeddings:
        :return:
        """
    # 使用数据加载BertModel,获取对应的字embedding
    import tensorflow as tf
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # Get the corresponding embedding input data [batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    # Calculate the true length of the sequence
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # Add CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=False)
    return rst
Ejemplo n.º 10
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1):
    """Creates a classification model."""
    import tensorflow as tf
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=False)
    return rst
Ejemplo n.º 11
0
def create_bert_crf_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels,crf_only, #use_one_hot_embeddings,
                 dropout_rate, lstm_size, cell, num_layers):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    import tensorflow as tf
    #from bert_base.bert import modeling
    sys.path.append('../../../bert_base/bert')
    import modeling
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=True,#use_one_hot_embeddings
    )
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers,
                          dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels,
                          seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=crf_only)
    return rst
Ejemplo n.º 12
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    dropout_rate = 1.0
    lstm_size = 1
    cell = 'lstm'
    num_layers = 1

    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_leng = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    from tensorflow.contrib.layers.python.layers import initializers
    from lstm_crf_layer import BLSTM_CRF
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_leng,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    loss, logits, trans, pred_ids = blstm_crf.add_blstm_crf_layer(
        crf_only=True)

    return loss, logits, trans, pred_ids
Ejemplo n.º 13
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        #********************************************************************************************#
        bsz_per_core = tf.shape(features["input_ids"])[0]
        inp = tf.transpose(features["input_ids"], [1, 0])
        seg_id = tf.transpose(features["segment_ids"], [1, 0])
        inp_mask = tf.transpose(features["input_mask"], [1, 0])
        label_ids = features["label_ids"]

        xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
        run_config = xlnet.create_run_config(is_training, True, FLAGS)
        xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                       run_config=run_config,
                                       input_ids=inp,
                                       seg_ids=seg_id,
                                       input_mask=inp_mask)
        #summary = xlnet_model.get_pooled_out(FLAGS.summary_type, FLAGS.use_summ_proj)
        # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
        xlnet_model_out = xlnet_model.get_sequence_output()
        embedding = tf.transpose(xlnet_model_out, [1, 0, 2])
        max_seq_length = embedding.shape[1].value
        # 算序列真实长度
        used = tf.sign(tf.abs(features["input_ids"]))
        lengths = tf.reduce_sum(
            used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
        # 添加CRF output layer
        blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                              hidden_unit=10,
                              cell_type="lstm",
                              num_layers=1,
                              dropout_rate=0.5,
                              initializers=initializers,
                              num_labels=n_class,
                              seq_length=max_seq_length,
                              labels=label_ids,
                              lengths=lengths,
                              is_training=is_training)
        total_loss, logits, trans, pred_ids = blstm_crf.add_blstm_crf_layer(
            crf_only=True)
        #********************************************************************************************#

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(label_ids, pred_ids):
                return {
                    "eval_loss":
                    tf.metrics.mean_squared_error(labels=label_ids,
                                                  predictions=pred_ids),
                }

            eval_metrics = metric_fn(features["label_ids"], pred_ids)
            eval_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metrics)
            return eval_spec
        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "logits": logits,
                "labels": label_ids,
                "pred_ids": pred_ids,
                "input_mask": features["input_mask"]
            }
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=predictions)
            return output_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                loss=total_loss,
                                                train_op=train_op)
        return train_spec