def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] # embedding Tensor("bert/encoder/Reshape_13:0", shape=(56, 202, 768), dtype=float32) embedding = model.get_sequence_output() # max_seq_length 202 max_seq_length = embedding.shape[1].value # 算序列真实长度 # input_ids Tensor("IteratorGetNext:0", shape=(56, 202), dtype=int32) # tf.abs Tensor("Abs_1:0", shape=(56, 202), dtype=int32) # used Tensor("Sign:0", shape=(56, 202), dtype=int32) # lengths Tensor("Sum:0", shape=(56,), dtype=int32) 1行56列(/56行1列?)的数组 每个batch下序列的真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=False) return rst
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1): """ 创建模型 :param bert_config: bert配置 :param is_training: 是否训练 :param input_ids: 数据的idx表示 :param input_mask: :param segment_ids: :param labels: 标签的idx表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :param dropout_rate: :param lstm_size: :param cell: :param num_layers: :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding,输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value # 序列的真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size]大小的向量,包含了当前的batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedd_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) res = blstm_crf.add_blstm_crf_layer(crf_only=True) return res
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] label_vocab = {"O": 0, "B": 1, "I": 2, "X": 3, "[CLS]": 4, "[SEP]": 5} SPAN_TYPE = "IOB2" mask = transition_mask(label_vocab, SPAN_TYPE, label_vocab["[CLS]"], label_vocab["[SEP]"]) embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value # 算序列真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=True) return rst
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1, crf_only=False, lstm_only=False): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value # 算序列真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer with tf.variable_scope('finetune'): blstm_crf = BLSTM_CRF(embedded_chars=embedding, input_mask=input_mask, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=crf_only, lstm_only=lstm_only) return rst
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) print("获取到bert的输出") # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 将embedding作为LSTM结构的输入 # 加载BLSTM-CRF模型对象 blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers, dropout_rate=FLAGS.dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) # 获取添加我们自己网络结构后的结果,这些结果包括loss, logits, trans, pred_ids # 因为BERT里面已经存在双向编码,所以LSTM并不是必须的,可以将BERT最后一层的结构直接扔给CRF进行解码 try: rst = blstm_crf.add_blstm_crf_layer() except Exception as e: print(str(e)) return rst
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, pos_ids, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1, add_pos_embedding=True): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param pos_ids: 词性的idx 表示 :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() # jzhang add if add_pos_embedding: embedding = embedding_addpos(embedding, pos_ids) max_seq_length = embedding.shape[1].value # 算序列真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) # jzhang: 设置参数crf_only=True,控制模型只使用CRF进行解码 # 如果设置crf_only=False,模型将使用BiLSTM-CRF作为Decoder rst = blstm_crf.add_blstm_crf_layer(crf_only=False) return rst