def __init__(self, input, labels, num_labels, lengths, is_training, dropout_rate=0.7): """ :param input: :param labels: :param num_labels: label的种类数,因为CRF是状态转移,因此label为一个状态 :param lengths: batch中每个句子的实际长度 :param is_training: :param dropout_rate: """ self.labels = labels self.num_labels = num_labels if is_training: input = tf.nn.dropout(input, dropout_rate) # project self.logits = self._project_layer(input, num_labels) if is_training: self.logits = tf.nn.dropout(self.logits, dropout_rate) # crf self.log_likelihood, self.trans = self._crf_log_likelihood( self.labels, self.logits, lengths, num_labels) # CRF decode, pred_ids 是一条最大概率的标注路径 self.pred_ids, _ = crf.crf_decode(potentials=self.logits, transition_params=self.trans, sequence_length=lengths)
def make_test(self, input_x=None, input_y=None, dropout=None, input_mask=None, input_segment=None, use_tfrecord=False): if not use_tfrecord: input_x, input_y, dropout, input_mask, input_segment = self.create_placeholder( ) else: dropout = tf.placeholder_with_default(1.0, shape=[], name='dropout') logits, real_sentence_length, trans = self.create_model( input_x, dropout, is_training=False, input_mask=input_mask, input_segment=input_segment) with tf.variable_scope('loss'): loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=input_y)) pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=real_sentence_length) pred_ids = tf.identity(pred_ids, name=constant.OUTPUT_NODE_NAME) return loss, pred_ids, real_sentence_length
def add_bilstm_crf_layer(self): if self.is_training: self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate) if not self.bilstm: # project layer logits = self.project_crf_layer(self.embedded_chars) else: # gcn_layer gcn_output = self.GCN_layer(A_fw=self.forward, A_bw=self.backward) # bilstm_layer lstm_output = self.bilstm_layer(gcn_output) # project layer logits = self.project_bilstm_layer(lstm_output) if not self.crf: # softmax layer pred_ids, loss = self.softmax_layer(logits) else: # crf_layer loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return (loss, pred_ids)
def add_blstm_crf_layer(self, crf_only): """ blstm-crf网络 :return: """ if self.is_training: # lstm input dropout rate i set 0.9 will get best score self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate) if crf_only: # logits = [batch_size, seq_length, num_labels] logits = self.project_crf_layer() else: # blstm lstm_output = self.blstm_layer(self.embedded_chars) # project logits = self.project_bilstm_layer(lstm_output) # crf loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return (loss, logits, trans, pred_ids)
def get_model(self): graph = tf.Graph() with graph.as_default(): ph_x = tf.placeholder(dtype=tf.float32, shape=[None, self.sentence_len, self.wordvec_size]) # shape(bactch_size,sentence_len,wordvec_size) ph_y = tf.placeholder(dtype=tf.int32, shape=[None, self.sentence_len]) # shape(bactch_size,sentence_len) ph_sequence_lengths = tf.placeholder(dtype=tf.int32, shape=[None, ]) bigru = keras.layers.Bidirectional( keras.layers.GRU(256, return_sequences=True, dropout=0.5))(ph_x) bigru2 = keras.layers.Bidirectional( keras.layers.GRU(512, return_sequences=True, dropout=0.5))(bigru) w = tf.Variable(tf.random_normal(shape=[1024, self.classes])) bigru2 = tf.reshape(bigru2, shape=[-1, 1024]) unary_scores = tf.matmul(bigru2, w) unary_scores = tf.reshape(unary_scores, shape=[-1, self.sentence_len, self.classes]) log_likelihood, transition_params = crf.crf_log_likelihood(unary_scores, ph_y, ph_sequence_lengths) loss = tf.reduce_mean(-log_likelihood) viterbi_sequence, viterbi_score = crf.crf_decode(unary_scores, transition_params, ph_sequence_lengths) train_opt = tf.train.AdamOptimizer(self.study_rate).minimize(loss) correct_pred = tf.equal(viterbi_sequence, ph_y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # 正确率 return graph, ph_sequence_lengths, ph_x, ph_y, loss, train_opt, accuracy, viterbi_sequence
def add_blstm_crf_layer(self): """ blstm-crf网络 :return: """ if self.is_training: # lstm input dropout rate set 0.5 will get best score self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.droupout_rate) # blstm # with tf.variable_scope("densely_connected_bi_rnn"): # # dense_bi_rnn = DenselyConnectedBiRNN(4, [50,50,50,150], # cell_type='lstm') # context = dense_bi_rnn(self.embedded_chars, seq_len=self.lengths) # logits = tf.layers.dense(context, units=self.num_labels) lstm_output = self.blstm_layer(self.embedded_chars) # project logits = self.project_bilstm_layer(lstm_output) # crf loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return ((loss, logits, trans, pred_ids))
def add_blstm_crf_layer(self, crf_only, name=None): """ blstm-crf网络 :return: """ if self.is_training: # lstm input dropout rate i set 0.9 will get best score self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate) if crf_only: logits = self.project_crf_layer( name=name) # 不加blstm,只使用crf,输入bert模型中得到的序列 else: # blstm lstm_output = self.blstm_layer(self.embedded_chars) # project logits = self.project_blstm_layer(lstm_output) # crf loss, trans = self.crf_layer(logits, name=name) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return loss, logits, trans, pred_ids
def __init__(self, input_tensor, num_labels, labels=None, lengths=None, scope_name='CRF'): with tf.variable_scope(scope_name): trans = tf.get_variable("transitions", shape=[num_labels, num_labels], initializer=create_initializer(0.02)) if labels is not None: log_likelihood, trans = crf.crf_log_likelihood( inputs=input_tensor, tag_indices=labels, transition_params=trans, sequence_lengths=lengths) self.loss = tf.reduce_mean(-log_likelihood) self.trans = trans self.pred_ids, _ = crf.crf_decode(potentials=input_tensor, transition_params=self.trans, sequence_length=lengths)
def add_bilstm_crf_layer(self, pos_ids): if self.is_training: self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate) if not self.bilstm: # project layer logits = self.project_crf_layer(self.embedded_chars) else: # bilstm_layer lstm_output = self.bilstm_layer(pos_ids) # cnn_layer cnn_output = self.cnn_layer(lstm_output) #link layer # project layer logits = self.project_bilstm_layer(cnn_output, pos_ids) if not self.crf: # softmax layer pred_ids, loss = self.softmax_layer(logits) else: # crf_layer loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return (loss, pred_ids)
def get_prediction_module(self, bert_model, features, is_training, percent_done): n_classes = len(self._get_label_mapping()) reprs = bert_model.get_sequence_output() reprs = pretrain_helpers.gather_positions( reprs, features[self.name + "_labeled_positions"]) seq_lengths = tf.cast( tf.reduce_sum(features[self.name + "_labels_mask"], axis=1), tf.int32) logits = tf.layers.dense(reprs, n_classes) with tf.variable_scope("crf", reuse=tf.AUTO_REUSE): trans_val = tf.get_variable("transition", shape=[n_classes, n_classes], dtype=tf.float32) predict_ids, _ = crf.crf_decode(logits, trans_val, seq_lengths) actual_ids = features[self.name + "_labels"] log_likelihood, _ = crf.crf_log_likelihood( inputs=logits, tag_indices=actual_ids, sequence_lengths=seq_lengths, transition_params=trans_val) losses = -log_likelihood return losses, dict( loss=losses, logits=logits, predictions=predict_ids, labels=features[self.name + "_labels"], labels_mask=features[self.name + "_labels_mask"], labeled_positions=features[self.name + "_labeled_positions"], eid=features[self.name + "_eid"], )
def add_blstm_crf_layer(self, crf_only): """ blstm-crf """ if self.is_training: self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate) if crf_only: logits = self.project_crf_layer(self.embedded_chars) else: #blstm lstm_output = self.blstm_layer(self.embedded_chars) #project logits = self.project_bilstm_layer(lstm_output) #crf loss, trans = self.crf_layer(logits) print(self.labels) # # CRF decode, pred_ids 是一条最大概率的标注路径 if self.is_training: return (loss, logits, trans, None) pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return (None, logits, None, pred_ids)
def add_blstm_crf_layer(self, crf_only): """ bi-lstm-crf网络. Return: """ if self.is_training: # lstm input dropout rate i set 0.9 will get best score self.embedding_inputs = tf.nn.dropout(self.embedding_inputs, self.dropout_rate) if crf_only: # 只有CRF Layer logits = self.project_crf_layer(self.embedding_inputs) else: # bi-lstm lstm_output = self.blstm_layer(self.embedding_inputs) # project logits = self.project_bilstm_layer(lstm_output) # crf loss, per_example_loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 probabilities, _ = crf.crf_decode( potentials=logits, transition_params=trans, sequence_length=self.sequence_lengths) # pred_ids return (loss, per_example_loss, logits, probabilities)
def add_blstm_crf_layer(self, crf_only=False, lstm_only=False): """ blstm-crf网络 :return: """ if self.is_training: # lstm input dropout rate i set 0.9 will get best score self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate) if lstm_only: # blstm lstm_output = self.blstm_layer(self.embedded_chars) # project logits = tf.layers.dense(lstm_output,self.num_labels,name='project',kernel_regularizer=tf.keras.regularizers.l2(1e-5)) loss, pred_ids = self._softmax_layer(logits, self.labels, self.num_labels, self.input_mask) else: if crf_only: logits = self.project_crf_layer(self.embedded_chars) else: # blstm lstm_output = self.blstm_layer(self.embedded_chars) # project logits = self.project_bilstm_layer(lstm_output) # crf loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, viterbi_score = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return (loss, logits, pred_ids)
def build_export_output(self, model): # pylint: disable=no-self-use """ Build the output of the model for export. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ transitions = model.transitions intent_logits, slots_logits = model.logits intent_score = tf.nn.softmax(intent_logits, name="intent_score") intent_preds = tf.argmax(intent_logits, axis=-1, name="intent_preds") slots_preds, slots_score = crf_decode(slots_logits, transitions, model.input_x_len) slots_preds = tf.identity(slots_preds, name="slots_preds") slots_score = tf.identity(slots_score, name="slots_score") model.preds = intent_preds, slots_preds model.score = intent_score, slots_score model.output_dict = { "slots_score": slots_score, "slots_preds": slots_preds, "intent_score": intent_score, "intent_preds": intent_preds } logging.info("Model built.")
def add_blstm_crf_layer(self, enable_lstm=False, decode='softmax'): """ blstm-crf网络 :return: """ if self.is_training: # lstm input dropout rate i set 0.9 will get best score self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate) if not enable_lstm: logits = self.project_embedding_layer(self.embedded_chars) else: # blstm lstm_output = self.blstm_layer(self.embedded_chars) # project logits = self.project_bilstm_layer(lstm_output) # decode if decode == 'softmax': loss, pred_ids = self.softmax_layer(logits) return (loss, logits, None, pred_ids) else: loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return (loss, logits, trans, pred_ids)
def __init__(self, bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, max_seq_length, use_one_hot_embeddings): # load bert bert = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取bert的输出 output_layer = bert.get_sequence_output() # self.all_encoder_layers = bert.get_all_encoder_layers() if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) hidden_size = output_layer.shape[-1].value output_layer = tf.reshape(output_layer, [-1, hidden_size]) tf.logging.info(" The dimension of bert output:%s" % output_layer.shape) # 全连接层 output_weight = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) logits = tf.matmul(output_layer, output_weight, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) self.logits = tf.reshape(logits, [-1, max_seq_length, num_labels]) # 使用全连接层的输出计算MNLP分数 self.probs = tf.nn.softmax(self.logits, axis=-1) self.best_probs = tf.reduce_max(self.probs, axis=-1) self.mnlp_score = tf.reduce_mean(tf.log(self.best_probs), axis=-1) # 计算输入样本的长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum(used, reduction_indices=1) # crf层 with tf.variable_scope("crf"): trans = tf.get_variable( "transitions", shape=[num_labels, num_labels], initializer=initializers.xavier_initializer()) if labels is None: self.loss = None else: log_likelihood, trans = tf.contrib.crf.crf_log_likelihood( inputs=self.logits, tag_indices=labels, transition_params=trans, sequence_lengths=lengths) self.loss = tf.reduce_mean(-log_likelihood) self.predicts, self.score = crf.crf_decode(potentials=self.logits, transition_params=trans, sequence_length=lengths)
def pred_op(self): with tf.name_scope("pred"): if self.CRF: self.plabels_,_=crf_decode(potentials=self.logits,transition_params=self.transition_params,sequence_length=self.sequence_lengths) else: self.plabels_ = tf.argmax(self.logits, axis=-1) with tf.name_scope("final"): self.plabels_=tf.multiply(self.plabels_,1,name="predlabels")
def get_metrics(self, logits): print("==============================") print(logits) print("==============================") if self.type == "classification": self.predictions = tf.squeeze(tf.argmax(self.logits, axis=-1), name="predictions") elif self.type == "ner": self.predictions, _ = crf.crf_decode(potentials=logits, transition_params=self.trans, sequence_length=self.lengths)
def build_export_output(self, model): # pylint: disable=no-self-use """ Build the output of the model. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ model.preds, score = crf_decode(model.logits, model.transitions, model.input_x_len) model.score = tf.identity(score, name="score") model.output_dict = {"score": model.score, "preds": model.preds}
def __init__(self,num_classes,max_docs,input_size,rnn_units=300, dropout_keep=0.9,lr=0.0001,bidirectional=True): self.max_docs = max_docs self.dropout_keep = dropout_keep self.dropout = tf.placeholder(tf.float32) self.rnn_units = rnn_units self.doc_input = tf.placeholder(tf.float32, shape=[None,max_docs,input_size]) self.num_docs = tf.placeholder(tf.int32, shape=[None]) max_len = tf.reduce_max(self.num_docs) doc_input_reduced = self.doc_input[:,:max_len,:] doc_input_reduced = tf.nn.dropout(doc_input_reduced,self.dropout) self.labels = tf.placeholder(tf.int32,shape=[None,max_docs]) labels_reduced = self.labels[:,:max_len] with tf.variable_scope('rnn',initializer=tf.contrib.layers.xavier_initializer()): if bidirectional: [outputs_fw,outputs_bw],_ = tf.nn.bidirectional_dynamic_rnn( GRUCell(self.rnn_units/2),GRUCell(self.rnn_units/2), doc_input_reduced,sequence_length=self.num_docs,dtype=tf.float32) outputs = tf.concat((outputs_fw,outputs_bw),2) else: outputs,_ = tf.nn.dynamic_rnn(GRUCell(self.rnn_units), doc_input_reduced,sequence_length=self.num_docs,dtype=tf.float32) outputs = tf.nn.dropout(outputs,self.dropout) #conditional random field weights = tf.get_variable("weights",[outputs.shape[2],num_classes],initializer=tf.contrib.layers.xavier_initializer()) matricized_docs = tf.reshape(outputs,[-1,outputs.shape[2]]) matricized_unary = tf.matmul(matricized_docs,weights) unary_scores = tf.reshape(matricized_unary,[-1,max_len,num_classes]) log_likelihood, transition_params = crf_log_likelihood(unary_scores,labels_reduced,self.num_docs) preds,viterbi_score = crf_decode(unary_scores,transition_params,self.num_docs) self.doc_idx = tf.placeholder(tf.int32, shape=[None,2]) self.prediction = tf.gather_nd(preds,self.doc_idx) #loss, accuracy, and training functions self.loss = tf.reduce_mean(-log_likelihood) self.optimizer = tf.train.AdamOptimizer(lr,0.9,0.99).minimize(self.loss) #init op config = tf.ConfigProto() config.gpu_options.allow_growth = True self.saver = tf.train.Saver() self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer())
def viterbi_decode(self, potentials, sequence_length): """Decode the highest scoring sequence of tags in TensorFlow. This is a function for tensor. Args: potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of unary potentials. sequence_length: A [batch_size] tensor, containing sequence lengths. Returns: decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32. Contains the highest scoring tag indicies. """ decode_tags, best_score = crf_decode(potentials, self.transition_params, sequence_length) return decode_tags
def make_test(self, input_x, input_y): dropout = tf.placeholder_with_default(1.0, (), name='dropout') logits, real_sentence_length, trans = self.create_model( input_x, dropout) with tf.variable_scope('loss'): loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=input_y)) pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=real_sentence_length) pred_ids = tf.identity(pred_ids, name=constant.OUTPUT_NODE_NAME) return loss, pred_ids, real_sentence_length
def create_model(self, bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): #加载bert模型基础结构 model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) #获取整个句子的输出 output_layer = model.get_sequence_output() #以句子为单位获取输出层,然后摘取最后一层的参数信息 hidden_size = output_layer.shape[-1].value #从新定义权重矩阵w,和b,根据最后一层的参数在结合新的w,b对模型进行微调 output_weights = tf.get_variable( "output_weights", [hidden_size, num_labels], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) lengths = tf.reduce_sum(tf.sign(tf.abs(input_ids)), reduction_indices=1) #从新定义损失函数 with tf.variable_scope("logits"): if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) output_layer = tf.reshape(output_layer, shape=[-1, hidden_size]) #微调新模型的输出 pred = tf.tanh( tf.nn.xw_plus_b(output_layer, output_weights, output_bias)) logits = tf.reshape(pred, [-1, args.max_seq_len, num_labels]) with tf.variable_scope("crf_loss"): trans = tf.get_variable( "transitions", [num_labels, num_labels], initializer=tf.truncated_normal_initializer()) if labels is None: return None, trans else: log_likelihood, trans = tf.contrib.crf.crf_log_likelihood( inputs=logits, tag_indices=labels, transition_params=trans, sequence_lengths=lengths) loss, trans = tf.reduce_mean(-log_likelihood), trans pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=lengths) return (loss, logits, trans, pred_ids)
def get_model(self): graph = tf.Graph() session = tf.Session(graph=graph) with session.graph.as_default(): ph_x = tf.placeholder( dtype=tf.float32, shape=[None, self._sentence_len, self._wordvec_size ]) # shape(bactch_size,sentence_len,wordvec_size) ph_y = tf.placeholder(dtype=tf.int32, shape=[None, self._sentence_len ]) # shape(bactch_size,sentence_len) ph_sequence_lengths = tf.placeholder(dtype=tf.int32, shape=[ None, ]) embeddings = keras.layers.Dense(config.WORDVEC_SIZE)(ph_x) # mask = keras.layers.Masking(mask_value=0.)(ph_x) bigru = keras.layers.Bidirectional( keras.layers.GRU(200, return_sequences=True))(embeddings) bigru = keras.layers.Dropout(0.5)(bigru) half_window_size = 2 padding_layer = keras.layers.ZeroPadding1D( padding=half_window_size)(embeddings) conv = keras.layers.Conv1D(100, 2 * half_window_size + 1)(padding_layer) conv_d = keras.layers.Dropout(0.5)(conv) dense_conv = keras.layers.TimeDistributed( keras.layers.Dense(100))(conv_d) rnn_cnn = tf.concat([bigru, dense_conv], axis=2) dense = keras.layers.Dense(self._classes)(rnn_cnn) unary_scores = keras.layers.Dropout(0.5)(dense) log_likelihood, transition_params = crf.crf_log_likelihood( unary_scores, ph_y, ph_sequence_lengths) loss = tf.reduce_mean(-log_likelihood) viterbi_sequence, viterbi_score = crf.crf_decode( unary_scores, transition_params, ph_sequence_lengths) train_opt = tf.train.AdamOptimizer(self._study_rate).minimize(loss) init = tf.global_variables_initializer() session.run(init) return session, ph_sequence_lengths, ph_x, ph_y, loss, train_opt, viterbi_sequence
def viterbi_decode(self, potentials, sequence_length): """ crf_decode(potentials,transition_params,sequence_length) 在tensorflow内解码 参数: potentials: 一个形状为[batch_size, max_seq_len, num_tags] 的tensor, transition_params: 一个形状为[num_tags, num_tags] 的转移矩阵 sequence_length: 一个形状为[batch_size] 的 ,表示batch中每个序列的长度 返回: decode_tags:一个形状为[batch_size, max_seq_len] 的tensor,类型是tf.int32.表示最好的序列标记. best_score: 有个形状为[batch_size] 的tensor, 包含每个序列解码标签的分数. """ decode_tags, best_score = crf_decode(potentials, self.transition_params, sequence_length) return decode_tags
def build_output(self, model): # pylint: disable=no-self-use """ Build the output of the model. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ model.preds, score = crf_decode(model.logits, model.transitions, model.input_x_len) model.score = tf.identity(score, name="score") model.y_ground_truth = model.input_y if model.use_pretrained_model: logging.info("initialize_pretrained_model_variables") self.initialize_pretrained_model_variables( model.pretrained_model_path, model.pretrained_model_mode)
def add_crf_layer(self): if self.is_training: # lstm input dropout rate set 0.5 will get best score self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.droupout_rate) # project logits = self.project_layer(self.embedded_chars) # crf loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return (loss, logits, trans, pred_ids)
def make_pb_file(self, model_dir): graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_conf.gpu_options.allow_growth = True session_conf.gpu_options.per_process_gpu_memory_fraction = 0.9 sess = tf.Session(config=session_conf) with sess.as_default(): input_ids = tf.placeholder( dtype=tf.int32, shape=(None, self.params.max_sentence_length), name=constant.INPUT_NODE_NAME) input_mask = tf.placeholder( dtype=tf.int32, shape=(None, self.params.max_sentence_length), name=constant.INPUT_MASK_NAME) dropout = tf.placeholder_with_default(1.0, shape=(), name='dropout') logits, real_sentence_length, trans = self.create_model( input_ids, input_mask, segment_ids=None, is_training=False, dropout=dropout) pred_ids, _ = crf.crf_decode( potentials=logits, transition_params=trans, sequence_length=real_sentence_length) pred_ids = tf.identity(pred_ids, name=constant.OUTPUT_NODE_NAME) saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) checkpoint = tf.train.latest_checkpoint(model_dir) if checkpoint: saver.restore(sess, checkpoint) else: raise FileNotFoundError("模型文件未找到") output_graph_with_weight = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, [constant.OUTPUT_NODE_NAME]) with tf.gfile.GFile(os.path.join(model_dir, 'ner.pb'), 'wb') as gf: gf.write(output_graph_with_weight.SerializeToString()) return os.path.join(model_dir, 'ner.pb')
def add_bilstm_crf_layer(self, crf_only): if self.is_training: self.embedded_chars = tf.nn.dropout(self.embedded_chars,keep_prob=self.dropout_rate) # 直接进行dense最后tanh激活输出logits if crf_only: logits = self.project_crf_layer(self.embedded_chars) # 通过多层双向的lstm最后输出logits else: lstm_output = self.bilstm_layer(self.embedded_chars) logits = self.project_bilstm_layer(lstm_output) loss, trans = self.crf_layer(logits) # 非viterbi算法解码 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length= self.lengths) return loss, logits, trans, pred_ids
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) sequence_lengths = tf.reduce_sum(tf.sign(tf.abs(input_ids)), axis=1) #sequence_lengths = tf.subtract(sequence_lengths,len(sequence_lengths)*[2]) output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value output_weight = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) output_layer = tf.reshape(output_layer, [-1, hidden_size]) logits = tf.matmul(output_layer, output_weight, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, num_labels]) log_likelihood, transition_params = crf_log_likelihood( inputs=logits, tag_indices=labels, sequence_lengths=sequence_lengths) loss = -tf.reduce_mean(log_likelihood) predict, best_score = crf_decode(potentials=logits, transition_params=transition_params, sequence_length=sequence_lengths) # mask = tf.cast(input_mask,tf.float32) # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask) # return (loss, logits, predict) ########################################################################## #log_probs = tf.nn.log_softmax(logits, axis=-1) #one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) #per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) #loss = tf.reduce_sum(per_example_loss) #probabilities = tf.nn.softmax(logits, axis=-1) #predict = tf.argmax(probabilities,axis=-1) return (loss, logits, predict)
def add_blstm_crf_layer(self): """ blstm-crf网络 :return: """ if self.is_training: # lstm input dropout rate set 0.5 will get best score self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.droupout_rate) #blstm lstm_output = self.blstm_layer(self.embedded_chars) #project logits = self.project_bilstm_layer(lstm_output) #crf loss, trans = self.crf_layer(logits) # CRF decode, pred_ids 是一条最大概率的标注路径 pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths) return ((loss, logits, trans, pred_ids))