예제 #1
0
    def __init__(self,
                 input,
                 labels,
                 num_labels,
                 lengths,
                 is_training,
                 dropout_rate=0.7):
        """

        :param input:
        :param labels:
        :param num_labels: label的种类数,因为CRF是状态转移,因此label为一个状态
        :param lengths: batch中每个句子的实际长度
        :param is_training:
        :param dropout_rate:
        """
        self.labels = labels
        self.num_labels = num_labels

        if is_training:
            input = tf.nn.dropout(input, dropout_rate)
        # project
        self.logits = self._project_layer(input, num_labels)
        if is_training:
            self.logits = tf.nn.dropout(self.logits, dropout_rate)
        # crf
        self.log_likelihood, self.trans = self._crf_log_likelihood(
            self.labels, self.logits, lengths, num_labels)
        # CRF decode, pred_ids 是一条最大概率的标注路径
        self.pred_ids, _ = crf.crf_decode(potentials=self.logits,
                                          transition_params=self.trans,
                                          sequence_length=lengths)
예제 #2
0
    def make_test(self,
                  input_x=None,
                  input_y=None,
                  dropout=None,
                  input_mask=None,
                  input_segment=None,
                  use_tfrecord=False):
        if not use_tfrecord:
            input_x, input_y, dropout, input_mask, input_segment = self.create_placeholder(
            )
        else:
            dropout = tf.placeholder_with_default(1.0,
                                                  shape=[],
                                                  name='dropout')

        logits, real_sentence_length, trans = self.create_model(
            input_x,
            dropout,
            is_training=False,
            input_mask=input_mask,
            input_segment=input_segment)
        with tf.variable_scope('loss'):
            loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                               labels=input_y))
        pred_ids, _ = crf.crf_decode(potentials=logits,
                                     transition_params=trans,
                                     sequence_length=real_sentence_length)
        pred_ids = tf.identity(pred_ids, name=constant.OUTPUT_NODE_NAME)
        return loss, pred_ids, real_sentence_length
    def add_bilstm_crf_layer(self):
        if self.is_training:
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                                self.dropout_rate)

        if not self.bilstm:
            # project layer
            logits = self.project_crf_layer(self.embedded_chars)
        else:
            # gcn_layer
            gcn_output = self.GCN_layer(A_fw=self.forward, A_bw=self.backward)
            # bilstm_layer
            lstm_output = self.bilstm_layer(gcn_output)
            # project layer
            logits = self.project_bilstm_layer(lstm_output)

        if not self.crf:
            # softmax layer
            pred_ids, loss = self.softmax_layer(logits)
        else:
            # crf_layer
            loss, trans = self.crf_layer(logits)
            # CRF decode, pred_ids 是一条最大概率的标注路径
            pred_ids, _ = crf.crf_decode(potentials=logits,
                                         transition_params=trans,
                                         sequence_length=self.lengths)

        return (loss, pred_ids)
예제 #4
0
    def add_blstm_crf_layer(self, crf_only):
        """
        blstm-crf网络
        :return:
        """
        if self.is_training:
            # lstm input dropout rate i set 0.9 will get best score
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                                self.dropout_rate)

        if crf_only:
            # logits = [batch_size, seq_length, num_labels]
            logits = self.project_crf_layer()
        else:
            # blstm
            lstm_output = self.blstm_layer(self.embedded_chars)
            # project
            logits = self.project_bilstm_layer(lstm_output)
        # crf
        loss, trans = self.crf_layer(logits)
        # CRF decode, pred_ids 是一条最大概率的标注路径
        pred_ids, _ = crf.crf_decode(potentials=logits,
                                     transition_params=trans,
                                     sequence_length=self.lengths)
        return (loss, logits, trans, pred_ids)
예제 #5
0
    def get_model(self):
        graph = tf.Graph()
        with graph.as_default():
            ph_x = tf.placeholder(dtype=tf.float32, shape=[None, self.sentence_len,
                                                           self.wordvec_size])  # shape(bactch_size,sentence_len,wordvec_size)

            ph_y = tf.placeholder(dtype=tf.int32, shape=[None, self.sentence_len])  # shape(bactch_size,sentence_len)

            ph_sequence_lengths = tf.placeholder(dtype=tf.int32, shape=[None, ])

            bigru = keras.layers.Bidirectional(
                keras.layers.GRU(256, return_sequences=True, dropout=0.5))(ph_x)
            bigru2 = keras.layers.Bidirectional(
                keras.layers.GRU(512, return_sequences=True, dropout=0.5))(bigru)

            w = tf.Variable(tf.random_normal(shape=[1024, self.classes]))

            bigru2 = tf.reshape(bigru2, shape=[-1, 1024])
            unary_scores = tf.matmul(bigru2, w)
            unary_scores = tf.reshape(unary_scores, shape=[-1, self.sentence_len, self.classes])

            log_likelihood, transition_params = crf.crf_log_likelihood(unary_scores, ph_y, ph_sequence_lengths)
            loss = tf.reduce_mean(-log_likelihood)

            viterbi_sequence, viterbi_score = crf.crf_decode(unary_scores, transition_params, ph_sequence_lengths)

            train_opt = tf.train.AdamOptimizer(self.study_rate).minimize(loss)

            correct_pred = tf.equal(viterbi_sequence, ph_y)

            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  # 正确率

            return graph, ph_sequence_lengths, ph_x, ph_y, loss, train_opt, accuracy, viterbi_sequence
예제 #6
0
 def add_blstm_crf_layer(self):
     """
     blstm-crf网络
     :return:
     """
     if self.is_training:
         # lstm input dropout rate set 0.5 will get best score
         self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                             self.droupout_rate)
     # blstm
     # with tf.variable_scope("densely_connected_bi_rnn"):
     #
     #     dense_bi_rnn = DenselyConnectedBiRNN(4, [50,50,50,150],
     #                                          cell_type='lstm')
     #     context = dense_bi_rnn(self.embedded_chars, seq_len=self.lengths)
     # logits = tf.layers.dense(context, units=self.num_labels)
     lstm_output = self.blstm_layer(self.embedded_chars)
     # project
     logits = self.project_bilstm_layer(lstm_output)
     # crf
     loss, trans = self.crf_layer(logits)
     # CRF decode, pred_ids 是一条最大概率的标注路径
     pred_ids, _ = crf.crf_decode(potentials=logits,
                                  transition_params=trans,
                                  sequence_length=self.lengths)
     return ((loss, logits, trans, pred_ids))
예제 #7
0
    def add_blstm_crf_layer(self, crf_only, name=None):
        """
        blstm-crf网络
        :return:
        """
        if self.is_training:
            # lstm input dropout rate i set 0.9 will get best score
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                                self.dropout_rate)

        if crf_only:
            logits = self.project_crf_layer(
                name=name)  # 不加blstm,只使用crf,输入bert模型中得到的序列
        else:
            # blstm
            lstm_output = self.blstm_layer(self.embedded_chars)
            # project
            logits = self.project_blstm_layer(lstm_output)
        # crf
        loss, trans = self.crf_layer(logits, name=name)
        # CRF decode, pred_ids 是一条最大概率的标注路径
        pred_ids, _ = crf.crf_decode(potentials=logits,
                                     transition_params=trans,
                                     sequence_length=self.lengths)
        return loss, logits, trans, pred_ids
예제 #8
0
    def __init__(self,
                 input_tensor,
                 num_labels,
                 labels=None,
                 lengths=None,
                 scope_name='CRF'):
        with tf.variable_scope(scope_name):
            trans = tf.get_variable("transitions",
                                    shape=[num_labels, num_labels],
                                    initializer=create_initializer(0.02))

            if labels is not None:
                log_likelihood, trans = crf.crf_log_likelihood(
                    inputs=input_tensor,
                    tag_indices=labels,
                    transition_params=trans,
                    sequence_lengths=lengths)

                self.loss = tf.reduce_mean(-log_likelihood)

            self.trans = trans

            self.pred_ids, _ = crf.crf_decode(potentials=input_tensor,
                                              transition_params=self.trans,
                                              sequence_length=lengths)
예제 #9
0
    def add_bilstm_crf_layer(self, pos_ids):
        if self.is_training:
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                                self.dropout_rate)

        if not self.bilstm:
            # project layer
            logits = self.project_crf_layer(self.embedded_chars)
        else:
            # bilstm_layer
            lstm_output = self.bilstm_layer(pos_ids)
            # cnn_layer
            cnn_output = self.cnn_layer(lstm_output)
            #link layer

            # project layer
            logits = self.project_bilstm_layer(cnn_output, pos_ids)

        if not self.crf:
            # softmax layer
            pred_ids, loss = self.softmax_layer(logits)
        else:
            # crf_layer
            loss, trans = self.crf_layer(logits)
            # CRF decode, pred_ids 是一条最大概率的标注路径
            pred_ids, _ = crf.crf_decode(potentials=logits,
                                         transition_params=trans,
                                         sequence_length=self.lengths)

        return (loss, pred_ids)
예제 #10
0
    def get_prediction_module(self, bert_model, features, is_training,
                              percent_done):
        n_classes = len(self._get_label_mapping())
        reprs = bert_model.get_sequence_output()
        reprs = pretrain_helpers.gather_positions(
            reprs, features[self.name + "_labeled_positions"])
        seq_lengths = tf.cast(
            tf.reduce_sum(features[self.name + "_labels_mask"], axis=1),
            tf.int32)
        logits = tf.layers.dense(reprs, n_classes)

        with tf.variable_scope("crf", reuse=tf.AUTO_REUSE):
            trans_val = tf.get_variable("transition",
                                        shape=[n_classes, n_classes],
                                        dtype=tf.float32)
        predict_ids, _ = crf.crf_decode(logits, trans_val, seq_lengths)
        actual_ids = features[self.name + "_labels"]
        log_likelihood, _ = crf.crf_log_likelihood(
            inputs=logits,
            tag_indices=actual_ids,
            sequence_lengths=seq_lengths,
            transition_params=trans_val)
        losses = -log_likelihood

        return losses, dict(
            loss=losses,
            logits=logits,
            predictions=predict_ids,
            labels=features[self.name + "_labels"],
            labels_mask=features[self.name + "_labels_mask"],
            labeled_positions=features[self.name + "_labeled_positions"],
            eid=features[self.name + "_eid"],
        )
예제 #11
0
    def add_blstm_crf_layer(self, crf_only):
        """
        blstm-crf
        """
        if self.is_training:
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                                self.dropout_rate)

        if crf_only:
            logits = self.project_crf_layer(self.embedded_chars)
        else:
            #blstm
            lstm_output = self.blstm_layer(self.embedded_chars)
            #project
            logits = self.project_bilstm_layer(lstm_output)

        #crf
        loss, trans = self.crf_layer(logits)
        print(self.labels)
        #
        # CRF decode, pred_ids 是一条最大概率的标注路径
        if self.is_training:
            return (loss, logits, trans, None)

        pred_ids, _ = crf.crf_decode(potentials=logits,
                                     transition_params=trans,
                                     sequence_length=self.lengths)

        return (None, logits, None, pred_ids)
예제 #12
0
    def add_blstm_crf_layer(self, crf_only):
        """
        bi-lstm-crf网络.

        Return:

        """
        if self.is_training:
            # lstm input dropout rate i set 0.9 will get best score
            self.embedding_inputs = tf.nn.dropout(self.embedding_inputs,
                                                  self.dropout_rate)

        if crf_only:
            # 只有CRF Layer
            logits = self.project_crf_layer(self.embedding_inputs)
        else:
            # bi-lstm
            lstm_output = self.blstm_layer(self.embedding_inputs)
            # project
            logits = self.project_bilstm_layer(lstm_output)
        # crf
        loss, per_example_loss, trans = self.crf_layer(logits)
        # CRF decode, pred_ids 是一条最大概率的标注路径
        probabilities, _ = crf.crf_decode(
            potentials=logits,
            transition_params=trans,
            sequence_length=self.sequence_lengths)
        # pred_ids
        return (loss, per_example_loss, logits, probabilities)
예제 #13
0
 def add_blstm_crf_layer(self, crf_only=False, lstm_only=False):
     """
     blstm-crf网络
     :return:
     """
     if self.is_training:
         # lstm input dropout rate i set 0.9 will get best score
         self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.dropout_rate)
     if lstm_only:
         # blstm
         lstm_output = self.blstm_layer(self.embedded_chars)
         # project
         logits = tf.layers.dense(lstm_output,self.num_labels,name='project',kernel_regularizer=tf.keras.regularizers.l2(1e-5))
         loss, pred_ids = self._softmax_layer(logits, self.labels, self.num_labels, self.input_mask)
     else:
         if crf_only:
             logits = self.project_crf_layer(self.embedded_chars)
         else:
             # blstm
             lstm_output = self.blstm_layer(self.embedded_chars)
             # project
             logits = self.project_bilstm_layer(lstm_output)
         # crf
         loss, trans = self.crf_layer(logits)
         # CRF decode, pred_ids 是一条最大概率的标注路径
         pred_ids, viterbi_score = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths)
     
     return (loss, logits, pred_ids)
예제 #14
0
  def build_export_output(self, model):  # pylint: disable=no-self-use
    """
    Build the output of the model for export.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
    transitions = model.transitions
    intent_logits, slots_logits = model.logits

    intent_score = tf.nn.softmax(intent_logits, name="intent_score")
    intent_preds = tf.argmax(intent_logits, axis=-1, name="intent_preds")

    slots_preds, slots_score = crf_decode(slots_logits, transitions,
                                          model.input_x_len)

    slots_preds = tf.identity(slots_preds, name="slots_preds")
    slots_score = tf.identity(slots_score, name="slots_score")

    model.preds = intent_preds, slots_preds
    model.score = intent_score, slots_score
    model.output_dict = {
        "slots_score": slots_score,
        "slots_preds": slots_preds,
        "intent_score": intent_score,
        "intent_preds": intent_preds
    }
    logging.info("Model built.")
예제 #15
0
    def add_blstm_crf_layer(self, enable_lstm=False, decode='softmax'):
        """
        blstm-crf网络
        :return:
        """
        if self.is_training:
            # lstm input dropout rate i set 0.9 will get best score
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                                self.dropout_rate)

        if not enable_lstm:
            logits = self.project_embedding_layer(self.embedded_chars)
        else:
            # blstm
            lstm_output = self.blstm_layer(self.embedded_chars)
            # project
            logits = self.project_bilstm_layer(lstm_output)
        # decode
        if decode == 'softmax':
            loss, pred_ids = self.softmax_layer(logits)
            return (loss, logits, None, pred_ids)
        else:
            loss, trans = self.crf_layer(logits)
            # CRF decode, pred_ids 是一条最大概率的标注路径
            pred_ids, _ = crf.crf_decode(potentials=logits,
                                         transition_params=trans,
                                         sequence_length=self.lengths)
            return (loss, logits, trans, pred_ids)
    def __init__(self, bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, max_seq_length,
                 use_one_hot_embeddings):
        # load bert
        bert = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)
        # 获取bert的输出
        output_layer = bert.get_sequence_output()
        # self.all_encoder_layers = bert.get_all_encoder_layers()
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        hidden_size = output_layer.shape[-1].value
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        tf.logging.info(" The dimension of bert output:%s" %
                        output_layer.shape)

        # 全连接层
        output_weight = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        self.logits = tf.reshape(logits, [-1, max_seq_length, num_labels])

        # 使用全连接层的输出计算MNLP分数
        self.probs = tf.nn.softmax(self.logits, axis=-1)
        self.best_probs = tf.reduce_max(self.probs, axis=-1)
        self.mnlp_score = tf.reduce_mean(tf.log(self.best_probs), axis=-1)

        # 计算输入样本的长度
        used = tf.sign(tf.abs(input_ids))
        lengths = tf.reduce_sum(used, reduction_indices=1)

        # crf层
        with tf.variable_scope("crf"):
            trans = tf.get_variable(
                "transitions",
                shape=[num_labels, num_labels],
                initializer=initializers.xavier_initializer())
            if labels is None:
                self.loss = None
            else:
                log_likelihood, trans = tf.contrib.crf.crf_log_likelihood(
                    inputs=self.logits,
                    tag_indices=labels,
                    transition_params=trans,
                    sequence_lengths=lengths)
                self.loss = tf.reduce_mean(-log_likelihood)

            self.predicts, self.score = crf.crf_decode(potentials=self.logits,
                                                       transition_params=trans,
                                                       sequence_length=lengths)
예제 #17
0
 def pred_op(self):
     with tf.name_scope("pred"):
         if self.CRF:
             self.plabels_,_=crf_decode(potentials=self.logits,transition_params=self.transition_params,sequence_length=self.sequence_lengths)
         else:
             self.plabels_ = tf.argmax(self.logits, axis=-1)
     with tf.name_scope("final"):
         self.plabels_=tf.multiply(self.plabels_,1,name="predlabels")
예제 #18
0
 def get_metrics(self, logits):
     print("==============================")
     print(logits)
     print("==============================")
     if self.type == "classification":
         self.predictions = tf.squeeze(tf.argmax(self.logits, axis=-1),
                                       name="predictions")
     elif self.type == "ner":
         self.predictions, _ = crf.crf_decode(potentials=logits,
                                              transition_params=self.trans,
                                              sequence_length=self.lengths)
    def build_export_output(self, model):  # pylint: disable=no-self-use
        """
    Build the output of the model.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
        model.preds, score = crf_decode(model.logits, model.transitions,
                                        model.input_x_len)

        model.score = tf.identity(score, name="score")
        model.output_dict = {"score": model.score, "preds": model.preds}
예제 #20
0
    def __init__(self,num_classes,max_docs,input_size,rnn_units=300,
                 dropout_keep=0.9,lr=0.0001,bidirectional=True):
        
        self.max_docs = max_docs
        self.dropout_keep = dropout_keep
        self.dropout = tf.placeholder(tf.float32)
        self.rnn_units = rnn_units

        self.doc_input = tf.placeholder(tf.float32, shape=[None,max_docs,input_size])
        self.num_docs = tf.placeholder(tf.int32, shape=[None])
        max_len = tf.reduce_max(self.num_docs)
        doc_input_reduced = self.doc_input[:,:max_len,:]
        doc_input_reduced = tf.nn.dropout(doc_input_reduced,self.dropout)

        self.labels = tf.placeholder(tf.int32,shape=[None,max_docs])
        labels_reduced = self.labels[:,:max_len]

        with tf.variable_scope('rnn',initializer=tf.contrib.layers.xavier_initializer()):

            if bidirectional:
                [outputs_fw,outputs_bw],_ = tf.nn.bidirectional_dynamic_rnn(
                            GRUCell(self.rnn_units/2),GRUCell(self.rnn_units/2),
                            doc_input_reduced,sequence_length=self.num_docs,dtype=tf.float32)
                outputs = tf.concat((outputs_fw,outputs_bw),2)

            else:
                outputs,_ = tf.nn.dynamic_rnn(GRUCell(self.rnn_units),
                            doc_input_reduced,sequence_length=self.num_docs,dtype=tf.float32)

        outputs = tf.nn.dropout(outputs,self.dropout)
        
        #conditional random field
        weights = tf.get_variable("weights",[outputs.shape[2],num_classes],initializer=tf.contrib.layers.xavier_initializer())
        matricized_docs = tf.reshape(outputs,[-1,outputs.shape[2]])
        matricized_unary = tf.matmul(matricized_docs,weights)
        unary_scores = tf.reshape(matricized_unary,[-1,max_len,num_classes])
        
        log_likelihood, transition_params = crf_log_likelihood(unary_scores,labels_reduced,self.num_docs)
        preds,viterbi_score = crf_decode(unary_scores,transition_params,self.num_docs)

        self.doc_idx = tf.placeholder(tf.int32, shape=[None,2])
        self.prediction = tf.gather_nd(preds,self.doc_idx)

        #loss, accuracy, and training functions
        self.loss = tf.reduce_mean(-log_likelihood)
        self.optimizer = tf.train.AdamOptimizer(lr,0.9,0.99).minimize(self.loss)

        #init op
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.saver = tf.train.Saver()
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())
예제 #21
0
    def viterbi_decode(self, potentials, sequence_length):
        """Decode the highest scoring sequence of tags in TensorFlow.
        This is a function for tensor.
        Args:
            potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of unary potentials.
            sequence_length: A [batch_size] tensor, containing sequence lengths.
        Returns:
            decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32.
                         Contains the highest scoring tag indicies.
        """
        decode_tags, best_score = crf_decode(potentials, self.transition_params, sequence_length)

        return decode_tags
예제 #22
0
    def make_test(self, input_x, input_y):
        dropout = tf.placeholder_with_default(1.0, (), name='dropout')

        logits, real_sentence_length, trans = self.create_model(
            input_x, dropout)
        with tf.variable_scope('loss'):
            loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                               labels=input_y))
        pred_ids, _ = crf.crf_decode(potentials=logits,
                                     transition_params=trans,
                                     sequence_length=real_sentence_length)
        pred_ids = tf.identity(pred_ids, name=constant.OUTPUT_NODE_NAME)
        return loss, pred_ids, real_sentence_length
예제 #23
0
    def create_model(self, bert_config, is_training, input_ids, input_mask,
                     segment_ids, labels, num_labels, use_one_hot_embeddings):
        #加载bert模型基础结构
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)
        #获取整个句子的输出
        output_layer = model.get_sequence_output()
        #以句子为单位获取输出层,然后摘取最后一层的参数信息
        hidden_size = output_layer.shape[-1].value
        #从新定义权重矩阵w,和b,根据最后一层的参数在结合新的w,b对模型进行微调
        output_weights = tf.get_variable(
            "output_weights", [hidden_size, num_labels],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())
        lengths = tf.reduce_sum(tf.sign(tf.abs(input_ids)),
                                reduction_indices=1)
        #从新定义损失函数
        with tf.variable_scope("logits"):
            if is_training:
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
            output_layer = tf.reshape(output_layer, shape=[-1, hidden_size])
            #微调新模型的输出
            pred = tf.tanh(
                tf.nn.xw_plus_b(output_layer, output_weights, output_bias))

            logits = tf.reshape(pred, [-1, args.max_seq_len, num_labels])

        with tf.variable_scope("crf_loss"):
            trans = tf.get_variable(
                "transitions", [num_labels, num_labels],
                initializer=tf.truncated_normal_initializer())
            if labels is None:
                return None, trans
            else:
                log_likelihood, trans = tf.contrib.crf.crf_log_likelihood(
                    inputs=logits,
                    tag_indices=labels,
                    transition_params=trans,
                    sequence_lengths=lengths)
                loss, trans = tf.reduce_mean(-log_likelihood), trans
        pred_ids, _ = crf.crf_decode(potentials=logits,
                                     transition_params=trans,
                                     sequence_length=lengths)
        return (loss, logits, trans, pred_ids)
예제 #24
0
    def get_model(self):
        graph = tf.Graph()
        session = tf.Session(graph=graph)
        with session.graph.as_default():
            ph_x = tf.placeholder(
                dtype=tf.float32,
                shape=[None, self._sentence_len, self._wordvec_size
                       ])  # shape(bactch_size,sentence_len,wordvec_size)
            ph_y = tf.placeholder(dtype=tf.int32,
                                  shape=[None, self._sentence_len
                                         ])  # shape(bactch_size,sentence_len)
            ph_sequence_lengths = tf.placeholder(dtype=tf.int32,
                                                 shape=[
                                                     None,
                                                 ])

            embeddings = keras.layers.Dense(config.WORDVEC_SIZE)(ph_x)

            # mask = keras.layers.Masking(mask_value=0.)(ph_x)
            bigru = keras.layers.Bidirectional(
                keras.layers.GRU(200, return_sequences=True))(embeddings)
            bigru = keras.layers.Dropout(0.5)(bigru)

            half_window_size = 2
            padding_layer = keras.layers.ZeroPadding1D(
                padding=half_window_size)(embeddings)
            conv = keras.layers.Conv1D(100,
                                       2 * half_window_size + 1)(padding_layer)
            conv_d = keras.layers.Dropout(0.5)(conv)
            dense_conv = keras.layers.TimeDistributed(
                keras.layers.Dense(100))(conv_d)

            rnn_cnn = tf.concat([bigru, dense_conv], axis=2)

            dense = keras.layers.Dense(self._classes)(rnn_cnn)
            unary_scores = keras.layers.Dropout(0.5)(dense)

            log_likelihood, transition_params = crf.crf_log_likelihood(
                unary_scores, ph_y, ph_sequence_lengths)
            loss = tf.reduce_mean(-log_likelihood)

            viterbi_sequence, viterbi_score = crf.crf_decode(
                unary_scores, transition_params, ph_sequence_lengths)

            train_opt = tf.train.AdamOptimizer(self._study_rate).minimize(loss)

            init = tf.global_variables_initializer()
            session.run(init)

            return session, ph_sequence_lengths, ph_x, ph_y, loss, train_opt, viterbi_sequence
예제 #25
0
 def viterbi_decode(self, potentials, sequence_length):
     """
     crf_decode(potentials,transition_params,sequence_length)  在tensorflow内解码
     参数:
         potentials: 一个形状为[batch_size, max_seq_len, num_tags] 的tensor, 
         transition_params: 一个形状为[num_tags, num_tags] 的转移矩阵 
         sequence_length: 一个形状为[batch_size] 的 ,表示batch中每个序列的长度
     返回:
         decode_tags:一个形状为[batch_size, max_seq_len] 的tensor,类型是tf.int32.表示最好的序列标记. 
         best_score: 有个形状为[batch_size] 的tensor, 包含每个序列解码标签的分数.
     """
     decode_tags, best_score = crf_decode(potentials,
                                          self.transition_params,
                                          sequence_length)
     return decode_tags
    def build_output(self, model):  # pylint: disable=no-self-use
        """
    Build the output of the model.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
        model.preds, score = crf_decode(model.logits, model.transitions,
                                        model.input_x_len)

        model.score = tf.identity(score, name="score")
        model.y_ground_truth = model.input_y
        if model.use_pretrained_model:
            logging.info("initialize_pretrained_model_variables")
            self.initialize_pretrained_model_variables(
                model.pretrained_model_path, model.pretrained_model_mode)
예제 #27
0
    def add_crf_layer(self):

        if self.is_training:
            # lstm input dropout rate set 0.5 will get best score
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,
                                                self.droupout_rate)
        # project
        logits = self.project_layer(self.embedded_chars)
        # crf
        loss, trans = self.crf_layer(logits)
        # CRF decode, pred_ids 是一条最大概率的标注路径
        pred_ids, _ = crf.crf_decode(potentials=logits,
                                     transition_params=trans,
                                     sequence_length=self.lengths)
        return (loss, logits, trans, pred_ids)
예제 #28
0
    def make_pb_file(self, model_dir):
        graph = tf.Graph()
        with graph.as_default():
            session_conf = tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False)
            session_conf.gpu_options.allow_growth = True
            session_conf.gpu_options.per_process_gpu_memory_fraction = 0.9

            sess = tf.Session(config=session_conf)
            with sess.as_default():
                input_ids = tf.placeholder(
                    dtype=tf.int32,
                    shape=(None, self.params.max_sentence_length),
                    name=constant.INPUT_NODE_NAME)
                input_mask = tf.placeholder(
                    dtype=tf.int32,
                    shape=(None, self.params.max_sentence_length),
                    name=constant.INPUT_MASK_NAME)
                dropout = tf.placeholder_with_default(1.0,
                                                      shape=(),
                                                      name='dropout')
                logits, real_sentence_length, trans = self.create_model(
                    input_ids,
                    input_mask,
                    segment_ids=None,
                    is_training=False,
                    dropout=dropout)
                pred_ids, _ = crf.crf_decode(
                    potentials=logits,
                    transition_params=trans,
                    sequence_length=real_sentence_length)
                pred_ids = tf.identity(pred_ids,
                                       name=constant.OUTPUT_NODE_NAME)

                saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
                checkpoint = tf.train.latest_checkpoint(model_dir)
                if checkpoint:
                    saver.restore(sess, checkpoint)
                else:
                    raise FileNotFoundError("模型文件未找到")

                output_graph_with_weight = tf.graph_util.convert_variables_to_constants(
                    sess, sess.graph_def, [constant.OUTPUT_NODE_NAME])

                with tf.gfile.GFile(os.path.join(model_dir, 'ner.pb'),
                                    'wb') as gf:
                    gf.write(output_graph_with_weight.SerializeToString())
        return os.path.join(model_dir, 'ner.pb')
예제 #29
0
    def add_bilstm_crf_layer(self, crf_only):
        if self.is_training:
            self.embedded_chars = tf.nn.dropout(self.embedded_chars,keep_prob=self.dropout_rate)

        # 直接进行dense最后tanh激活输出logits
        if crf_only:
            logits = self.project_crf_layer(self.embedded_chars)
        # 通过多层双向的lstm最后输出logits
        else:
            lstm_output = self.bilstm_layer(self.embedded_chars)
            logits = self.project_bilstm_layer(lstm_output)

        loss, trans = self.crf_layer(logits)
        # 非viterbi算法解码
        pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length= self.lengths)
        return loss, logits, trans, pred_ids
예제 #30
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    sequence_lengths = tf.reduce_sum(tf.sign(tf.abs(input_ids)), axis=1)
    #sequence_lengths = tf.subtract(sequence_lengths,len(sequence_lengths)*[2])
    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value

    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())
    with tf.variable_scope("loss"):
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, num_labels])

        log_likelihood, transition_params = crf_log_likelihood(
            inputs=logits,
            tag_indices=labels,
            sequence_lengths=sequence_lengths)
        loss = -tf.reduce_mean(log_likelihood)
        predict, best_score = crf_decode(potentials=logits,
                                         transition_params=transition_params,
                                         sequence_length=sequence_lengths)

        # mask = tf.cast(input_mask,tf.float32)
        # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask)
        # return (loss, logits, predict)
        ##########################################################################
        #log_probs = tf.nn.log_softmax(logits, axis=-1)
        #one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        #per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        #loss = tf.reduce_sum(per_example_loss)
        #probabilities = tf.nn.softmax(logits, axis=-1)
        #predict = tf.argmax(probabilities,axis=-1)
        return (loss, logits, predict)
 def add_blstm_crf_layer(self):
     """
     blstm-crf网络
     :return: 
     """
     if self.is_training:
         # lstm input dropout rate set 0.5 will get best score
         self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.droupout_rate)
     #blstm
     lstm_output = self.blstm_layer(self.embedded_chars)
     #project
     logits = self.project_bilstm_layer(lstm_output)
     #crf
     loss, trans = self.crf_layer(logits)
     # CRF decode, pred_ids 是一条最大概率的标注路径
     pred_ids, _ = crf.crf_decode(potentials=logits, transition_params=trans, sequence_length=self.lengths)
     return ((loss, logits, trans, pred_ids))