Esempio n. 1
0
    def predict(self, sess, input, length):
        feed_dict = self.create_feed_dict(input,
                                          None,
                                          length,
                                          is_training=False)
        fetch = [self.scores, self.trans_form]
        scores, trans_form = sess.run(fetch, feed_dict)
        for score_, length_ in zip(scores, length):
            score = score_[:length_]
            path, _ = crf.viterbi_decode(score, trans_form)

        return path
Esempio n. 2
0
def decode(logits, trans, sequence_lengths, tag_num):
    viterbi_sequences = []
    small = -1000.0
    start = np.asarray([[small] * tag_num + [0]])
    for logit, length in zip(logits, sequence_lengths):
        score = logit[:length]
        pad = small * np.ones([length, 1])
        logits = np.concatenate([score, pad], axis=1)
        logits = np.concatenate([start, logits], axis=0)
        viterbi_seq, viterbi_score = viterbi_decode(logits, trans)
        viterbi_sequences .append(viterbi_seq[1:])
    return viterbi_sequences
Esempio n. 3
0
def decode(logits, trans, sequence_lengths, tag_num):
    viterbi_sequences = []
    small = -1000.0
    start = np.asarray([[small] * tag_num + [0]])
    for logit, length in zip(logits, sequence_lengths):
        score = logit[:length]
        pad = small * np.ones([length, 1])
        logits = np.concatenate([score, pad], axis=1)
        logits = np.concatenate([start, logits], axis=0)
        viterbi_seq, viterbi_score = viterbi_decode(logits, trans)
        viterbi_sequences += [viterbi_seq]
    return viterbi_sequences
Esempio n. 4
0
 def __decode(self, logits, trans, sequence_lengths, tag_num):
     viterbi_sequences = []
     small = -1000.0
     start = np.asarray([[small] * tag_num + [0]])
     for logit, length in zip(logits, sequence_lengths):
         score = logit[:length]
         pad = small * np.ones([length, 1])
         score = np.concatenate([score, pad], axis=1)
         score = np.concatenate([start, score], axis=0)
         viterbi_seq, viterbi_score = viterbi_decode(score, trans)
         viterbi_sequences.append(viterbi_seq[1:])
     return viterbi_sequences
Esempio n. 5
0
 def predict_step(self,sess,x_batch):
     feed_dict={
         self.x:x_batch,
         self.dropout_keep_prob:1.0
     }
     lengths,unary_scores,transition_param = sess.run(
         [self.seq_length,self.output,self.transition_params], feed_dict)
     predict=[]
     for unary_score,length in zip(unary_scores,lengths):
         viterbi_sequence, _=crf.viterbi_decode(unary_score[:length],transition_param)
         predict.append(viterbi_sequence)
     return predict
Esempio n. 6
0
    def _predict_one_batch(self, sess, bat_sens, bat_seqs_len):
        feed_dict = {self.sentences: bat_sens,
                     self.sequences_len: bat_seqs_len,
                     self.dropout_keep_prob: 1.0}
        hidden_scores, transition_params = sess.run([self.hidden_scores, self.transition_params],
                                                    feed_dict=feed_dict)

        bat_labels = []
        for scocre, seq_len in zip(hidden_scores, bat_seqs_len):
            labs, _ = viterbi_decode(scocre[:seq_len], transition_params)
            bat_labels.append(list(labs))

        return bat_labels
Esempio n. 7
0
    def test_accuraty(self, lengths, scores, trans_matrix, labels):
        total_labels = []
        predict_labels = []
        for score_, length_, label_ in zip(scores, lengths, labels):
            if length_ == 0:
                continue
            score = score_[:length_]
            path, _ = crf.viterbi_decode(score, trans_matrix)
            label_path = label_[:length_]
            predict_labels.extend(path)
            total_labels.extend(label_path)

        return total_labels, predict_labels
Esempio n. 8
0
 def decode(self, logits, lengths, matrix):
     """
     :param logits: [batch_size, num_steps, num_tags]float32, logits 
     :param lengths: [batch_size]int32, real length of each sequence 序列真实长度(不算padding)
     :param matrix: transaction matrix for inference
     :return:
     """
     paths = []
     for score, length in zip(logits, lengths):
         score = score[:length]
         path, _ = viterbi_decode(score, matrix)
         paths.append(path)
     return paths
Esempio n. 9
0
 def decode(self, logits, lengths, matrix):
     paths = []
     small = -1000.0
     start = np.asarray([[small] * self._num_targets + [small] + [0]])
     end = np.asarray([[small] * self._num_targets + [0] + [small]])
     for logit, length in zip(logits, lengths):
         logit = logit[:length]
         pad = small * np.ones([length, 2])
         logit = np.concatenate([logit, pad], axis=1)
         logit = np.concatenate([start, logit, end], axis=0)
         path, _ = viterbi_decode(logit, matrix)
         paths.append(path[1:-1])
     return paths
Esempio n. 10
0
    def decode(self, logits, lengths, matrix):
        paths = []
        small = -1000.0
        start = np.asarray([[small]*self.num_tags +[0]])
        for score, length in zip(logits, lengths):
            score = score[:length]
            pad = small * np.ones([length, 1])
            logits = np.concatenate([score, pad], axis=1)
            logits = np.concatenate([start, logits], axis=0)
            path, _ = viterbi_decode(logits, matrix)

            paths.append(path[1:])
        return paths
Esempio n. 11
0
 def prediction_one_batch(self, sess, seqs):
     feed_dict, seq_len_list = self.get_feed_dict(seqs, dropout=0.1)
     if self.crf_:
         logits, transition_params = sess.run(
             [self.logits_, self.transition_params_], feed_dict=feed_dict)
         label_list = []
         for logit, seq_len in zip(logits, seq_len_list):
             viterbi_seq, _ = crf.viterbi_decode(logit[:seq_len],
                                                 transition_params)
             label_list.append(viterbi_seq)
         return label_list, seq_len_list
     else:
         label_list = sess.run(self.labels_softmax_, feed_dict=feed_dict)
         return label_list, seq_len_list
Esempio n. 12
0
    def out(self, sentences, out_file=None):
        '''
        :param sentences: 支持两种输入格式,1种是输入txt文件,一种是输入list
        :return:
        '''
        sentences_list = [[char for char in sen] for sen in sentences]
        sentences_list, sequences_len = pad_sequences(sentences_list,
                                                      self.maxLen)
        sentences_idx = sequences2idx(sentences_list, self.char2idx)
        sequences_len = [
            seq if seq <= self.maxLen else self.maxLen for seq in sequences_len
        ]

        if type(out_file) == str:
            fw = open(out_file, 'wt', encoding='utf-8')

        with tf.Session() as sess:
            self.saver.restore(sess, self.model_path + self.model_name)

            labels = [0] * len(sentences_idx)  # 这个是没用的
            pred_labels = []
            for (bat_sens, _, bat_seqs_len) in batch_yield(sentences_idx,
                                                           labels,
                                                           sequences_len,
                                                           bs=500):
                feed_dict = {
                    self.sentences: bat_sens,
                    self.sequences_len: bat_seqs_len,
                    self.dropout_keep_prob: 1.0
                }
                hidden_scores, transition_params = sess.run(
                    [self.hidden_scores, self.transition_params],
                    feed_dict=feed_dict)
                bat_labels = []
                for scocre, seq_len in zip(hidden_scores, bat_seqs_len):
                    labs, _ = viterbi_decode(scocre[:seq_len],
                                             transition_params)
                    bat_labels.append(list(labs))

                pred_labels += [[self.idx2tag[idx] for idx in labs]
                                for labs in bat_labels]

            result = []
            for one_lab, one_sen_str in zip(pred_labels, sentences):
                result.append(self.get_prediction(one_lab, one_sen_str))

            if type(out_file) == str:
                self._out_file(result, out_file)

            return result
Esempio n. 13
0
 def decode(self, logits, lengths, matrix):
     """
     :param logits: [batch_size, num_steps, num_tags]float32, logits
     :param lengths: [batch_size]int32, real length of each sequence
     :param matrix: transaction matrix for inference
     :return:
     """
     # inference final labels usa viterbi Algorithm
     paths = []
     for score, length in zip(logits, lengths):
         logits = score[:length]
         path, _ = viterbi_decode(logits, matrix)
         paths.append(path)
     return paths
Esempio n. 14
0
 def predict_one_batch(self, sess, seqs):
     feed_dict, seq_len_list = self.update(seqs, dropout=1.0)
     if self.decode_method == 1:
         label_list = sess.run(self.labels_softmax_, feed_dict=feed_dict)
         return label_list, seq_len_list
     if self.decode_method == 0:
         logits, transition_params = sess.run(
             [self.logits, self.transition_params], feed_dict=feed_dict)
         label_list = []
         for logit, seq_len in zip(logits, seq_len_list):
             viterbi_seq, _ = viterbi_decode(logit[:seq_len],
                                             transition_params)
             label_list.append(viterbi_seq)
         return label_list, seq_len_list
Esempio n. 15
0
def predict_one_batch(model, sess, seqs):
    feed_dict, seq_len_list = get_feed_dict(model, seqs, dropout=1.0)

    if model.CRF:
        logits, transition_params = sess.run(
            [model.logits, model.transition_params], feed_dict=feed_dict)
        label_list = []
        for logit, seq_len in zip(logits, seq_len_list):
            viterbi_seq, _ = viterbi_decode(logit[:seq_len], transition_params)
            label_list.append(viterbi_seq)
        return label_list, seq_len_list
    else:
        label_list = sess.run(model.labels_softmax_, feed_dict=feed_dict)
        return label_list, seq_len_list
Esempio n. 16
0
def make_mask_test(logits_,
                   sentence_legth,
                   is_CRF=False,
                   transition_params_=None):
    pred_list = []
    # print(logits_)
    # print(sentence_legth)
    for log, seq_len in zip(logits_, sentence_legth):
        if is_CRF:
            viterbi_seq, _ = viterbi_decode(log[:seq_len], transition_params_)
            # print(viterbi_seq)
        else:
            viterbi_seq = log[:seq_len]
        pred_list.extend(viterbi_seq)
    return pred_list
Esempio n. 17
0
def make_mask(logits_,
              labels_,
              sentence_legth,
              is_CRF=False,
              transition_params_=None):
    pred_list = []
    label_list = []
    for log, lab, seq_len in zip(logits_, labels_, sentence_legth):
        if is_CRF:
            viterbi_seq, _ = viterbi_decode(log[:seq_len], transition_params_)
        else:
            viterbi_seq = log[:seq_len]
        pred_list.extend(viterbi_seq)
        label_list.extend(lab[:seq_len])
    return label_list, pred_list
Esempio n. 18
0
 def inference_tgt(self, scores, sequence_lengths=None):
     if not self.use_crf:
         return np.argmax(scores, 2)
     else:
         with tf.variable_scope(self.scope_tgt_crf, reuse=True):
             transitions = tf.get_variable('transitions').eval(
                 session=self.sess)
         paths = np.zeros(scores.shape[:2], dtype=np.int32)
         for i in xrange(scores.shape[0]):
             tag_score, length = scores[i], sequence_lengths[i]
             if length == 0:
                 continue
             path, _ = crf.viterbi_decode(tag_score[:length], transitions)
             paths[i, :length] = path
         return paths
Esempio n. 19
0
 def predict_labels(self, sess, inputs, max_sentence_num,
                    max_sentence_length, sentslenlist, placelist,
                    docslenlist):
     feed_dict = self.get_feed_dict(inputs, sentslenlist, placelist,
                                    docslenlist)
     feed_dict[self.doc2vecmodel.max_sentence_num] = max_sentence_num
     feed_dict[self.doc2vecmodel.max_sentence_length] = max_sentence_length
     logits = sess.run(self.classificalmodel.logits, feed_dict=feed_dict)
     label_list = []
     # print("logits:" + str(logits))
     for logit in logits:
         viterbi_seq, _ = viterbi_decode(logit, self.transition_params)
         # print("logit:" + str(logit))
         label_list.append(viterbi_seq)
     return label_list
Esempio n. 20
0
    def decode(self,logits,lengths,matrix):
        # 预测解码(每一时刻隐状态的输出值,真实长度,转移矩阵)
        # 维特比算法解码
        paths=[]
        small=-1000.0
        start=np.asarray([[small]*self.num_tags,+[0]]) # 二维数组
        for score,length in zip(logits,lengths):
            score=score[:length] # 只取有效字符的输出
            pad=small*np.ones([length,1])
            logits=np.concatenate([score,pad],axis=-1)
            logits=np.concatenate([start,logits],axis=0)

            path,_=viterbi_decode(logits,matrix)# 维特比解码
            paths.append(path[1:])
        return paths # 解码出的id
Esempio n. 21
0
    def predict_one_batch(self, sess, seqs):
        feed_dict, seq_len_list = self.get_feed_dict(seqs, dropout=1.0)

        if self.CRF:
            logits, transition_params = sess.run(
                [self.logits, self.transition_params], feed_dict=feed_dict)
            label_list = []
            for logit, seq_len in zip(logits, seq_len_list):
                viterbi_seq, _ = viterbi_decode(logit[:seq_len],
                                                transition_params)
                label_list.append(viterbi_seq)
            return label_list, seq_len_list
        else:
            label_list = sess.run(self.label_pred, feed_dict=feed_dict)
            return label_list, seq_len_list
Esempio n. 22
0
 def predict(self, inputs, sequence_lengths):
     sess = self.session
     feed = {
         self.inputs: [inputs],
         self.sequence_lengths: sequence_lengths,
         self.keep_prob: 1
     }
     logits, transition_params = sess.run(
         [self.logits, self.transition_params], feed_dict=feed)
     labels = []
     for logit, sequence_length in zip(logits, sequence_lengths):
         viterbi_seq, _ = viterbi_decode(
             score=logit[:sequence_length],
             transition_params=transition_params)
         labels.append(viterbi_seq)
     return labels
Esempio n. 23
0
    def predict_one_batch(self, sess, seqs):  # 使用viterbi algorithm預測標籤
        """

        :param sess:
        :param seqs:
        :return: label_list
                 seq_len_list
        """
        feed_dict, seq_len_list = self.get_feed_dict(seqs, dropout=1.0)
        logits, transition_params = sess.run(
            [self.logits, self.transition_params], feed_dict=feed_dict)
        label_list = []
        for logit, seq_len in zip(logits, seq_len_list):
            viterbi_seq, _ = viterbi_decode(logit[:seq_len], transition_params)
            label_list.append(viterbi_seq)
        return label_list, seq_len_list
Esempio n. 24
0
 def predict(self, sess, seqs):
     seq_pad, seq_length = process_seq(seqs)
     logits, transition_params = sess.run(
         [self.logits, self.transition_params],
         feed_dict={
             self.input_x: seq_pad,
             self.seq_length: seq_length,
             self.keep_pro: 1.0
         })
     label_ = []
     for logit, length in zip(logits, seq_length):
         #logit 每个子句的输出值,length子句的真实长度,logit[:length]的真实输出值
         # 调用维特比算法求最优标注序列
         viterbi_seq, _ = viterbi_decode(logit[:length], transition_params)
         label_.append(viterbi_seq)
     return label_
Esempio n. 25
0
 def evaluate_step(self, sess, data):
     strings, chars, segs, tags = data
     feed_dict = {
         self.char_inputs: chars,
         self.seg_inputs: segs,
         self.dropout_keep: 1.0
     }
     logits, lengths, transition_params = sess.run(
         [self.logits, self.lengths, self.transition_params], feed_dict)
     new_strings, predicts, new_tags = [], [], []
     for length, logit, string, tag in zip(lengths, logits, strings, tags):
         predict, _ = crf.viterbi_decode(logit[:length], transition_params)
         predicts.append(predict)
         new_strings.append(string[:length])
         new_tags.append(tag[:length])
     return new_strings, predicts, new_tags
Esempio n. 26
0
 def dev_step(self,sess,x_batch,x_dict,y_batch):
     feed_dict={
         self.x:x_batch,
         self.dict:x_dict,
         self.y:y_batch,
         self.dropout_keep_prob:1.0
     }
     loss,lengths,unary_scores, transition_param=sess.run(
         [self.loss,self.seq_length,self.output, self.transition_params],feed_dict)
     predict=[]
     for unary_score,length in zip(unary_scores,lengths):
         if length==0:
             continue
         viterbi_sequence, _=crf.viterbi_decode(unary_score[:length],transition_param)
         predict.append(viterbi_sequence)
     return loss,predict
Esempio n. 27
0
 def decode(self, logits, lengths, matrix):
     """
     :param logits: [batch_size, num_steps, num_tags] float32, logits
     :param lengths: [batch_size] int32, real length of each sequence
     :param matrix: transaction matric for inference
     :return:
     """
     # inference final labels usa viterbi Algorithm
     paths = []
     for score, length in zip(logits, lengths):
         if self.is_crf:
             path, _ = viterbi_decode(score, matrix)
         else:
             path = tf.argmax(score, axis=-1).eval()
         paths.append(path)
     return paths
    def pred_labels(self, x, y, seqs):
        scores, transition_matrix = self.sess.run(
            [self.logit, self.transition],
            feed_dict={
                self.bilstm.input_x: x,
                self.bilstm.input_y: y,
                self.bilstm.seq_lengths: seqs,
                self.bilstm.dropout: 1.0
            })
        labels = []
        for i in range(scores.shape[0]):
            label, _ = crf.viterbi_decode(scores[i],
                                          transition_params=transition_matrix)

            labels.append(label)
        return labels
Esempio n. 29
0
    def test_accuraty(self, sess, inputs, labels):
        crf_trans_matrix = self.trans.eval()
        lengths, scores = self.run_step(sess, inputs, None, False)
        correct_num = 0
        total_labels = 0
        for score_, length_, label_ in zip(scores, lengths, labels):
            if length_ == 0:
                continue
            score = score_[:length_]
            path, _ = crf.viterbi_decode(score, crf_trans_matrix)
            label_path = label_[:length_]
            correct_num += np.sum(np.equal(path, label_path))
            total_labels += length_

        accuracy = 100.0 * correct_num / float(total_labels)
        return accuracy
Esempio n. 30
0
    def predict(self, sess, inputs, inputs_y=[]):
        crf_trans_matrix = self.trans.eval()
        lengths, scores = self.run_step(sess, inputs, None, False)
        paths = []
        for score, length in zip(scores, lengths):
            score = score[:length]
            path, _ = crf.viterbi_decode(score, crf_trans_matrix)
            paths.append(path[:length])

        if len(inputs_y) != 0:
            paths_y = []
            for y, length in zip(inputs_y, lengths):
                paths_y.append(y[:length])

            return paths, paths_y

        return paths
Esempio n. 31
0
    def test(self, test_set):
        real_seq = list()
        for seq in test_set[0]:
            real_seq.append(list(map(lambda x: self.idx2words[x], seq)))
        real_label = list()
        for seq in test_set[1]:
            real_label.append(list(map(lambda x: self.idx2labels[x], seq)))
        real_test_set = (real_seq, real_label)

        slot_predict = None
        intent_predict = None
        saver = tf.train.Saver()
        with tf.Session(config=self.config) as sess:
            saver.restore(sess, self.model_path)
            feed_dict, seq_len_list = self.get_feed_dict(test_set[0])
            if self.slot_filling:
                if self.CRF:
                    logits, transition_params = sess.run(
                        [self.logits_slot, self.transition_params],
                        feed_dict=feed_dict)
                    slot_predicts = list()
                    for logit, seq_len in zip(logits, seq_len_list):
                        viterbi_seq, _ = viterbi_decode(
                            logit[:seq_len], transition_params)
                        slot_predicts.append(viterbi_seq)
                else:
                    slot_predicts = sess.run(self.labels_softmax,
                                             feed_dict=feed_dict)
                slot_predict = list()
                for i in range(len(test_set[0])):
                    seq_len = len(test_set[0][i])
                    predicted_seq = list(
                        map(lambda x: self.idx2labels[x],
                            slot_predicts[i][:seq_len]))
                    slot_predict.append(predicted_seq)
            if self.intent_detection:
                intent_predicts = sess.run(self.intents_softmax,
                                           feed_dict=feed_dict)
                for i in range(len(test_set[0])):
                    if test_set[2][i] == intent_predicts[i]:
                        # TODO intent eval
                        pass

        evaluate(real_test_set, slot_predict, intent_predict,
                 self.error_example_output, self.true_example_output,
                 self.slot_distinct)
Esempio n. 32
0
    def decode(self, logits, lengths, matrix):
        """
        :param logits: [batch_size, num_steps, num_tags]float32, logits
        :param lengths: [batch_size]int32, real length of each sequence
        :param matrix: transaction matrix for inference
        :return:
        """
        # inference final labels usa viterbi Algorithm
        paths = []
        small = -1000.0
        start = np.asarray([[small]*self.num_tags +[0]])
        for score, length in zip(logits, lengths):
            score = score[:length]
            pad = small * np.ones([length, 1])
            logits = np.concatenate([score, pad], axis=1)
            logits = np.concatenate([start, logits], axis=0)
            path, _ = viterbi_decode(logits, matrix)

            paths.append(path[1:])
        return paths