from get_vocab import get_rid
import tensorflow.contrib.keras as kr
import config
from LSTM_run import get_length

checkpoint_path = 'model/intent_classifier_model'
vocab_path = 'data/vocabulary.txt'

test_question = '你叫什么'

if __name__ == '__main__':
    with open(vocab_path, 'r', encoding='utf8') as file:
        vocabulary_list = [k.strip() for k in file.readlines()]
    word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)])
    content_list = []
    test_question = get_rid(test_question)
    print(test_question)
    for i in range(len(test_question)):
        content_list.append(word2id_dict[test_question[i]])
    test_x = kr.preprocessing.sequence.pad_sequences(
        [content_list],
        maxlen=config.sequence_length,
        padding='post',
        truncating='post')
    model = IntentModel()
    model.load(checkpoint_path)
    real_train_length = get_length(test_x)
    predictions, max_score = model.test_step([test_x], real_train_length)
    print(predictions)
    print(max_score)
    if predictions == [0]:
예제 #2
0
    real_len = []
    for line in x_batch:
        real_len.append(np.sum(np.sign(line)))
    return real_len


if __name__ == '__main__':
    with open(vocab_path, 'r', encoding='utf8') as file:
        vocabulary_list = [k.strip() for k in file.readlines()]
    word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)])

    content_list = []
    with open(positive_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            content_list.append([get_rid(row['question']), 1])

    with open(negative_path, 'r', encoding='utf-8') as f:
        state = 0
        question = ''
        for line in f.readlines():
            if line[0] == 'E':
                state = 1
            elif state == 1 and line[0] == 'M':
                state = 2
                question = get_rid(line[2:])
            elif state == 2 and line[0] not in ['E', 'M']:
                question += get_rid(line)
            elif state == 2 and line[0] == 'M':
                state = 0
                content_list.append([question, 0])
예제 #3
0
import csv
import numpy as np
from get_vocab import get_rid

positive_path = 'data/question.csv'
negative_path = 'data/xiaohuangji50w_nofenci.conv'
length_edge = 50

length_list = []

if __name__ == '__main__':
    over = 0
    with open(positive_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        all_len = 0
        for row in reader:
            all_len += 1
            question = get_rid(row['question'])
            length_list.append(len(question))
            if len(question) > length_edge:
                over += 1

    print('中位数: ' + str(np.median(length_list)) + ' 平均值: ' +
          str(np.mean(length_list)))
    print('超出: ' + str(float(over / all_len)))
    print(all_len)