from get_vocab import get_rid import tensorflow.contrib.keras as kr import config from LSTM_run import get_length checkpoint_path = 'model/intent_classifier_model' vocab_path = 'data/vocabulary.txt' test_question = '你叫什么' if __name__ == '__main__': with open(vocab_path, 'r', encoding='utf8') as file: vocabulary_list = [k.strip() for k in file.readlines()] word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)]) content_list = [] test_question = get_rid(test_question) print(test_question) for i in range(len(test_question)): content_list.append(word2id_dict[test_question[i]]) test_x = kr.preprocessing.sequence.pad_sequences( [content_list], maxlen=config.sequence_length, padding='post', truncating='post') model = IntentModel() model.load(checkpoint_path) real_train_length = get_length(test_x) predictions, max_score = model.test_step([test_x], real_train_length) print(predictions) print(max_score) if predictions == [0]:
real_len = [] for line in x_batch: real_len.append(np.sum(np.sign(line))) return real_len if __name__ == '__main__': with open(vocab_path, 'r', encoding='utf8') as file: vocabulary_list = [k.strip() for k in file.readlines()] word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)]) content_list = [] with open(positive_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: content_list.append([get_rid(row['question']), 1]) with open(negative_path, 'r', encoding='utf-8') as f: state = 0 question = '' for line in f.readlines(): if line[0] == 'E': state = 1 elif state == 1 and line[0] == 'M': state = 2 question = get_rid(line[2:]) elif state == 2 and line[0] not in ['E', 'M']: question += get_rid(line) elif state == 2 and line[0] == 'M': state = 0 content_list.append([question, 0])
import csv import numpy as np from get_vocab import get_rid positive_path = 'data/question.csv' negative_path = 'data/xiaohuangji50w_nofenci.conv' length_edge = 50 length_list = [] if __name__ == '__main__': over = 0 with open(positive_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) all_len = 0 for row in reader: all_len += 1 question = get_rid(row['question']) length_list.append(len(question)) if len(question) > length_edge: over += 1 print('中位数: ' + str(np.median(length_list)) + ' 平均值: ' + str(np.mean(length_list))) print('超出: ' + str(float(over / all_len))) print(all_len)