def main(_): model_path = os.path.join('models', Config.file_name) input_file = 'data/去除2和null.xlsx' vocab_file = os.path.join(model_path, 'vocab_label.pkl') # 数据处理 converter = TextConverter(None, vocab_file, max_vocab=Config.vocab_max_size, seq_length=Config.seq_length) print('vocab size:', converter.vocab_size) # 加载上一次保存的模型 model = Model(Config, converter.vocab_size) checkpoint_path = tf.train.latest_checkpoint(model_path) if checkpoint_path: model.load(checkpoint_path) # 获取测试库数据 # test_libs = get_excel_libs('data/tianlong_libs.xlsx') # 用整个库3w+ QAs = get_excel_QAs(input_file) thres = int(0.8 * len(QAs)) test_QAs = QAs[thres:] test_libs = [r for q, r, y in test_QAs] # 用QAs test_libs_arrs = converter.libs_to_arrs(test_libs) # 产生匹配库向量 save_file = checkpoint_path + '_matul_state_QAs.pkl' if os.path.exists(save_file) is False: response_matul_state = model.test_to_matul(test_libs_arrs) with open(save_file, 'wb') as f: pickle.dump(response_matul_state, f) else: with open(save_file, 'rb') as f: response_matul_state = pickle.load(f) # 测试 print('start to testing...') QAY = [] k, n = 0, 0 for query, y_response, label in test_QAs: input_arr, input_len = converter.text_to_arr(query) indexs = model.test(input_arr, input_len, response_matul_state) responses = converter.index_to_response(indexs, test_libs) QAY.append((query, y_response, responses)) if responses[0] == y_response: k += 1 print(k, '/', n) n += 1 print('accuracy:', k / float(n)) result_xls = checkpoint_path + '_Q_for_QAs.xls' converter.save_to_excel(QAY, result_xls)
def main(_): # FLAGS.start_string = FLAGS.start_string#.decode('utf-8') converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = DualLSTM(converter.vocab_size, test=True, batch_size=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) # # ----------test3------------- # QAs = get_excel_QAs(FLAGS.input_file) # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response # QA_arrs = converter.QAs_to_arrs(QAs, FLAGS.num_steps) # all_samples = converter.samples_for_test3(QA_arrs) # indexs = model.test3(all_samples) # converter.index_to_QA_and_save(indexs,QAs,FLAGS.checkpoint_path) # ----------test4------------- libs = get_excel_libs('data/tianlong_libs.xlsx') libs_arrs = converter.libs_to_arrs(libs, FLAGS.num_steps) response_matul_state = model.test4_matul(libs_arrs) QAs = get_excel_QAs( FLAGS.input_file) # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response QAY = [] k, n = 0, 0 for query, y_response in QAs: input_arr, input_len = converter.Q_to_arr(query, FLAGS.num_steps) indexs = model.test4(input_arr, input_len, response_matul_state) responses = converter.index_to_response(indexs, libs) # print('response:',responses) QAY.append((query, y_response, responses)) if responses[0] == y_response: k += 1 # print(k) n += 1 print('accuracy:', k / float(n)) converter.save_to_excel(QAY, FLAGS.checkpoint_path)