Esempio n. 1
0
def main(_):
    model_path = os.path.join('models', Config.file_name)

    input_file = 'data/去除2和null.xlsx'
    vocab_file = os.path.join(model_path, 'vocab_label.pkl')

    # 数据处理
    converter = TextConverter(None,
                              vocab_file,
                              max_vocab=Config.vocab_max_size,
                              seq_length=Config.seq_length)
    print('vocab size:', converter.vocab_size)

    # 加载上一次保存的模型
    model = Model(Config, converter.vocab_size)
    checkpoint_path = tf.train.latest_checkpoint(model_path)
    if checkpoint_path:
        model.load(checkpoint_path)

    # 获取测试库数据
    # test_libs = get_excel_libs('data/tianlong_libs.xlsx')  # 用整个库3w+
    QAs = get_excel_QAs(input_file)
    thres = int(0.8 * len(QAs))
    test_QAs = QAs[thres:]
    test_libs = [r for q, r, y in test_QAs]  # 用QAs

    test_libs_arrs = converter.libs_to_arrs(test_libs)

    # 产生匹配库向量
    save_file = checkpoint_path + '_matul_state_QAs.pkl'
    if os.path.exists(save_file) is False:
        response_matul_state = model.test_to_matul(test_libs_arrs)
        with open(save_file, 'wb') as f:
            pickle.dump(response_matul_state, f)
    else:
        with open(save_file, 'rb') as f:
            response_matul_state = pickle.load(f)

    # 测试
    print('start to testing...')
    QAY = []
    k, n = 0, 0
    for query, y_response, label in test_QAs:
        input_arr, input_len = converter.text_to_arr(query)
        indexs = model.test(input_arr, input_len, response_matul_state)
        responses = converter.index_to_response(indexs, test_libs)

        QAY.append((query, y_response, responses))
        if responses[0] == y_response:
            k += 1
            print(k, '/', n)
        n += 1
    print('accuracy:', k / float(n))
    result_xls = checkpoint_path + '_Q_for_QAs.xls'
    converter.save_to_excel(QAY, result_xls)
Esempio n. 2
0
def main(_):
    # FLAGS.start_string = FLAGS.start_string#.decode('utf-8')
    converter = TextConverter(filename=FLAGS.converter_path)
    if os.path.isdir(FLAGS.checkpoint_path):
        FLAGS.checkpoint_path =\
            tf.train.latest_checkpoint(FLAGS.checkpoint_path)

    model = DualLSTM(converter.vocab_size,
                     test=True,
                     batch_size=FLAGS.num_seqs,
                     num_steps=FLAGS.num_steps,
                     lstm_size=FLAGS.lstm_size,
                     num_layers=FLAGS.num_layers,
                     use_embedding=FLAGS.use_embedding,
                     embedding_size=FLAGS.embedding_size)

    model.load(FLAGS.checkpoint_path)

    # # ----------test3-------------
    # QAs = get_excel_QAs(FLAGS.input_file)  # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response
    # QA_arrs = converter.QAs_to_arrs(QAs, FLAGS.num_steps)
    # all_samples = converter.samples_for_test3(QA_arrs)
    # indexs = model.test3(all_samples)
    # converter.index_to_QA_and_save(indexs,QAs,FLAGS.checkpoint_path)

    # ----------test4-------------
    libs = get_excel_libs('data/tianlong_libs.xlsx')
    libs_arrs = converter.libs_to_arrs(libs, FLAGS.num_steps)
    response_matul_state = model.test4_matul(libs_arrs)
    QAs = get_excel_QAs(
        FLAGS.input_file)  # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response
    QAY = []
    k, n = 0, 0
    for query, y_response in QAs:
        input_arr, input_len = converter.Q_to_arr(query, FLAGS.num_steps)
        indexs = model.test4(input_arr, input_len, response_matul_state)
        responses = converter.index_to_response(indexs, libs)
        # print('response:',responses)
        QAY.append((query, y_response, responses))
        if responses[0] == y_response:
            k += 1
            # print(k)
        n += 1
    print('accuracy:', k / float(n))
    converter.save_to_excel(QAY, FLAGS.checkpoint_path)