Beispiel #1
0
def main(_):
    model_path = os.path.join('models', Config.file_name)

    vocab_file = os.path.join(model_path, 'vocab_tuples.pkl')

    # 获取测试问题
    sens_tags_test = get_sens_tags('data/test.txt')

    # 数据处理
    converter = TextConverter(None,
                              vocab_file,
                              max_vocab=Config.vocab_max_size)
    print('vocab size:', converter.vocab_size)

    # 产生测试样本
    test_QA_arrs = converter.QAs_to_arr(sens_tags_test, Config.seq_length)

    # 加载上一次保存的模型
    model = Model(Config, converter.vocab_size)
    checkpoint_path = tf.train.latest_checkpoint(model_path)
    if checkpoint_path:
        model.load(checkpoint_path)

    # 测试
    print('start to testing...')
    n = len(test_QA_arrs)
    for i in range(n):
        y_pre, y_cos = model.test(test_QA_arrs[i])
        tags = [converter.int_to_tag(id) for id in y_pre[:test_QA_arrs[i][2]]]
        print('\nword / tag / pre')
        for j in range(test_QA_arrs[i][2]):
            print("{} / {} / {}".format(sens_tags_test[i][0][j],
                                        sens_tags_test[i][1][j], tags[j]))
Beispiel #2
0
def main(_):

    model_path = os.path.join('models', Config.file_name)
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)

    # train_file = 'data/train.txt'
    # train_file = 'data/train.0926.label'
    # dev_file = 'data/slotlabel3400'
    train_file = sys.argv[1]
    dev_file = sys.argv[2]

    save_file = os.path.join(model_path, 'vocab_tuples.pkl')

    # 获取样本数据
    sens_tags_train = get_sens_tags(train_file)
    sens_tags_val = get_sens_tags(dev_file)

    # sens_tags = get_sens_tags(train_file)
    #
    # sens_tags_train = sens_tags[10000:]
    # sens_tags_val = sens_tags[:10000]

    # 数据处理
    converter = TextConverter(train_file,
                              save_file,
                              max_vocab=Config.vocab_max_size)
    print('vocab size:', converter.vocab_size)
    Config.num_classes = converter.tag_size + 1

    # 产生训练样本
    train_QA_arrs = converter.QAs_to_arr(sens_tags_train, Config.seq_length)
    train_g = converter.batch_generator(train_QA_arrs, Config.batch_size)

    # 产生验证样本
    val_QA_arrs = converter.QAs_to_arr(sens_tags_val, Config.seq_length)
    val_g = converter.val_samples_generator(val_QA_arrs, Config.batch_size)

    # 加载上一次保存的模型
    model = Model(Config, converter.vocab_size)
    checkpoint_path = tf.train.latest_checkpoint(model_path)
    if checkpoint_path:
        model.load(checkpoint_path)

    print('start to training...')
    model.train(train_g, model_path, val_g)
Beispiel #3
0
def main(_):

    model_path = os.path.join('models', Config.file_name)
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)

    input_file = 'data/去除2和null.xlsx'
    vocab_file = os.path.join(model_path, 'vocab_label.pkl')

    # 获取原始excel数据
    QAs = get_excel_QAs(
        input_file)  # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response

    # 分配训练和验证数据集
    thres = int(0.8 * len(QAs))
    train_QAs = QAs[:thres]
    val_QAs = QAs[thres:]

    # 数据处理
    text = get_QAs_text(train_QAs)
    converter = TextConverter(text,
                              vocab_file,
                              max_vocab=Config.vocab_max_size,
                              seq_length=Config.seq_length)
    print('vocab size:', converter.vocab_size)

    # 产生训练样本
    train_QA_arrs = converter.QAs_to_arr(train_QAs)
    train_g = converter.batch_generator(train_QA_arrs, Config.batch_size)

    # 产生验证样本
    val_QA_arrs = converter.QAs_to_arr(val_QAs)
    val_g = converter.val_samples_generator(val_QA_arrs, Config.batch_size)

    # 加载上一次保存的模型
    model = Model(Config, converter.vocab_size)
    checkpoint_path = tf.train.latest_checkpoint(model_path)
    if checkpoint_path:
        model.load(checkpoint_path)

    print('start to training...')
    model.train(train_g, model_path, val_g)