Example #1
0
def predict():
    with open(map_path, "rb") as f:
        word_to_id, cat_to_id, seq_length, num_classes = pickle.load(f)
    id_to_cat = {v: k for k, v in cat_to_id.items()}
    config = TCNNConfig()
    config.num_classes = num_classes
    config.vocab_size = len(word_to_id)
    model = TextCNN(config)
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)  # 读取保存的模型
    while True:
        line = str(input("请输入测试句子:"))
        data_id = [[
            word_to_id[x] for x in list(native_content(line))
            if x in word_to_id
        ]]
        x_pad = kr.preprocessing.sequence.pad_sequences(data_id, seq_length)
        y_pred_cls = session.run(model.y_pred_cls,
                                 feed_dict={
                                     model.input_x: x_pad,
                                     model.keep_prob: 1.0
                                 })
        print('sentence : {}, prdict intent : {}'.format(
            line, id_to_cat[y_pred_cls[0]]))
        a = 1
Example #2
0
        base_dir = 'data/' + data_dir + '/' + t_name
        classes = sys.argv[5].split('-')

        train_dir = os.path.join(base_dir, 'train.csv')
        test_dir = os.path.join(base_dir, 'test.csv')
        val_dir = os.path.join(base_dir, 'dev.csv')
        vocab_dir = os.path.join('data/data_orginal/' + t_name, 'vocab.csv')

        if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
            print(' vocab_dir not exists: ', vocab_dir)
            build_vocab('data/data_orginal/' + t_name + '/whole.csv',
                        vocab_dir, config.vocab_size)
        categories, cat_to_id = read_category(classes)
        words, word_to_id = read_vocab(vocab_dir)
        config.vocab_size = len(words)
        config.num_classes = len(classes)

        mode_name = 'textcnn'
        save_dir = 'checkpoints/' + t_name + '/' + mode_name + '_' + t_name + "_" + data_dir + '_' + t_th + 'th'
        save_path = os.path.join(save_dir, 'best_validation')  # 最佳验证结果保存路径

        model = TextCNN(config)
        if sys.argv[1] == 'train':
            train()
        else:
            test()
    else:
        print(
            'usage: parameters are less than 4: python file, number of running the task, task name'
        )
    # 混淆矩阵
    print("Confusion Matrix...")
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)


if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
        raise ValueError("""usage: python run_cnn.py [train / test]""")

    print('Configuring CNN model...')
    config = TCNNConfig()
    print('CNN config:', config)
    if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
        build_vocab(train_dir, vocab_dir, config.vocab_size)
    categories, cat_to_id = read_category(problem)
    print('categories:', categories)
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    config.num_classes = len(categories)
    model = TextCNN(config)

    if sys.argv[1] == 'train':
        train()
    else:
        test()
Example #4
0
if __name__ == '__main__':
    #    predict(); exit()

    #    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
    #        raise ValueError("""usage: python run_cnn.py [train / test / predict]""")

    d = 'data/cnews/'
    train_dir = d + 'sen_class.train'
    test_dir = d + 'sen_class.test'
    val_dir = d + 'sen_class.val'
    vocab_dir = d + 'sen_class.vocab'
    print('Configuring CNN model...')
    config = TCNNConfig()
    labels = build_vocab(train_dir, test_dir, val_dir, vocab_dir,
                         config.vocab_size)
    config.num_classes = len(labels)
    categories, cat_to_id = read_category(labels)
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    model = TextCNN(config)
    print('labels: {}, vocabulary size: {}'.format(config.num_classes,
                                                   len(words)))
    with open(map_path, "wb") as f:
        pickle.dump(
            [word_to_id, cat_to_id, config.seq_length, config.num_classes], f)

    train()
    exit()

    if sys.argv[1] == 'train':
        train()
if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
        raise ValueError("""usage: python run_cnn.py [train / test]""")

    print('Configuring CNN model...')
    config = TCNNConfig()

    if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
        build_vocab(train_dir, vocab_dir, config.vocab_size)
# w2v = get_word_embedding(w2v_path, vocab_dir, config.embedding_dim)
# config.w2v = w2v
# print(w2v)
# print(config.w2v)
    categories, cat_to_id = read_category(categories_dir)
    id_to_cat = {v: k for k, v in cat_to_id.items()}
    words, word_to_id = read_vocab(vocab_dir)
    #print('loading word embedding...')
    #embeddings = get_embeddings('./datasets/w2v.txt',vocab_dir,word_to_id)
    #embeddings = pickle.load(open('./datasets/embeddings.pkl','rb'))
    #config.embedding_dim = len(embeddings[0])
    config.num_classes = len(cat_to_id)
    config.vocab_size = len(words)
    config.is_w2v = False
    #config.w2v = embeddings
    model = TextCNN(config)

    if sys.argv[1] == 'train':
        train()
    else:
        test()