save_path = os.path.join(save_dir, 'best_validation')  # 最佳验证结果保存路径
    # tf.reset_default_graph()
    print('Configuring CNN model...')
    config = TCNNConfig()

    if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
        print("no vocabulary file, need to generate it ")
        generate_w2v()
    # categories, cat_to_id = read_category()
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    # trans vector file to numpy file
    if not os.path.exists(word_vector_dir):
        print("no pretrained w2v exists, generate the w2v")
        generate_w2v()
    else:
        print("load w2v embeddings")
        config.pre_training = pd.read_csv(word_vector_dir,
                                          header=None,
                                          index_col=None).values
    model = TextCNN(config)

    if sys.argv[1] == 'train' and len(sys.argv) == 3:
        train(filename=sys.argv[2])
    elif sys.argv[1] == 'train' and len(sys.argv) == 2:
        train()
    elif sys.argv[1] == 'test':
        test()
    else:
        raise ValueError(
            """usage: python run_cnn.py [train / test] [train/test file]""")
Esempio n. 2
0
        metrics.classification_report(y_test_cls,
                                      y_pred_cls,
                                      target_names=categories))

    # 混淆矩阵
    print("Confusion Matrix...")
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)


if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
        raise ValueError("""usage: python run_rnn.py [train / test]""")
    print('Configuring CNN model...')
    config = TCNNConfig()
    if not os.path.exists(vocab_dir):
        build_vocab(train_dir, vocab_dir, config.vocab_size)
    categories, cat_to_id = read_category()
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    config.pre_training = np.load(pre_training)
    model = TextCNN(config)

    if sys.argv[1] == 'train':
        train()
    else:
        test()