save_path = os.path.join(save_dir, 'best_validation') # 最佳验证结果保存路径 # tf.reset_default_graph() print('Configuring CNN model...') config = TCNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 print("no vocabulary file, need to generate it ") generate_w2v() # categories, cat_to_id = read_category() words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) # trans vector file to numpy file if not os.path.exists(word_vector_dir): print("no pretrained w2v exists, generate the w2v") generate_w2v() else: print("load w2v embeddings") config.pre_training = pd.read_csv(word_vector_dir, header=None, index_col=None).values model = TextCNN(config) if sys.argv[1] == 'train' and len(sys.argv) == 3: train(filename=sys.argv[2]) elif sys.argv[1] == 'train' and len(sys.argv) == 2: train() elif sys.argv[1] == 'test': test() else: raise ValueError( """usage: python run_cnn.py [train / test] [train/test file]""")
metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories)) # 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test_cls, y_pred_cls) print(cm) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) if __name__ == '__main__': if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: raise ValueError("""usage: python run_rnn.py [train / test]""") print('Configuring CNN model...') config = TCNNConfig() if not os.path.exists(vocab_dir): build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category() words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) config.pre_training = np.load(pre_training) model = TextCNN(config) if sys.argv[1] == 'train': train() else: test()