def predict(): with open(map_path, "rb") as f: word_to_id, cat_to_id, seq_length, num_classes = pickle.load(f) id_to_cat = {v: k for k, v in cat_to_id.items()} config = TCNNConfig() config.num_classes = num_classes config.vocab_size = len(word_to_id) model = TextCNN(config) session = tf.Session() session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess=session, save_path=save_path) # 读取保存的模型 while True: line = str(input("请输入测试句子:")) data_id = [[ word_to_id[x] for x in list(native_content(line)) if x in word_to_id ]] x_pad = kr.preprocessing.sequence.pad_sequences(data_id, seq_length) y_pred_cls = session.run(model.y_pred_cls, feed_dict={ model.input_x: x_pad, model.keep_prob: 1.0 }) print('sentence : {}, prdict intent : {}'.format( line, id_to_cat[y_pred_cls[0]])) a = 1
base_dir = 'data/' + data_dir + '/' + t_name classes = sys.argv[5].split('-') train_dir = os.path.join(base_dir, 'train.csv') test_dir = os.path.join(base_dir, 'test.csv') val_dir = os.path.join(base_dir, 'dev.csv') vocab_dir = os.path.join('data/data_orginal/' + t_name, 'vocab.csv') if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 print(' vocab_dir not exists: ', vocab_dir) build_vocab('data/data_orginal/' + t_name + '/whole.csv', vocab_dir, config.vocab_size) categories, cat_to_id = read_category(classes) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) config.num_classes = len(classes) mode_name = 'textcnn' save_dir = 'checkpoints/' + t_name + '/' + mode_name + '_' + t_name + "_" + data_dir + '_' + t_th + 'th' save_path = os.path.join(save_dir, 'best_validation') # 最佳验证结果保存路径 model = TextCNN(config) if sys.argv[1] == 'train': train() else: test() else: print( 'usage: parameters are less than 4: python file, number of running the task, task name' )
# 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test_cls, y_pred_cls) print(cm) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) if __name__ == '__main__': if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: raise ValueError("""usage: python run_cnn.py [train / test]""") print('Configuring CNN model...') config = TCNNConfig() print('CNN config:', config) if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category(problem) print('categories:', categories) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) config.num_classes = len(categories) model = TextCNN(config) if sys.argv[1] == 'train': train() else: test()
if __name__ == '__main__': # predict(); exit() # if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: # raise ValueError("""usage: python run_cnn.py [train / test / predict]""") d = 'data/cnews/' train_dir = d + 'sen_class.train' test_dir = d + 'sen_class.test' val_dir = d + 'sen_class.val' vocab_dir = d + 'sen_class.vocab' print('Configuring CNN model...') config = TCNNConfig() labels = build_vocab(train_dir, test_dir, val_dir, vocab_dir, config.vocab_size) config.num_classes = len(labels) categories, cat_to_id = read_category(labels) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextCNN(config) print('labels: {}, vocabulary size: {}'.format(config.num_classes, len(words))) with open(map_path, "wb") as f: pickle.dump( [word_to_id, cat_to_id, config.seq_length, config.num_classes], f) train() exit() if sys.argv[1] == 'train': train()
if __name__ == '__main__': if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: raise ValueError("""usage: python run_cnn.py [train / test]""") print('Configuring CNN model...') config = TCNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) # w2v = get_word_embedding(w2v_path, vocab_dir, config.embedding_dim) # config.w2v = w2v # print(w2v) # print(config.w2v) categories, cat_to_id = read_category(categories_dir) id_to_cat = {v: k for k, v in cat_to_id.items()} words, word_to_id = read_vocab(vocab_dir) #print('loading word embedding...') #embeddings = get_embeddings('./datasets/w2v.txt',vocab_dir,word_to_id) #embeddings = pickle.load(open('./datasets/embeddings.pkl','rb')) #config.embedding_dim = len(embeddings[0]) config.num_classes = len(cat_to_id) config.vocab_size = len(words) config.is_w2v = False #config.w2v = embeddings model = TextCNN(config) if sys.argv[1] == 'train': train() else: test()