def __init__(self): self.categories, self.cat_to_id = read_category() self.words, self.word_to_id = read_vocab('cnews_vocab.txt') ##直接加载训练过的模型 ##self.model = torch.load('\model.pkl') ##也可以选择加载模型之后,加载参数 self.model = TextRnn() self.model.load_state_dict(torch.load('model_params.pkl'))
def __init__(self): self.config = TCNNConfig() self.categories, self.cat_to_id = read_category() self.word_to_id = read_vocab(vocab_dir) self.config.vocab_size = len(self.word_to_id) self.model = TextCNN(self.config) self.session = tf.Session() self.session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess=self.session, save_path=save_path) # 读取保存的模型
def load_variable_pb(): session = tf.Session(graph=tf.Graph()) model_file_path = "pb/model" meta_graph = tf.saved_model.loader.load( session, [tf.saved_model.tag_constants.SERVING], model_file_path) model_graph_signature = list(meta_graph.signature_def.items())[0][1] output_feed = [] output_op_names = [] output_tensor_dict = {} output_op_names.append('y_pred_cls') output_op_names.append('y_pred_prob') for output_item in model_graph_signature.outputs.items(): output_op_name = output_item[0] output_tensor_name = output_item[1].name output_tensor_dict[output_op_name] = output_tensor_name for name in output_op_names: output_feed.append(output_tensor_dict[name]) print(output_tensor_dict[name]) print("load model finish!") config = TCNNConfig() categories, cat_to_id = read_category() word_to_id = read_vocab(vocab_dir) while True: string = input("请输入测试句子: ").strip() input_x = [[word_to_id.get(x, word_to_id['<PAD>']) for x in string]] input_x = tf.keras.preprocessing.sequence.pad_sequences( sequences=input_x, maxlen=config.seq_length) inputs = {} inputs['input_x'] = input_x inputs['keep_prob'] = 1.0 feed_dict = {} for input_item in model_graph_signature.inputs.items(): input_op_name = input_item[0] input_tensor_name = input_item[1].name feed_dict[input_tensor_name] = inputs[input_op_name] outputs = session.run(output_feed, feed_dict=feed_dict) print(categories[outputs[0][0]]) print(outputs[1][0])
def __init__(self, stopwords_path, vocab_dir, categories_dir, save_path): self.thu = thulac.thulac(seg_only=True) self.stopwords = [ line.strip() for line in open(stopwords_path).readlines() ] categories, cat_to_id = read_category(categories_dir) self.id_to_cat = {v: k for k, v in cat_to_id.items()} words, self.word_to_id = read_vocab(vocab_dir) g = tf.Graph() tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True self.sess = tf.Session(graph=g, config=tf_config) with self.sess.as_default(): with g.as_default(): self.config = TCNNConfig() self.config.num_classes = len(cat_to_id) self.config.vocab_size = len(words) self.model = TextCNN(self.config) saver = tf.train.Saver() self.sess.run(tf.global_variables_initializer()) saver.restore(self.sess, save_path=save_path)
import tensorflow as tf import tensorflow.keras as krs from data_loader import read_category, read_vocab, process_file, data_load from model import TextRnn from time import time from tqdm import tqdm import torch from torch import nn import torch.nn.functional as f from torch.autograd import Variable ##查看GPU是否可用 print(torch.cuda.is_available()) categories, cat_to_id = read_category() print(categories) words, word_to_id = read_vocab('cnews_vocab.txt') print(words) ##加载训练集 x_train, y_train = process_file('cnews_small_sample.txt', word_to_id, cat_to_id, 600) print('x_train=', x_train) ##加载验证集 x_val, y_val = process_file('cnews_val.txt', word_to_id, cat_to_id, 600) ###验证集上进行准确率评估 def evaluate(model, Loss, optimizer, x_val, y_val):
save_path_bak = os.path.join(save_dir_bak, 'best_validation') # 最佳验证结果保存路径 save_dir = 'checkpoints/textcnn' save_path = os.path.join(save_dir, 'best_validation') print('Configuring CNN model...') normal_num = [690] * 3 max_acc = 0 greatest_normal_num = 0 for i in normal_num: get_train_data(i) config = TCNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category(train_dir) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) config.num_classes = len(categories) model = TextCNN(config) # 训练模型并保存到bak train() print("Loading test data...") start_time = time.time() x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) session.run(tf.global_variables_initializer())
print "配置CNN模型..." config = TCNNConfig() model = TextCNN(config) if sys.argv[1] == 'train': print "开始训练..." print "载入训练样本..." # data_dir = '/home/abc/ssd/pzw/nlp/data/0523/word_sep/' data_dir = '/home/zhwpeng/abc/nlp/data/0324/word_sep/' txt_dirs = list() for fold in glob(data_dir + '*'): # txt_dirs = txt_dirs + glob(fold+'/*.txt') txt_dirs = txt_dirs + glob(fold + '/*.txt')[:1] # 本地小批量数据 print "训练样本总数是{}".format(len(txt_dirs)) np.random.shuffle(txt_dirs) train() else: print "开始测试..." test_txt_dirs = list() test_data_dir = '/home/abc/ssd/pzw/nlp/data/0523/word_sep_test/' # test_data_dir = '/home/zhwpeng/abc/nlp/data/0324/word_sep_test/' for fold in glob(test_data_dir + '*'): test_txt_dirs = test_txt_dirs + glob(fold + '/*.txt') print "测试集样本总数是{}".format(len(test_txt_dirs)) np.random.shuffle(test_txt_dirs) categories, cat_to_id = read_category(types) model_tes_t(test_txt_dirs, train_flag=False)
end_id = min((i + 1) * batch_size, data_len) feed_dict = { model.input_x: x_test[start_id:end_id], model.keep_prob: 1.0 } y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls, feed_dict=feed_dict) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) return y_pred_cls if __name__ == '__main__': if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: raise ValueError("""usage: python run_rnn.py [train / test]""") print('Configuring RNN model...') config = TRNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category(vocab_dir) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextRNN(config) if sys.argv[1] == 'train': train() else: test(test_dir)
if __name__ == '__main__': if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: raise ValueError("""usage: python run_cnn.py [train / test]""") print('Configuring CNN model...') config = TCNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) # w2v = get_word_embedding(w2v_path, vocab_dir, config.embedding_dim) # config.w2v = w2v # print(w2v) # print(config.w2v) categories, cat_to_id = read_category(categories_dir) id_to_cat = {v: k for k, v in cat_to_id.items()} words, word_to_id = read_vocab(vocab_dir) #print('loading word embedding...') #embeddings = get_embeddings('./datasets/w2v.txt',vocab_dir,word_to_id) #embeddings = pickle.load(open('./datasets/embeddings.pkl','rb')) #config.embedding_dim = len(embeddings[0]) config.num_classes = len(cat_to_id) config.vocab_size = len(words) config.is_w2v = False #config.w2v = embeddings model = TextCNN(config) if sys.argv[1] == 'train': train() else: