def initModel(self): print('Loading model:', self.model, 'pred_mode:', self.pred_mode) checkpoint_file = self.__getCkptfile(self.model, self.pred_mode) if not checkpoint_file: return # 加载模型,这要加.meta后缀 with self.graph.as_default(): saver = tf.train.import_meta_graph(checkpoint_file + '.meta') saver.restore(self.sess, checkpoint_file) # self.graph = tf.get_default_graph() # 从图中读取变量 self.input_x = self.graph.get_operation_by_name("input_x").outputs[0] self.input_y = self.graph.get_operation_by_name("input_y").outputs[0] self.dropout_keep_prob = self.graph.get_operation_by_name( "dropout_keep_prob").outputs[0] self.prediction = self.graph.get_operation_by_name( "output/prediction").outputs[0] self.training = self.graph.get_operation_by_name("training").outputs[0] # 加载词向量 if self.pred_mode == 'CHAR-RANDOM': self.vocab = preprocess.read_vocab( os.path.join('data', preprocess.CHAR_VOCAB_PATH)) elif self.pred_mode == 'WORD-NON-STATIC' or self.pred_mode == 'MULTI': self.vocab = preprocess.read_vocab( os.path.join('data', preprocess.WORD_VOCAB_PATH)) # 加载标签 self.label = preprocess.read_label( os.path.join('data', preprocess.LABEL_ID_PATH))
def prepare_data(self): # Data preparation. # ======================================================= if self.train_mode == 'CHAR-RANDOM': # 1.字符级 # 读取词汇表 self.vocab = preprocess.read_vocab( os.path.join('data', preprocess.CHAR_VOCAB_PATH)) elif self.train_mode == 'WORD-NON-STATIC' or self.train_mode == 'MULTI': # 把预训练词向量的值读到变量中 self.vocab = preprocess.read_vocab( os.path.join('data', preprocess.WORD_VOCAB_PATH)) self.vecs_dict = preprocess.load_vecs( os.path.join('data', preprocess.SGNS_WORD_PATH)) self.embedding_W = np.ndarray( shape=[self.vocab_size, self.embedding_dim], dtype=np.float32) for word in self.vocab: # 第n行对应id为n的词的词向量 if word not in self.vecs_dict: preprocess.add_word(word, self.vecs_dict) self.embedding_W[self.vocab[word]] = self.vecs_dict[word] self.dataset = TextLineDataset( os.path.join('data', preprocess.TRAIN_WITH_ID_PATH)) return
def prepare_data(self): # Data preparation. # ======================================================= if self.train_mode == 'CHAR-RANDOM': # 1.字符级 # 读取词汇表 self.vocab = preprocess.read_vocab( os.path.join('data', preprocess.CHAR_VOCAB_PATH)) elif self.train_mode == 'WORD-NON-STATIC' or self.train_mode == 'MULTI': # 把预训练词向量的值读到变量中 self.vocab = preprocess.read_vocab( os.path.join('data', preprocess.WORD_VOCAB_PATH)) self.vecs_dict = preprocess.load_vecs( os.path.join('data', preprocess.SGNS_WORD_PATH)) self.embedding_W = np.ndarray( shape=[self.vocab_size, self.embedding_dim], dtype=np.float32) for word in self.vocab: # 第n行对应id为n的词的词向量 if word not in self.vecs_dict: preprocess.add_word(word, self.vecs_dict) self.embedding_W[self.vocab[word]] = self.vecs_dict[word] self.dataset = TextLineDataset( os.path.join('data', preprocess.TRAIN_WITH_ID_PATH)) print('Shuffling dataset...') self.dataset = self.dataset.shuffle(preprocess.TOTAL_TRAIN_SIZE) # 分割数据集 # 取前VALID_SIZE个样本给验证集 valid_dataset = self.dataset.take(preprocess.VALID_SIZE).batch( self.valid_batch_size) # 剩下的给训练集 train_dataset = self.dataset.skip(preprocess.VALID_SIZE).batch( self.train_batch_size) # Create a reinitializable iterator train_iterator = train_dataset.make_initializable_iterator() valid_iterator = valid_dataset.make_initializable_iterator() train_init_op = train_iterator.initializer valid_init_op = valid_iterator.initializer # 要获取元素,先sess.run(train_init_op)初始化迭代器 # 再sess.run(next_train_element) next_train_element = train_iterator.get_next() next_valid_element = valid_iterator.get_next() return train_init_op, valid_init_op, next_train_element, next_valid_element
def prepare_test_data(self): # 读取词汇表 if self.train_mode == 'CHAR-RANDOM': # 1.字符级 self.vocab = preprocess.read_vocab(os.path.join('data',preprocess.CHAR_VOCAB_PATH)) elif self.train_mode == 'WORD-NON-STATIC': self.vocab = preprocess.read_vocab(os.path.join('data', preprocess.WORD_VOCAB_PATH)) # 测试集有标题,读取时注意跳过第一行 dataset = TextLineDataset(os.path.join('data',preprocess.TEST_PATH)) dataset = dataset.shuffle(preprocess.TOTAL_TEST_SIZE).batch(self.test_batch_size) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() return dataset, next_element