def __init__(self,args): self.args = args logging.basicConfig(stream=sys.stdout, format='%(asctime)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S') #with open(os.path.join(self.args.model_dir, 'result.json'), 'r') as f: # 获取 绝对路径 with open(r'G:\Postgraduate\myClassesOfGraduateStudentOne\张华平大数据人工智能课程\大作业\字符级LSTM文本自动生成模型\TangPoemGenerator\poet\output_poem\result.json', 'r') as f: result = json.load(f) params = result['params'] best_model = result['best_model'] best_valid_ppl = result['best_valid_ppl'] if 'encoding' in result: self.args.encoding = result['encoding'] else: self.args.encoding = 'utf-8' base_path = args.data_dir """ 源代码中都是使用os.path.join 形成的路径在windows下面,类似于 “FileNotFoundError: [Errno 2] No such file or directory: './data/poem\\rhyme_words.txt'” 这样的错误, 所以本次试验使用 绝对路径进行替换 """ #w2v_file = os.path.join(base_path, "vectors_poem.bin") w2v_file=r'G:\Postgraduate\myClassesOfGraduateStudentOne\张华平大数据人工智能课程\大作业\字符级LSTM文本自动生成模型\TangPoemGenerator\poet\data\poem\vectors_poem.bin' #将词嵌入加载进来 self.w2v = Word2Vec(w2v_file) #读取押韵词 RhymeWords.read_rhyme_words(r'G:\Postgraduate\myClassesOfGraduateStudentOne\张华平大数据人工智能课程\大作业\字符级LSTM文本自动生成模型\TangPoemGenerator\poet\data\poem\rhyme_words.txt') if args.seed >= 0: np.random.seed(args.seed) logging.info('best_model: %s\n', best_model) best_model=r'G:\Postgraduate\myClassesOfGraduateStudentOne\张华平大数据人工智能课程\大作业\字符级LSTM文本自动生成模型\TangPoemGenerator\poet\output_poem\best_model\model-16312' self.sess = tf.Session() w2v_vocab_size = len(self.w2v.model.vocab) with tf.name_scope('evaluation'): self.model = CharRNNLM(is_training=False,w2v_model = self.w2v.model,vocab_size=w2v_vocab_size, infer=True, **params) saver = tf.train.Saver(name='model_saver') saver.restore(self.sess, best_model)
def __init__(self, args): self.args = args logging.basicConfig(stream=sys.stdout, format='%(asctime)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S') with open(os.path.join(self.args.model_dir, 'result.json'), 'r') as f: result = json.load(f) params = result['params'] best_model = result['best_model'] best_valid_ppl = result['best_valid_ppl'] if 'encoding' in result: self.args.encoding = result['encoding'] else: self.args.encoding = 'utf-8' base_path = args.data_dir w2v_file = os.path.join(base_path, "vectors_poem.bin") self.w2v = Word2Vec(w2v_file) RhymeWords.read_rhyme_words(os.path.join(base_path, 'rhyme_words.txt')) if args.seed >= 0: np.random.seed(args.seed) logging.info('best_model: %s\n', best_model) # self.sess = tf.Session() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.2 self.sess = tf.Session(config=config) # self.sess = tf.Session(config=tf.ConfigProto(device_count={'gpu':0})) w2v_vocab_size = len(self.w2v.model.vocab) with tf.name_scope('evaluation'): self.model = CharRNNLM(is_training=False, w2v_model=self.w2v.model, vocab_size=w2v_vocab_size, infer=True, **params) saver = tf.train.Saver(name='model_saver') saver.restore(self.sess, best_model)
def select_rhyme(self, rhyme_ref, probs): if rhyme_ref: rhyme_set = RhymeWords.get_rhyme_words(rhyme_ref) if rhyme_set: # 如果生成的第二句的韵脚在韵律字典里面,则取出全部的韵律字典 seq_probs = zip(probs, range(0, self.vocab_size)) topn = heapq.nlargest(50, seq_probs, key=lambda sp: sp[0]) # 一句概率找到前50个,如果前50个里有,就选取前50里面最大的,否则选取最大概率的那个,就不管韵律了 for _, seq in topn: if self.w2v_model.vocab[seq] in rhyme_set: return seq return np.argmax(probs)
def select_rhyme(self, rhyme_ref, probs): if rhyme_ref: rhyme_set = RhymeWords.get_rhyme_words(rhyme_ref) if rhyme_set: seq_probs = zip(probs, range(0, self.vocab_size)) topn = heapq.nlargest(50, seq_probs, key=lambda sp: sp[0]) for _, seq in topn: if self.w2v_model.vocab[seq] in rhyme_set: return seq return np.argmax(probs)
def select_rhyme(self,rhyme_ref,probs): if rhyme_ref: rhyme_set = RhymeWords.get_rhyme_words(rhyme_ref) if rhyme_set: seq_probs = zip(probs,range(0,self.vocab_size)) topn = heapq.nlargest(50,seq_probs,key=lambda sp :sp[0]) for _,seq in topn: if self.w2v_model.vocab[seq] in rhyme_set: return seq return np.argmax(probs)