def userTest(): print("正在初始化......") datas = np.load("data/tang.npz",allow_pickle=True) data = datas['data'] ix2word = datas['ix2word'].item() word2ix = datas['word2ix'].item() model = PoetryModel(len(ix2word), Config.embedding_dim, Config.hidden_dim) model.load_state_dict(t.load(Config.model_path, 'cpu')) if Config.use_gpu: model.to(t.device('cuda')) print("初始化完成!\n") while True: print("欢迎使用李港唐诗生成器,\n" "输入1 进入首句生成模式\n" "输入2 进入藏头诗生成模式\n") mode = int(input()) if mode == 1: print("请输入您想要的诗歌首句,可以是五言或七言") start_words = str(input()) gen_poetry = ''.join(generate(model, start_words, ix2word, word2ix)) print("生成的诗句如下:%s\n" % (gen_poetry)) elif mode == 2: print("请输入您想要的诗歌藏头部分,不超过16个字,最好是偶数") start_words = str(input()) gen_poetry = ''.join(gen_acrostic(model, start_words, ix2word, word2ix)) print("生成的诗句如下:%s\n" % (gen_poetry))
def gen(): """生成相应的情话""" data, word2ix, ix2word = get_data() model = PoetryModel(len(word2ix), config.embedding_dim, config.hidden_dim, config.num_layers) state_dict = torch.load(config.model_path, map_location=lambda s, l: s) model.load_state_dict(state_dict) model.to(config.device) # python2和python3 字符串兼容 if sys.version_info.major == 3: if config.start_words.isprintable(): start_words = config.start_words prefix_words = config.prefix_words if config.prefix_words else None else: start_words = config.start_words.encode( 'ascii', 'surrogateescape').decode('utf8') prefix_words = config.prefix_words.encode( 'ascii', 'surrogateescape').decode( 'utf8') if config.prefix_words else None else: start_words = config.start_words.decode('utf8') prefix_words = config.prefix_words.decode( 'utf8') if config.prefix_words else None start_words = start_words.replace(',', u',').replace('.', u'。').replace('?', u'?') result = generate(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def gen(**kwargs): ''' 提供命令行接口,用以生成相应的诗 ''' for k, v in kwargs.items(): setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() # if sys.version_info.major == 3: # start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') # prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None # else: # start_words = opt.start_words.decode('utf8') # prefix_words = opt.prefix_words.decode('utf8') if opt.prefix_words else None start_words = opt.start_words prefix_words = opt.prefix_words start_words= start_words.replace(',',u',')\ .replace('.',u'。')\ .replace('?',u'?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def gen(**kwargs): """ gen 提供命令行接口 """ for k, v in kwargs.items(): setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim) # todo : what is it??? map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model = model.to(device) # 默认Python 3.X if opt.start_words.isprintable: start_words = opt.start_words prefix_words = opt.prefix_words else: # todo : what is it??? start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None # 半角替换成全角 start_words = start_words.replace(',', u',').replace('.', u'。').replace('?', u'?') # 选择合适的生成函数 gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def gen(**kwargs): """ 提供命令行接口,用以生成相应的诗 """ for k, v in kwargs.items(): setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256); map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() if sys.version_info.major == 3: if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode( 'utf8') if opt.prefix_words else None else: start_words = opt.start_words.decode('utf8') prefix_words = opt.prefix_words.decode('utf8') if opt.prefix_words else None start_words = start_words.replace(',', u',') \ .replace('.', u'。') \ .replace('?', u'?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def gen(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) data_all = np.load(opt.pickle_path) data = data_all['data'] word2ix = data_all['word2ix'].item() ix2word = data_all['ix2word'].item() model = PoetryModel(len(word2ix), 128, 256) model.load_state_dict(t.load(opt.model_path, map_location=t.device('cpu'))) opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu') model.to(opt.device) if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode( 'ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None start_words = start_words.replace(',', ',').replace('.', '。').replace('?', '?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def userTest(acrostic=True): data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode( 'ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None start_words = start_words.replace(',', u',') \ .replace('.', u'。') \ .replace('?', u'?') if acrostic: result = gen_acrostic(model, start_words, ix2word, word2ix, prefix_words) else: result = generate(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def load_model(self): model = PoetryModel(len(self.word_to_ix), self.config.embedding_dim, self.config.hidden_dim, self.device, self.config.layer_num) map_location = lambda s, l: s state_dict = torch.load(self.config.model_path, map_location=map_location) model.load_state_dict(state_dict) model.to(self.device) self.model = model
def test(): datas = np.load("tang.npz") ix2word = datas['ix2word'].item() word2ix = datas['word2ix'].item() model = PoetryModel(len(ix2word), config.embedding_dim, config.hidden_dim) model.load_state_dict(torch.load(config.model_path, config.device)) while True: start_words = str(input()) gen_poetry = ''.join( generate(model, start_words, ix2word, word2ix, config)) print(gen_poetry)
def generate(): config = Config() train_data, dev_data, vocabulary = get_dataset(config.data_path) poetry_model = PoetryModel(vocabulary_size=len(vocabulary), embedding_size=config.embedding_size, hidden_size=config.hidden_size) states = torch.load(os.path.join(config.save_path, config.model_name)).state_dict() poetry_model.load_state_dict(states) for start_word in config.start_words: result = generate_poem(model=poetry_model, start_word=start_word, vocabulary=vocabulary, max_gen_len=config.max_gen_len, temperature=0.6) print(result)
def gen(**kwargs): """ 提供命令行接口,用以生成相应的诗 """ for k, v in kwargs.items(): print(k, v) setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() # python2和python3 字符串兼容 if sys.version_info.major == 3: if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode( 'ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode( 'ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None else: start_words = opt.start_words.decode('utf8') prefix_words = opt.prefix_words.decode( 'utf8') if opt.prefix_words else None prefix_words = prefix_words.replace(',', u',') \ .replace('.', u'。') \ .replace('?', u'?') start_words = start_words.replace(',', u',') \ .replace('.', u'。') \ .replace('?', u'?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) with open('result.txt', 'w') as f: f.writelines(result) print(''.join(result))
def train(**kwargs:dict) -> None: for k, v in kwargs.items(): setattr(opt, k, v) #vis = Visdom(env=opt.env) #data get data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader( data, batch_size=opt.batch_size, shuffle = True, ) model = PoetryModel(len(word2ix), 2, 2) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.user_gpu: model.cuda() criterion.cuda() for epoch in range(opt.epoch): for ii, data_ in tqdm.tqdm(enumerate(dataloader)): data_ = data.long().transpose(1, 0).contiguous() if opt.user_gpu : data_ = data_.cuda() optimizer.zero_grad() input_, target = V(data_[:-1, :]), V(data[1:, :]) ouput, _ = model(input_) loss = criterion(ouput, target.view(-1)) loss.backward() optimizer.step()
def gen(**kwargs): """ 提供命令行接口,用以生成相应的诗 """ print(kwargs) for k, v in kwargs.items(): setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() # read names from file with open(opt.start_words_file) as f: json_data = json.load(f) names = json_data["names"] prefix_words_list = json_data["prefix_words_list"] gen_poetry = gen_acrostic if opt.acrostic else generate for staff in names: result = gen_poetry( model, staff['name'], ix2word, word2ix, prefix_words_list[random.randint(1, len(prefix_words_list) - 1)]) print(''.join(result)) result_data[staff['name']] = ''.join(result) # 写入文件 # {key: value} # for, 多个人名 # 读写相关配置文件,生成各人名对应的诗 with open('data.json', 'w', encoding='utf-8') as outfile: json.dump(result_data, outfile, ensure_ascii=False)
def train(**kwargs): for k,v in kwargs.items(): setattr(opt,k,v) vis = Visualizer(env=opt.env) # 获取数据 data,word2ix,ix2word = get_data(opt) data = t.from_numpy(data)#把数据类型转为tensor dataloader = t.utils.data.DataLoader(data,#初始化Dataloader类实例 batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256)#(vocab_size, embedding_dim, hidden_dim) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss()#损失函数定义为交叉熵 if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii,data_ in tqdm.tqdm(enumerate(dataloader)): #tqdm进度条工具 #取一个batch的数据 # 训练 #data_.size:(batch_size,maxlen) data_ = data_.long().transpose(1,0).contiguous()#转置后返回一个内存连续的有相同数据的tensor # if epoch==0 and ii ==0: # print('size of data_ after transpose: \n',data_.size()) if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad()#梯度清零 input_,target = Variable(data_[:-1,:]),Variable(data_[1:,:])#input_是所有句子的前maxlen-1个item的集合, #target是所有句子的后maxlen-1个item的集合 #以"床前明月光"为例,输入是"床前明月",要预测"前明月光" output,_ = model(input_) #Tensor.view(-1)按照第0个维度逐个元素读取将张量展开成数组 loss = criterion(output,target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1+ii)%opt.plot_every==0: if os.path.exists(opt.debug_file):#如果存在调试文件, #则进入调试模式 ipdb.set_trace() vis.plot('loss',loss_meter.value()[0]) # 诗歌原文 poetrys=[ [ix2word[_word] for _word in data_[:,_iii]] #每一个句子(诗歌)的每一个item(id)要转换成文本 for _iii in range(data_.size(1))][:16]#_iii的取值范围[,127] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),win=u'origin_poem') #在visdom中输出这些句子(诗歌)中的前16个 gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model,word,ix2word,word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]),win=u'gen_poem') t.save(model.state_dict(),'%s_%s.pth' %(opt.model_prefix,epoch))
.replace('?', u'?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result)) if __name__ == '__main__': # step1: 训练模型 ix2word, word2ix = train() # step2: 加载训练好的模型 # data, word2ix, ix2word = get_data(opt) # data, word2ix, ix2word = data['data'], data['word2ix'].item(), data['ix2word'].item() model = PoetryModel(len(word2ix), 128, 256) #opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu') model.to(opt.device) model.load_state_dict(t.load('%s_%s.pth' % (opt.model_prefix, 19))) #利用训练好的模型生成藏头诗 results = gen_acrostic(model, start_words='深度学习', ix2word=ix2word, word2ix=word2ix, prefix_words=None) print(' '.join(results)) # 生成普通的诗 gen()
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) #获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) #模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for li, data_ in tqdm.tqdm(enumerate(dataloader)): #训练 data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() ##输入和目标错开 input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) #诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, -iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] #分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu') device = opt.device #vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) #[57580,125] dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) model.to(device) # AverageMeter类用来管理一些变量的更新 loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() # 在每一个epoch 都要进行reset一遍 for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 #contiguous:view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy。 #一种可能的解释是: #有些tensor并不是占用一整块内存,而是由不同的数据块组成,而tensor的view()操作依赖于内存是整块的,这时只需要执行contiguous()这个函数,把tensor变成在内存中连续分布的形式。 # 也就是说使用contiguous()是为了能够使用view() data_ = data_.long().transpose( 1, 0).contiguous() #data_ shape:[seq_len,batch_size] data_ = data_.to(device) optimizer.zero_grad() #input_ shape:[124,128] target shape:[124,128] input_, target = data_[:-1, :], data_[1:, :] output, _ = model( input_ ) # ouput shape:[seq_len * batch_size,vocab_size] 此处seq_len为124 loss = criterion( output, target.view(-1)) # target需要规整成[seq_len * batch_size] loss.backward() optimizer.step() # 更新loss_meter loss_meter.add(loss.item()) ''' # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()] for _iii in range(data_.shape[1])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') ''' # 每一个epoch都打印下loss的值 print('epoch:%d, loss:%.3f' % (epoch, loss_meter.value()[0])) #需要改进的地方是 得到模型在验证集的结果 根据结果只保存最好的模型 t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch)) return ix2word, word2ix
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu') device = opt.device vis = Visualizer(env=opt.env) # 获取数据 data_all = np.load(opt.pickle_path) data = data_all['data'] word2ix = data_all['word2ix'].item() ix2word = data_all['ix2word'].item() data = t.from_numpy(data) dataloader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) loss_func = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict( t.load(opt.model_path, map_location=t.device('cpu'))) model.to(device) loss_avg = 0 for epoch in range(opt.epoch): for ii, data_ in tqdm(enumerate(dataloader)): data_ = data_.long() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:, :-1], data_[:, 1:] output, _ = model(input_) loss = loss_func(output, target.reshape(-1)) loss.backward() optimizer.step() loss_avg += loss.item() # 可视化 if (ii + 1) % opt.plot_every == 0: vis.plot('loss', loss_avg / opt.plot_every) loss_avg = 0 poetrys = [[ix2word[_word] for _word in data_[i].tolist()] for i in range(data_.shape[0])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win='origin_poem') gen_poetries = [] for word in list('春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win='gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train_torch_lstm(conf, args=None): pdata = PoemData() pdata.read_data(conf) pdata.get_vocab() if conf.use_gpu: device = torch.device('cuda') else: device = torch.device('cpu') model = PoetryModel(pdata.vocab_size, conf, device) train_data = pdata.train_data test_data = pdata.test_data train_data = torch.from_numpy(np.array(train_data['pad_words'])) dev_data = torch.from_numpy(np.array(test_data['pad_words'])) dataloader = DataLoader(train_data, batch_size=conf.batch_size, shuffle=True, num_workers=conf.num_workers) devloader = DataLoader(dev_data, batch_size=conf.batch_size, shuffle=True, num_workers=conf.num_workers) optimizer = Adam(model.parameters(), lr=conf.learning_rate) criterion = nn.CrossEntropyLoss() loss_meter = meter.AverageValueMeter() if conf.load_best_model: model.load_state_dict(torch.load(conf.beat_model_path)) if conf.use_gpu: model.cuda() criterion.cuda() step = 0 bestppl = 1e9 early_stop_controller = 0 for epoch in range(conf.n_epochs): losses = [] loss_meter.reset() model.train() for i, data in enumerate(dataloader): data = data.long().transpose(1, 0).contiguous() if conf.use_gpu: data = data.cuda() input, target = data[:-1, :], data[1:, :] optimizer.zero_grad() output, _ = model(input) loss = criterion(output, target.contiguous().view(-1)) loss.backward() optimizer.step() losses.append(loss.item()) loss_meter.add(loss.item()) step += 1 if step % 100 == 0: print("epoch_%d_step_%d_loss:%0.4f" % (epoch + 1, step, loss.item())) train_loss = float(loss_meter.value()[0]) model.eval() for i, data in enumerate(devloader): data = data.long().transpose(1, 0).contiguous() if conf.use_gpu: data = data.cuda() input, target = data[:-1, :], data[1:, :] output, _ = model(input) loss = criterion(output, target.view(-1)) loss_meter.add(loss.item()) ppl = math.exp(loss_meter.value()[0]) print("epoch_%d_loss:%0.4f , ppl:%0.4f" % (epoch + 1, train_loss, ppl)) if epoch % conf.save_every == 0: torch.save(model.state_dict(), "{0}_{1}".format(conf.model_prefix, epoch)) fout = open("{0}out_{1}".format(conf.out_path, epoch), 'w', encoding='utf-8') for word in list('日红山夜湖海月'): gen_poetry = generate_poet(model, word, pdata.vocab, conf) fout.write("".join(gen_poetry) + '\n\n') fout.close() if ppl < bestppl: bestppl = ppl early_stop_controller = 0 torch.save(model.state_dict(), "{0}_{1}".format(conf.best_model_path, "best_model")) else: early_stop_controller += 1 if early_stop_controller > conf.patience: print("early stop.") break
data, ix2word, word2ix = load_dataset(opt.dataset_file) #init model model = PoetryModel(len(word2ix), 128, 256, num_layers=opt.num_layers) #training if opt.train_or_load: model, loss_list=train(model=model, data=data, ix2word=ix2word, word2ix=word2ix, \ lr=opt.lr, batch_size=opt.batch_size, num_epochs=opt.num_epochs, \ device=opt.device) #save model torch.save(model.state_dict(), opt.save_path + '\\model.pt') #draw loss-curve draw_fig(loss_list, opt.save_path) else: #load model model.load_state_dict(torch.load(opt.save_path + '\\model.pt')) #generate acrostic acrostic=gen_acrostic(model=model, start_words=opt.start_words_1, \ ix2word=ix2word, word2ix=word2ix, \ max_gen_len=opt.max_gen_len, device=opt.device) #generate poetry Poetry=generate(model=model, start_words=opt.start_words_2, \ ix2word=ix2word, word2ix=word2ix, \ max_gen_len=opt.max_gen_len, device=opt.device) print(Poetry) print() print(acrostic)
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=2) # 定义model model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim) # 优化器 optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) # Loss Function criterion = nn.CrossEntropyLoss() # 使用预训练的模型,为了可持续训练 if opt.model_path and os.path.exists(opt.model_path): model.load_state_dict(t.load(opt.model_path)) # GPU related if opt.use_gpu: model = model.to(device) criterion = criterion.to(device) # loss 计量器 loss_meter = meter.AverageValueMeter() # for loop for epoch in range(opt.epoch): loss_meter.reset() # for : batching dataset for i, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 # data_ # size: [128, 125] 每次取128行,每行一首诗,长度为125 # type: Tensor # dtype: torch.int32 应该转成long # 这行代码信息量很大: # 第一步:int32 to long # 第二步:将行列互换,为了并行计算的需要 # 第三步:将数据放置在连续内存里,避免后续有些操作报错 data_ = data_.long().transpose(0, 1).contiguous() # GPU related if opt.use_gpu: data_ = data_.to(device) # 到这里 data_.dtype又变成了torch.int64 # print(data_.dtype) # 清空梯度 optimizer.zero_grad() # 错位训练,很容易理解 # 把前n-1行作为input,把后n-1行作为target : model的输入 # 这么做还是为了并行计算的需要 # input_ 加下划线是为了和built_in function input区分开 input_, target = data_[:-1, :], data_[1:, :] # model的返回值 output和hidden # 这里hidden没什么用 output, _ = model(input_) # 计算loss target = target.view(-1) # 新的target.size() [15872] 124 * 128 = 15872 # output.size() [15872, 8293] 8293 是词汇量的大小 loss = criterion(output, target) # 反向传播 loss.backward() # optimizer梯度下降更新参数 optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + i) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word.item()] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') # 迭代一次epoch,保存一下模型 t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))