def gen(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) data_all = np.load(opt.pickle_path) data = data_all['data'] word2ix = data_all['word2ix'].item() ix2word = data_all['ix2word'].item() model = PoetryModel(len(word2ix), 128, 256) model.load_state_dict(t.load(opt.model_path, map_location=t.device('cpu'))) opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu') model.to(opt.device) if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode( 'ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None start_words = start_words.replace(',', ',').replace('.', '。').replace('?', '?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def run(self): # 1 获取数据 data, char_to_ix, ix_to_chars = get_data(self.config) vocab_size = len(char_to_ix) print('样本数:%d' % len(data)) print('词典大小: %d' % vocab_size) # 2 设置dataloader data = torch.from_numpy(data) data_loader = Data.DataLoader(data, batch_size=self.config.batch_size, shuffle=True, num_workers=1) # 3 创建模型 model = PoetryModel(vocab_size=vocab_size, embedding_dim=self.config.embedding_dim, hidden_dim=self.config.hidden_dim, device=self.device, layer_num=self.config.layer_num) model.to(self.device) # 4 创建优化器 optimizer = optim.Adam(model.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) # 5 创建损失函数,使用与logsoftmax的输出 criterion = nn.CrossEntropyLoss() # 6.训练 self.train(data_loader, model, optimizer, criterion, char_to_ix, ix_to_chars)
def userTest(): print("正在初始化......") datas = np.load("data/tang.npz",allow_pickle=True) data = datas['data'] ix2word = datas['ix2word'].item() word2ix = datas['word2ix'].item() model = PoetryModel(len(ix2word), Config.embedding_dim, Config.hidden_dim) model.load_state_dict(t.load(Config.model_path, 'cpu')) if Config.use_gpu: model.to(t.device('cuda')) print("初始化完成!\n") while True: print("欢迎使用李港唐诗生成器,\n" "输入1 进入首句生成模式\n" "输入2 进入藏头诗生成模式\n") mode = int(input()) if mode == 1: print("请输入您想要的诗歌首句,可以是五言或七言") start_words = str(input()) gen_poetry = ''.join(generate(model, start_words, ix2word, word2ix)) print("生成的诗句如下:%s\n" % (gen_poetry)) elif mode == 2: print("请输入您想要的诗歌藏头部分,不超过16个字,最好是偶数") start_words = str(input()) gen_poetry = ''.join(gen_acrostic(model, start_words, ix2word, word2ix)) print("生成的诗句如下:%s\n" % (gen_poetry))
def gen(): """生成相应的情话""" data, word2ix, ix2word = get_data() model = PoetryModel(len(word2ix), config.embedding_dim, config.hidden_dim, config.num_layers) state_dict = torch.load(config.model_path, map_location=lambda s, l: s) model.load_state_dict(state_dict) model.to(config.device) # python2和python3 字符串兼容 if sys.version_info.major == 3: if config.start_words.isprintable(): start_words = config.start_words prefix_words = config.prefix_words if config.prefix_words else None else: start_words = config.start_words.encode( 'ascii', 'surrogateescape').decode('utf8') prefix_words = config.prefix_words.encode( 'ascii', 'surrogateescape').decode( 'utf8') if config.prefix_words else None else: start_words = config.start_words.decode('utf8') prefix_words = config.prefix_words.decode( 'utf8') if config.prefix_words else None start_words = start_words.replace(',', u',').replace('.', u'。').replace('?', u'?') result = generate(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def load_model(self): model = PoetryModel(len(self.word_to_ix), self.config.embedding_dim, self.config.hidden_dim, self.device, self.config.layer_num) map_location = lambda s, l: s state_dict = torch.load(self.config.model_path, map_location=map_location) model.load_state_dict(state_dict) model.to(self.device) self.model = model
def gen(**kwargs): """ gen 提供命令行接口 """ for k, v in kwargs.items(): setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim) # todo : what is it??? map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model = model.to(device) # 默认Python 3.X if opt.start_words.isprintable: start_words = opt.start_words prefix_words = opt.prefix_words else: # todo : what is it??? start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None # 半角替换成全角 start_words = start_words.replace(',', u',').replace('.', u'。').replace('?', u'?') # 选择合适的生成函数 gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def train(): datas = np.load("tang.npz") data = datas['data'] ix2word = datas['ix2word'].item() word2ix = datas['word2ix'].item() data = torch.from_numpy(data) dataloader = DataLoader(data[:5000], batch_size=config.batch_size, shuffle=True, num_workers=2) model = PoetryModel(len(word2ix), embedding_dim=config.embedding_dim, hidden_dim=config.hidden_dim) optimizer = optim.Adam(model.parameters(), lr=config.lr) criterion = nn.CrossEntropyLoss() model.to(config.device) f = open('result.txt', 'w') loss_history = [] for epoch in range(config.epoch): start_time = time.time() temp_loss = 0 for step, batch_data in enumerate(dataloader): batch_data = batch_data.long().transpose(1, 0).contiguous() optimizer.zero_grad() trn, target = batch_data[:-1, :], batch_data[1:, :] output, _ = model(trn) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() temp_loss += loss.item() if step % config.print_freq == 0 or step == len(dataloader) - 1: print("Train: [{:2d}/{}] Step: {:03d}/{:03d} Loss: {} ".format( epoch + 1, config.epoch, step, len(dataloader) - 1, loss.item())) loss_history.append(temp_loss / len(len(dataloader))) elapsed_time = time.time() - start_time print("Epoch: %d" % epoch + " " + "Loss: %d" % loss_history[-1] + " Epoch time: " + time.strftime("%H: %M: %S", time.gmtime(elapsed_time))) torch.save(model.state_dict(), config.model_path)
def train(): # 获取数据 data, word2ix, ix2word = get_data() data = torch.from_numpy(data) dataloader = DataLoader(data, batch_size=config.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), config.embedding_dim, config.hidden_dim, config.num_layers) optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) scheduler = StepLR(optimizer, step_size=config.lr_step, gamma=config.lr_gamma) criterion = nn.CrossEntropyLoss() model.to(config.device) for epoch in range(config.epoch): total_loss = 0 for data_ in tqdm(dataloader): # 训练 data_ = data_.long().transpose(1, 0).contiguous() data_ = data_.to(config.device) optimizer.zero_grad() input_, target = data_[:-1, :], data_[1:, :] output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() total_loss += loss.item() scheduler.step() print("epoch: ", epoch, "loss: ", total_loss / len(dataloader)) torch.save(model.state_dict(), '%s_%s.pth' % (config.model_prefix, epoch))
# save the word dic for sample method with open('wordDict', 'wb') as f: p.dump(word_to_ix, f) # save all available word # wordList = open('wordList','w') # for w in word_to_ix: # wordList.write(w.encode('utf-8')) # wordList.close() # create model print("create model...") model = PoetryModel(vocab_size=len(word_to_ix), embedding_dim=256, hidden_dim=256) model.to(device) optimizer = optim.RMSprop(model.parameters(), lr=0.01, weight_decay=0.0001) # use negative log likelihood loss. criterion = nn.NLLLoss() # one-hot vec representation of word one_hot_var_target = {} for w in word_to_ix: one_hot_var_target.setdefault(w, make_one_hot_vec_target(w, word_to_ix)) # hyper params epochs = 10 data_size = len(data) batch_size = 1000
def train(**kwargs): for k, v in kwargs.items(): setattr(config, k, v) device = torch.device('cuda') if use_cuda else torch.device('cpu') # 获取数据 data, vocab = get_data(config.filepath) np.random.shuffle(data) l = len(data) dev_data = data[:l // 5 - 1] data = data[l // 5:] data = torch.from_numpy(data) dev_data = torch.from_numpy(dev_data) dataloader = D.DataLoader(data, batch_size=config.batch_size, shuffle=True, num_workers=4) dev_dataloader = D.DataLoader(dev_data, batch_size=config.batch_size, shuffle=True, num_workers=4) # 模型定义 model = PoetryModel(len(vocab.word2idx), 128, 256) # if config.model_path: # model.load_state_dict(torch.load(config.model_path)) model.to(device) # SGD, SGD with momentum, Nesterov, Adagrad, Adadelta, Adam # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr) # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9) # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, nesterov=True) # optimizer = torch.optim.Adagrad(model.parameters(), lr=config.lr) # optimizer = torch.optim.Adadelta(model.parameters()) optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) criterion = nn.CrossEntropyLoss() pre_pp = 0 cnt = -1 loss_his = [] pp_his = [] for epoch in range(config.epoch): for ii, data_ in enumerate(dataloader): # 训练 data_ = data_.long().transpose(1, 0).contiguous() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:-1, :], data_[1:, :] output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() print("epoch", epoch, "step", ii, "loss", loss.item()) loss_his.append(loss.item()) # 测试 if (1 + ii) % config.gen_every == 0: # "'春江花月夜凉如水'" word = "春" gen_poetry = ''.join(generate(model, word, vocab)) print(gen_poetry) if (1 + ii) % config.pp_every == 0: pp = check_perplexity(model, dev_dataloader) if pre_pp < pp: cnt += 1 pre_pp = pp print(pp.cpu().numpy()) pp_his.append(pp.cpu().numpy()) if cnt >= config.tolerance: torch.save(model.state_dict(), '%s_final.pth' % str(int(time.time()))) print("epoch", epoch, "step", ii, "final loss", loss.item()) for word in ["日", "红", "山", "夜", "湖", "海", "月"]: gen_poetry = ''.join(generate(model, word, vocab)) print(gen_poetry) return loss_his, pp_his if (epoch + 1) % config.save_every == 0 or epoch + 1 == config.epoch: torch.save(model.state_dict(), '%s_%s.pth' % (str(int(time.time())), str(epoch))) return loss_his, pp_his
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda') if opt.use_gpu else t.device('cpu') device = opt.device vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) model.to(device) loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 data_ = data_.long().transpose(1, 0).contiguous() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:-1, :], data_[1:, :] output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ ix2word[_word] for _word in data_[:, _iii].tolist() ] for _iii in range(data_.shape[1])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu') device = opt.device vis = Visualizer(env=opt.env) # 获取数据 data_all = np.load(opt.pickle_path) data = data_all['data'] word2ix = data_all['word2ix'].item() ix2word = data_all['ix2word'].item() data = t.from_numpy(data) dataloader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) loss_func = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict( t.load(opt.model_path, map_location=t.device('cpu'))) model.to(device) loss_avg = 0 for epoch in range(opt.epoch): for ii, data_ in tqdm(enumerate(dataloader)): data_ = data_.long() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:, :-1], data_[:, 1:] output, _ = model(input_) loss = loss_func(output, target.reshape(-1)) loss.backward() optimizer.step() loss_avg += loss.item() # 可视化 if (ii + 1) % opt.plot_every == 0: vis.plot('loss', loss_avg / opt.plot_every) loss_avg = 0 poetrys = [[ix2word[_word] for _word in data_[i].tolist()] for i in range(data_.shape[0])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win='origin_poem') gen_poetries = [] for word in list('春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win='gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device=t.device('cuda') if opt.use_gpu else t.device('cpu') device = opt.device vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) model.to(device) loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 data_ = data_.long().transpose(1, 0).contiguous() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:-1, :], data_[1:, :] output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()] for _iii in range(data_.shape[1])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=2) # 定义model model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim) # 优化器 optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) # Loss Function criterion = nn.CrossEntropyLoss() # 使用预训练的模型,为了可持续训练 if opt.model_path and os.path.exists(opt.model_path): model.load_state_dict(t.load(opt.model_path)) # GPU related if opt.use_gpu: model = model.to(device) criterion = criterion.to(device) # loss 计量器 loss_meter = meter.AverageValueMeter() # for loop for epoch in range(opt.epoch): loss_meter.reset() # for : batching dataset for i, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 # data_ # size: [128, 125] 每次取128行,每行一首诗,长度为125 # type: Tensor # dtype: torch.int32 应该转成long # 这行代码信息量很大: # 第一步:int32 to long # 第二步:将行列互换,为了并行计算的需要 # 第三步:将数据放置在连续内存里,避免后续有些操作报错 data_ = data_.long().transpose(0, 1).contiguous() # GPU related if opt.use_gpu: data_ = data_.to(device) # 到这里 data_.dtype又变成了torch.int64 # print(data_.dtype) # 清空梯度 optimizer.zero_grad() # 错位训练,很容易理解 # 把前n-1行作为input,把后n-1行作为target : model的输入 # 这么做还是为了并行计算的需要 # input_ 加下划线是为了和built_in function input区分开 input_, target = data_[:-1, :], data_[1:, :] # model的返回值 output和hidden # 这里hidden没什么用 output, _ = model(input_) # 计算loss target = target.view(-1) # 新的target.size() [15872] 124 * 128 = 15872 # output.size() [15872, 8293] 8293 是词汇量的大小 loss = criterion(output, target) # 反向传播 loss.backward() # optimizer梯度下降更新参数 optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + i) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word.item()] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') # 迭代一次epoch,保存一下模型 t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu') device = opt.device #vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) #[57580,125] dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) model.to(device) # AverageMeter类用来管理一些变量的更新 loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() # 在每一个epoch 都要进行reset一遍 for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 #contiguous:view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy。 #一种可能的解释是: #有些tensor并不是占用一整块内存,而是由不同的数据块组成,而tensor的view()操作依赖于内存是整块的,这时只需要执行contiguous()这个函数,把tensor变成在内存中连续分布的形式。 # 也就是说使用contiguous()是为了能够使用view() data_ = data_.long().transpose( 1, 0).contiguous() #data_ shape:[seq_len,batch_size] data_ = data_.to(device) optimizer.zero_grad() #input_ shape:[124,128] target shape:[124,128] input_, target = data_[:-1, :], data_[1:, :] output, _ = model( input_ ) # ouput shape:[seq_len * batch_size,vocab_size] 此处seq_len为124 loss = criterion( output, target.view(-1)) # target需要规整成[seq_len * batch_size] loss.backward() optimizer.step() # 更新loss_meter loss_meter.add(loss.item()) ''' # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()] for _iii in range(data_.shape[1])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') ''' # 每一个epoch都打印下loss的值 print('epoch:%d, loss:%.3f' % (epoch, loss_meter.value()[0])) #需要改进的地方是 得到模型在验证集的结果 根据结果只保存最好的模型 t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch)) return ix2word, word2ix
.replace('?', u'?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result)) if __name__ == '__main__': # step1: 训练模型 ix2word, word2ix = train() # step2: 加载训练好的模型 # data, word2ix, ix2word = get_data(opt) # data, word2ix, ix2word = data['data'], data['word2ix'].item(), data['ix2word'].item() model = PoetryModel(len(word2ix), 128, 256) #opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu') model.to(opt.device) model.load_state_dict(t.load('%s_%s.pth' % (opt.model_prefix, 19))) #利用训练好的模型生成藏头诗 results = gen_acrostic(model, start_words='深度学习', ix2word=ix2word, word2ix=word2ix, prefix_words=None) print(' '.join(results)) # 生成普通的诗 gen()