예제 #1
0
def gen(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)
    data_all = np.load(opt.pickle_path)
    data = data_all['data']
    word2ix = data_all['word2ix'].item()
    ix2word = data_all['ix2word'].item()
    model = PoetryModel(len(word2ix), 128, 256)
    model.load_state_dict(t.load(opt.model_path, map_location=t.device('cpu')))
    opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu')
    model.to(opt.device)
    if opt.start_words.isprintable():
        start_words = opt.start_words
        prefix_words = opt.prefix_words if opt.prefix_words else None
    else:
        start_words = opt.start_words.encode('ascii',
                                             'surrogateescape').decode('utf8')
        prefix_words = opt.prefix_words.encode(
            'ascii',
            'surrogateescape').decode('utf8') if opt.prefix_words else None

    start_words = start_words.replace(',', ',').replace('.',
                                                        '。').replace('?', '?')

    gen_poetry = gen_acrostic if opt.acrostic else generate
    result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words)
    print(''.join(result))
예제 #2
0
    def run(self):
        # 1 获取数据
        data, char_to_ix, ix_to_chars = get_data(self.config)
        vocab_size = len(char_to_ix)
        print('样本数:%d' % len(data))
        print('词典大小: %d' % vocab_size)

        # 2 设置dataloader
        data = torch.from_numpy(data)
        data_loader = Data.DataLoader(data,
                                      batch_size=self.config.batch_size,
                                      shuffle=True,
                                      num_workers=1)

        # 3 创建模型
        model = PoetryModel(vocab_size=vocab_size,
                            embedding_dim=self.config.embedding_dim,
                            hidden_dim=self.config.hidden_dim,
                            device=self.device,
                            layer_num=self.config.layer_num)
        model.to(self.device)

        # 4 创建优化器
        optimizer = optim.Adam(model.parameters(),
                               lr=self.config.lr,
                               weight_decay=self.config.weight_decay)

        # 5 创建损失函数,使用与logsoftmax的输出
        criterion = nn.CrossEntropyLoss()

        # 6.训练
        self.train(data_loader, model, optimizer, criterion, char_to_ix,
                   ix_to_chars)
예제 #3
0
def userTest():
    print("正在初始化......")
    datas = np.load("data/tang.npz",allow_pickle=True)
    data = datas['data']
    ix2word = datas['ix2word'].item()
    word2ix = datas['word2ix'].item()
    model = PoetryModel(len(ix2word), Config.embedding_dim, Config.hidden_dim)
    model.load_state_dict(t.load(Config.model_path, 'cpu'))
    if Config.use_gpu:
        model.to(t.device('cuda'))
    print("初始化完成!\n")
    while True:
        print("欢迎使用李港唐诗生成器,\n"
              "输入1 进入首句生成模式\n"
              "输入2 进入藏头诗生成模式\n")
        mode = int(input())
        if mode == 1:
            print("请输入您想要的诗歌首句,可以是五言或七言")
            start_words = str(input())
            gen_poetry = ''.join(generate(model, start_words, ix2word, word2ix))
            print("生成的诗句如下:%s\n" % (gen_poetry))
        elif mode == 2:
            print("请输入您想要的诗歌藏头部分,不超过16个字,最好是偶数")
            start_words = str(input())
            gen_poetry = ''.join(gen_acrostic(model, start_words, ix2word, word2ix))
            print("生成的诗句如下:%s\n" % (gen_poetry))
예제 #4
0
def gen():
    """生成相应的情话"""
    data, word2ix, ix2word = get_data()
    model = PoetryModel(len(word2ix), config.embedding_dim, config.hidden_dim,
                        config.num_layers)
    state_dict = torch.load(config.model_path, map_location=lambda s, l: s)
    model.load_state_dict(state_dict)
    model.to(config.device)

    # python2和python3 字符串兼容
    if sys.version_info.major == 3:
        if config.start_words.isprintable():
            start_words = config.start_words
            prefix_words = config.prefix_words if config.prefix_words else None
        else:
            start_words = config.start_words.encode(
                'ascii', 'surrogateescape').decode('utf8')
            prefix_words = config.prefix_words.encode(
                'ascii', 'surrogateescape').decode(
                    'utf8') if config.prefix_words else None
    else:
        start_words = config.start_words.decode('utf8')
        prefix_words = config.prefix_words.decode(
            'utf8') if config.prefix_words else None

    start_words = start_words.replace(',',
                                      u',').replace('.',
                                                    u'。').replace('?', u'?')

    result = generate(model, start_words, ix2word, word2ix, prefix_words)
    print(''.join(result))
예제 #5
0
 def load_model(self):
     model = PoetryModel(len(self.word_to_ix), self.config.embedding_dim,
                         self.config.hidden_dim, self.device,
                         self.config.layer_num)
     map_location = lambda s, l: s
     state_dict = torch.load(self.config.model_path,
                             map_location=map_location)
     model.load_state_dict(state_dict)
     model.to(self.device)
     self.model = model
예제 #6
0
def gen(**kwargs):
    """
    gen 提供命令行接口
    """
    for k, v in kwargs.items():
        setattr(opt, k, v)
    
    data, word2ix, ix2word = get_data(opt)
    model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim)
    # todo : what is it???
    map_location = lambda s, l: s
    state_dict = t.load(opt.model_path, map_location=map_location)
    model.load_state_dict(state_dict)
    
    if opt.use_gpu:
        model = model.to(device)
    
    # 默认Python 3.X
    if opt.start_words.isprintable:
        start_words = opt.start_words
        prefix_words = opt.prefix_words
    else:
        # todo : what is it???
        start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8')
        prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None
    
    # 半角替换成全角
    start_words = start_words.replace(',', u',').replace('.', u'。').replace('?', u'?')
    
    # 选择合适的生成函数
    gen_poetry = gen_acrostic if opt.acrostic else generate
    result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words)
    print(''.join(result))
예제 #7
0
def train():
    datas = np.load("tang.npz")
    data = datas['data']
    ix2word = datas['ix2word'].item()
    word2ix = datas['word2ix'].item()
    data = torch.from_numpy(data)
    dataloader = DataLoader(data[:5000],
                            batch_size=config.batch_size,
                            shuffle=True,
                            num_workers=2)

    model = PoetryModel(len(word2ix),
                        embedding_dim=config.embedding_dim,
                        hidden_dim=config.hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    criterion = nn.CrossEntropyLoss()

    model.to(config.device)

    f = open('result.txt', 'w')
    loss_history = []
    for epoch in range(config.epoch):
        start_time = time.time()
        temp_loss = 0

        for step, batch_data in enumerate(dataloader):
            batch_data = batch_data.long().transpose(1, 0).contiguous()
            optimizer.zero_grad()
            trn, target = batch_data[:-1, :], batch_data[1:, :]
            output, _ = model(trn)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()
            temp_loss += loss.item()
            if step % config.print_freq == 0 or step == len(dataloader) - 1:
                print("Train: [{:2d}/{}] Step: {:03d}/{:03d} Loss: {} ".format(
                    epoch + 1, config.epoch, step,
                    len(dataloader) - 1, loss.item()))

        loss_history.append(temp_loss / len(len(dataloader)))
        elapsed_time = time.time() - start_time
        print("Epoch: %d" % epoch + " " + "Loss: %d" % loss_history[-1] +
              " Epoch time: " +
              time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
        torch.save(model.state_dict(), config.model_path)
예제 #8
0
def train():
    # 获取数据
    data, word2ix, ix2word = get_data()
    data = torch.from_numpy(data)
    dataloader = DataLoader(data,
                            batch_size=config.batch_size,
                            shuffle=True,
                            num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), config.embedding_dim, config.hidden_dim,
                        config.num_layers)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.lr,
                           weight_decay=config.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=config.lr_step,
                       gamma=config.lr_gamma)
    criterion = nn.CrossEntropyLoss()
    model.to(config.device)

    for epoch in range(config.epoch):
        total_loss = 0
        for data_ in tqdm(dataloader):
            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(config.device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()
        print("epoch: ", epoch, "loss: ", total_loss / len(dataloader))
        torch.save(model.state_dict(),
                   '%s_%s.pth' % (config.model_prefix, epoch))
예제 #9
0
# save the word dic for sample method
with open('wordDict', 'wb') as f:
    p.dump(word_to_ix, f)

# save all available word
# wordList = open('wordList','w')
# for w in word_to_ix:
#     wordList.write(w.encode('utf-8'))
# wordList.close()

# create model
print("create model...")
model = PoetryModel(vocab_size=len(word_to_ix),
                    embedding_dim=256,
                    hidden_dim=256)
model.to(device)
optimizer = optim.RMSprop(model.parameters(), lr=0.01, weight_decay=0.0001)

# use negative log likelihood loss.
criterion = nn.NLLLoss()

# one-hot vec representation of word
one_hot_var_target = {}
for w in word_to_ix:
    one_hot_var_target.setdefault(w, make_one_hot_vec_target(w, word_to_ix))

# hyper params
epochs = 10
data_size = len(data)
batch_size = 1000
예제 #10
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(config, k, v)

    device = torch.device('cuda') if use_cuda else torch.device('cpu')

    # 获取数据
    data, vocab = get_data(config.filepath)
    np.random.shuffle(data)
    l = len(data)
    dev_data = data[:l // 5 - 1]
    data = data[l // 5:]
    data = torch.from_numpy(data)
    dev_data = torch.from_numpy(dev_data)
    dataloader = D.DataLoader(data,
                              batch_size=config.batch_size,
                              shuffle=True,
                              num_workers=4)
    dev_dataloader = D.DataLoader(dev_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=4)

    # 模型定义
    model = PoetryModel(len(vocab.word2idx), 128, 256)

    # if config.model_path:
    #     model.load_state_dict(torch.load(config.model_path))
    model.to(device)

    # SGD, SGD with momentum, Nesterov, Adagrad, Adadelta, Adam
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr)
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9)
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, nesterov=True)
    # optimizer = torch.optim.Adagrad(model.parameters(), lr=config.lr)
    # optimizer = torch.optim.Adadelta(model.parameters())
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

    criterion = nn.CrossEntropyLoss()

    pre_pp = 0
    cnt = -1
    loss_his = []
    pp_his = []
    for epoch in range(config.epoch):
        for ii, data_ in enumerate(dataloader):
            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            print("epoch", epoch, "step", ii, "loss", loss.item())
            loss_his.append(loss.item())

            # 测试
            if (1 + ii) % config.gen_every == 0:
                # "'春江花月夜凉如水'"
                word = "春"
                gen_poetry = ''.join(generate(model, word, vocab))
                print(gen_poetry)

            if (1 + ii) % config.pp_every == 0:
                pp = check_perplexity(model, dev_dataloader)
                if pre_pp < pp:
                    cnt += 1
                pre_pp = pp
                print(pp.cpu().numpy())
                pp_his.append(pp.cpu().numpy())

                if cnt >= config.tolerance:
                    torch.save(model.state_dict(),
                               '%s_final.pth' % str(int(time.time())))
                    print("epoch", epoch, "step", ii, "final loss",
                          loss.item())
                    for word in ["日", "红", "山", "夜", "湖", "海", "月"]:
                        gen_poetry = ''.join(generate(model, word, vocab))
                        print(gen_poetry)
                    return loss_his, pp_his
        if (epoch + 1) % config.save_every == 0 or epoch + 1 == config.epoch:
            torch.save(model.state_dict(),
                       '%s_%s.pth' % (str(int(time.time())), str(epoch)))
    return loss_his, pp_his
예제 #11
0
파일: main.py 프로젝트: lingxr/poet
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    opt.device = t.device('cuda') if opt.use_gpu else t.device('cpu')
    device = opt.device
    vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))
    model.to(device)

    loss_meter = meter.AverageValueMeter()
    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())

            # 可视化
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[
                    ix2word[_word] for _word in data_[:, _iii].tolist()
                ] for _iii in range(data_.shape[1])][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),
                         win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(
                        generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join(
                    [''.join(poetry) for poetry in gen_poetries]),
                         win=u'gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
예제 #12
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu')
    device = opt.device
    vis = Visualizer(env=opt.env)

    # 获取数据
    data_all = np.load(opt.pickle_path)
    data = data_all['data']
    word2ix = data_all['word2ix'].item()
    ix2word = data_all['ix2word'].item()
    data = t.from_numpy(data)
    dataloader = DataLoader(data,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    loss_func = nn.CrossEntropyLoss()
    if opt.model_path:
        model.load_state_dict(
            t.load(opt.model_path, map_location=t.device('cpu')))
    model.to(device)

    loss_avg = 0
    for epoch in range(opt.epoch):
        for ii, data_ in tqdm(enumerate(dataloader)):
            data_ = data_.long()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:, :-1], data_[:, 1:]
            output, _ = model(input_)
            loss = loss_func(output, target.reshape(-1))
            loss.backward()
            optimizer.step()

            loss_avg += loss.item()

            # 可视化
            if (ii + 1) % opt.plot_every == 0:
                vis.plot('loss', loss_avg / opt.plot_every)
                loss_avg = 0
                poetrys = [[ix2word[_word] for _word in data_[i].tolist()]
                           for i in range(data_.shape[0])][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),
                         win='origin_poem')

                gen_poetries = []
                for word in list('春江花月夜凉如水'):
                    gen_poetry = ''.join(
                        generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join(
                    [''.join(poetry) for poetry in gen_poetries]),
                         win='gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
예제 #13
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    opt.device=t.device('cuda') if opt.use_gpu else t.device('cpu')
    device = opt.device
    vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))
    model.to(device)

    loss_meter = meter.AverageValueMeter()
    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())

            # 可视化
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()]
                           for _iii in range(data_.shape[1])][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
예제 #14
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)
    
    vis = Visualizer(env=opt.env)
    
    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=2)
    
    # 定义model
    model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim)
    # 优化器
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    # Loss Function
    criterion = nn.CrossEntropyLoss()
    
    
    
    # 使用预训练的模型,为了可持续训练
    if opt.model_path and os.path.exists(opt.model_path):
        model.load_state_dict(t.load(opt.model_path))
    
    # GPU related

    if opt.use_gpu:
        model = model.to(device)
        criterion = criterion.to(device)
    
    # loss 计量器
    loss_meter = meter.AverageValueMeter()
    
    # for loop
    for epoch in range(opt.epoch):
        loss_meter.reset()
        
        # for : batching dataset
        for i, data_ in tqdm.tqdm(enumerate(dataloader)):
            
            # 训练
            # data_ 
            # size: [128, 125]  每次取128行,每行一首诗,长度为125
            # type: Tensor
            # dtype: torch.int32 应该转成long
            
            # 这行代码信息量很大:
            # 第一步:int32 to long
            # 第二步:将行列互换,为了并行计算的需要
            # 第三步:将数据放置在连续内存里,避免后续有些操作报错
            data_ = data_.long().transpose(0, 1).contiguous()
            
            # GPU related
            if opt.use_gpu:
                data_ = data_.to(device)
            
            # 到这里 data_.dtype又变成了torch.int64
            # print(data_.dtype)
            
            # 清空梯度
            optimizer.zero_grad()
            
            # 错位训练,很容易理解
            # 把前n-1行作为input,把后n-1行作为target  :  model的输入
            # 这么做还是为了并行计算的需要
            # input_ 加下划线是为了和built_in function input区分开
            input_, target = data_[:-1, :], data_[1:, :]
            
            # model的返回值 output和hidden
            # 这里hidden没什么用
            output, _ = model(input_)
            
            # 计算loss
            target = target.view(-1)
            
            # 新的target.size() [15872]  124 * 128 = 15872
            # output.size()  [15872, 8293] 8293 是词汇量的大小
            
            loss = criterion(output, target)
            
            # 反向传播
            loss.backward()
            
            # optimizer梯度下降更新参数
            optimizer.step()
            
            loss_meter.add(loss.data[0])

            # 可视化
            if (1 + i) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[ix2word[_word.item()] for _word in data_[:, _iii]]
                           for _iii in range(data_.size(1))][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem')
        # 迭代一次epoch,保存一下模型
        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
예제 #15
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu')
    device = opt.device
    #vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)  #[57580,125]
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))
    model.to(device)

    # AverageMeter类用来管理一些变量的更新
    loss_meter = meter.AverageValueMeter()
    for epoch in range(opt.epoch):
        loss_meter.reset()  # 在每一个epoch 都要进行reset一遍

        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            #contiguous:view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy。
            #一种可能的解释是:
            #有些tensor并不是占用一整块内存,而是由不同的数据块组成,而tensor的view()操作依赖于内存是整块的,这时只需要执行contiguous()这个函数,把tensor变成在内存中连续分布的形式。
            # 也就是说使用contiguous()是为了能够使用view()
            data_ = data_.long().transpose(
                1, 0).contiguous()  #data_ shape:[seq_len,batch_size]
            data_ = data_.to(device)
            optimizer.zero_grad()
            #input_ shape:[124,128] target shape:[124,128]
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(
                input_
            )  # ouput shape:[seq_len * batch_size,vocab_size] 此处seq_len为124
            loss = criterion(
                output, target.view(-1))  # target需要规整成[seq_len * batch_size]

            loss.backward()
            optimizer.step()

            # 更新loss_meter
            loss_meter.add(loss.item())
            '''
            
            # 可视化
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()]
                           for _iii in range(data_.shape[1])][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem')
            '''
        # 每一个epoch都打印下loss的值
        print('epoch:%d, loss:%.3f' % (epoch, loss_meter.value()[0]))

        #需要改进的地方是 得到模型在验证集的结果 根据结果只保存最好的模型
        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))

    return ix2word, word2ix
예제 #16
0
        .replace('?', u'?')

    gen_poetry = gen_acrostic if opt.acrostic else generate
    result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words)
    print(''.join(result))


if __name__ == '__main__':

    # step1: 训练模型
    ix2word, word2ix = train()

    # step2: 加载训练好的模型
    # data, word2ix, ix2word = get_data(opt)
    # data, word2ix, ix2word = data['data'], data['word2ix'].item(), data['ix2word'].item()

    model = PoetryModel(len(word2ix), 128, 256)
    #opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu')
    model.to(opt.device)
    model.load_state_dict(t.load('%s_%s.pth' % (opt.model_prefix, 19)))
    #利用训练好的模型生成藏头诗

    results = gen_acrostic(model,
                           start_words='深度学习',
                           ix2word=ix2word,
                           word2ix=word2ix,
                           prefix_words=None)
    print(' '.join(results))

    # 生成普通的诗
    gen()