Esempio n. 1
0
 def forward(self, rnn_outputs, encoder_outputs):
     ''' 时刻t,计算1与s个的对齐向量,也是注意力权值
     Args:
         rnn_output: Decoder中GRU的输出[1, b, h]
         encoder_outputs: Encoder的最后的输出, [s, b, h]
     Returns:
         attn_weights: Yt与所有Xs的注意力权值,[b, s]
     '''
     seq_len = encoder_outputs.size()[0]
     this_batch_size = encoder_outputs.size()[1]
     # (b, h)
     rnn_outputs = rnn_outputs.squeeze(0)
     # attn_energies (b, s)
     attn_energies = get_variable(torch.zeros(this_batch_size, seq_len))
     for b in range(this_batch_size):
         # (1, h) 当前一个GRU的输出
         decoder_rnn_output = rnn_outputs[b]
         for i in range(seq_len): 
             # (1, h) < (s, 1, h) 
             encoder_output = encoder_outputs[i, b, :].squeeze(0)
             attn_energies[b, i] = self.score(decoder_rnn_output, encoder_output)
     
     attn_weights = get_variable(torch.zeros(this_batch_size, seq_len))
     for b in range(this_batch_size):
         attn_weights[b] = F.softmax(attn_energies[b])
     return attn_weights
Esempio n. 2
0
def evaluate(input_sentence, input_lang, target_lang, encoder, decoder, target_maxlen=25):
    ''' 验证一条句子
    Args:
        input_sentence: 输入的一个句子,原字符句子,不包含EOS
        target_maxlen: 翻译目标句子的最大长度,不包括EOS_token的长度
    Returns:
        decoded_words: 翻译后的词语
        decoder_attentions: Attention [目标句子长度,原句子长度]
    '''
    batch_size = 1
    # [s,1] 包含EOS 
    input_batches = [dh.indexes_from_sentence(input_lang, input_sentence)]
    # [1, s]
    input_batches = get_variable(torch.LongTensor(input_batches)).transpose(0, 1)
    input_lengths = [len(input_batches)]    
    
    # 非训练模式,避免dropout
    encoder.train(False)
    decoder.train(False)
    
    # [s,b,h],[nl,b,h]过encoder,准备decoder数据
    encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths, None)
    #print (encoder_outputs.data[0][0][:10])
    # (ts,b)
    decoder_input = decoder.create_input_seqs(1, batch_size)
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    # 最终结果
    decoded_words = []
    decoder_attentions = torch.zeros(target_maxlen + 1, input_lengths[0])
    
    # 过decoder
    for di in range(target_maxlen):
        # 这里ts=b=1,即[1,1,o],[nl,1,h],[1,1,is],原本[ts,b,o], [nl,b,h], [b,ts,is]
        # print ("input:", decoder_input.data.tolist())
        decoder_output, decoder_hidden, attn_weights = \
            decoder(decoder_input, decoder_hidden, encoder_outputs)
        # print ("attn:", attn_weights.data.tolist())
        # word信息
        word_id = parse_output(decoder_output).squeeze().cpu().data.numpy().tolist()[0]
        #show_decoder_outputs(decoder_output, target_lang)
        #maxv, maxi = decoder_output.squeeze().max(-1)
        #print ("evaluate:", word_id, maxv.data[0], maxi.data[0])
        word = target_lang.index2word[word_id]
        decoded_words.append(word)
        # attention
        decoder_attentions[di] += attn_weights.cpu().data.squeeze()
        
        if word_id == dh.EOS_token:
            break
        # 当前单词作为下一个的输入(ts,b)=(1,1)
        decoder_input = get_variable(torch.LongTensor([word_id])).view(1, -1)
    
    # 改变encoder的模式
    encoder.train(True)
    decoder.train(True)
    res = decoder_attentions[:di+1,:]
    #print ('input_length:{}, di={}, size={}'.format(input_lengths[0], di, res.size()))
    return decoded_words, res
Esempio n. 3
0
def test_model(pairs, input_lang, target_lang):
    batch_size = 2
    input_batches, input_lengths, target_batches, target_lengths \
        = helper.random_batch(batch_size, pairs, input_lang, target_lang)

    print('input:', input_batches.size(), input_lengths)
    print('target:', target_batches.size(), target_lengths)

    hidden_size = 8
    n_layers = 2
    encoder = EncoderRNN(input_lang.n_words,
                         hidden_size,
                         n_layers=n_layers,
                         bidir=False)
    decoder = AttnDecoderRNN(hidden_size,
                             target_lang.n_words,
                             n_layers=n_layers)

    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    print(decoder)
    print(encoder)
    encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths)
    print('outputs:', encoder_outputs.size(), 'hidden:', encoder_hidden.size())
    max_target_len = max(target_lengths)
    decoder_input = decoder.create_input_seq(batch_size)
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    # (s, b, o)
    all_decoder_outputs = get_variable(
        torch.zeros(max_target_len, batch_size, decoder.output_size))

    use_teacher_forcing = random.random() < 1
    for t in range(max_target_len):
        #(b,o)
        output, decoder_hidden, attn_weights = decoder(decoder_input,
                                                       decoder_hidden,
                                                       encoder_outputs)
        all_decoder_outputs[t] = output
        # 喂真实lable,应该喂output的结果
        if use_teacher_forcing:
            decoder_input = target_batches[t]
        else:
            # 从output中找到两个最符合的单词
            words = []
            for b in range(batch_size):
                topv, topi = output[b].data.topk(1)
                words.append(topi)
            decoder_input = get_variable(torch.LongTensor(words))

    loss = masked_cross_entropy(
        all_decoder_outputs.transpose(0, 1).contiguous(),
        target_batches.transpose(0, 1).contiguous(), target_lengths)
    print(loss)
Esempio n. 4
0
def masked_cross_entropy(logits, target, length):
    length = get_variable(torch.LongTensor(length))
    """
    Args:
        logits: A Variable containing a FloatTensor of size
            (batch, max_len, num_classes) which contains the
            unnormalized probability for each class.
        target: A Variable containing a LongTensor of size
            (batch, max_len) which contains the index of the true
            class for each corresponding step.
        length: A Variable containing a LongTensor of size (batch,)
            which contains the length of each data in a batch.
    Returns:
        loss: An average loss value masked by the length.
    """
    # (b,s,o)
    # logits_flat: (batch * max_len, num_classes)
    logits_flat = logits.view(-1, logits.size(-1))
    # log_probs_flat: (batch * max_len, num_classes)
    log_probs_flat = functional.log_softmax(logits_flat)
    # target_flat: (batch * max_len, 1)
    target_flat = target.view(-1, 1)
    # losses_flat: (batch * max_len, 1)
    losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
    # losses: (batch, max_len)
    losses = losses_flat.view(*target.size())
    # mask: (batch, max_len)
    mask = sequence_mask(sequence_length=length, max_len=target.size(1))
    losses = losses * mask.float()
    loss = losses.sum() / length.float().sum()
    return loss
Esempio n. 5
0
def train(input_batches, input_lengths, target_batches, target_lengths,
          encoder, decoder, encoder_optimizer, decoder_optimizer, train_conf):
    '''训练一批数据'''
    batch_size = len(input_lengths)
    # 1. zero grad
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    # 2. 输入encoder
    encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths)
    # 3. decoder 默认输入
    decoder_input = decoder.create_input_seq(batch_size)
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    # 4. 输入到decoder
    max_target_len = max(target_lengths)
    all_decoder_outputs = get_variable(
        torch.zeros(max_target_len, batch_size, decoder.output_size))

    use_teacher_forcing = random.random() < train_conf['teacher_forcing_ratio']

    for t in range(max_target_len):
        output, decoder_hidden, attn_weights = decoder(decoder_input,
                                                       decoder_hidden,
                                                       encoder_outputs)
        all_decoder_outputs[t] = output
        # 喂真实lable,应该喂output的结果
        if False:
            decoder_input = target_batches[t]
        else:
            # 从output中找到两个最符合的单词
            words = parse_output(output)
            decoder_input = get_variable(torch.LongTensor(words))

    loss = masked_cross_entropy(
        all_decoder_outputs.transpose(0, 1).contiguous(),
        target_batches.transpose(0, 1).contiguous(), target_lengths)
    loss.backward()

    ec = torch.nn.utils.clip_grad_norm(encoder.parameters(),
                                       train_conf['clip'])
    dc = torch.nn.utils.clip_grad_norm(decoder.parameters(),
                                       train_conf['clip'])
    # Update parameters with optimizers
    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.data[0], ec, dc
Esempio n. 6
0
def sequence_mask(sequence_length, max_len=None):
    if max_len is None:
        max_len = sequence_length.data.max()
    batch_size = sequence_length.size(0)
    seq_range = torch.arange(0, max_len).long()
    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
    seq_range_expand = get_variable(seq_range_expand)
    seq_length_expand = (
        sequence_length.unsqueeze(1).expand_as(seq_range_expand))
    return seq_range_expand < seq_length_expand
Esempio n. 7
0
 def forward(self, info):
     ''' 给文档和问题的综合信息info,预测答案
     Args:
         info -- [b, m, 2h]. CoattentionEncoder对D的编码矩阵U
     Returns:
         start -- [b]答案在D中的起始位置 
         end -- [b]答案在D中的结束位置
         all_scores -- [[start_scores, end_scores]].每一次迭代所有单词的得分. 
                       scores -- [b,m]. 因为是累积交叉熵,每一次迭代结果都要加入计算
     '''
     bsize = info.size(0)
     doclen = info.size(1)
     hidden = self.init_hidden(bsize)
     # 默认起始地址初始化为 [0,1]
     start = get_variable(torch.LongTensor([0] * bsize))
     end = get_variable(torch.LongTensor([1] * bsize))
     # 每一轮的start_scores, end_scores
     all_scores = []
     for i in range(self.max_iter):
         # 1. [b,2h] 根据s和e的索引,从u中选择对应的start和end向量
         ustart, uend = self.get_ustart_uend(info, start, end)
         # 2. [b, m] 在当前条件下,计算u中每个词作为start、end的得分
         start_scores = self.hmn_start(info, hidden, ustart, uend)
         end_scores = self.hmn_end(info, hidden, ustart, uend)
         all_scores.append([start_scores, end_scores])
         # 3. [b] 选择得分最大的作为新的start和end
         _, new_start = start_scores.max(-1)
         _, new_end = end_scores.max(-1)
         # 4. 比较和上一轮的结果,完全相同则停止
         eq_start = torch.eq(start, new_start)
         eq_end = torch.eq(end, new_end)
         eq_start = torch.sum(eq_start).data.tolist()[0]
         eq_end = torch.sum(eq_end).data.tolist()[0]
         if (eq_start == bsize and eq_end == bsize):
             #log("new_start--start, new_end--end, equal, break")
             break
         # 5. 更新hidden和start和end
         ustart, uend = self.get_ustart_uend(info, new_start, new_end)
         hidden = self.grucell(torch.cat([ustart, uend], 1), hidden)
         start, end = new_start, new_end
     return start, end, all_scores
Esempio n. 8
0
    def forward(self, rnn_outputs, encoder_outputs):
        '''ts个时刻,计算ts个与is的对齐向量,也是注意力权值
        Args:
            rnn_outputs: Decoder中GRU的输出[ts, b, h]
            encoder_outputs: Encoder的最后的输出, [is, b, h]
        Returns:
            attn_weights: Yt与所有Xs的注意力权值,[b, ts, is]
        '''
        target_seqlen = rnn_outputs.size()[0]
        input_seqlen = encoder_outputs.size()[0]
        batch_size = encoder_outputs.size()[1]

        # (b, ts, h) (b, is, h)
        rnn_outputs = rnn_outputs.transpose(0, 1)
        encoder_outputs = encoder_outputs.transpose(0, 1)

        if self.score_type == 'general':
            # (b, h, is)
            encoder_outputs = self.attn(encoder_outputs).transpose(1, 2)
            # [b,ts,is] <[b,ts,h]*[b,h,is]
            attn_energies = rnn_outputs.bmm(encoder_outputs)
            res = my_log_softmax(attn_energies)
            return res

        # attn_energies (b, s)
        attn_energies = get_variable(
            torch.zeros(batch_size, target_seqlen, input_seqlen))
        for b in range(batch_size):
            # (1, h) 当前一个GRU的输出
            decoder_rnn_output = rnn_outputs[b]
            for i in range(seq_len):
                # (1, h) < (s, 1, h)
                encoder_output = encoder_outputs[i, b, :].squeeze(0)
                attn_energies[b, i] = self.score(decoder_rnn_output,
                                                 encoder_output)

        attn_weights = get_variable(torch.zeros(this_batch_size, seq_len))
        for b in range(this_batch_size):
            attn_weights[b] = F.softmax(attn_energies[b])
        return attn_weights
Esempio n. 9
0
 def init_hidden(self, bsize, bidir=None):
     '''init一个GRU的hidden state. 实际上bidir和nlayer都在前面配置过了. 
     所有GRU的层数、方向都、hidden_size都是一样的
     Args:
         bsize -- batch_size
         bidir -- 是否是双向GRU]
     Returns:
         hidden -- 初始化为0的
     '''
     bidir = self.bidir if bidir is None else bidir
     ndir = 1 if bidir is False else 2
     hidden = torch.zeros(ndir * self.nlayer, bsize, self.hidden_size)
     hidden = get_variable(hidden)
     return hidden
Esempio n. 10
0
def evaluate(input_seq,
             input_lang,
             target_lang,
             encoder,
             decoder,
             target_maxlen=25):
    ''' 验证一条句子
    Args:
        input_seq: 输入的一个句子,不包含EOS_token
        target_maxlen: 翻译目标句子的最大长度,不包括EOS_token的长度
    Returns:
        decoded_words: 翻译后的词语
        decoder_attentions: Attention [目标句子长度,原句子长度]
    '''
    batch_size = 1
    seq_wordids = dh.indexes_from_sentence(input_lang, input_seq)
    # 已经自动加上EOS_token的长度。
    input_length = len(seq_wordids)
    # batch=1,转换为[1, s]
    input_lengths = [input_length]
    input_batches = [seq_wordids]
    input_batches = get_variable(torch.LongTensor(input_batches))
    # encoder输入是[s, b]
    input_batches = input_batches.transpose(0, 1)

    # 非训练模式,避免dropout
    encoder.train(False)
    decoder.train(False)

    # 过encoder,准备decoder数据
    #print ('input_batches:', input_batches.size())
    #print ('input_lengths:', input_lengths)
    encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths,
                                              None)
    decoder_input = decoder.create_input_seq(batch_size)
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # 最终结果
    decoded_words = []
    decoder_attentions = torch.zeros(target_maxlen + 1, input_length)

    # 过decoder
    for di in range(target_maxlen):
        # (b, s)=(1,s), s是输入句子的长度
        decoder_output, decoder_hidden, attn_weights = \
            decoder(decoder_input, decoder_hidden, encoder_outputs)
        # word信息
        word_id = parse_output(decoder_output)[0]
        word = target_lang.index2word[word_id]
        decoded_words.append(word)
        # attention
        decoder_attentions[di] += attn_weights.data.squeeze(0)

        if word_id == dh.EOS_token:
            break
        # 当前单词作为下一个的输入
        decoder_input = get_variable(torch.LongTensor([word_id]))

    # 改变encoder的模式
    encoder.train(True)
    decoder.train(True)
    res = decoder_attentions[:di + 1, :]
    print('input_length:{}, di={}, size={}'.format(input_length, di,
                                                   res.size()))
    return decoded_words, res
Esempio n. 11
0
 def init_hidden(self, batch_size):
     hidden = torch.zeros(batch_size, self.hidden_size)
     return get_variable(hidden)
Esempio n. 12
0
 def create_input_seqs(self, seq_len, batch_size):
     sos = [helper.SOS_token] * batch_size
     sos = [sos] * seq_len
     return get_variable(torch.LongTensor(sos))
Esempio n. 13
0
    def forward(self, allfacts, allfacts_mask, questions, questions_mask, alen, n_episode=3):
        '''
        Args:
            allfacts -- [b, n_fact, flen],输入的多个句子
            allfacts_mask -- [b, n_fact, flen],mask=1表示是pad的,否则不是
            questions -- [b, qlen],问题
            questions_mask -- [b, qlen],mask=1:pad
            alen -- Answer len
            seqbegin_id -- 句子开始标记的wordid
            n_episodes -- 
        Returns:
            preds -- [b * alen,  vocab_size],预测的句子。b*alen合在一起方便后面算交叉熵
        '''
        # 0. 计算常用的信息,batch_size,一条数据nfact条句子,每个fact长度为flen,每个问题长度为qlen
        bsize = allfacts.size(0)
        nfact = allfacts.size(1)
        flen = allfacts.size(2)
        qlen = questions.size(1)
        
        # 1. 输入模块,用RNN编码输入的句子
        # TODO 两层循环,待优化
        encoded_facts = []
        # 对每一条数据,计算facts编码
        for facts, facts_mask in zip(allfacts, allfacts_mask):
            facts_embeds = self.embed(facts)
            facts.embeds = self.dropout(facts_embeds)
            hidden = self.init_hidden(nfact)
            # 1.1 把输入(多条句子)给到GRU
            # b=nf, [nf, flen, h], [1, nf, h]
            outputs, hidden = self.input_gru(facts_embeds, hidden)
            # 1.2 每条句子真正结束时(real_len)对应的输出,作为该句子的hidden。GRU:ouput=hidden
            real_hiddens = []

            for i, o in enumerate(outputs):
                real_len = facts_mask[i].data.tolist().count(0)
                real_hiddens.append(o[real_len - 1])
            # 1.3 把所有单个fact连接起来,unsqueeze(0)是为了后面的所有batch的cat
            hiddens = torch.cat(real_hiddens).view(nfact, -1).unsqueeze(0)
            encoded_facts.append(hiddens)
        # [b, nfact, h]
        encoded_facts = torch.cat(encoded_facts)

        # 2. 问题模块,对问题使用RNN编码
        questions_embeds = self.embed(questions)
        questions_embeds = self.dropout(questions_embeds)
        hidden = self.init_hidden(bsize)
        # [b, qlen, h], [1, b, h]
        outputs, hidden = self.question_gru(questions_embeds, hidden)
        real_questions = []
        for i, o in enumerate(outputs):
            real_len = questions_mask[i].data.tolist().count(0)
            real_questions.append(o[real_len - 1])
        encoded_questions = torch.cat(real_questions).view(bsize, -1)
        
        # 3. Memory模块
        memory = encoded_questions
        for i in range(n_episode):
            # e
            e = self.init_hidden(bsize).squeeze(0)
            # [nfact, b, h]
            encoded_facts_t = encoded_facts.transpose(0, 1)
            # 根据memory, episode,计算每一时刻的e。最终的e和memory来计算新的memory
            for t in range(nfact):
                # [b, h]
                bfact = encoded_facts_t[t]
                # TODO 计算4个特征,论文是9个
                f1 = bfact * encoded_questions
                f2 = bfact * memory
                f3 = torch.abs(bfact - encoded_questions)
                f4 = torch.abs(bfact - memory)
                z = torch.cat([f1, f2, f3, f4], dim=1)
                # [b, 1] 对每个fact的注意力
                gt = self.gate(z)
                e = gt * self.attention_grucell(bfact, e) + (1 - gt) * e
            # 每一轮的e和旧memory计算新的memory
            memory = self.memory_grucell(e, memory)
        
        # 4. Answer模块
        # [b, h]
        answer_hidden = memory
        begin_tokens = get_variable(torch.LongTensor([self.seqbegin_id]*bsize))
        # [b, h]
        last_word = self.embed(begin_tokens)
        preds = []
        for i in range(alen):
            inputs = torch.cat([last_word, encoded_questions], dim=1)
            answer_hidden = self.answer_grucell(inputs, answer_hidden)
            # to vocab_size
            probs = self.answer_fc(answer_hidden)
            # [b, v]
            probs = F.log_softmax(probs.float())
            _, indics = torch.max(probs, 1)
            last_word = self.embed(indics)
            # for cross entropy
            preds.append(probs.view(bsize, 1, -1))
            #preds.append(indics.view(bsize, -1))
        #print (preds[0].data.shape)
        preds = torch.cat(preds, dim=1)
        #print (preds.data.shape)
        return preds.view(bsize * alen, -1)
Esempio n. 14
0
 def init_hidden(self, batch_size):
     '''GRU的初始hidden。单层单向'''
     hidden = torch.zeros(1, batch_size, self.hidden_size)
     hidden = get_variable(hidden)
     return hidden
Esempio n. 15
0
def train(input_batches, input_lengths, target_batches, target_lengths, encoder, decoder,
         encoder_optimizer, decoder_optimizer, loss_func, train_conf, input_lang, target_lang):
    '''训练一批数据
    Args:
        input_batches, input_lengths: [is, b] [b],长度包含EOS,不包含SOS
        target_batches, target_lengths: [ts, b], [b]
        encoder, decoder, optimizer: 
        train_conf: 训练时的配置文件
    '''
    batch_size = len(input_lengths)
    ts = target_batches.size(0)
    # 1. zero grad
    zerograd_start = time.time()
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    zerograd_end = time.time()
   
    # 2. 输入encoder
    encoder_start = time.time()
    encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths)
    encoder_end = time.time()
   
    # 3. decoder 默认输入
    decoder_start = time.time()
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    
    # 3.1 先过SOS
    sos = [dh.SOS_token]* batch_size
    sos = [sos for i in range(ts)]
    sos = get_variable(torch.LongTensor(sos))
    decoder_outputs, decoder_hidden, attn_weights = decoder(sos, decoder_hidden, encoder_outputs)
    
    # 4. 输入到decoder
    max_target_len = max(target_lengths)
    # (ts,b,o)
    decoder_outputs, decoder_hidden, attn_weights = decoder(target_batches, decoder_hidden, encoder_outputs)
    decoder_end = time.time()
    
    #show_decoder_outputs(decoder_outputs, target_lang)
    #maxv, maxi = decoder_outputs.max(-1)
    # (b,ts,o) (b,ts)
    decoder_outputs = decoder_outputs.transpose(0, 1)
    target_batches = target_batches.transpose(0, 1)
    
    loss = 0
    for i in range(batch_size):
        tlen = target_lengths[i]
        # print (tlen, decoder_outputs[i].size())
        input = decoder_outputs[i][:tlen]
        target = target_batches[i][:tlen]
        #print (input.size(), target.size())
        loss += loss_func(input, target)
    
    contig_start = time.time()
    # logits = maxi.transpose(0, 1).contiguous()
    # target = target_batches.transpose(0, 1).contiguous()
    contig_end = time.time()
    # print (type(logits.data), type(target.data))
    # print (logits.size(), target.size())
    loss_start = time.time()
    # loss = masked_cross_entropy(logits, target, target_lengths)
    
    #loss = loss_func(logits, target)
    loss.backward()
    loss_end = time.time()
   

    optim_start = time.time()
    ec = torch.nn.utils.clip_grad_norm(encoder.parameters(), train_conf['clip'])
    dc = torch.nn.utils.clip_grad_norm(decoder.parameters(), train_conf['clip'])
    # Update parameters with optimizers
    encoder_optimizer.step()
    decoder_optimizer.step()
    optim_end = time.time()
    
    zerograd_use = zerograd_end - zerograd_start
    encoder_use = encoder_end - encoder_start
    decoder_use = decoder_end - decoder_start
    contig_use = contig_end - contig_start
    loss_use = loss_end - loss_start
    optim_use = optim_end - optim_start
    
    #info = "%.3f, %.3f, %.3f, %.3f, %.3f, %.3f " % (zerograd_use, encoder_use, decoder_use,
    #                                          contig_use, loss_use, optim_use)
    #print (info)
    
    input_wordids = input_batches.transpose(0, 1)[0].cpu().data.tolist()[:input_lengths[0]-1]
    input_sentence = get_sentence(input_wordids, input_lang)
    target_wordids = target_batches.transpose(0, 1)[0].cpu().data.tolist()[:input_lengths[0]-1]
    target_sentence = get_sentence(target_wordids, target_lang)
    # print (sentence)
    #evaluate_sentence(input_sentence, input_lang, target_lang, encoder, decoder, print_res=True,
    #                  target_sentence=target_sentence, show_attention=False, show_in_visdom=False)
    #evaluate(sentence, input_lang, target_lang, encoder, decoder, target_maxlen=target_lengths[0] + 2)
    
    return loss.data[0], ec, dc
Esempio n. 16
0
 def create_input_seq(self, batch_size):
     return get_variable(torch.LongTensor([helper.SOS_token] * batch_size))