Ejemplo n.º 1
0
def embed_input_sentence(input_pair, encoder, max_length=MAX_LENGTH):
    """Embeds the input sentence using a trained encoder model"""
    with torch.no_grad():
        if encoder.trainable_model:
            input_tensor, target_tensor = utils.tensorsFromPair(input_pair)
            
            input_length = input_tensor.size()[0]
            encoder_hidden = encoder.initHidden()
            encoder_outputs = torch.zeros(max_length+1, encoder.hidden_size, device=DEVICE)
    
            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                         encoder_hidden)
                encoder_outputs[ei] += encoder_output[0, 0]
    
            decoder_hidden = encoder_hidden
            
            return decoder_hidden, target_tensor, encoder_outputs
            
        else:
            target_tensor = utils.tensorFromSentence(vocab_index, input_pair[1])
            decoder_hidden = encoder.sentence_embedding(input_pair[0])
            decoder_hidden = layer_normalize(decoder_hidden)
        
            return decoder_hidden, target_tensor, None
Ejemplo n.º 2
0
    def train(self,
              pairs,
              n_iters,
              max_length=1000,
              teacher_forcing_ratio=0.5,
              print_every=1000,
              plot_every=100,
              learning_rate=0.01):
        start = time.time()
        plot_losses = []
        print_loss_total = 0  # Reset every print_every
        plot_loss_total = 0  # Reset every plot_every

        encoder_optimizer = optim.SGD(self.encoder.parameters(),
                                      lr=learning_rate)
        decoder_optimizer = optim.SGD(self.decoder.parameters(),
                                      lr=learning_rate)
        training_pairs = [
            tensorsFromPair(self.input_lang, self.output_lang,
                            random.choice(pairs), self.device)
            for i in range(n_iters)
        ]
        criterion = nn.NLLLoss()

        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = self.step(input_tensor, target_tensor, encoder_optimizer,
                             decoder_optimizer, criterion, max_length,
                             teacher_forcing_ratio)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (timeSince(start, iter / n_iters), iter,
                       iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

        showPlot(plot_losses)
Ejemplo n.º 3
0
def trainIters(encoder,
               decoder,
               n_iters,
               print_every=1000,
               plot_every=100,
               learning_rate=0.01,
               lang_pack=None):

    assert not (lang_pack == None), "None shall pass"
    input_lang, output_lang, pairs = lang_pack

    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [
        tensorsFromPair(random.choice(pairs), langs=[input_lang, output_lang])
        for i in range(n_iters)
    ]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)
Ejemplo n.º 4
0
def mask_batch(input_batch_pairs):
    """Convert batch of sentence pairs to tensors and masks for ESIM model"""
    input_tensor = torch.zeros((MAX_LENGTH, len(input_batch_pairs)),
                               dtype=torch.long,
                               device=DEVICE)
    target_tensor = torch.zeros((MAX_LENGTH, len(input_batch_pairs)),
                                dtype=torch.long,
                                device=DEVICE)

    for idx, pair in enumerate(input_batch_pairs):
        encoded_input, encoded_target = tensorsFromPair(pair)
        input_tensor[:len(encoded_input), idx], target_tensor[:len(encoded_target), idx] = \
            encoded_input.view(-1), encoded_target.view(-1)

    input_tensor_mask, target_tensor_mask = input_tensor != 0, target_tensor != 0
    input_tensor_mask, target_tensor_mask = input_tensor_mask.float(
    ), target_tensor_mask.float()

    return input_tensor, input_tensor_mask, target_tensor, target_tensor_mask
Ejemplo n.º 5
0
def trainItersBert(encoder, decoder, n_iters, training_pairs, eval_pairs, input_lang, output_lang, print_every=1000, plot_every=100, learning_rate=0.01, mom=0,  model_name="QALD-dev"):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    
    plot_loss_avg = 1.0 #!!!

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate, momentum=mom)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate, momentum=mom)
    
    #encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, amsgrad=True)
    #encoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(encoder_optimizer, n_iters)
    #decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, amsgrad=True)
    #decoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(decoder_optimizer, n_iters)                                                       

    teacher_forcing_ratio = 1.0

    criterion = nn.NLLLoss()

    '''src_sents, tgt_sents = [], []
    for pair in training_pairs:
        src_sents.append(pair[0])
        tgt_sents.append(pair[1])
    src_size, tgt_size = utils.max_size(src_sents, tgt_sents )'''
    
    #!!!
    input_tensors, target_tensors, train_pairs = [], [], []
    for pair in training_pairs:
        tensors = utils.tensorsFromPair(pair, input_lang, output_lang, device)
        train_pairs.append(tensors)
        '''print("tensor shape--> ", tensors[0].size())
        print(tensors[0])'''
        input_tensors.append(tensors[0].view(-1,1).long()) #float() #!!! 
        target_tensors.append(tensors[1].view(-1,1).long()) #!!! 
    print("\n Dataset preparing... ")

    """input_tensors, target_tensors = torch.Tensor(len(training_pairs), 1, 1, 256), torch.Tensor(len(training_pairs), 1, 1, 256)
    torch.cat(en_tensors, out=input_tensors)
    torch.cat(sparql_tensors, out=target_tensors)"""

    '''print(" assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors) ")
    print(input_tensors[0].size(0), )'''

    input_tensors  = rnn_utils.pad_sequence(input_tensors, batch_first=True, padding_value=0)
    target_tensors  = rnn_utils.pad_sequence(target_tensors, batch_first=True, padding_value=0)
    #input_tensors, target_tensors = utils.padding(input_tensors, target_tensors )
    torch_dataset = utils.TxtDataset(input_tensors, target_tensors  )
    
    # put the dataset into DataLoader
    loader = Data.DataLoader(
        dataset=torch_dataset,
        batch_size=6,  # MINIBATCH_SIZE
        shuffle=True,
        #num_workers=1           # set multi-work num read data
        #collate_fn= utils.collate_fn  #!!! 
    ) 
    print(" Dataset loader ready, begin training. \n")

    for epoch in range(1, n_iters + 1):
    # 1 epoch go the whole data
        for step, (batch_input, batch_target) in enumerate(loader):
            # here to train your model
            print('\n\n  - epoch: ', epoch, ' | step: ', step, '\n | batch_input: \n', batch_input.size(), '\n | batch_target: \n', batch_target.size() ) 
            
            #input_tensor, target_tensor = batch_input, batch_target  #!!! 

            batch_input = batch_input.reshape( [6,  -1, 1] ) #!!!  [6, 1, -1] 
            batch_target = batch_target.reshape( [6,  -1, 1] )
            print("\n input_batch : ", batch_input.size())
            print("\n target_batch : ", batch_target.size())

            '''loss = trainBert(input_tensor, target_tensor, encoder,
                        decoder, encoder_optimizer, decoder_optimizer, criterion) '''
            rl = True if (epoch > 1) and (np.mean(plot_losses) < 1.0 ) else False 

            '''loss = 0.0
            for i in range(6):
                input_tensor, target_tensor = batch_input[i], batch_target[i]
                print("\n input_tensor : ", input_tensor.size() )
                print("\n target_tensor : ", target_tensor.size() )
                loss += trainBert(input_tensor, target_tensor, encoder, decoder, eval_pairs, input_lang, output_lang, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio = teacher_forcing_ratio, rl=rl )
            plot_losses.append( loss/6 )
            '''
            loss = 0
            for batch_input_, batch_target_ in zip(batch_input, batch_target):
                loss += trainBert(batch_input, batch_target, encoder, decoder, eval_pairs, input_lang, output_lang, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio = teacher_forcing_ratio, rl=rl )
            
            plot_losses.append( loss/6 )

            print("\t -  %s step xentropy loss: "%str(epoch), loss, " \n" )

            teacher_forcing_ratio = utils.teacher_force(float(loss) ) ;
Ejemplo n.º 6
0
def trainItersBert(model, n_iters, training_pairs, eval_pairs, input_lang, output_lang, batch_size, learning_rate=0.01, mom=0,  model_name="qald-test"):
    #start = time.time()
    plot_losses = []
    losses_trend = []
     
    #encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate, momentum=mom)
    #decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate, momentum=mom)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=mom)
    
    #encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, amsgrad=True)
    #encoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(encoder_optimizer, n_iters)
    #decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, amsgrad=True)
    #decoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(decoder_optimizer, n_iters)                                                       

    teacher_forcing_ratio = 1.0

    criterion = nn.NLLLoss()
    '''
    input_tensors, target_tensors, train_pairs = [], [], []
    for pair in training_pairs:
        tensors = utils.tensorsFromPair(pair, input_lang, output_lang, device)
        train_pairs.append(tensors)
        #print("tensor shape--> ", tensors[0].size())
        #print(tensors[0])
        input_tensors.append(tensors[0].view(-1,1).long()) #float() #!!! 
        target_tensors.append(tensors[1].view(-1,1).long()) #!!!

    print("\n Dataset preparing... ")
    input_tensors  = rnn_utils.pad_sequence(input_tensors, batch_first=True, padding_value=0)
    target_tensors  = rnn_utils.pad_sequence(target_tensors, batch_first=True, padding_value=0)
    
    torch.save(input_tensors, "./model/input_tensors.pt")
    torch.save(target_tensors, "./model/target_tensors.pt")'''

    eval_tensors = [utils.tensorsFromPair(pair, input_lang, output_lang, device) for pair in eval_pairs ] 
    eval_inputs = [ tensors[0] for tensors in eval_tensors ]
    eval_targets = [ tensors[1] for tensors in eval_tensors ]
    
    eval_inputs  = rnn_utils.pad_sequence(eval_inputs, batch_first=True, padding_value=0)
    eval_targets = rnn_utils.pad_sequence(eval_targets, batch_first=True, padding_value=0)

    #input_tensors, target_tensors = utils.padding(input_tensors, target_tensors )
    '''torch_dataset = utils.TxtDataset(input_tensors, target_tensors  )'''
    torch_dataset = utils.TxtDataset(eval_inputs, eval_targets  )
    
    # put the dataset into DataLoader
    loader = Data.DataLoader(
        dataset=torch_dataset,
        batch_size=batch_size,  # MINIBATCH_SIZE = 6
        shuffle=True,
        drop_last= False,
        num_workers= 2 if utils.getOSystPlateform() else 0  # set multi-work num read data based on OS plateform
        #collate_fn= utils.collate_fn  #!!! 
    ) 
    print(" Dataset loader ready, begin training. \n") 

    datset_len = len(loader)
    
    print("\n Dataset loader length is ", datset_len, ", save model every batch. " )

    for epoch in range(1, n_iters + 1):
    # an epoch goes the whole data
        for batch, (batch_input, batch_target) in enumerate(loader):
            # here to train your model
            print('\n\n  - Epoch ', epoch, ' | batch ', batch, '\n | input lenght:   ', batch_input.size(), '\n | target length:   ', batch_target.size() ," \n")  
            
            #input_tensor, target_tensor = batch_input, batch_target  #!!! 
            #print("  * T-forcing ratio: ", teacher_forcing_ratio  )
            '''try:
                input_seq_len, target_seq_len = batch_input.size(1), batch_target.size(1)
                batch_input = batch_input.reshape( [input_seq_len, batch_size] ) #!!!  [6, 1, -1] 
                batch_target = batch_target.reshape( [target_seq_len, batch_size] )
                print("\n input_seq_len, target_seq_len : ", input_seq_len, target_seq_len )
            except:
                pass ; '''

            """input_lens = [utils.getNzeroSize(tensor) for tensor in batch_input ]
            target_lens = [utils.getNzeroSize(tensor) for tensor in batch_target ]"""

            rl = False #True if (epoch > 1) and ( np.mean(losses_trend)<1.0 and len(losses_trend)>1 ) else False  #!!! and / or 

            loss = trainBert(batch_input,  batch_target,  model, eval_pairs, \
                                    input_lang, output_lang, optimizer, criterion, \
                                      teacher_forcing_ratio = teacher_forcing_ratio, rl=rl )
            plot_losses.append( loss )

            print("\t- the %s batch xentropy loss: "%str(str(epoch)+"."+str(batch)), loss, " " )

            '''if 0 == batch%savepoint and batch > 1:
                print("\n Batch %d savepoint, save the trained model...\n"%batch )
                save_model(encoder, decoder, plot_losses, model_name ) ;'''
        
        losses_trend.append(np.mean(plot_losses))
        plot_losses.clear()

        if epoch > 1 :#and 0 == epoch%5 :
            save_model(model, losses_trend, model_name ) 
            '''if epoch > 5 and 0 == epoch%5 :
                utils.showPlot(losses_trend, model_name, "epoch"+str(epoch) )'''
            print("\n Finish Epoch %d -- model saved. \n "%epoch ); #!!!
Ejemplo n.º 7
0
def trainIters(input_lang,
               output_lang,
               pairs,
               encoder,
               decoder,
               n_iters,
               max_length,
               print_every=1000,
               plot_every=100,
               learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    # Create an parameter optimization object for both models
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    # Pick a random sentence and convert to tensor of word indices
    training_pairs = [
        tensorsFromPair(input_lang, output_lang, random.choice(pairs))
        for i in range(n_iters)
    ]

    # Use negative log likelihood as loss
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]

        # Enforce max sentence length
        input_tensor = training_pair[0]
        if len(input_tensor) > max_length:
            continue
        target_tensor = training_pair[1]

        # Train model using one sentence pair, returns the negative log likelihood
        loss = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion,
                     max_length)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
            pickle.dump(
                plot_losses,
                open("losses/{}.p".format(encoder.__class__.__name__), "wb"))

        if iter % 10000 == 0:
            torch.save(
                encoder.state_dict(), 'trained_models/{}/encoder_it{}'.format(
                    encoder.__class__.__name__, iter))
            torch.save(
                decoder.state_dict(), 'trained_models/{}/decoder_it{}'.format(
                    encoder.__class__.__name__, iter))

    showPlot(plot_losses)
Ejemplo n.º 8
0
def trainIters(encoder,
               decoder,
               dictionary,
               pairs,
               epochs,
               print_every=1000,
               print_sentences=5,
               learning_rate=0.01,
               batch_size=16):  #?
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # print_every 마다 초기화
    plot_loss_total = 0  # plot_every 마다 초기화

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    training_pairs = [
        tensorsFromPair(pairs[i], dictionary) for i in range(len(pairs))
    ]
    criterion = nn.NLLLoss()

    for e in range(epochs):
        num_batch = ceil(len(pairs) // batch_size)
        print_loss_total = 0

        for b in range(num_batch):
            if b == num_batch - 1:
                num_data = len(pairs) - batch_size * b
            else:
                num_data = batch_size

            input_tensors = [
                training_pairs[m][0]
                for m in range(batch_size * b, batch_size * b + num_data)
            ]
            target_tensors = [
                training_pairs[m][1]
                for m in range(batch_size * b, batch_size * b + num_data)
            ]

            loss = train(input_tensors, target_tensors, encoder, decoder,
                         encoder_optimizer, decoder_optimizer, criterion)

            print_loss_total += loss

            print('%s epochs, %s step, %.4f' %
                  (e, batch_size * b + num_data, loss))

        print_loss_avg = print_loss_total / num_batch
        print('%s epochs, %.4f' % (e, print_loss_avg))

        # for iter in range(1, len(pairs) + 1):
        #     training_pair = training_pairs[iter - 1]
        #     input_tensor = training_pair[0]
        #     target_tensor = training_pair[1]

        #     loss = train(input_tensor, target_tensor, encoder,
        #                  decoder, encoder_optimizer, decoder_optimizer, criterion)
        #     print_loss_total += loss
        #     plot_loss_total += loss

        #     if iter % print_every == 0:
        #         print_loss_avg = print_loss_total / print_every
        #         print_loss_total = 0
        #         print('%s epochs, %s (%d %d%%) %.4f' % (e, timeSince(start, iter / len(pairs)),
        #                                                 iter, iter / len(pairs) * 100, print_loss_avg))

        #            if iter % plot_every == 0:

        # 매 epoch 마다 출력

        evaluateRandomly(encoder,
                         decoder,
                         pairs,
                         dictionary,
                         n=print_sentences)
Ejemplo n.º 9
0
def trainItersBert(encoder, decoder, n_iters, training_pairs, eval_pairs, input_lang, output_lang, print_every=1000, plot_every=100, learning_rate=0.01, mom=0,  model_name="qald-dev"):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    
    plot_loss_avg = 1.0 #!!!

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate, momentum=mom)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate, momentum=mom)
    
    #encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, amsgrad=True)
    #encoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(encoder_optimizer, n_iters)
    #decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, amsgrad=True)
    #decoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(decoder_optimizer, n_iters)                                                       

    teacher_forcing_ratio = 1.0

    criterion = nn.NLLLoss()
    '''
    input_tensors, target_tensors, train_pairs = [], [], []
    for pair in training_pairs:
        tensors = utils.tensorsFromPair(pair, input_lang, output_lang, device)
        train_pairs.append(tensors)
        #print("tensor shape--> ", tensors[0].size())
        #print(tensors[0])
        input_tensors.append(tensors[0].view(-1,1).long()) #float() #!!! 
        target_tensors.append(tensors[1].view(-1,1).long()) #!!!

    print("\n Dataset preparing... ")
    input_tensors  = rnn_utils.pad_sequence(input_tensors, batch_first=True, padding_value=0)
    target_tensors  = rnn_utils.pad_sequence(target_tensors, batch_first=True, padding_value=0)
    
    torch.save(input_tensors, "./model/input_tensors.pt")
    torch.save(target_tensors, "./model/target_tensors.pt")'''

    eval_tensors = [utils.tensorsFromPair(pair, input_lang, output_lang, device) for pair in eval_pairs ] 
    eval_inputs = [ tensors[0] for tensors in eval_tensors ]
    eval_targets = [ tensors[1] for tensors in eval_tensors ]
    
    eval_inputs  = rnn_utils.pad_sequence(eval_inputs, batch_first=True, padding_value=0)
    eval_targets = rnn_utils.pad_sequence(eval_targets, batch_first=True, padding_value=0)

    #input_tensors, target_tensors = utils.padding(input_tensors, target_tensors )
    '''torch_dataset = utils.TxtDataset(input_tensors, target_tensors  )'''
    torch_dataset = utils.TxtDataset(eval_inputs, eval_targets  )
    
    # put the dataset into DataLoader
    loader = Data.DataLoader(
        dataset=torch_dataset,
        batch_size=6,  # MINIBATCH_SIZE
        shuffle=True,
        #drop_last= True,
        num_workers= 2 if utils.getOSystPlateform() else 0       # set multi-work num read data
        #collate_fn= utils.collate_fn  #!!! 
    ) 
    print(" Dataset loader ready, begin training. \n") 

    datset_len = len(loader)
    savepoint = datset_len//4  #12
    print("\n Dataset loader length is ", datset_len, ", save model every %d batches. "%savepoint )

    for epoch in range(1, n_iters + 1):
    # an epoch goes the whole data
        for batch, (batch_input, batch_target) in enumerate(loader):
            # here to train your model
            print('\n\n  - Epoch ', epoch, ' | batch ', batch, '\n | batch_input: \n ', batch_input.size(), '\n | batch_target: \n ', batch_target.size() ," \n")  
            
            #input_tensor, target_tensor = batch_input, batch_target  #!!! 
            try:
                batch_input = batch_input.reshape( [6, -1, 1] ) #!!!  [6, 1, -1] 
                batch_target = batch_target.reshape( [6, -1, 1] )
            except:
                pass ;

            #input_lens = [utils.getNzeroSize(tensor) for tensor in batch_input ]
            #target_lens = [utils.getNzeroSize(tensor) for tensor in batch_target ]

            rl = True if (epoch > 1) and (np.mean(plot_losses) < 1.0) else False  #!!! and / or 

            loss = 0
            for batch_input_item, batch_target_item in zip(batch_input, batch_target):
                #print("\n\t batch_input_item, batch_target_item : ", batch_input_item.size(), batch_target_item.size() )
                loss += trainBert(batch_input_item, batch_target_item, encoder, decoder, eval_pairs, \
                                    input_lang, output_lang, encoder_optimizer, decoder_optimizer, criterion, \
                                      teacher_forcing_ratio = teacher_forcing_ratio, rl=rl )
            loss = loss/6
            plot_losses.append( loss )

            print("\t - the %s batch xentropy loss: "%str(str(epoch)+"."+str(batch)), loss, " " )

            teacher_forcing_ratio = utils.teacher_force(float(loss) ) ;

            if 0 == batch%savepoint and batch > 1:
                print("\n Batch %d savepoint, save the trained model...\n"%batch )
                save_model(encoder, decoder, plot_losses, model_name ) ;

        if epoch > 1:
            save_model(encoder, decoder, plot_losses, model_name ) 
            print("\n Finish Epoch %d -- model saved. "%epoch ); #!!!
Ejemplo n.º 10
0
def train(input_pair, encoder, decoder, encoder_optimizer, decoder_optimizer,
          criterion, teacher_forcing_ratio, max_length=MAX_LENGTH):
    """Model training logic, initializes graph, creates encoder outputs matrix for attention model,
    applies teacher forcing (randomly), calculates the loss and trains the models"""
    if encoder.trainable_model:
        # Encode sentences using encoder model
        input_tensor, target_tensor = utils.tensorsFromPair(input_pair)
        decoder_hidden, encoder_outputs, encoder_optimizer = train_encoder(
                    input_tensor, encoder, encoder_optimizer, max_length)
    else:
        # Encode sentences using pretrained encoder model
        target_tensor = utils.tensorFromSentence(vocab_index, input_pair[1])
        decoder_hidden = encoder.sentence_embedding(input_pair[0])
        decoder_hidden = layer_normalize(decoder_hidden)
    
    # Clear the gradients from the decoder optimizer
    decoder_optimizer.zero_grad()
    target_length = target_tensor.size(0)
    
    decoder_input = torch.tensor([[SOS_token]], device=DEVICE)
    loss = 0
    
    # Randomly apply teacher forcing subject to teacher forcing ratio
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            if decoder.uses_attention:
                decoder_output, decoder_hidden, _ = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
            else:
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)
            
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing: set next input to correct target

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            if decoder.uses_attention:
                decoder_output, decoder_hidden, _ = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
            else:
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)
            
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break
            
    # Calculate the error and blackpropogate through the network 
    loss.backward()
    
    if encoder.trainable_model:
        encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length
Ejemplo n.º 11
0
def beam_decode(input_pair, encoder, decoder, beam_width=5, n_output_sentences=1, encoder_outputs=None):
    """Implements beam search decoding using specified encoder, decoder, and beam length"""
    """ Notebook source: https://github.com/budzianowski/PyTorch-Beam-Search-Decoding/blob/master/decode_beam.py """
    '''
    :param target_tensor: target indexes tensor of shape [B, T] where B is the batch size and
    T is the maximum length of the output sentence
    :param decoder_hidden: input tensor of shape [1, B, H] for start of the decoding
    :param encoder_outputs: if you are using attention mechanism you can pass encoder outputs, 
    [T, B, H] where T is the maximum length of input sentence
    :return: decoded_batch
    '''
    assert beam_width > 1, 'Beam width must be greater than 1'
    
    if encoder.trainable_model:
        input_tensor, _ = utils.tensorsFromPair(input_pair)
        
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()
        encoder_outputs = torch.zeros(MAX_LENGTH+1, encoder.hidden_size, device=DEVICE)
    
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]
    
        decoder_hidden = encoder_hidden
    
    else:
        decoder_hidden = encoder.sentence_embedding(input_pair[0])
        decoder_hidden = layer_normalize(decoder_hidden)
    
    topk = n_output_sentences  # how many sentence do you want to generate
    
    # Start with the start of the sentence token
    decoder_input = torch.tensor([[SOS_token]], device=DEVICE)
    
    # Number of sentence to generate
    endnodes = []
    number_required = min((topk + 1), topk - len(endnodes))

    # starting node -  hidden vector, previous node, word id, logp, length
    node = BeamSearchNode(decoder_hidden, None, decoder_input, 0, 1)
    nodes = PriorityQueue()

    # start the queue
    nodes.put((-node.eval(), node))
    qsize = 1

    # start beam search
    for _ in range(2000):
        # give up when decoding takes too long
        if qsize > 1000: break

        # fetch the best node
        score, n = nodes.get()
        decoder_input = n.wordid
        decoder_hidden = n.h

        if n.wordid.item() == EOS_token and n.prevNode != None:
            endnodes.append((score, n))
            # if we reached maximum # of sentences required
            if len(endnodes) >= number_required:
                break
            else:
                continue

        # decode for one step using decoder
        if decoder.uses_attention:
            decoder_output, decoder_hidden, _ = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
        else:
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)

        # do actual beam search
        log_prob, indexes = torch.topk(decoder_output, beam_width)
        nextnodes = []

        for new_k in range(beam_width):
            decoded_t = indexes[0][new_k].view(1, -1)
            log_p = log_prob[0][new_k].item()
            
            node = BeamSearchNode(decoder_hidden, n, decoded_t, n.logp + log_p, n.leng + 1)
            score = -node.eval()
            nextnodes.append((score, node))

        # put them into queue
        for i in range(len(nextnodes)):
            score, next_node = nextnodes[i]
            nodes.put((score, next_node))
            # increase qsize
        qsize += len(nextnodes) - 1

    # choose nbest paths, back trace them
    if len(endnodes) == 0:
        endnodes = [nodes.get() for _ in range(topk)]

    utterances = []
    for score, n in sorted(endnodes, key=operator.itemgetter(0)):
        utterance = []
        utterance.append(n.wordid)
        # back trace
        while n.prevNode != None:
            n = n.prevNode
            utterance.append(n.wordid)

        utterance = utterance[::-1]
        utterances.append(utterance)
    
    output_sentences = []
    for sentence in utterances:
        output_words = [vocab_index.index2word[word_idx.item()] for word_idx in sentence]
        output_sentences.append(' '.join(output_words[1:-1]))
    
    return output_sentences