Beispiel #1
0
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = DecoderRNN(self.dataset.output_vocab, 50, 16, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     batch = [[1, 2, 3], [1, 2], [1]]
     output1, _, _ = rnn(batch)
     output2, _, _ = rnn(batch)
     self.assertEqual(output1, output2)
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     output1, _, _ = rnn()
     output2, _, _ = rnn()
     for prob1, prob2 in zip(output1, output2):
         self.assertTrue(torch.equal(prob1.data, prob2.data))
    def test_input_dropout_WITH_NON_ZERO_PROB(self):
        rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, input_dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)

        equal = True
        for _ in range(50):
            output1, _, _ = rnn()
            output2, _, _ = rnn()
            if not torch.equal(output1[0].data, output2[0].data):
                equal = False
                break
        self.assertFalse(equal)
Beispiel #4
0
    def test_dropout_WITH_NON_ZERO_PROB(self):
        rnn = DecoderRNN(self.dataset.output_vocab, 50, 16, dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)
        batch = [[1, 2, 3], [1, 2], [1]]

        equal = True
        for _ in range(50):
            output1, _, _ = rnn(batch)
            output2, _, _ = rnn(batch)
            if output1[0] != output2[0]:
                equal = False
                break
        self.assertFalse(equal)
Beispiel #5
0
    def test_k_1(self):
        """ When k=1, the output of topk decoder should be the same as a normal decoder. """
        batch_size = 1
        eos = 1

        for _ in range(10):
            # Repeat the randomized test multiple times
            decoder = DecoderRNN(self.vocab_size, 50, 16, 0, eos)
            for param in decoder.parameters():
                param.data.uniform_(-1, 1)
            topk_decoder = TopKDecoder(decoder, 1)

            output, _, other = decoder(None)
            output_topk, _, other_topk = topk_decoder(None)

            self.assertEqual(len(output), len(output_topk))

            finished = [False] * batch_size
            seq_scores = [0] * batch_size

            for t_step in range(len(output)):
                score, _ = output[t_step].topk(1)
                symbols = other['sequence'][t_step]
                for b in range(batch_size):
                    seq_scores[b] += score[b].data[0]
                    symbol = symbols[b].data[0]
                    if not finished[b] and symbol == eos:
                        finished[b] = True
                        self.assertEqual(other_topk['length'][b], t_step + 1)
                        self.assertTrue(
                            np.isclose(seq_scores[b],
                                       other_topk['score'][b][0]))
                    if not finished[b]:
                        symbol_topk = other_topk['topk_sequence'][t_step][
                            b].data[0][0]
                        self.assertEqual(symbol, symbol_topk)
                        self.assertTrue(
                            torch.equal(output[t_step].data,
                                        output_topk[t_step].data))
                if sum(finished) == batch_size:
                    break
Beispiel #6
0
    def test_k_greater_than_1(self):
        """ Implement beam search manually and compare results from topk decoder. """
        max_len = 50
        beam_size = 3
        batch_size = 1
        hidden_size = 8
        sos = 0
        eos = 1

        for _ in range(10):
            decoder = DecoderRNN(self.vocab_size, max_len, hidden_size, sos,
                                 eos)
            for param in decoder.parameters():
                param.data.uniform_(-1, 1)
            topk_decoder = TopKDecoder(decoder, beam_size)

            encoder_hidden = torch.autograd.Variable(
                torch.randn(1, batch_size, hidden_size))
            _, hidden_topk, other_topk = topk_decoder(
                None, encoder_hidden=encoder_hidden)

            # Queue state:
            #   1. time step
            #   2. symbol
            #   3. hidden state
            #   4. accumulated log likelihood
            #   5. beam number
            batch_queue = [[(-1, sos, encoder_hidden[:, b, :].unsqueeze(1), 0,
                             None)] for b in range(batch_size)]
            time_batch_queue = [batch_queue]
            batch_finished_seqs = [list() for _ in range(batch_size)]
            for t in range(max_len):
                new_batch_queue = []
                for b in range(batch_size):
                    new_queue = []
                    for k in range(min(len(time_batch_queue[t][b]),
                                       beam_size)):
                        _, inputs, hidden, seq_score, _ = time_batch_queue[t][
                            b][k]
                        if inputs == eos:
                            batch_finished_seqs[b].append(
                                time_batch_queue[t][b][k])
                            continue
                        inputs = torch.autograd.Variable(
                            torch.LongTensor([[inputs]]))
                        context, hidden, attn = decoder.forward_step(
                            inputs, hidden, None)
                        decoder_outputs, symbols = decoder.decoder(
                            context, attn, None, None)
                        decoder_outputs = decoder_outputs.log()
                        topk_score, topk = decoder_outputs[0].data.topk(
                            beam_size)
                        for score, sym in zip(topk_score.tolist()[0],
                                              topk.tolist()[0]):
                            new_queue.append(
                                (t, sym, hidden, score + seq_score, k))
                    new_queue = sorted(new_queue,
                                       key=lambda x: x[3],
                                       reverse=True)[:beam_size]
                    new_batch_queue.append(new_queue)
                time_batch_queue.append(new_batch_queue)

            # finished beams
            finalist = [l[:beam_size] for l in batch_finished_seqs]
            # unfinished beams
            for b in range(batch_size):
                if len(finalist[b]) < beam_size:
                    last_step = sorted(time_batch_queue[-1][b],
                                       key=lambda x: x[3],
                                       reverse=True)
                    finalist[b] += last_step[:beam_size - len(finalist[b])]

            # back track
            topk = []
            for b in range(batch_size):
                batch_topk = []
                for k in range(beam_size):
                    seq = [finalist[b][k]]
                    prev_k = seq[-1][4]
                    prev_t = seq[-1][0]
                    while prev_k is not None:
                        seq.append(time_batch_queue[prev_t][b][prev_k])
                        prev_k = seq[-1][4]
                        prev_t = seq[-1][0]
                    batch_topk.append([s for s in reversed(seq)])
                topk.append(batch_topk)

            for b in range(batch_size):
                topk[b] = sorted(topk[b], key=lambda s: s[-1][3], reverse=True)

            topk_scores = other_topk['score']
            topk_lengths = other_topk['topk_length']
            topk_pred_symbols = other_topk['topk_sequence']
            for b in range(batch_size):
                precision_error = False
                for k in range(beam_size - 1):
                    if np.isclose(topk_scores[b][k], topk_scores[b][k + 1]):
                        precision_error = True
                        break
                if precision_error:
                    break
                for k in range(beam_size):
                    self.assertEqual(topk_lengths[b][k], len(topk[b][k]) - 1)
                    self.assertTrue(
                        np.isclose(topk_scores[b][k], topk[b][k][-1][3]))
                    total_steps = topk_lengths[b][k]
                    for t in range(total_steps):
                        self.assertEqual(topk_pred_symbols[t][b, k].data[0],
                                         topk[b][k][t +
                                                    1][1])  # topk includes SOS
Beispiel #7
0
def main(option):
    random.seed(option.random_seed)
    torch.manual_seed(option.random_seed)

    LOG_FORMAT = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    logging.basicConfig(format=LOG_FORMAT, level='INFO', stream=sys.stdout)

    glove = Glove(option.emb_file)
    logging.info('loaded embeddings from ' + option.emb_file)

    src_vocab = Vocab.build_from_glove(glove)
    tgt_vocab = Vocab.load(option.intent_vocab)

    train_dataset = load_intent_prediction_dataset(option.train_dataset,
                                                   src_vocab,
                                                   tgt_vocab,
                                                   device=option.device)
    dev_dataset = load_intent_prediction_dataset(option.dev_dataset,
                                                 src_vocab,
                                                 tgt_vocab,
                                                 device=option.device)

    train_data_loader = DataLoader(train_dataset,
                                   batch_size=option.batch_size,
                                   shuffle=True)
    dev_data_loader = DataLoader(dev_dataset,
                                 batch_size=len(dev_dataset),
                                 shuffle=False)

    src_vocab_size = len(src_vocab)
    tgt_vocab_size = len(tgt_vocab)

    # Prepare loss
    weight = torch.ones(tgt_vocab_size)
    pad = tgt_vocab.stoi[tgt_vocab.pad_token]
    loss = Perplexity(weight, pad)
    loss.criterion.to(option.device)

    # Initialize model
    encoder = NeuralTensorNetwork(nn.Embedding(src_vocab_size, option.emb_dim),
                                  option.em_k)
    decoder = DecoderRNN(tgt_vocab_size,
                         option.im_max_len,
                         option.im_hidden_size,
                         use_attention=False,
                         bidirectional=False,
                         eos_id=tgt_vocab.stoi[tgt_vocab.eos_token],
                         sos_id=tgt_vocab.stoi[tgt_vocab.bos_token])
    encoder.to(option.device)
    decoder.to(option.device)

    init_model(encoder)
    init_model(decoder)

    encoder.embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float())

    optimizer_params = [{
        'params': encoder.parameters()
    }, {
        'params': decoder.parameters()
    }]
    optimizer = Optimizer(optim.Adam(optimizer_params, lr=option.lr),
                          max_grad_norm=5)
    trainer = NTNTrainer(loss,
                         print_every=option.report_every,
                         device=option.device)
    encoder, decoder = trainer.train(
        encoder,
        decoder,
        optimizer,
        train_data_loader,
        num_epochs=option.epochs,
        dev_data_loader=dev_data_loader,
        teacher_forcing_ratio=option.im_teacher_forcing_ratio)

    predictor = NTNPredictor(encoder, decoder, src_vocab, tgt_vocab,
                             option.device)
    samples = [
        ("PersonX", "eventually told", "___"),
        ("PersonX", "tells", "PersonY 's tale"),
        ("PersonX", "always played", " ___"),
        ("PersonX", "would teach", "PersonY"),
        ("PersonX", "gets", "a ride"),
    ]
    for sample in samples:
        subj, verb, obj = sample
        subj = subj.lower().split(' ')
        verb = verb.lower().split(' ')
        obj = obj.lower().split(' ')
        print(sample, predictor.predict(subj, verb, obj))
Beispiel #8
0
    if not opt.resume:
        # Initialize model
        decoder = DecoderRNN(train_label_lang.word2index,
                             x_mean_std[0],
                             y_mean_std[0],
                             w_mean_std[0],
                             r_mean_std[0],
                             opt.batch_size,
                             opt.max_len,
                             hidden_size,
                             opt.gmm_comp_num,
                             dropout_p=0.2,
                             use_attention=False,
                             bidirectional=bidirectional)

        for param in decoder.parameters():
            param.data.uniform_(-0.08, 0.08)

        if torch.cuda.is_available():
            decoder.cuda()

    # train
    t = SupervisedTrainer(lloss=lloss,
                          bloss=bloss,
                          batch_size=opt.batch_size,
                          checkpoint_every=100,
                          print_every=50,
                          expt_dir=opt.expt_dir,
                          train_cap_lang=train_cap_lang,
                          train_label_lang=train_label_lang,
                          x_mean_std=x_mean_std,
class RNNDecoder():
    """RNN decoder class. Wraps the IBM seq2seq decoder (using GRU or LSTM units)."""
    def __init__(self,
                 vocab_size: int,
                 embedding_size: int,
                 n_hidden: int,
                 sos_token: int = 0,
                 eos_token: int = 1,
                 mask_token: int = 2,
                 max_output_length: int = 100,
                 rnn_cell: str = 'lstm') -> None:
        self.decoder = DecoderRNN(vocab_size,
                                  max_output_length,
                                  embedding_size,
                                  n_layers=n_hidden,
                                  rnn_cell=rnn_cell,
                                  use_attention=False,
                                  bidirectional=False,
                                  eos_id=eos_token,
                                  sos_id=sos_token)
        if torch.cuda.is_available(): self.decoder.cuda()

        self.rnn_cell = rnn_cell
        self.n_hidden = n_hidden
        self.embedding_size = embedding_size
        self.SOS_token = sos_token
        self.EOS_token = eos_token
        self.mask_token = mask_token
        self.max_output_length = max_output_length
        token_weights = torch.ones(vocab_size)
        if torch.cuda.is_available(): token_weights = token_weights.cuda()
        self.loss = NLLLoss(weight=token_weights, mask=mask_token)
        self.optimizer = None

    def _create_init_hidden(self, embedding):
        # All hidden states start as the embedding.
        decoder_hidden = []
        for i in range(self.n_hidden):
            decoder_hidden.append(embedding)
        # num_layers x batch_size x embedding_size
        decoder_h = torch.cat(decoder_hidden, 0)
        return decoder_h

    def train(self, input_tensor, target_tensor, teacher_forcing_ratio=0.5):
        """Train for one batch."""
        decoder_outputs, decoder_hidden, ret_dict = self.decoder(
            inputs=target_tensor,
            encoder_hidden=input_tensor,
            teacher_forcing_ratio=teacher_forcing_ratio)
        # Nothing was generated. This number (10) was arbitrarily chosen.
        if len(decoder_outputs) == 0:
            return 10

        loss = self.loss
        loss.reset()
        for step, step_output in enumerate(decoder_outputs):
            batch_size = target_tensor.size(0)
            loss.eval_batch(step_output.contiguous().view(batch_size, -1),
                            target_tensor[:, step + 1])
        self.decoder.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.get_loss()

    def train_iters(self,
                    pairs,
                    n_iters,
                    batch_size=64,
                    print_every=1000,
                    learning_rate=0.0002,
                    teacher_forcing_ratio=0.5):
        """Train for some number of iterations choosing randomly from the list of tensor pairs."""
        print("Initializing training.")
        if self.optimizer == None:
            adam = optim.Adam(self.decoder.parameters(), lr=learning_rate)
            self.optimizer = Optimizer(adam, max_grad_norm=5)
        else:
            print("Using existing optimizer.")
        random.shuffle(pairs)
        if (len(pairs) < batch_size):
            print("Not enough examples for one batch.")
            return

        # Turn the pairs into big tensors.
        # TODO: instead of saving pairs, save tensors directly. Otherwise this operation takes too much space.
        # Input: num_layers x num_examples x embedding_size
        # Target: num_examples x max_output_length+1
        input_tensors = [torch.reshape(i, (1, 1, -1)) for i, j in pairs]
        input_tensor = torch.cat(input_tensors, 1)
        input_tensor = self._create_init_hidden(input_tensor)
        target_tensors = [j for i, j in pairs]
        targets = []
        for target in target_tensors:
            target_tensor = torch.reshape(target, (1, -1))
            if target_tensor.size(1) >= self.max_output_length:
                target_tensor = target_tensor[0][0:self.max_output_length]
                target_tensor = torch.reshape(target_tensor, (1, -1))
            else:
                pad = torch.zeros(
                    1, self.max_output_length - target_tensor.size(1)).long()
                for i in range(self.max_output_length - target_tensor.size(1)):
                    pad[0][i] = self.mask_token
                target_tensor = torch.cat((target_tensor, pad), 1)
            # Add the start token.
            start_tensor = torch.zeros(1, 1).long()
            start_tensor[0][0] = self.SOS_token
            target_tensor = torch.cat((start_tensor, target_tensor), 1)
            targets.append(target_tensor)
        target_tensor = torch.cat(targets, 0)

        if torch.cuda.is_available(): target_tensor = target_tensor.cuda()
        if torch.cuda.is_available(): input_tensor = input_tensor.cuda()

        print("Starting training.")
        print_loss_total = 0  # Reset every print_every.
        batch = 0
        for iter in range(n_iters):
            # Create the batch.
            if (batch + 1) * batch_size > len(pairs):
                print("Finished an epoch!")
                batch = 0
            batch_input = input_tensor[:, batch * batch_size:(batch + 1) *
                                       batch_size, :].contiguous()
            batch_target = target_tensor[batch * batch_size:(batch + 1) *
                                         batch_size, :].contiguous()

            if self.rnn_cell == 'lstm':
                batch_input = (batch_input, batch_input)

            loss = self.train(batch_input,
                              batch_target,
                              teacher_forcing_ratio=teacher_forcing_ratio)
            print_loss_total += loss

            if iter % print_every == print_every - 1:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('Steps: {0}\nAverage loss: {1}'.format(
                    iter, print_loss_avg))
            batch += 1

    def predict(self, input_tensor, beam_size: int):
        if beam_size > 1:
            beam_decoder = TopKDecoder(self.decoder, beam_size)
        else:
            beam_decoder = self.decoder
        with torch.no_grad():
            decoder_hidden = self._create_init_hidden(
                torch.reshape(input_tensor, (1, 1, -1)))
            if torch.cuda.is_available():
                decoder_hidden = decoder_hidden.cuda()
            if self.rnn_cell == 'lstm':
                decoder_hidden = (decoder_hidden, decoder_hidden)
            decoder_outputs, decoder_hidden, ret_dict = beam_decoder(
                inputs=None,
                encoder_hidden=decoder_hidden,
                teacher_forcing_ratio=0)
        output_sequence = []
        for item in ret_dict['sequence']:
            output_sequence.append(item[0].item())
        return output_sequence