Beispiel #1
0
def train_step(batch, model, config, loss_compute):

	model.train()
	enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros = prepare_src_batch(batch, config)
	dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch =  prepare_tgt_batch(batch)
	# print(enc_batch.dtype)
	# print(dec_batch.dtype)
	out, enc_op = model.forward(enc_batch, dec_batch, enc_padding_mask, dec_padding_mask)
	norm = int(torch.sum(dec_lens_var))
	loss = loss_compute(out, target_batch, norm, enc_op, enc_padding_mask, enc_batch_extend_vocab, extra_zeros)  # sending actual lengths instead of norm
	return loss
Beispiel #2
0
def train_step_bert(batch, model, config, loss_compute):

    model.train()
    enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros = prepare_src_batch(
        batch)
    dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = prepare_tgt_batch(
        batch)

    model.encoder.eval()
    with torch.no_grad():
        enc_op, _ = model.encoder.forward(enc_batch,
                                          enc_padding_mask.squeeze(1))
    out, _ = model.decode(enc_op, dec_batch, enc_padding_mask,
                          dec_padding_mask)
    norm = int(torch.sum(dec_lens_var))
    loss = loss_compute(out, target_batch, norm, enc_op, enc_padding_mask,
                        enc_batch_extend_vocab, extra_zeros)
    return loss
Beispiel #3
0
    def beam_search(self, batch, conf):
        #batch should have only one example
        enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros = helper.prepare_src_batch(
            batch)
        encoder_output, _ = self.model.encoder.forward(
            enc_batch, enc_padding_mask.squeeze(1))

        hyps_list = [
            Hypothesis(tokens=[self.vocab.word2id(data.START_DECODING)],
                       log_probs=[0.0]) for _ in range(config.beam_size)
        ]
        results = []
        steps = 0
        yt = torch.zeros(config.beam_size, 1).long().to(device)
        while steps < config.max_dec_steps and len(results) < config.beam_size:
            latest_tokens = [h.latest_token for h in hyps_list]
            latest_tokens = [
                t if t < self.vocab.size() else self.vocab.word2id(
                    data.UNKNOWN_TOKEN) for t in latest_tokens
            ]

            curr_yt = torch.LongTensor(latest_tokens).unsqueeze(1).to(
                device)  # [Bx1]
            yt = torch.cat((yt, curr_yt), dim=1)

            out, _ = self.model.decode(
                encoder_output, yt[:, 1:], enc_padding_mask,
                helper.subsequent_mask(yt[:, 1:].size(-1)))
            extra_zeros_ip = None
            if extra_zeros is not None:
                extra_zeros_ip = extra_zeros[:, 0:steps + 1, :]

            if config.coverage:
                op_dist, _ = self.model.generator(out, encoder_output,
                                                  enc_padding_mask,
                                                  enc_batch_extend_vocab,
                                                  extra_zeros_ip)
            else:
                op_dist = self.model.generator(out, encoder_output,
                                               enc_padding_mask,
                                               enc_batch_extend_vocab,
                                               extra_zeros_ip)

            log_probs = op_dist[:, -1, :]
            topk_log_probs, topk_ids = torch.topk(log_probs,
                                                  config.beam_size * 2)

            all_hyps = []
            num_orig_hyps = 1 if steps == 0 else len(hyps_list)

            for i in range(num_orig_hyps):
                h = hyps_list[i]

                for j in range(config.beam_size *
                               2):  # for each of the top beam_size hyps:
                    hyp = h.extend(token=topk_ids[i, j].item(),
                                   log_prob=topk_log_probs[i, j].item())
                    all_hyps.append(hyp)

            hyps_list = []
            sorted_hyps = sorted(all_hyps,
                                 key=lambda h: h.avg_log_prob,
                                 reverse=True)
            for h in sorted_hyps:
                if h.latest_token == self.vocab.word2id(data.STOP_DECODING):
                    if steps >= config.min_dec_steps:
                        results.append(h)
                else:
                    hyps_list.append(h)
                if len(hyps_list) == config.beam_size or len(
                        results) == config.beam_size:
                    break

            steps += 1

        if len(results) == 0:
            results = hyps_list

        results_sorted = sorted(results,
                                key=lambda h: h.avg_log_prob,
                                reverse=True)
        return results_sorted[0]
Beispiel #4
0
    def beam_search(self, batch, conf):
        # batch should have only one example
        enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros = helper.prepare_src_batch(
            batch, conf)
        encoder_output = self.model.encode(enc_batch, enc_padding_mask)
        hyps_list = [
            Hypothesis(tokens=[self.vocab.word2id(data.START_DECODING)],
                       log_probs=[0.0]) for _ in range(1)
        ]
        results = []
        steps = 0

        while steps < config.max_dec_len and len(results) < config.beam_size:
            hyp_tokens = [h.tokens for h in hyps_list]
            np_hyp_tokens = np.asarray(hyp_tokens)
            np_hyp_tokens[
                np_hyp_tokens >= self.vocab.size()] = self.vocab.word2id(
                    data.UNKNOWN_TOKEN)
            yt = torch.LongTensor(np_hyp_tokens).to(device)
            temp_enc_out = encoder_output.repeat(yt.size(0), 1, 1)
            out, _ = self.model.decode(temp_enc_out, yt, enc_padding_mask,
                                       helper.subsequent_mask(yt.size(-1)))
            extra_zeros_ip = None
            if extra_zeros is not None:
                extra_zeros_ip = extra_zeros[:, 0:steps + 1, :].repeat(
                    yt.size(0), 1, 1)
            if conf.coverage:
                log_probs, _ = self.model.generator(out, temp_enc_out,
                                                    enc_padding_mask,
                                                    enc_batch_extend_vocab,
                                                    extra_zeros_ip)
            else:
                log_probs = self.model.generator(out, temp_enc_out,
                                                 enc_padding_mask,
                                                 enc_batch_extend_vocab,
                                                 extra_zeros_ip)
            log_probs = log_probs.squeeze(1)
            topk_log_probs, topk_ids = torch.topk(log_probs,
                                                  config.beam_size * 2)
            if len(topk_log_probs.size()) == 3:
                topk_log_probs = topk_log_probs[:, -1, :].squeeze(1)
                topk_ids = topk_ids[:, -1, :].squeeze(1)
            all_hyps = []
            num_orig_hyps = 1 if steps == 0 else len(hyps_list)
            for i in range(num_orig_hyps):
                h = hyps_list[i]
                # print(h.tokens)
                for j in range(config.beam_size *
                               2):  # for each of the top beam_size hyps:
                    hyp = h.extend(token=topk_ids[i, j].item(),
                                   log_prob=topk_log_probs[i, j].item())
                    all_hyps.append(hyp)
            hyps_list = []
            sorted_hyps = sorted(all_hyps,
                                 key=lambda h: h.avg_log_prob,
                                 reverse=True)
            for h in sorted_hyps:
                if h.latest_token == self.vocab.word2id(data.STOP_DECODING):
                    if steps >= config.min_dec_steps:
                        results.append(h)
                else:
                    hyps_list.append(h)
                if len(hyps_list) == config.beam_size or len(
                        results) == config.beam_size:
                    break

            steps += 1

        if len(results) == 0:
            results = hyps_list

        results_sorted = sorted(results,
                                key=lambda h: h.avg_log_prob,
                                reverse=True)
        return results_sorted[0]