コード例 #1
0
ファイル: train.py プロジェクト: didivassi/seq2seq-summarizer
def train_batch(batch: Batch,
                model: Seq2Seq,
                criterion,
                optimizer,
                *,
                pack_seq=True,
                forcing_ratio=0.5,
                partial_forcing=True,
                rl_ratio: float = 0,
                vocab=None,
                grad_norm: float = 0):
    if not pack_seq:
        input_lengths = None
    else:
        input_lengths = batch.input_lengths

    optimizer.zero_grad()
    input_tensor = batch.input_tensor.to(DEVICE)
    target_tensor = batch.target_tensor.to(DEVICE)
    ext_vocab_size = batch.ext_vocab_size

    out = model(input_tensor,
                target_tensor,
                input_lengths,
                criterion,
                forcing_ratio=forcing_ratio,
                partial_forcing=partial_forcing,
                ext_vocab_size=ext_vocab_size)

    if rl_ratio > 0:
        assert vocab is not None
        sample_out = model(input_tensor,
                           saved_out=out,
                           criterion=criterion,
                           sample=True,
                           ext_vocab_size=ext_vocab_size)
        baseline_out = model(input_tensor,
                             saved_out=out,
                             visualize=False,
                             ext_vocab_size=ext_vocab_size)
        scores = eval_batch_output([ex.tgt for ex in batch.examples], vocab,
                                   batch.oov_dict, sample_out.decoded_tokens,
                                   baseline_out.decoded_tokens)
        greedy_rouge = scores[1]['l_f']
        neg_reward = greedy_rouge - scores[0]['l_f']
        # if sample > baseline, the reward is positive (i.e. good exploration), rl_loss is negative
        rl_loss = neg_reward * sample_out.loss
        loss = (1 - rl_ratio) * out.loss + rl_ratio * rl_loss
    else:
        loss = out.loss
        greedy_rouge = None

    loss.backward()
    if grad_norm > 0:
        clip_grad_norm_(model.parameters(), grad_norm)
    optimizer.step()

    target_length = target_tensor.size(0)
    return loss.item() / target_length, greedy_rouge
コード例 #2
0
ファイル: train.py プロジェクト: whcisci/seq2seq-summarizer
def train_batch(batch: Batch,
                model: Seq2Seq,
                criterion,
                optimizer,
                *,
                pack_seq=True,
                forcing_ratio=0.5,
                partial_forcing=True,
                sample=False,
                rl_ratio: float = 0,
                vocab=None,
                grad_norm: float = 0,
                show_cover_loss=False):
    if not pack_seq:
        input_lengths = None
    else:
        # use PAD
        input_lengths = batch.input_lengths
        mask = create_mask(input_lengths)

    optimizer.zero_grad()
    input_tensor = batch.input_tensor.to(DEVICE)
    target_tensor = batch.target_tensor.to(DEVICE)
    mask = mask.to(DEVICE)
    ext_vocab_size = batch.ext_vocab_size

    out = model(input_tensor,
                target_tensor,
                input_lengths,
                criterion,
                forcing_ratio=forcing_ratio,
                partial_forcing=partial_forcing,
                sample=sample,
                ext_vocab_size=ext_vocab_size,
                include_cover_loss=show_cover_loss,
                mask=mask)

    if rl_ratio > 0:
        assert vocab is not None
        # sample
        sample_out = model(input_tensor,
                           saved_out=out,
                           criterion=criterion,
                           sample=True,
                           ext_vocab_size=ext_vocab_size,
                           mask=mask)
        # greedy
        baseline_out = model(input_tensor,
                             saved_out=out,
                             visualize=False,
                             ext_vocab_size=ext_vocab_size,
                             mask=mask)
        scores = eval_batch_output([ex.tgt for ex in batch.examples], vocab,
                                   batch.oov_dict, sample_out.decoded_tokens,
                                   baseline_out.decoded_tokens)
        greedy_rouge = scores[1]['l_f']
        neg_reward = greedy_rouge - scores[0][
            'l_f']  # greedy_rouge - sample_rouge
        # if sample > baseline, the reward is positive (i.e. good exploration), rl_loss is negative
        # TODO 关于强化学习loss的计算有些问题
        rl_loss = neg_reward * sample_out.loss
        rl_loss_value = neg_reward * sample_out.loss_value
        # 结合强化学习loss和交叉熵损失
        loss = (1 - rl_ratio) * out.loss + rl_ratio * rl_loss
        loss_value = (1 - rl_ratio) * out.loss_value + rl_ratio * rl_loss_value
    else:
        loss = out.loss
        loss_value = out.loss_value
        greedy_rouge = None

    loss.backward()  # 反向传播
    # 梯度裁剪
    if grad_norm > 0:
        clip_grad_norm_(model.parameters(), grad_norm)
    # 参数更新
    optimizer.step()

    target_length = target_tensor.size(0)
    # 整个batch的loss/decode步数,相当于一个平均
    return loss_value / target_length, greedy_rouge