Python Transformer 예제들, espnet.nets.pytorch_backend.e2e_tts_transformer.Transformer Python 예제들

예제 #1

0

파일 보기

파일: test_e2e_tts_fastspeech.py 프로젝트: zouwei02/espnet

def test_fastspeech_gpu_trainable_and_decodable(teacher_type, model_dict):
    # make args
    idim, odim = 10, 25
    model_args = make_feedforward_transformer_args(**model_dict)

    # setup batch
    ilens = [10, 5]
    olens = [20, 15]
    device = torch.device('cuda')
    batch = prepare_inputs(idim,
                           odim,
                           ilens,
                           olens,
                           model_args["spk_embed_dim"],
                           device=device)

    # define teacher model and save it
    if teacher_type == "transformer":
        teacher_model_args = make_transformer_args(**model_dict)
        teacher_model = Transformer(idim, odim,
                                    Namespace(**teacher_model_args))
    elif teacher_type == "tacotron2":
        teacher_model_args = make_taco2_args(**model_dict)
        teacher_model = Tacotron2(idim, odim, Namespace(**teacher_model_args))
    else:
        raise ValueError()
    tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp")
    torch.save(teacher_model.state_dict(), tmpdir + "/model.dummy.best")
    with open(tmpdir + "/model.json", 'wb') as f:
        f.write(
            json.dumps((idim, odim, teacher_model_args),
                       indent=4,
                       ensure_ascii=False,
                       sort_keys=True).encode('utf_8'))

    # define model
    model_args["teacher_model"] = tmpdir + "/model.dummy.best"
    model = FeedForwardTransformer(idim, odim, Namespace(**model_args))
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters())

    # trainable
    loss = model(**batch).mean()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # decodable
    model.eval()
    with torch.no_grad():
        if model_args["spk_embed_dim"] is None:
            spemb = None
        else:
            spemb = batch["spembs"][0]
        model.inference(batch["xs"][0][:batch["ilens"][0]], None, spemb=spemb)
        model.calculate_all_attentions(**batch)

    # remove tmpdir
    if os.path.exists(tmpdir):
        shutil.rmtree(tmpdir)

예제 #2

0

파일 보기

파일: test_e2e_tts_transformer.py 프로젝트: zy1022/espnet

def test_transformer_multi_gpu_trainable(model_dict):
    # make args
    model_args = make_transformer_args(**model_dict)

    # setup batch
    idim = 5
    odim = 10
    ilens = [10, 5]
    olens = [20, 15]
    device = torch.device("cuda")
    batch = prepare_inputs(idim,
                           odim,
                           ilens,
                           olens,
                           model_args["spk_embed_dim"],
                           device=device)

    # define model
    ngpu = 2
    device_ids = list(range(ngpu))
    model = Transformer(idim, odim, Namespace(**model_args))
    model = torch.nn.DataParallel(model, device_ids)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters())

    # trainable
    loss = model(**batch).mean()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # check gradient of ScaledPositionalEncoding
    if model.module.use_scaled_pos_enc:
        assert model.module.encoder.embed[1].alpha.grad is not None
        assert model.module.decoder.embed[1].alpha.grad is not None

예제 #3

0

파일 보기

파일: test_e2e_tts_fastspeech.py 프로젝트: zhuanaa/espnet

def test_initialization(model_dict):
    # make args
    idim, odim = 10, 25
    teacher_model_args = make_transformer_args(**model_dict)
    model_args = make_feedforward_transformer_args(**model_dict)

    # define teacher model and save it
    teacher_model = Transformer(idim, odim, Namespace(**teacher_model_args))
    tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp")
    torch.save(teacher_model.state_dict(), tmpdir + "/model.dummy.best")
    with open(tmpdir + "/model.json", 'wb') as f:
        f.write(
            json.dumps((idim, odim, teacher_model_args),
                       indent=4,
                       ensure_ascii=False,
                       sort_keys=True).encode('utf_8'))

    # define model
    model_args["teacher_model"] = tmpdir + "/model.dummy.best"
    model = FeedForwardTransformer(idim, odim, Namespace(**model_args))

    # check initialization
    if model_args["transferred_encoder_module"] == "all":
        for p1, p2 in zip(model.encoder.parameters(),
                          model.teacher.encoder.parameters()):
            np.testing.assert_array_equal(p1.data.cpu().numpy(),
                                          p2.data.cpu().numpy())
    else:
        np.testing.assert_array_equal(
            model.encoder.embed[0].weight.data.cpu().numpy(),
            model.teacher.encoder.embed[0].weight.data.cpu().numpy())

    # remove tmpdir
    if os.path.exists(tmpdir):
        shutil.rmtree(tmpdir)

예제 #4

0

파일 보기

파일: test_e2e_tts_transformer.py 프로젝트: yntcyjb/espnet

def test_transformer_gpu_trainable(model_dict):
    # make args
    model_args = make_transformer_args(**model_dict)

    idim = 5
    odim = 10
    ilens = [10, 5]
    olens = [20, 15]
    device = torch.device('cuda')
    batch = prepare_inputs(idim, odim, ilens, olens, device=device)

    # define model
    model = Transformer(idim, odim, Namespace(**model_args))
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters())

    # trainable
    loss = model(**batch).mean()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # check gradient of ScaledPositionalEncoding
    if model.use_scaled_pos_enc:
        assert model.encoder.embed[1].alpha.grad is not None
        assert model.decoder.embed[1].alpha.grad is not None

예제 #5

0

파일 보기

파일: test_e2e_tts_fastspeech.py 프로젝트: yunigma/espnet

def test_fastspeech_multi_gpu_trainable(teacher_type, model_dict):
    # make args
    idim, odim = 10, 25
    model_args = make_feedforward_transformer_args(**model_dict)

    # setup batch
    ilens = [10, 5]
    olens = [20, 15]
    device = torch.device("cuda")
    batch = prepare_inputs(idim,
                           odim,
                           ilens,
                           olens,
                           model_args["spk_embed_dim"],
                           device=device)

    # define teacher model and save it
    if teacher_type == "transformer":
        teacher_model_args = make_transformer_args(**model_dict)
        teacher_model = Transformer(idim, odim,
                                    Namespace(**teacher_model_args))
    elif teacher_type == "tacotron2":
        teacher_model_args = make_taco2_args(**model_dict)
        teacher_model = Tacotron2(idim, odim, Namespace(**teacher_model_args))
    else:
        raise ValueError()
    tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp")
    torch.save(teacher_model.state_dict(), tmpdir + "/model.dummy.best")
    with open(tmpdir + "/model.json", "wb") as f:
        f.write(
            json.dumps(
                (idim, odim, teacher_model_args),
                indent=4,
                ensure_ascii=False,
                sort_keys=True,
            ).encode("utf_8"))

    # define model
    ngpu = 2
    device_ids = list(range(ngpu))
    model_args["teacher_model"] = tmpdir + "/model.dummy.best"
    model = FeedForwardTransformer(idim, odim, Namespace(**model_args))
    model = torch.nn.DataParallel(model, device_ids)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters())

    # trainable
    loss = model(**batch).mean()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # remove tmpdir
    if os.path.exists(tmpdir):
        shutil.rmtree(tmpdir)

예제 #6

0

파일 보기

파일: test_e2e_tts_transformer.py 프로젝트: MostafaMessi789/espnet

def test_attention_masking(model_dict):
    # make args
    model_args = make_transformer_args(**model_dict)

    # setup batch
    idim = 5
    odim = 10
    ilens = [10, 5]
    olens = [20, 15]
    batch = prepare_inputs(idim, odim, ilens, olens)

    # define model
    model = Transformer(idim, odim, Namespace(**model_args))

    # test encoder self-attention
    xs = model.encoder.embed(batch["xs"])
    xs[1, ilens[1]:] = float("nan")
    x_masks = model._source_mask(batch["ilens"])
    a = model.encoder.encoders[0].self_attn
    a(xs, xs, xs, x_masks)
    aws = a.attn.detach().numpy()
    assert not np.isnan(aws).any()
    for aw, ilen in zip(aws, batch["ilens"]):
        np.testing.assert_almost_equal(aw[:, :ilen, :ilen].sum(),
                                       float(aw.shape[0] * ilen),
                                       decimal=4)
        assert aw[:, ilen:, ilen:].sum() == 0.0

    # test encoder-decoder attention
    ys = model.decoder.embed(batch["ys"])
    ys[1, olens[1]:] = float("nan")
    xy_masks = model._source_to_target_mask(batch["ilens"], batch["olens"])
    a = model.decoder.decoders[0].src_attn
    a(ys, xs, xs, xy_masks)
    aws = a.attn.detach().numpy()
    assert not np.isnan(aws).any()
    for aw, ilen, olen in zip(aws, batch["ilens"], batch["olens"]):
        np.testing.assert_almost_equal(aw[:, :olen, :ilen].sum(),
                                       float(aw.shape[0] * olen),
                                       decimal=4)
        assert aw[:, olen:, ilen:].sum() == 0.0

    # test decoder self-attention
    ys = model.decoder.embed(batch["ys"])
    ys[1, olens[1]:] = float("nan")
    y_masks = model._target_mask(batch["olens"])
    a = model.decoder.decoders[0].self_attn
    a(ys, ys, ys, y_masks)
    aws = a.attn.detach().numpy()
    assert not np.isnan(aws).any()
    for aw, olen in zip(aws, batch["olens"]):
        np.testing.assert_almost_equal(aw[:, :olen, :olen].sum(),
                                       float(aw.shape[0] * olen),
                                       decimal=4)
        assert aw[:, olen:, olen:].sum() == 0.0

예제 #7

0

파일 보기

파일: test_e2e_tts_transformer.py 프로젝트: yntcyjb/espnet

def test_transformer_trainable_and_decodable(model_dict):
    # make args
    model_args = make_transformer_args(**model_dict)
    inference_args = make_inference_args()

    # setup batch
    idim = 5
    odim = 10
    ilens = [10, 5]
    olens = [20, 15]
    batch = prepare_inputs(idim, odim, ilens, olens)

    # define model
    model = Transformer(idim, odim, Namespace(**model_args))
    optimizer = torch.optim.Adam(model.parameters())

    # trainable
    loss = model(**batch).mean()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # check gradient of ScaledPositionalEncoding
    if model.use_scaled_pos_enc:
        assert model.encoder.embed[1].alpha.grad is not None
        assert model.decoder.embed[1].alpha.grad is not None

    # decodable
    model.eval()
    with torch.no_grad():
        model.inference(batch["xs"][0][:batch["ilens"][0]], Namespace(**inference_args))
        model.calculate_all_attentions(**batch)

예제 #8

0

파일 보기

파일: e2e_tts_fastspeech.py 프로젝트: 107368019/espnet

    def _load_teacher_model(self, model_path):
        # get teacher model config
        idim, odim, args = get_model_conf(model_path)

        # assert dimension is the same between teacher and studnet
        assert idim == self.idim
        assert odim == self.odim
        assert args.reduction_factor == self.reduction_factor

        # load teacher model
        model = Transformer(idim, odim, args)
        torch_load(model_path, model)

        # freeze teacher model parameters
        for p in model.parameters():
            p.requires_grad = False

        return model

예제 #9

0

파일 보기

def test_duration_calculator():
    # define duration calculator
    idim, odim = 10, 25
    teacher_model_args = make_transformer_args()
    teacher = Transformer(idim, odim, Namespace(**teacher_model_args))
    duration_calculator = DurationCalculator(teacher)

    # setup batch
    ilens = [10, 5, 3]
    olens = [20, 15, 10]
    batch = prepare_inputs(idim, odim, ilens, olens)

    # calculate durations
    ds = duration_calculator(batch["xs"], batch["ilens"], batch["ys"], batch["olens"])
    np.testing.assert_array_equal(ds.sum(dim=-1).cpu().numpy(), batch["olens"].cpu().numpy())

예제 #10

0

파일 보기

파일: test_e2e_tts_transformer.py 프로젝트: zy1022/espnet

def test_forward_and_inference_are_equal(model_dict):
    # make args
    model_args = make_transformer_args(dprenet_dropout_rate=0.0, **model_dict)

    # setup batch
    idim = 5
    odim = 10
    ilens = [10]
    olens = [20]
    batch = prepare_inputs(idim, odim, ilens, olens)
    xs = batch["xs"]
    ilens = batch["ilens"]
    ys = batch["ys"]
    olens = batch["olens"]

    # define model
    model = Transformer(idim, odim, Namespace(**model_args))
    model.eval()

    # TODO(kan-bayashi): update following ugly part
    with torch.no_grad():
        # --------- forward calculation ---------
        x_masks = model._source_mask(ilens)
        hs_fp, h_masks = model.encoder(xs, x_masks)
        if model.reduction_factor > 1:
            ys_in = ys[:, model.reduction_factor - 1::model.reduction_factor]
            olens_in = olens.new(
                [olen // model.reduction_factor for olen in olens])
        else:
            ys_in, olens_in = ys, olens
        ys_in = model._add_first_frame_and_remove_last_frame(ys_in)
        y_masks = model._target_mask(olens_in)
        zs, _ = model.decoder(ys_in, y_masks, hs_fp, h_masks)
        before_outs = model.feat_out(zs).view(zs.size(0), -1, model.odim)
        logits = model.prob_out(zs).view(zs.size(0), -1)
        after_outs = before_outs + model.postnet(before_outs.transpose(
            1, 2)).transpose(1, 2)
        # --------- forward calculation ---------

        # --------- inference calculation ---------
        hs_ir, _ = model.encoder(xs, None)
        maxlen = ys_in.shape[1]
        minlen = ys_in.shape[1]
        idx = 0
        # this is the inferene calculation but we use groundtruth to check the behavior
        ys_in_ = ys_in[0, idx].view(1, 1, model.odim)
        np.testing.assert_array_equal(
            ys_in_.new_zeros(1, 1, model.odim).detach().cpu().numpy(),
            ys_in_.detach().cpu().numpy(),
        )
        outs, probs = [], []
        while True:
            idx += 1
            y_masks = subsequent_mask(idx).unsqueeze(0)
            z = model.decoder.forward_one_step(ys_in_, y_masks,
                                               hs_ir)[0]  # (B, idx, adim)
            outs += [model.feat_out(z).view(1, -1,
                                            model.odim)]  # [(1, r, odim), ...]
            probs += [torch.sigmoid(model.prob_out(z))[0]]  # [(r), ...]
            if idx >= maxlen:
                if idx < minlen:
                    continue
                outs = torch.cat(outs, dim=1).transpose(
                    1, 2)  # (1, L, odim) -> (1, odim, L)
                if model.postnet is not None:
                    outs = outs + model.postnet(outs)  # (1, odim, L)
                outs = outs.transpose(2, 1).squeeze(0)  # (L, odim)
                probs = torch.cat(probs, dim=0)
                break
            ys_in_ = torch.cat((ys_in_, ys_in[0, idx].view(1, 1, model.odim)),
                               dim=1)  # (1, idx + 1, odim)
        # --------- inference calculation ---------

        # check both are equal
        np.testing.assert_array_almost_equal(
            hs_fp.detach().cpu().numpy(),
            hs_ir.detach().cpu().numpy(),
        )
        np.testing.assert_array_almost_equal(
            after_outs.squeeze(0).detach().cpu().numpy(),
            outs.detach().cpu().numpy(),
        )
        np.testing.assert_array_almost_equal(
            torch.sigmoid(logits.squeeze(0)).detach().cpu().numpy(),
            probs.detach().cpu().numpy(),
        )