Ejemplo n.º 1
0
def prepare_inputs(bs,
                   idim,
                   odim,
                   maxin_len,
                   maxout_len,
                   spk_embed_dim=None,
                   spc_dim=None):
    ilens = np.sort(np.random.randint(1, maxin_len, bs))[::-1].tolist()
    olens = np.sort(np.random.randint(1, maxout_len, bs))[::-1].tolist()
    ilens = torch.LongTensor(ilens)
    olens = torch.LongTensor(olens)
    xs = [np.random.randint(0, idim, l) for l in ilens]
    ys = [np.random.randn(l, odim) for l in olens]
    xs = pad_list([torch.from_numpy(x).long() for x in xs], 0)
    ys = pad_list([torch.from_numpy(y).float() for y in ys], 0)
    labels = ys.new_zeros(ys.size(0), ys.size(1))
    for i, l in enumerate(olens):
        labels[i, l - 1:] = 1
    if spk_embed_dim is not None:
        spembs = torch.from_numpy(np.random.randn(bs, spk_embed_dim)).float()
    else:
        spembs = None
    if spc_dim is not None:
        spcs = [np.random.randn(l, spc_dim) for l in olens]
        spcs = pad_list([torch.from_numpy(spc).float() for spc in spcs], 0)
    else:
        spcs = None

    return xs, ilens, ys, labels, olens, spembs, spcs
Ejemplo n.º 2
0
def prepare_inputs(mode, ilens=[150, 100], olens=[4, 3], is_cuda=False):
    np.random.seed(1)
    assert len(ilens) == len(olens)
    xs = [np.random.randn(ilen, 40).astype(np.float32) for ilen in ilens]
    ys = [np.random.randint(1, 5, olen).astype(np.int32) for olen in olens]
    ilens = np.array([x.shape[0] for x in xs], dtype=np.int32)

    if mode == "chainer":
        if is_cuda:
            xp = importlib.import_module('cupy')
            xs = [chainer.Variable(xp.array(x)) for x in xs]
            ys = [chainer.Variable(xp.array(y)) for y in ys]
            ilens = xp.array(ilens)
        else:
            xs = [chainer.Variable(x) for x in xs]
            ys = [chainer.Variable(y) for y in ys]
        return xs, ilens, ys

    elif mode == "pytorch":
        ilens = torch.from_numpy(ilens).long()
        xs_pad = pad_list([torch.from_numpy(x).float() for x in xs], 0)
        ys_pad = pad_list([torch.from_numpy(y).long() for y in ys], -1)
        if is_cuda:
            xs_pad = xs_pad.cuda()
            ilens = ilens.cuda()
            ys_pad = ys_pad.cuda()

        return xs_pad, ilens, ys_pad
    else:
        raise ValueError("Invalid mode")
Ejemplo n.º 3
0
def test_ctc_loss():
    pytest.importorskip("torch")
    pytest.importorskip("warpctc_pytorch")
    import torch
    import warpctc_pytorch

    from espnet.nets.e2e_asr_th import pad_list

    n_out = 7
    input_length = numpy.array([11, 17, 15], dtype=numpy.int32)
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = [
        numpy.random.rand(il, n_out).astype(numpy.float32)
        for il in input_length
    ]
    np_target = [
        numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0,
                                                      input_length,
                                                      label_length).data

    th_pred = pad_list([torch.from_numpy(x) for x in np_pred],
                       0.0).transpose(0, 1)
    th_target = torch.from_numpy(numpy.concatenate(np_target))
    th_ilen = torch.from_numpy(input_length)
    th_olen = torch.from_numpy(label_length)
    th_loss = warpctc_pytorch.CTCLoss(size_average=True)(
        th_pred, th_target, th_ilen, th_olen).data.numpy()[0]
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
Ejemplo n.º 4
0
def test_pad_list():
    xs = [[1, 2, 3], [1, 2], [1, 2, 3, 4]]
    xs = list(map(lambda x: torch.LongTensor(x), xs))
    xpad = pad_list(xs, -1)

    es = [[1, 2, 3, -1], [1, 2, -1, -1], [1, 2, 3, 4]]
    assert xpad.data.tolist() == es
Ejemplo n.º 5
0
    def __call__(self, batch, device):
        # batch should be located in list
        assert len(batch) == 1
        xs, ys = batch[0]

        # perform subsamping
        if self.subsamping_factor > 1:
            xs = [x[::self.subsampling_factor, :] for x in xs]

        # get batch of lengths of input sequences
        ilens = np.array([x.shape[0] for x in xs])

        # perform padding and convert to tensor
        xs_pad = pad_list([torch.from_numpy(x).float() for x in xs],
                          0).to(device)
        ilens = torch.from_numpy(ilens).to(device)
        ys_pad = pad_list([torch.from_numpy(y).long() for y in ys],
                          self.ignore_id).to(device)

        return xs_pad, ilens, ys_pad
Ejemplo n.º 6
0
    def __call__(self, batch, device):
        # batch should be located in list
        assert len(batch) == 1
        inputs_and_targets = batch[0]

        # parse inputs and targets
        xs, ys, spembs, spcs = inputs_and_targets

        # get list of lengths (must be tensor for DataParallel)
        ilens = torch.from_numpy(np.array([x.shape[0]
                                           for x in xs])).long().to(device)
        olens = torch.from_numpy(np.array([y.shape[0]
                                           for y in ys])).long().to(device)

        # perform padding and conversion to tensor
        xs = pad_list([torch.from_numpy(x).long() for x in xs], 0).to(device)
        ys = pad_list([torch.from_numpy(y).float() for y in ys], 0).to(device)

        # make labels for stop prediction
        labels = ys.new_zeros(ys.size(0), ys.size(1))
        for i, l in enumerate(olens):
            labels[i, l - 1:] = 1.0

        # load second target
        if spcs is not None:
            spcs = pad_list([torch.from_numpy(spc).float() for spc in spcs],
                            0).to(device)

        # load speaker embedding
        if spembs is not None:
            spembs = torch.from_numpy(np.array(spembs)).float().to(device)

        if self.return_targets:
            return xs, ilens, ys, labels, olens, spembs, spcs
        else:
            return xs, ilens, ys, spembs
Ejemplo n.º 7
0
def test_attn_loss():
    pytest.importorskip("torch")
    import torch

    from espnet.nets.e2e_asr_th import pad_list

    n_out = 7
    _eos = n_out - 1
    n_batch = 3
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = numpy.random.rand(n_batch,
                                max(label_length) + 1,
                                n_out).astype(numpy.float32)
    # NOTE: 0 is only used for CTC, never appeared in attn target
    np_target = [
        numpy.random.randint(1, n_out - 1, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    eos = numpy.array([_eos], 'i')
    ys_out = [F.concat([y, eos], axis=0) for y in np_target]

    # padding for ys with -1
    # pys: utt x olen
    # NOTE: -1 is default ignore index for chainer
    pad_ys_out = F.pad_sequence(ys_out, padding=-1)
    y_all = F.reshape(np_pred, (n_batch * (max(label_length) + 1), n_out))
    ch_loss = F.softmax_cross_entropy(y_all, F.concat(pad_ys_out, axis=0))

    # NOTE: this index 0 is only for CTC not attn. so it can be ignored
    # unfortunately, torch cross_entropy does not accept out-of-bound ids
    th_ignore = 0
    th_pred = torch.from_numpy(y_all.data)
    th_target = pad_list([torch.from_numpy(t.data).long() for t in ys_out],
                         th_ignore)
    th_loss = torch.nn.functional.cross_entropy(th_pred,
                                                th_target.view(-1),
                                                ignore_index=th_ignore,
                                                size_average=True)
    print(ch_loss)
    print(th_loss)

    # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does
    loss_data = float(th_loss)
    numpy.testing.assert_allclose(loss_data, ch_loss.data, 0.05)
Ejemplo n.º 8
0
def test_train_acc():
    pytest.importorskip("torch")
    import torch

    from espnet.nets.e2e_asr_th import pad_list
    from espnet.nets.e2e_asr_th import th_accuracy

    n_out = 7
    _eos = n_out - 1
    n_batch = 3
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = numpy.random.rand(n_batch,
                                max(label_length) + 1,
                                n_out).astype(numpy.float32)
    # NOTE: 0 is only used for CTC, never appeared in attn target
    np_target = [
        numpy.random.randint(1, n_out - 1, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    eos = numpy.array([_eos], 'i')
    ys_out = [F.concat([y, eos], axis=0) for y in np_target]

    # padding for ys with -1
    # pys: utt x olen
    # NOTE: -1 is default ignore index for chainer
    pad_ys_out = F.pad_sequence(ys_out, padding=-1)
    y_all = F.reshape(np_pred, (n_batch * (max(label_length) + 1), n_out))
    ch_acc = F.accuracy(y_all, F.concat(pad_ys_out, axis=0), ignore_label=-1)

    # NOTE: this index 0 is only for CTC not attn. so it can be ignored
    # unfortunately, torch cross_entropy does not accept out-of-bound ids
    th_ignore = 0
    th_pred = torch.from_numpy(y_all.data)
    th_ys = [torch.from_numpy(numpy.append(t, eos)).long() for t in np_target]
    th_target = pad_list(th_ys, th_ignore)
    th_acc = th_accuracy(th_pred, th_target, th_ignore)

    numpy.testing.assert_allclose(ch_acc.data, th_acc)