Python collate_custom_fieldの例、lhotse.dataset.collation.collate_custom_field Pythonの例

コード例 #1

0

ファイルを表示

def test_collate_custom_numbers():
    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    expected_snrs = []
    for cut in cuts:
        expected_snrs.append(random.random() * 20)
        cut.snr = expected_snrs[-1]

    snrs = collate_custom_field(cuts, "snr")
    assert isinstance(snrs, torch.Tensor)
    assert snrs.dtype == torch.float32
    assert snrs.shape == (len(cuts), )
    for idx, snr in enumerate(expected_snrs):
        assert isclose(snrs[idx], snr, abs_tol=1e-5)

コード例 #2

0

ファイルを表示

def test_collate_custom_temporal_array_ints(pad_value):
    CODEBOOK_SIZE = 512
    FRAME_SHIFT = 0.04

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(
        seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_codebook_indices = []
        for cut in cuts:
            expected_codebook_indices.append(
                np.random.randint(CODEBOOK_SIZE,
                                  size=(seconds_to_frames(
                                      cut.duration,
                                      FRAME_SHIFT), )).astype(np.int16))
            cut.codebook_indices = writer.store_array(
                cut.id,
                expected_codebook_indices[-1],
                frame_shift=FRAME_SHIFT,
                temporal_dim=0,
            )

        codebook_indices, codebook_indices_lens = collate_custom_field(
            cuts, "codebook_indices", pad_value=pad_value)

        assert isinstance(codebook_indices_lens, torch.Tensor)
        assert codebook_indices_lens.dtype == torch.int32
        assert codebook_indices_lens.shape == (len(cuts), )
        assert codebook_indices_lens.tolist() == [
            seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts
        ]

        assert isinstance(codebook_indices, torch.Tensor)
        assert codebook_indices.dtype == torch.int16
        assert codebook_indices.shape == (len(cuts), max_num_frames)
        for idx, cbidxs in enumerate(expected_codebook_indices):
            exp_len = cbidxs.shape[0]
            # PyTorch < 1.9.0 doesn't have an assert_equal function.
            np.testing.assert_equal(codebook_indices[idx, :exp_len].numpy(),
                                    cbidxs)
            expected_pad_value = 0 if pad_value is None else pad_value
            np.testing.assert_equal(codebook_indices[idx, exp_len:].numpy(),
                                    expected_pad_value)

コード例 #3

0

ファイルを表示

def test_collate_custom_array():
    EMBEDDING_SIZE = 300

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_xvectors = []
        for cut in cuts:
            expected_xvectors.append(
                np.random.randn(EMBEDDING_SIZE).astype(np.float32))
            cut.xvector = writer.store_array(cut.id, expected_xvectors[-1])

        xvectors = collate_custom_field(cuts, "xvector")
        assert isinstance(xvectors, torch.Tensor)
        assert xvectors.dtype == torch.float32
        assert xvectors.shape == (len(cuts), EMBEDDING_SIZE)
        for idx, xvec in enumerate(expected_xvectors):
            torch.testing.assert_allclose(xvectors[idx], xvec)

コード例 #4

0

ファイルを表示

def test_collate_custom_temporal_array_floats(pad_value):
    VOCAB_SIZE = 500

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(cut.num_frames for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_posteriors = []
        for cut in cuts:
            expected_posteriors.append(
                np.random.randn(cut.num_frames, VOCAB_SIZE).astype(np.float32))
            cut.posterior = writer.store_array(
                cut.id,
                expected_posteriors[-1],
                frame_shift=cut.frame_shift,
                temporal_dim=0,
            )

        posteriors, posterior_lens = collate_custom_field(cuts,
                                                          "posterior",
                                                          pad_value=pad_value)

        assert isinstance(posterior_lens, torch.Tensor)
        assert posterior_lens.dtype == torch.int32
        assert posterior_lens.shape == (len(cuts), )
        assert posterior_lens.tolist() == [c.num_frames for c in cuts]

        assert isinstance(posteriors, torch.Tensor)
        assert posteriors.dtype == torch.float32
        assert posteriors.shape == (len(cuts), max_num_frames, VOCAB_SIZE)
        for idx, post in enumerate(expected_posteriors):
            exp_len = post.shape[0]
            torch.testing.assert_allclose(posteriors[idx, :exp_len], post)
            expected_pad_value = 0 if pad_value is None else pad_value
            torch.testing.assert_allclose(
                posteriors[idx, exp_len:],
                expected_pad_value *
                torch.ones_like(posteriors[idx, exp_len:]),
            )

コード例 #5

0

ファイルを表示

def test_padding_issue_478():
    """
    https://github.com/lhotse-speech/lhotse/issues/478
    """
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:

        # Prepare data for cut 1.
        cut1 = MonoCut("c1",
                       start=0,
                       duration=4.9,
                       channel=0,
                       recording=dummy_recording(1))
        ali1 = np.random.randint(500, size=(121, ))
        cut1.label_alignment = writer.store_array("c1",
                                                  ali1,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Prepare data for cut 2.
        cut2 = MonoCut("c2",
                       start=0,
                       duration=4.895,
                       channel=0,
                       recording=dummy_recording(2))
        ali2 = np.random.randint(500, size=(121, ))
        cut2.label_alignment = writer.store_array("c2",
                                                  ali2,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Test collation behavior on this cutset.
        cuts = CutSet.from_cuts([cut1, cut2])
        label_alignments, label_alignment_lens = collate_custom_field(
            cuts, "label_alignment")

        np.testing.assert_equal(label_alignments[0].numpy(), ali1)
        np.testing.assert_equal(label_alignments[1].numpy(), ali2)

コード例 #6

0

ファイルを表示

def test_collate_custom_attribute_missing():
    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    with pytest.raises(AttributeError):
        collate_custom_field(cuts, "nonexistent_attribute")

コード例 #7

0

ファイルを表示

def test_collate_custom_temporal_array_ints(pad_direction):
    CODEBOOK_SIZE = 512
    FRAME_SHIFT = 0.04
    EXPECTED_PAD_VALUE = 0

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(
        seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_codebook_indices = []
        for cut in cuts:
            expected_codebook_indices.append(
                np.random.randint(CODEBOOK_SIZE,
                                  size=(seconds_to_frames(
                                      cut.duration,
                                      FRAME_SHIFT), )).astype(np.int16))
            cut.codebook_indices = writer.store_array(
                cut.id,
                expected_codebook_indices[-1],
                frame_shift=FRAME_SHIFT,
                temporal_dim=0,
            )

        codebook_indices, codebook_indices_lens = collate_custom_field(
            cuts, "codebook_indices", pad_direction=pad_direction)

        assert isinstance(codebook_indices_lens, torch.Tensor)
        assert codebook_indices_lens.dtype == torch.int32
        assert codebook_indices_lens.shape == (len(cuts), )
        assert codebook_indices_lens.tolist() == [
            seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts
        ]

        assert isinstance(codebook_indices, torch.Tensor)
        assert (codebook_indices.dtype == torch.int64
                )  # the dtype got promoted by default
        assert codebook_indices.shape == (len(cuts), max_num_frames)
        for idx, cbidxs in enumerate(expected_codebook_indices):
            exp_len = cbidxs.shape[0]
            # PyTorch < 1.9.0 doesn't have an assert_equal function.
            if pad_direction == "right":
                np.testing.assert_equal(
                    codebook_indices[idx, :exp_len].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, exp_len:].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "left":
                np.testing.assert_equal(
                    codebook_indices[idx, -exp_len:].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, :-exp_len].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "both":
                half = (max_num_frames - exp_len) // 2
                np.testing.assert_equal(codebook_indices[idx, :half].numpy(),
                                        EXPECTED_PAD_VALUE)
                np.testing.assert_equal(
                    codebook_indices[idx, half:half + exp_len].numpy(), cbidxs)
                if half > 0:
                    # indexing like [idx, -0:] would return the whole array rather
                    # than an empty slice.
                    np.testing.assert_equal(
                        codebook_indices[idx, -half:].numpy(),
                        EXPECTED_PAD_VALUE)