def test_cut_with_temporal_array_move_to_memory_large_offset():
    path = "test/fixtures/libri/cuts.json"
    cut = CutSet.from_file(path)[0]
    cut.start = 10.0
    cut.duration = 1.5

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as w:
        arr = np.array(
            np.arange(
                compute_num_frames(cut.duration,
                                   frame_shift=0.01,
                                   sampling_rate=16000)))
        cut.custom_array = w.store_array(
            key="dummy-key",
            value=arr,
            frame_shift=0.01,
            temporal_dim=0,
            start=cut.start,
        )

        cut_mem = cut.move_to_memory()
        arr_mem = cut_mem.load_custom_array()

        assert arr.dtype == arr_mem.dtype
        np.testing.assert_equal(arr, arr_mem)

        arr_trunc = cut.truncate(duration=0.5).load_custom_array()
        arr_mem_trunc = cut_mem.truncate(duration=0.5).load_custom_array()

        assert arr_trunc.dtype == arr_mem_trunc.dtype
        np.testing.assert_equal(arr_trunc, arr_mem_trunc)
예제 #2
0
def test_temporal_array_partial_read():
    array = np.arange(30).astype(np.int8)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        manifest = writer.store_array(
            key="utt1",
            value=array,
            temporal_dim=0,
            frame_shift=0.5,
            start=0.0,
        )

        # Read all
        restored = manifest.load()
        np.testing.assert_equal(array, restored)

        # Read first 10 frames (0 - 5 seconds)
        first_10 = manifest.load(duration=5)
        np.testing.assert_equal(array[:10], first_10)

        # Read last 10 frames (10 - 15 seconds)
        last_10 = manifest.load(start=10)
        np.testing.assert_equal(array[-10:], last_10)
        last_10 = manifest.load(start=10, duration=5)
        np.testing.assert_equal(array[-10:], last_10)

        # Read middle 10 frames (5 - 10 seconds)
        mid_10 = manifest.load(start=5, duration=5)
        np.testing.assert_equal(array[10:20], mid_10)
예제 #3
0
 def _with_custom_temporal_array(self, cut: MonoCut, frame_shift: Seconds) -> None:
     d = TemporaryDirectory()
     self.dirs.append(d)
     num_frames = seconds_to_frames(cut.duration, frame_shift=frame_shift)
     array = np.random.randint(256, size=(num_frames,))
     with NumpyHdf5Writer(d.name) as storage:
         cut.codebook_indices = storage.store_array(
             key="ali1", value=array, frame_shift=frame_shift, temporal_dim=0
         )
def test_cut_with_array_move_to_memory():
    path = "test/fixtures/libri/cuts.json"
    cut = CutSet.from_file(path)[0]
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as w:
        arr = np.array([0, 1, 2, 3])
        cut.custom_array = w.store_array(key="dummy-key", value=arr)

        cut_mem = cut.move_to_memory()
        arr_mem = cut_mem.load_custom_array()

        assert arr.dtype == arr_mem.dtype
        np.testing.assert_equal(arr, arr_mem)
예제 #5
0
def test_cut_load_temporal_array():
    """Check that we can read a TemporalArray from a cut when their durations match."""
    alignment = np.random.randint(500, size=131)
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer:
        manifest = writer.store_array(
            key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0
        )
        expected_duration = 52.4  # 131 frames x 0.4s frame shift == 52.4s
        cut = MonoCut(id="x", start=0, duration=expected_duration, channel=0)
        # Note: MonoCut doesn't normally have an "alignment" attribute,
        #       and a "load_alignment()" method.
        #       We are dynamically extending it.
        cut.alignment = manifest
        restored_alignment = cut.load_alignment()
        np.testing.assert_equal(alignment, restored_alignment)
예제 #6
0
def test_cut_load_temporal_array_truncate():
    """Check the array loaded via TemporalArray is truncated along with the cut."""
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer:
        expected_duration = 52.4  # 131 frames x 0.4s frame shift == 52.4s
        cut = MonoCut(id="x", start=0, duration=expected_duration, channel=0)

        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(
            key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0
        )
        cut_trunc = cut.truncate(duration=5.0)

        alignment_piece = cut_trunc.load_alignment()
        assert alignment_piece.shape == (13,)  # 5.0 / 0.4 == 12.5 ~= 13
        np.testing.assert_equal(alignment[:13], alignment_piece)
예제 #7
0
def test_collate_custom_temporal_array_ints(pad_value):
    CODEBOOK_SIZE = 512
    FRAME_SHIFT = 0.04

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(
        seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_codebook_indices = []
        for cut in cuts:
            expected_codebook_indices.append(
                np.random.randint(CODEBOOK_SIZE,
                                  size=(seconds_to_frames(
                                      cut.duration,
                                      FRAME_SHIFT), )).astype(np.int16))
            cut.codebook_indices = writer.store_array(
                cut.id,
                expected_codebook_indices[-1],
                frame_shift=FRAME_SHIFT,
                temporal_dim=0,
            )

        codebook_indices, codebook_indices_lens = collate_custom_field(
            cuts, "codebook_indices", pad_value=pad_value)

        assert isinstance(codebook_indices_lens, torch.Tensor)
        assert codebook_indices_lens.dtype == torch.int32
        assert codebook_indices_lens.shape == (len(cuts), )
        assert codebook_indices_lens.tolist() == [
            seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts
        ]

        assert isinstance(codebook_indices, torch.Tensor)
        assert codebook_indices.dtype == torch.int16
        assert codebook_indices.shape == (len(cuts), max_num_frames)
        for idx, cbidxs in enumerate(expected_codebook_indices):
            exp_len = cbidxs.shape[0]
            # PyTorch < 1.9.0 doesn't have an assert_equal function.
            np.testing.assert_equal(codebook_indices[idx, :exp_len].numpy(),
                                    cbidxs)
            expected_pad_value = 0 if pad_value is None else pad_value
            np.testing.assert_equal(codebook_indices[idx, exp_len:].numpy(),
                                    expected_pad_value)
예제 #8
0
def test_collate_custom_array():
    EMBEDDING_SIZE = 300

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_xvectors = []
        for cut in cuts:
            expected_xvectors.append(
                np.random.randn(EMBEDDING_SIZE).astype(np.float32))
            cut.xvector = writer.store_array(cut.id, expected_xvectors[-1])

        xvectors = collate_custom_field(cuts, "xvector")
        assert isinstance(xvectors, torch.Tensor)
        assert xvectors.dtype == torch.float32
        assert xvectors.shape == (len(cuts), EMBEDDING_SIZE)
        for idx, xvec in enumerate(expected_xvectors):
            torch.testing.assert_allclose(xvectors[idx], xvec)
예제 #9
0
def test_cut_load_temporal_array_pad(pad_value):
    """Check the array loaded via TemporalArray is padded along with the cut."""
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer:
        cut = MonoCut(
            id="x",
            start=0,
            duration=52.4,  # 131 frames x 0.4s frame shift == 52.4s
            channel=0,
            recording=dummy_recording(1),
        )

        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(
            key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0
        )
        cut_pad = cut.pad(duration=60.0, pad_value_dict={"alignment": pad_value})

        alignment_pad = cut_pad.load_alignment()
        assert alignment_pad.shape == (150,)  # 60.0 / 0.4 == 150
        np.testing.assert_equal(alignment_pad[:131], alignment)
        np.testing.assert_equal(alignment_pad[131:], pad_value)
예제 #10
0
def test_collate_custom_temporal_array_floats(pad_value):
    VOCAB_SIZE = 500

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(cut.num_frames for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_posteriors = []
        for cut in cuts:
            expected_posteriors.append(
                np.random.randn(cut.num_frames, VOCAB_SIZE).astype(np.float32))
            cut.posterior = writer.store_array(
                cut.id,
                expected_posteriors[-1],
                frame_shift=cut.frame_shift,
                temporal_dim=0,
            )

        posteriors, posterior_lens = collate_custom_field(cuts,
                                                          "posterior",
                                                          pad_value=pad_value)

        assert isinstance(posterior_lens, torch.Tensor)
        assert posterior_lens.dtype == torch.int32
        assert posterior_lens.shape == (len(cuts), )
        assert posterior_lens.tolist() == [c.num_frames for c in cuts]

        assert isinstance(posteriors, torch.Tensor)
        assert posteriors.dtype == torch.float32
        assert posteriors.shape == (len(cuts), max_num_frames, VOCAB_SIZE)
        for idx, post in enumerate(expected_posteriors):
            exp_len = post.shape[0]
            torch.testing.assert_allclose(posteriors[idx, :exp_len], post)
            expected_pad_value = 0 if pad_value is None else pad_value
            torch.testing.assert_allclose(
                posteriors[idx, exp_len:],
                expected_pad_value *
                torch.ones_like(posteriors[idx, exp_len:]),
            )
예제 #11
0
def test_validate_cut_with_temporal_array(caplog):
    # Note: "caplog" is a special variable in pytest that captures logs.
    caplog.set_level(logging.WARNING)
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        cut = MonoCut(
            id="cut1",
            start=0,
            duration=4.9,
            channel=0,
            recording=dummy_recording(1),
        )
        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(key="utt1",
                                           value=alignment,
                                           frame_shift=0.4,
                                           temporal_dim=0)
        validate(cut)

    assert ("MonoCut cut1: possibly mismatched duration between cut (4.9s) "
            "and temporal array in custom field 'alignment' (num_frames=131 "
            "* frame_shift=0.4 == duration=52.400000000000006)" in caplog.text)
예제 #12
0
def test_padding_issue_478():
    """
    https://github.com/lhotse-speech/lhotse/issues/478
    """
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:

        # Prepare data for cut 1.
        cut1 = MonoCut("c1",
                       start=0,
                       duration=4.9,
                       channel=0,
                       recording=dummy_recording(1))
        ali1 = np.random.randint(500, size=(121, ))
        cut1.label_alignment = writer.store_array("c1",
                                                  ali1,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Prepare data for cut 2.
        cut2 = MonoCut("c2",
                       start=0,
                       duration=4.895,
                       channel=0,
                       recording=dummy_recording(2))
        ali2 = np.random.randint(500, size=(121, ))
        cut2.label_alignment = writer.store_array("c2",
                                                  ali2,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Test collation behavior on this cutset.
        cuts = CutSet.from_cuts([cut1, cut2])
        label_alignments, label_alignment_lens = collate_custom_field(
            cuts, "label_alignment")

        np.testing.assert_equal(label_alignments[0].numpy(), ali1)
        np.testing.assert_equal(label_alignments[1].numpy(), ali2)
예제 #13
0
def test_collate_custom_temporal_array_ints(pad_direction):
    CODEBOOK_SIZE = 512
    FRAME_SHIFT = 0.04
    EXPECTED_PAD_VALUE = 0

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(
        seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_codebook_indices = []
        for cut in cuts:
            expected_codebook_indices.append(
                np.random.randint(CODEBOOK_SIZE,
                                  size=(seconds_to_frames(
                                      cut.duration,
                                      FRAME_SHIFT), )).astype(np.int16))
            cut.codebook_indices = writer.store_array(
                cut.id,
                expected_codebook_indices[-1],
                frame_shift=FRAME_SHIFT,
                temporal_dim=0,
            )

        codebook_indices, codebook_indices_lens = collate_custom_field(
            cuts, "codebook_indices", pad_direction=pad_direction)

        assert isinstance(codebook_indices_lens, torch.Tensor)
        assert codebook_indices_lens.dtype == torch.int32
        assert codebook_indices_lens.shape == (len(cuts), )
        assert codebook_indices_lens.tolist() == [
            seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts
        ]

        assert isinstance(codebook_indices, torch.Tensor)
        assert (codebook_indices.dtype == torch.int64
                )  # the dtype got promoted by default
        assert codebook_indices.shape == (len(cuts), max_num_frames)
        for idx, cbidxs in enumerate(expected_codebook_indices):
            exp_len = cbidxs.shape[0]
            # PyTorch < 1.9.0 doesn't have an assert_equal function.
            if pad_direction == "right":
                np.testing.assert_equal(
                    codebook_indices[idx, :exp_len].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, exp_len:].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "left":
                np.testing.assert_equal(
                    codebook_indices[idx, -exp_len:].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, :-exp_len].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "both":
                half = (max_num_frames - exp_len) // 2
                np.testing.assert_equal(codebook_indices[idx, :half].numpy(),
                                        EXPECTED_PAD_VALUE)
                np.testing.assert_equal(
                    codebook_indices[idx, half:half + exp_len].numpy(), cbidxs)
                if half > 0:
                    # indexing like [idx, -0:] would return the whole array rather
                    # than an empty slice.
                    np.testing.assert_equal(
                        codebook_indices[idx, -half:].numpy(),
                        EXPECTED_PAD_VALUE)