Esempio n. 1
0
 def test_invariants_pad(self, sampling_rate: int, window_hop: int,
                         pad_direction: str, rand_gen):
     # Generate cut duration in numbers of samples
     num_samples = rand_gen.draw(
         st.integers(round(sampling_rate * 0.46),
                     round(sampling_rate * 1.9)),
         label="Number of audio samples in a cut.",
     )
     # Generate random cut
     frame_shift = window_hop / sampling_rate
     cut = self.with_cut(
         sampling_rate=sampling_rate,
         num_samples=num_samples,
         frame_shift=frame_shift,
         features=False,
         custom_field=True,
     )
     # Pad with random duration
     duration = rand_gen.draw(
         st.floats(min_value=cut.duration + 0.03 * cut.duration,
                   max_value=cut.duration * 2),
         label=f"Padded cut duration",
     )
     padded = cut.pad(
         duration=duration,
         direction=pad_direction,
         pad_value_dict={"codebook_indices": -1},
     )
     # Test the invariants
     array = padded.load_codebook_indices()
     assert array.ndim == padded.codebook_indices.ndim
     expected_num_frames = seconds_to_frames(
         padded.duration, padded.codebook_indices.frame_shift)
     assert array.shape[0] == expected_num_frames
     self.cleanup()
Esempio n. 2
0
def test_collate_custom_temporal_array_ints(pad_value):
    CODEBOOK_SIZE = 512
    FRAME_SHIFT = 0.04

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(
        seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_codebook_indices = []
        for cut in cuts:
            expected_codebook_indices.append(
                np.random.randint(CODEBOOK_SIZE,
                                  size=(seconds_to_frames(
                                      cut.duration,
                                      FRAME_SHIFT), )).astype(np.int16))
            cut.codebook_indices = writer.store_array(
                cut.id,
                expected_codebook_indices[-1],
                frame_shift=FRAME_SHIFT,
                temporal_dim=0,
            )

        codebook_indices, codebook_indices_lens = collate_custom_field(
            cuts, "codebook_indices", pad_value=pad_value)

        assert isinstance(codebook_indices_lens, torch.Tensor)
        assert codebook_indices_lens.dtype == torch.int32
        assert codebook_indices_lens.shape == (len(cuts), )
        assert codebook_indices_lens.tolist() == [
            seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts
        ]

        assert isinstance(codebook_indices, torch.Tensor)
        assert codebook_indices.dtype == torch.int16
        assert codebook_indices.shape == (len(cuts), max_num_frames)
        for idx, cbidxs in enumerate(expected_codebook_indices):
            exp_len = cbidxs.shape[0]
            # PyTorch < 1.9.0 doesn't have an assert_equal function.
            np.testing.assert_equal(codebook_indices[idx, :exp_len].numpy(),
                                    cbidxs)
            expected_pad_value = 0 if pad_value is None else pad_value
            np.testing.assert_equal(codebook_indices[idx, exp_len:].numpy(),
                                    expected_pad_value)
Esempio n. 3
0
 def _with_custom_temporal_array(self, cut: MonoCut, frame_shift: Seconds) -> None:
     d = TemporaryDirectory()
     self.dirs.append(d)
     num_frames = seconds_to_frames(cut.duration, frame_shift=frame_shift)
     array = np.random.randint(256, size=(num_frames,))
     with NumpyHdf5Writer(d.name) as storage:
         cut.codebook_indices = storage.store_array(
             key="ali1", value=array, frame_shift=frame_shift, temporal_dim=0
         )
Esempio n. 4
0
def test_collate_custom_temporal_array_ints(pad_direction):
    CODEBOOK_SIZE = 512
    FRAME_SHIFT = 0.04
    EXPECTED_PAD_VALUE = 0

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(
        seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_codebook_indices = []
        for cut in cuts:
            expected_codebook_indices.append(
                np.random.randint(CODEBOOK_SIZE,
                                  size=(seconds_to_frames(
                                      cut.duration,
                                      FRAME_SHIFT), )).astype(np.int16))
            cut.codebook_indices = writer.store_array(
                cut.id,
                expected_codebook_indices[-1],
                frame_shift=FRAME_SHIFT,
                temporal_dim=0,
            )

        codebook_indices, codebook_indices_lens = collate_custom_field(
            cuts, "codebook_indices", pad_direction=pad_direction)

        assert isinstance(codebook_indices_lens, torch.Tensor)
        assert codebook_indices_lens.dtype == torch.int32
        assert codebook_indices_lens.shape == (len(cuts), )
        assert codebook_indices_lens.tolist() == [
            seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts
        ]

        assert isinstance(codebook_indices, torch.Tensor)
        assert (codebook_indices.dtype == torch.int64
                )  # the dtype got promoted by default
        assert codebook_indices.shape == (len(cuts), max_num_frames)
        for idx, cbidxs in enumerate(expected_codebook_indices):
            exp_len = cbidxs.shape[0]
            # PyTorch < 1.9.0 doesn't have an assert_equal function.
            if pad_direction == "right":
                np.testing.assert_equal(
                    codebook_indices[idx, :exp_len].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, exp_len:].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "left":
                np.testing.assert_equal(
                    codebook_indices[idx, -exp_len:].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, :-exp_len].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "both":
                half = (max_num_frames - exp_len) // 2
                np.testing.assert_equal(codebook_indices[idx, :half].numpy(),
                                        EXPECTED_PAD_VALUE)
                np.testing.assert_equal(
                    codebook_indices[idx, half:half + exp_len].numpy(), cbidxs)
                if half > 0:
                    # indexing like [idx, -0:] would return the whole array rather
                    # than an empty slice.
                    np.testing.assert_equal(
                        codebook_indices[idx, -half:].numpy(),
                        EXPECTED_PAD_VALUE)