def test_invariants_pad(self, sampling_rate: int, window_hop: int, pad_direction: str, rand_gen): # Generate cut duration in numbers of samples num_samples = rand_gen.draw( st.integers(round(sampling_rate * 0.46), round(sampling_rate * 1.9)), label="Number of audio samples in a cut.", ) # Generate random cut frame_shift = window_hop / sampling_rate cut = self.with_cut( sampling_rate=sampling_rate, num_samples=num_samples, frame_shift=frame_shift, features=False, custom_field=True, ) # Pad with random duration duration = rand_gen.draw( st.floats(min_value=cut.duration + 0.03 * cut.duration, max_value=cut.duration * 2), label=f"Padded cut duration", ) padded = cut.pad( duration=duration, direction=pad_direction, pad_value_dict={"codebook_indices": -1}, ) # Test the invariants array = padded.load_codebook_indices() assert array.ndim == padded.codebook_indices.ndim expected_num_frames = seconds_to_frames( padded.duration, padded.codebook_indices.frame_shift) assert array.shape[0] == expected_num_frames self.cleanup()
def test_collate_custom_temporal_array_ints(pad_value): CODEBOOK_SIZE = 512 FRAME_SHIFT = 0.04 cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json") max_num_frames = max( seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: expected_codebook_indices = [] for cut in cuts: expected_codebook_indices.append( np.random.randint(CODEBOOK_SIZE, size=(seconds_to_frames( cut.duration, FRAME_SHIFT), )).astype(np.int16)) cut.codebook_indices = writer.store_array( cut.id, expected_codebook_indices[-1], frame_shift=FRAME_SHIFT, temporal_dim=0, ) codebook_indices, codebook_indices_lens = collate_custom_field( cuts, "codebook_indices", pad_value=pad_value) assert isinstance(codebook_indices_lens, torch.Tensor) assert codebook_indices_lens.dtype == torch.int32 assert codebook_indices_lens.shape == (len(cuts), ) assert codebook_indices_lens.tolist() == [ seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts ] assert isinstance(codebook_indices, torch.Tensor) assert codebook_indices.dtype == torch.int16 assert codebook_indices.shape == (len(cuts), max_num_frames) for idx, cbidxs in enumerate(expected_codebook_indices): exp_len = cbidxs.shape[0] # PyTorch < 1.9.0 doesn't have an assert_equal function. np.testing.assert_equal(codebook_indices[idx, :exp_len].numpy(), cbidxs) expected_pad_value = 0 if pad_value is None else pad_value np.testing.assert_equal(codebook_indices[idx, exp_len:].numpy(), expected_pad_value)
def _with_custom_temporal_array(self, cut: MonoCut, frame_shift: Seconds) -> None: d = TemporaryDirectory() self.dirs.append(d) num_frames = seconds_to_frames(cut.duration, frame_shift=frame_shift) array = np.random.randint(256, size=(num_frames,)) with NumpyHdf5Writer(d.name) as storage: cut.codebook_indices = storage.store_array( key="ali1", value=array, frame_shift=frame_shift, temporal_dim=0 )
def test_collate_custom_temporal_array_ints(pad_direction): CODEBOOK_SIZE = 512 FRAME_SHIFT = 0.04 EXPECTED_PAD_VALUE = 0 cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json") max_num_frames = max( seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: expected_codebook_indices = [] for cut in cuts: expected_codebook_indices.append( np.random.randint(CODEBOOK_SIZE, size=(seconds_to_frames( cut.duration, FRAME_SHIFT), )).astype(np.int16)) cut.codebook_indices = writer.store_array( cut.id, expected_codebook_indices[-1], frame_shift=FRAME_SHIFT, temporal_dim=0, ) codebook_indices, codebook_indices_lens = collate_custom_field( cuts, "codebook_indices", pad_direction=pad_direction) assert isinstance(codebook_indices_lens, torch.Tensor) assert codebook_indices_lens.dtype == torch.int32 assert codebook_indices_lens.shape == (len(cuts), ) assert codebook_indices_lens.tolist() == [ seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts ] assert isinstance(codebook_indices, torch.Tensor) assert (codebook_indices.dtype == torch.int64 ) # the dtype got promoted by default assert codebook_indices.shape == (len(cuts), max_num_frames) for idx, cbidxs in enumerate(expected_codebook_indices): exp_len = cbidxs.shape[0] # PyTorch < 1.9.0 doesn't have an assert_equal function. if pad_direction == "right": np.testing.assert_equal( codebook_indices[idx, :exp_len].numpy(), cbidxs) np.testing.assert_equal( codebook_indices[idx, exp_len:].numpy(), EXPECTED_PAD_VALUE) if pad_direction == "left": np.testing.assert_equal( codebook_indices[idx, -exp_len:].numpy(), cbidxs) np.testing.assert_equal( codebook_indices[idx, :-exp_len].numpy(), EXPECTED_PAD_VALUE) if pad_direction == "both": half = (max_num_frames - exp_len) // 2 np.testing.assert_equal(codebook_indices[idx, :half].numpy(), EXPECTED_PAD_VALUE) np.testing.assert_equal( codebook_indices[idx, half:half + exp_len].numpy(), cbidxs) if half > 0: # indexing like [idx, -0:] would return the whole array rather # than an empty slice. np.testing.assert_equal( codebook_indices[idx, -half:].numpy(), EXPECTED_PAD_VALUE)