def test_cut_with_temporal_array_move_to_memory_large_offset(): path = "test/fixtures/libri/cuts.json" cut = CutSet.from_file(path)[0] cut.start = 10.0 cut.duration = 1.5 with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as w: arr = np.array( np.arange( compute_num_frames(cut.duration, frame_shift=0.01, sampling_rate=16000))) cut.custom_array = w.store_array( key="dummy-key", value=arr, frame_shift=0.01, temporal_dim=0, start=cut.start, ) cut_mem = cut.move_to_memory() arr_mem = cut_mem.load_custom_array() assert arr.dtype == arr_mem.dtype np.testing.assert_equal(arr, arr_mem) arr_trunc = cut.truncate(duration=0.5).load_custom_array() arr_mem_trunc = cut_mem.truncate(duration=0.5).load_custom_array() assert arr_trunc.dtype == arr_mem_trunc.dtype np.testing.assert_equal(arr_trunc, arr_mem_trunc)
def test_temporal_array_partial_read(): array = np.arange(30).astype(np.int8) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: manifest = writer.store_array( key="utt1", value=array, temporal_dim=0, frame_shift=0.5, start=0.0, ) # Read all restored = manifest.load() np.testing.assert_equal(array, restored) # Read first 10 frames (0 - 5 seconds) first_10 = manifest.load(duration=5) np.testing.assert_equal(array[:10], first_10) # Read last 10 frames (10 - 15 seconds) last_10 = manifest.load(start=10) np.testing.assert_equal(array[-10:], last_10) last_10 = manifest.load(start=10, duration=5) np.testing.assert_equal(array[-10:], last_10) # Read middle 10 frames (5 - 10 seconds) mid_10 = manifest.load(start=5, duration=5) np.testing.assert_equal(array[10:20], mid_10)
def _with_custom_temporal_array(self, cut: MonoCut, frame_shift: Seconds) -> None: d = TemporaryDirectory() self.dirs.append(d) num_frames = seconds_to_frames(cut.duration, frame_shift=frame_shift) array = np.random.randint(256, size=(num_frames,)) with NumpyHdf5Writer(d.name) as storage: cut.codebook_indices = storage.store_array( key="ali1", value=array, frame_shift=frame_shift, temporal_dim=0 )
def test_cut_with_array_move_to_memory(): path = "test/fixtures/libri/cuts.json" cut = CutSet.from_file(path)[0] with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as w: arr = np.array([0, 1, 2, 3]) cut.custom_array = w.store_array(key="dummy-key", value=arr) cut_mem = cut.move_to_memory() arr_mem = cut_mem.load_custom_array() assert arr.dtype == arr_mem.dtype np.testing.assert_equal(arr, arr_mem)
def test_cut_load_temporal_array(): """Check that we can read a TemporalArray from a cut when their durations match.""" alignment = np.random.randint(500, size=131) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer: manifest = writer.store_array( key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0 ) expected_duration = 52.4 # 131 frames x 0.4s frame shift == 52.4s cut = MonoCut(id="x", start=0, duration=expected_duration, channel=0) # Note: MonoCut doesn't normally have an "alignment" attribute, # and a "load_alignment()" method. # We are dynamically extending it. cut.alignment = manifest restored_alignment = cut.load_alignment() np.testing.assert_equal(alignment, restored_alignment)
def test_cut_load_temporal_array_truncate(): """Check the array loaded via TemporalArray is truncated along with the cut.""" with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer: expected_duration = 52.4 # 131 frames x 0.4s frame shift == 52.4s cut = MonoCut(id="x", start=0, duration=expected_duration, channel=0) alignment = np.random.randint(500, size=131) cut.alignment = writer.store_array( key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0 ) cut_trunc = cut.truncate(duration=5.0) alignment_piece = cut_trunc.load_alignment() assert alignment_piece.shape == (13,) # 5.0 / 0.4 == 12.5 ~= 13 np.testing.assert_equal(alignment[:13], alignment_piece)
def test_collate_custom_temporal_array_ints(pad_value): CODEBOOK_SIZE = 512 FRAME_SHIFT = 0.04 cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json") max_num_frames = max( seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: expected_codebook_indices = [] for cut in cuts: expected_codebook_indices.append( np.random.randint(CODEBOOK_SIZE, size=(seconds_to_frames( cut.duration, FRAME_SHIFT), )).astype(np.int16)) cut.codebook_indices = writer.store_array( cut.id, expected_codebook_indices[-1], frame_shift=FRAME_SHIFT, temporal_dim=0, ) codebook_indices, codebook_indices_lens = collate_custom_field( cuts, "codebook_indices", pad_value=pad_value) assert isinstance(codebook_indices_lens, torch.Tensor) assert codebook_indices_lens.dtype == torch.int32 assert codebook_indices_lens.shape == (len(cuts), ) assert codebook_indices_lens.tolist() == [ seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts ] assert isinstance(codebook_indices, torch.Tensor) assert codebook_indices.dtype == torch.int16 assert codebook_indices.shape == (len(cuts), max_num_frames) for idx, cbidxs in enumerate(expected_codebook_indices): exp_len = cbidxs.shape[0] # PyTorch < 1.9.0 doesn't have an assert_equal function. np.testing.assert_equal(codebook_indices[idx, :exp_len].numpy(), cbidxs) expected_pad_value = 0 if pad_value is None else pad_value np.testing.assert_equal(codebook_indices[idx, exp_len:].numpy(), expected_pad_value)
def test_collate_custom_array(): EMBEDDING_SIZE = 300 cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json") with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: expected_xvectors = [] for cut in cuts: expected_xvectors.append( np.random.randn(EMBEDDING_SIZE).astype(np.float32)) cut.xvector = writer.store_array(cut.id, expected_xvectors[-1]) xvectors = collate_custom_field(cuts, "xvector") assert isinstance(xvectors, torch.Tensor) assert xvectors.dtype == torch.float32 assert xvectors.shape == (len(cuts), EMBEDDING_SIZE) for idx, xvec in enumerate(expected_xvectors): torch.testing.assert_allclose(xvectors[idx], xvec)
def test_cut_load_temporal_array_pad(pad_value): """Check the array loaded via TemporalArray is padded along with the cut.""" with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer: cut = MonoCut( id="x", start=0, duration=52.4, # 131 frames x 0.4s frame shift == 52.4s channel=0, recording=dummy_recording(1), ) alignment = np.random.randint(500, size=131) cut.alignment = writer.store_array( key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0 ) cut_pad = cut.pad(duration=60.0, pad_value_dict={"alignment": pad_value}) alignment_pad = cut_pad.load_alignment() assert alignment_pad.shape == (150,) # 60.0 / 0.4 == 150 np.testing.assert_equal(alignment_pad[:131], alignment) np.testing.assert_equal(alignment_pad[131:], pad_value)
def test_collate_custom_temporal_array_floats(pad_value): VOCAB_SIZE = 500 cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json") max_num_frames = max(cut.num_frames for cut in cuts) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: expected_posteriors = [] for cut in cuts: expected_posteriors.append( np.random.randn(cut.num_frames, VOCAB_SIZE).astype(np.float32)) cut.posterior = writer.store_array( cut.id, expected_posteriors[-1], frame_shift=cut.frame_shift, temporal_dim=0, ) posteriors, posterior_lens = collate_custom_field(cuts, "posterior", pad_value=pad_value) assert isinstance(posterior_lens, torch.Tensor) assert posterior_lens.dtype == torch.int32 assert posterior_lens.shape == (len(cuts), ) assert posterior_lens.tolist() == [c.num_frames for c in cuts] assert isinstance(posteriors, torch.Tensor) assert posteriors.dtype == torch.float32 assert posteriors.shape == (len(cuts), max_num_frames, VOCAB_SIZE) for idx, post in enumerate(expected_posteriors): exp_len = post.shape[0] torch.testing.assert_allclose(posteriors[idx, :exp_len], post) expected_pad_value = 0 if pad_value is None else pad_value torch.testing.assert_allclose( posteriors[idx, exp_len:], expected_pad_value * torch.ones_like(posteriors[idx, exp_len:]), )
def test_validate_cut_with_temporal_array(caplog): # Note: "caplog" is a special variable in pytest that captures logs. caplog.set_level(logging.WARNING) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: cut = MonoCut( id="cut1", start=0, duration=4.9, channel=0, recording=dummy_recording(1), ) alignment = np.random.randint(500, size=131) cut.alignment = writer.store_array(key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0) validate(cut) assert ("MonoCut cut1: possibly mismatched duration between cut (4.9s) " "and temporal array in custom field 'alignment' (num_frames=131 " "* frame_shift=0.4 == duration=52.400000000000006)" in caplog.text)
def test_padding_issue_478(): """ https://github.com/lhotse-speech/lhotse/issues/478 """ with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: # Prepare data for cut 1. cut1 = MonoCut("c1", start=0, duration=4.9, channel=0, recording=dummy_recording(1)) ali1 = np.random.randint(500, size=(121, )) cut1.label_alignment = writer.store_array("c1", ali1, frame_shift=0.04, temporal_dim=0) # Prepare data for cut 2. cut2 = MonoCut("c2", start=0, duration=4.895, channel=0, recording=dummy_recording(2)) ali2 = np.random.randint(500, size=(121, )) cut2.label_alignment = writer.store_array("c2", ali2, frame_shift=0.04, temporal_dim=0) # Test collation behavior on this cutset. cuts = CutSet.from_cuts([cut1, cut2]) label_alignments, label_alignment_lens = collate_custom_field( cuts, "label_alignment") np.testing.assert_equal(label_alignments[0].numpy(), ali1) np.testing.assert_equal(label_alignments[1].numpy(), ali2)
def test_collate_custom_temporal_array_ints(pad_direction): CODEBOOK_SIZE = 512 FRAME_SHIFT = 0.04 EXPECTED_PAD_VALUE = 0 cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json") max_num_frames = max( seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: expected_codebook_indices = [] for cut in cuts: expected_codebook_indices.append( np.random.randint(CODEBOOK_SIZE, size=(seconds_to_frames( cut.duration, FRAME_SHIFT), )).astype(np.int16)) cut.codebook_indices = writer.store_array( cut.id, expected_codebook_indices[-1], frame_shift=FRAME_SHIFT, temporal_dim=0, ) codebook_indices, codebook_indices_lens = collate_custom_field( cuts, "codebook_indices", pad_direction=pad_direction) assert isinstance(codebook_indices_lens, torch.Tensor) assert codebook_indices_lens.dtype == torch.int32 assert codebook_indices_lens.shape == (len(cuts), ) assert codebook_indices_lens.tolist() == [ seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts ] assert isinstance(codebook_indices, torch.Tensor) assert (codebook_indices.dtype == torch.int64 ) # the dtype got promoted by default assert codebook_indices.shape == (len(cuts), max_num_frames) for idx, cbidxs in enumerate(expected_codebook_indices): exp_len = cbidxs.shape[0] # PyTorch < 1.9.0 doesn't have an assert_equal function. if pad_direction == "right": np.testing.assert_equal( codebook_indices[idx, :exp_len].numpy(), cbidxs) np.testing.assert_equal( codebook_indices[idx, exp_len:].numpy(), EXPECTED_PAD_VALUE) if pad_direction == "left": np.testing.assert_equal( codebook_indices[idx, -exp_len:].numpy(), cbidxs) np.testing.assert_equal( codebook_indices[idx, :-exp_len].numpy(), EXPECTED_PAD_VALUE) if pad_direction == "both": half = (max_num_frames - exp_len) // 2 np.testing.assert_equal(codebook_indices[idx, :half].numpy(), EXPECTED_PAD_VALUE) np.testing.assert_equal( codebook_indices[idx, half:half + exp_len].numpy(), cbidxs) if half > 0: # indexing like [idx, -0:] would return the whole array rather # than an empty slice. np.testing.assert_equal( codebook_indices[idx, -half:].numpy(), EXPECTED_PAD_VALUE)