def test_extend_by_cut_with_supervision( cut_start, cut_duration, extend_duration, extend_direction, supervision_start, supervision_duration, expected_start, expected_end, ): recording = dummy_recording(int(uuid4()), duration=1.0) supervisions = SupervisionSet.from_segments([ SupervisionSegment( id=int(uuid4()), recording_id=recording.id, start=supervision_start, duration=supervision_duration, ) ]) cut = dummy_cut(int(uuid4()), start=cut_start, duration=cut_duration, supervisions=supervisions) extended_cut = cut.extend_by(duration=extend_duration, direction=extend_direction) assert isclose(extended_cut.supervisions[0].start, expected_start) assert isclose(extended_cut.supervisions[0].end, expected_end)
def test_cut_load_custom_recording_pad_left(): sampling_rate = 16000 duration = 52.4 audio = np.random.randn(1, compute_num_samples( duration, sampling_rate)).astype(np.float32) audio /= np.abs(audio).max() # normalize to [-1, 1] with NamedTemporaryFile(suffix=".wav") as f: torchaudio.save(f.name, torch.from_numpy(audio), sampling_rate) f.flush() os.fsync(f) recording = Recording.from_file(f.name) # Note: MonoCut doesn't normally have an "alignment" attribute, # and a "load_alignment()" method. # We are dynamically extending it. cut = MonoCut( id="x", start=0, duration=duration, channel=0, recording=dummy_recording(0, duration=duration), ) cut.my_favorite_song = recording cut_pad = cut.pad(duration=60.0, direction="left") restored_audio = cut_pad.load_my_favorite_song() assert restored_audio.shape == (1, 960000) # 16000 * 60 np.testing.assert_almost_equal(0, restored_audio[:, :-audio.shape[1]]) np.testing.assert_almost_equal(audio, restored_audio[:, -audio.shape[1]:])
def test_trim_to_unsupervised_segments(): cut_set = CutSet.from_cuts([ # Yields 3 unsupervised cuts - before first supervision, # between sup2 and sup3, and after sup3. MonoCut( "cut1", start=0, duration=30, channel=0, supervisions=[ SupervisionSegment("sup1", "rec1", start=1.5, duration=8.5), SupervisionSegment("sup2", "rec1", start=10, duration=5), SupervisionSegment("sup3", "rec1", start=20, duration=8), ], recording=dummy_recording(1, duration=30), ), # Does not yield any "unsupervised" cut. MonoCut( "cut2", start=0, duration=30, channel=0, supervisions=[ SupervisionSegment("sup4", "rec1", start=0, duration=30), ], recording=dummy_recording(2, duration=30), ), ]) unsupervised_cuts = cut_set.trim_to_unsupervised_segments() assert len(unsupervised_cuts) == 3 assert unsupervised_cuts[0].start == 0 assert unsupervised_cuts[0].duration == 1.5 assert unsupervised_cuts[0].supervisions == [] assert unsupervised_cuts[1].start == 15 assert unsupervised_cuts[1].duration == 5 assert unsupervised_cuts[1].supervisions == [] assert unsupervised_cuts[2].start == 28 assert unsupervised_cuts[2].duration == 2 assert unsupervised_cuts[2].supervisions == []
def test_cut_load_array_pad(): """Check that loading a custom Array works after padding.""" ivector = np.arange(20).astype(np.float32) with NamedTemporaryFile(suffix=".h5") as f, LilcomHdf5Writer(f.name) as writer: cut = MonoCut( id="x", start=0, duration=5, channel=0, recording=dummy_recording(1) ) cut.ivector = writer.store_array(key="utt1", value=ivector) cut = cut.pad(duration=7.6) restored_ivector = cut.load_ivector() np.testing.assert_equal(ivector, restored_ivector)
def test_known_issue_with_overlap(): r = dummy_recording(0) rec = RecordingSet.from_recordings([r]) # Make two segments. The first segment is 1s long. The segment segment # is 0.3 seconds long and lies entirely within the first. Both have the # same recording_id as the single entry in rec. sup = SupervisionSet.from_segments( [ SupervisionSegment( id="utt1", recording_id=r.id, start=0.0, duration=1.0, channel=0, text="Hello", ), SupervisionSegment( id="utt2", recording_id=r.id, start=0.2, duration=0.5, channel=0, text="World", ), ] ) cuts = CutSet.from_manifests(recordings=rec, supervisions=sup) assert len(cuts) == 1 cuts_trim = cuts.trim_to_supervisions(keep_overlapping=False) assert len(cuts_trim) == 2 cut = cuts_trim[0] assert cut.start == 0 assert cut.duration == 1 assert len(cut.supervisions) == 1 sup = cut.supervisions[0] assert sup.start == 0 assert sup.duration == 1 assert sup.text == "Hello" cut = cuts_trim[1] assert cut.start == 0.2 assert cut.duration == 0.5 assert len(cut.supervisions) == 1 sup = cut.supervisions[0] assert sup.start == 0 assert sup.duration == 0.5 assert sup.text == "World"
def test_padding_issue_478(): """ https://github.com/lhotse-speech/lhotse/issues/478 """ with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: # Prepare data for cut 1. cut1 = MonoCut("c1", start=0, duration=4.9, channel=0, recording=dummy_recording(1)) ali1 = np.random.randint(500, size=(121, )) cut1.label_alignment = writer.store_array("c1", ali1, frame_shift=0.04, temporal_dim=0) # Prepare data for cut 2. cut2 = MonoCut("c2", start=0, duration=4.895, channel=0, recording=dummy_recording(2)) ali2 = np.random.randint(500, size=(121, )) cut2.label_alignment = writer.store_array("c2", ali2, frame_shift=0.04, temporal_dim=0) # Test collation behavior on this cutset. cuts = CutSet.from_cuts([cut1, cut2]) label_alignments, label_alignment_lens = collate_custom_field( cuts, "label_alignment") np.testing.assert_equal(label_alignments[0].numpy(), ali1) np.testing.assert_equal(label_alignments[1].numpy(), ali2)
def test_cut_load_temporal_array_pad(pad_value): """Check the array loaded via TemporalArray is padded along with the cut.""" with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer: cut = MonoCut( id="x", start=0, duration=52.4, # 131 frames x 0.4s frame shift == 52.4s channel=0, recording=dummy_recording(1), ) alignment = np.random.randint(500, size=131) cut.alignment = writer.store_array( key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0 ) cut_pad = cut.pad(duration=60.0, pad_value_dict={"alignment": pad_value}) alignment_pad = cut_pad.load_alignment() assert alignment_pad.shape == (150,) # 60.0 / 0.4 == 150 np.testing.assert_equal(alignment_pad[:131], alignment) np.testing.assert_equal(alignment_pad[131:], pad_value)
def test_validate_cut_with_temporal_array(caplog): # Note: "caplog" is a special variable in pytest that captures logs. caplog.set_level(logging.WARNING) with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer( f.name) as writer: cut = MonoCut( id="cut1", start=0, duration=4.9, channel=0, recording=dummy_recording(1), ) alignment = np.random.randint(500, size=131) cut.alignment = writer.store_array(key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0) validate(cut) assert ("MonoCut cut1: possibly mismatched duration between cut (4.9s) " "and temporal array in custom field 'alignment' (num_frames=131 " "* frame_shift=0.4 == duration=52.400000000000006)" in caplog.text)
def test_extend_by_cut_with_temporal_array( cut_start, cut_duration, array_start, extend_duration, extend_direction, expected, ): cut = dummy_cut( int(uuid4()), start=cut_start, duration=cut_duration, features=None, recording=dummy_recording(int(uuid4()), duration=1.5), ) cut.temporal_array = dummy_temporal_array(start=array_start) extended_cut = cut.extend_by(duration=extend_duration, direction=extend_direction) if expected: assert extended_cut.temporal_array == cut.temporal_array else: with pytest.raises(ValueError): _ = extended_cut.load_custom("temporal_array")
def test_trim_to_supervisions_simple_cuts(keep_overlapping, num_jobs): cut_set = CutSet.from_cuts([ MonoCut( "cut1", start=0, duration=30, channel=0, supervisions=[ SupervisionSegment("sup1", "rec1", start=1.5, duration=10.5), SupervisionSegment("sup2", "rec1", start=10, duration=5), SupervisionSegment("sup3", "rec1", start=20, duration=8), ], recording=dummy_recording(1, duration=30), ), MonoCut( "cut2", start=0, duration=30, channel=0, supervisions=[ SupervisionSegment("sup4", "rec1", start=0, duration=30), ], recording=dummy_recording(2, duration=30), ), ]) cuts = cut_set.trim_to_supervisions(keep_overlapping=keep_overlapping, num_jobs=num_jobs) assert len(cuts) == 4 # Note: expected results diverge here depending on the value of keep_overlapping flag cut = cuts[0] assert cut.start == 1.5 assert cut.duration == 10.5 if keep_overlapping: assert len(cut.supervisions) == 2 sup = cut.supervisions[0] assert sup.id == "sup1" assert sup.start == 0 assert sup.duration == 10.5 sup = cut.supervisions[1] assert sup.id == "sup2" assert sup.start == 8.5 assert sup.duration == 5 else: assert len(cut.supervisions) == 1 sup = cut.supervisions[0] assert sup.id == "sup1" assert sup.start == 0 assert sup.duration == 10.5 # Note: expected results diverge here depending on the value of keep_overlapping flag cut = cuts[1] assert cut.start == 10 assert cut.duration == 5 if keep_overlapping: assert len(cut.supervisions) == 2 sup = cut.supervisions[0] assert sup.id == "sup1" assert sup.start == -8.5 assert sup.duration == 10.5 sup = cut.supervisions[1] assert sup.id == "sup2" assert sup.start == 0 assert sup.duration == 5 else: assert len(cut.supervisions) == 1 sup = cut.supervisions[0] assert sup.id == "sup2" assert sup.start == 0 assert sup.duration == 5 # Note: both test cases have same results cut = cuts[2] assert len(cut.supervisions) == 1 assert cut.start == 20 assert cut.duration == 8 assert cut.supervisions[0].id == "sup3" # Note: both test cases have same results cut = cuts[3] assert len(cut.supervisions) == 1 assert cut.start == 0 assert cut.duration == 30 assert cut.supervisions[0].id == "sup4"