Beispiel #1
0
def test_extend_by_cut_with_supervision(
    cut_start,
    cut_duration,
    extend_duration,
    extend_direction,
    supervision_start,
    supervision_duration,
    expected_start,
    expected_end,
):
    recording = dummy_recording(int(uuid4()), duration=1.0)
    supervisions = SupervisionSet.from_segments([
        SupervisionSegment(
            id=int(uuid4()),
            recording_id=recording.id,
            start=supervision_start,
            duration=supervision_duration,
        )
    ])
    cut = dummy_cut(int(uuid4()),
                    start=cut_start,
                    duration=cut_duration,
                    supervisions=supervisions)
    extended_cut = cut.extend_by(duration=extend_duration,
                                 direction=extend_direction)
    assert isclose(extended_cut.supervisions[0].start, expected_start)
    assert isclose(extended_cut.supervisions[0].end, expected_end)
Beispiel #2
0
def test_cut_load_custom_recording_pad_left():
    sampling_rate = 16000
    duration = 52.4
    audio = np.random.randn(1, compute_num_samples(
        duration, sampling_rate)).astype(np.float32)
    audio /= np.abs(audio).max()  # normalize to [-1, 1]
    with NamedTemporaryFile(suffix=".wav") as f:
        torchaudio.save(f.name, torch.from_numpy(audio), sampling_rate)
        f.flush()
        os.fsync(f)
        recording = Recording.from_file(f.name)

        # Note: MonoCut doesn't normally have an "alignment" attribute,
        #       and a "load_alignment()" method.
        #       We are dynamically extending it.
        cut = MonoCut(
            id="x",
            start=0,
            duration=duration,
            channel=0,
            recording=dummy_recording(0, duration=duration),
        )
        cut.my_favorite_song = recording

        cut_pad = cut.pad(duration=60.0, direction="left")

        restored_audio = cut_pad.load_my_favorite_song()
        assert restored_audio.shape == (1, 960000)  # 16000 * 60

        np.testing.assert_almost_equal(0, restored_audio[:, :-audio.shape[1]])
        np.testing.assert_almost_equal(audio, restored_audio[:,
                                                             -audio.shape[1]:])
Beispiel #3
0
def test_trim_to_unsupervised_segments():
    cut_set = CutSet.from_cuts([
        # Yields 3 unsupervised cuts - before first supervision,
        # between sup2 and sup3, and after sup3.
        MonoCut(
            "cut1",
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment("sup1", "rec1", start=1.5, duration=8.5),
                SupervisionSegment("sup2", "rec1", start=10, duration=5),
                SupervisionSegment("sup3", "rec1", start=20, duration=8),
            ],
            recording=dummy_recording(1, duration=30),
        ),
        # Does not yield any "unsupervised" cut.
        MonoCut(
            "cut2",
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment("sup4", "rec1", start=0, duration=30),
            ],
            recording=dummy_recording(2, duration=30),
        ),
    ])
    unsupervised_cuts = cut_set.trim_to_unsupervised_segments()

    assert len(unsupervised_cuts) == 3

    assert unsupervised_cuts[0].start == 0
    assert unsupervised_cuts[0].duration == 1.5
    assert unsupervised_cuts[0].supervisions == []

    assert unsupervised_cuts[1].start == 15
    assert unsupervised_cuts[1].duration == 5
    assert unsupervised_cuts[1].supervisions == []

    assert unsupervised_cuts[2].start == 28
    assert unsupervised_cuts[2].duration == 2
    assert unsupervised_cuts[2].supervisions == []
Beispiel #4
0
def test_cut_load_array_pad():
    """Check that loading a custom Array works after padding."""
    ivector = np.arange(20).astype(np.float32)
    with NamedTemporaryFile(suffix=".h5") as f, LilcomHdf5Writer(f.name) as writer:
        cut = MonoCut(
            id="x", start=0, duration=5, channel=0, recording=dummy_recording(1)
        )
        cut.ivector = writer.store_array(key="utt1", value=ivector)

        cut = cut.pad(duration=7.6)

        restored_ivector = cut.load_ivector()
        np.testing.assert_equal(ivector, restored_ivector)
Beispiel #5
0
def test_known_issue_with_overlap():
    r = dummy_recording(0)
    rec = RecordingSet.from_recordings([r])

    # Make two segments. The first segment is 1s long. The segment segment
    # is 0.3 seconds long and lies entirely within the first. Both have the
    # same recording_id as the single entry in rec.
    sup = SupervisionSet.from_segments(
        [
            SupervisionSegment(
                id="utt1",
                recording_id=r.id,
                start=0.0,
                duration=1.0,
                channel=0,
                text="Hello",
            ),
            SupervisionSegment(
                id="utt2",
                recording_id=r.id,
                start=0.2,
                duration=0.5,
                channel=0,
                text="World",
            ),
        ]
    )

    cuts = CutSet.from_manifests(recordings=rec, supervisions=sup)
    assert len(cuts) == 1

    cuts_trim = cuts.trim_to_supervisions(keep_overlapping=False)
    assert len(cuts_trim) == 2

    cut = cuts_trim[0]
    assert cut.start == 0
    assert cut.duration == 1
    assert len(cut.supervisions) == 1
    sup = cut.supervisions[0]
    assert sup.start == 0
    assert sup.duration == 1
    assert sup.text == "Hello"

    cut = cuts_trim[1]
    assert cut.start == 0.2
    assert cut.duration == 0.5
    assert len(cut.supervisions) == 1
    sup = cut.supervisions[0]
    assert sup.start == 0
    assert sup.duration == 0.5
    assert sup.text == "World"
Beispiel #6
0
def test_padding_issue_478():
    """
    https://github.com/lhotse-speech/lhotse/issues/478
    """
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:

        # Prepare data for cut 1.
        cut1 = MonoCut("c1",
                       start=0,
                       duration=4.9,
                       channel=0,
                       recording=dummy_recording(1))
        ali1 = np.random.randint(500, size=(121, ))
        cut1.label_alignment = writer.store_array("c1",
                                                  ali1,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Prepare data for cut 2.
        cut2 = MonoCut("c2",
                       start=0,
                       duration=4.895,
                       channel=0,
                       recording=dummy_recording(2))
        ali2 = np.random.randint(500, size=(121, ))
        cut2.label_alignment = writer.store_array("c2",
                                                  ali2,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Test collation behavior on this cutset.
        cuts = CutSet.from_cuts([cut1, cut2])
        label_alignments, label_alignment_lens = collate_custom_field(
            cuts, "label_alignment")

        np.testing.assert_equal(label_alignments[0].numpy(), ali1)
        np.testing.assert_equal(label_alignments[1].numpy(), ali2)
Beispiel #7
0
def test_cut_load_temporal_array_pad(pad_value):
    """Check the array loaded via TemporalArray is padded along with the cut."""
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer:
        cut = MonoCut(
            id="x",
            start=0,
            duration=52.4,  # 131 frames x 0.4s frame shift == 52.4s
            channel=0,
            recording=dummy_recording(1),
        )

        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(
            key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0
        )
        cut_pad = cut.pad(duration=60.0, pad_value_dict={"alignment": pad_value})

        alignment_pad = cut_pad.load_alignment()
        assert alignment_pad.shape == (150,)  # 60.0 / 0.4 == 150
        np.testing.assert_equal(alignment_pad[:131], alignment)
        np.testing.assert_equal(alignment_pad[131:], pad_value)
Beispiel #8
0
def test_validate_cut_with_temporal_array(caplog):
    # Note: "caplog" is a special variable in pytest that captures logs.
    caplog.set_level(logging.WARNING)
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        cut = MonoCut(
            id="cut1",
            start=0,
            duration=4.9,
            channel=0,
            recording=dummy_recording(1),
        )
        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(key="utt1",
                                           value=alignment,
                                           frame_shift=0.4,
                                           temporal_dim=0)
        validate(cut)

    assert ("MonoCut cut1: possibly mismatched duration between cut (4.9s) "
            "and temporal array in custom field 'alignment' (num_frames=131 "
            "* frame_shift=0.4 == duration=52.400000000000006)" in caplog.text)
def test_extend_by_cut_with_temporal_array(
    cut_start,
    cut_duration,
    array_start,
    extend_duration,
    extend_direction,
    expected,
):
    cut = dummy_cut(
        int(uuid4()),
        start=cut_start,
        duration=cut_duration,
        features=None,
        recording=dummy_recording(int(uuid4()), duration=1.5),
    )
    cut.temporal_array = dummy_temporal_array(start=array_start)
    extended_cut = cut.extend_by(duration=extend_duration, direction=extend_direction)
    if expected:
        assert extended_cut.temporal_array == cut.temporal_array
    else:
        with pytest.raises(ValueError):
            _ = extended_cut.load_custom("temporal_array")
Beispiel #10
0
def test_trim_to_supervisions_simple_cuts(keep_overlapping, num_jobs):
    cut_set = CutSet.from_cuts([
        MonoCut(
            "cut1",
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment("sup1", "rec1", start=1.5, duration=10.5),
                SupervisionSegment("sup2", "rec1", start=10, duration=5),
                SupervisionSegment("sup3", "rec1", start=20, duration=8),
            ],
            recording=dummy_recording(1, duration=30),
        ),
        MonoCut(
            "cut2",
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment("sup4", "rec1", start=0, duration=30),
            ],
            recording=dummy_recording(2, duration=30),
        ),
    ])
    cuts = cut_set.trim_to_supervisions(keep_overlapping=keep_overlapping,
                                        num_jobs=num_jobs)
    assert len(cuts) == 4

    # Note: expected results diverge here depending on the value of keep_overlapping flag
    cut = cuts[0]
    assert cut.start == 1.5
    assert cut.duration == 10.5
    if keep_overlapping:
        assert len(cut.supervisions) == 2
        sup = cut.supervisions[0]
        assert sup.id == "sup1"
        assert sup.start == 0
        assert sup.duration == 10.5
        sup = cut.supervisions[1]
        assert sup.id == "sup2"
        assert sup.start == 8.5
        assert sup.duration == 5
    else:
        assert len(cut.supervisions) == 1
        sup = cut.supervisions[0]
        assert sup.id == "sup1"
        assert sup.start == 0
        assert sup.duration == 10.5

    # Note: expected results diverge here depending on the value of keep_overlapping flag
    cut = cuts[1]
    assert cut.start == 10
    assert cut.duration == 5
    if keep_overlapping:
        assert len(cut.supervisions) == 2
        sup = cut.supervisions[0]
        assert sup.id == "sup1"
        assert sup.start == -8.5
        assert sup.duration == 10.5
        sup = cut.supervisions[1]
        assert sup.id == "sup2"
        assert sup.start == 0
        assert sup.duration == 5
    else:
        assert len(cut.supervisions) == 1
        sup = cut.supervisions[0]
        assert sup.id == "sup2"
        assert sup.start == 0
        assert sup.duration == 5

    # Note: both test cases have same results
    cut = cuts[2]
    assert len(cut.supervisions) == 1
    assert cut.start == 20
    assert cut.duration == 8
    assert cut.supervisions[0].id == "sup3"

    # Note: both test cases have same results
    cut = cuts[3]
    assert len(cut.supervisions) == 1
    assert cut.start == 0
    assert cut.duration == 30
    assert cut.supervisions[0].id == "sup4"