Beispiel #1
0
def test_validate_cut_with_temporal_array(caplog):
    # Note: "caplog" is a special variable in pytest that captures logs.
    caplog.set_level(logging.WARNING)
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer:
        cut = MonoCut(
            id="cut1",
            start=0,
            duration=4.9,
            channel=0,
            recording=dummy_recording(1),
        )
        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(
            key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0
        )
        validate(cut)

    assert (
        "MonoCut cut1: possibly mismatched duration between cut (4.9s) "
        "and temporal array in custom field 'alignment' (num_frames=131 "
        "* frame_shift=0.4 == duration=52.400000000000006)" in caplog.text
    )
Beispiel #2
0
def test_padding_issue_478():
    """
    https://github.com/lhotse-speech/lhotse/issues/478
    """
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:

        # Prepare data for cut 1.
        cut1 = MonoCut("c1",
                       start=0,
                       duration=4.9,
                       channel=0,
                       recording=dummy_recording(1))
        ali1 = np.random.randint(500, size=(121, ))
        cut1.label_alignment = writer.store_array("c1",
                                                  ali1,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Prepare data for cut 2.
        cut2 = MonoCut("c2",
                       start=0,
                       duration=4.895,
                       channel=0,
                       recording=dummy_recording(2))
        ali2 = np.random.randint(500, size=(121, ))
        cut2.label_alignment = writer.store_array("c2",
                                                  ali2,
                                                  frame_shift=0.04,
                                                  temporal_dim=0)

        # Test collation behavior on this cutset.
        cuts = CutSet.from_cuts([cut1, cut2])
        label_alignments, label_alignment_lens = collate_custom_field(
            cuts, "label_alignment")

        np.testing.assert_equal(label_alignments[0].numpy(), ali1)
        np.testing.assert_equal(label_alignments[1].numpy(), ali2)
Beispiel #3
0
def test_cut_load_temporal_array_pad(pad_value):
    """Check the array loaded via TemporalArray is padded along with the cut."""
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        cut = MonoCut(
            id="x",
            start=0,
            duration=52.4,  # 131 frames x 0.4s frame shift == 52.4s
            channel=0,
            recording=dummy_recording(1),
        )

        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(key="utt1",
                                           value=alignment,
                                           frame_shift=0.4,
                                           temporal_dim=0)
        cut_pad = cut.pad(duration=60.0,
                          pad_value_dict={"alignment": pad_value})

        alignment_pad = cut_pad.load_alignment()
        assert alignment_pad.shape == (150, )  # 60.0 / 0.4 == 150
        np.testing.assert_equal(alignment_pad[:131], alignment)
        np.testing.assert_equal(alignment_pad[131:], pad_value)
Beispiel #4
0
def test_collate_custom_temporal_array_ints(pad_direction):
    CODEBOOK_SIZE = 512
    FRAME_SHIFT = 0.04
    EXPECTED_PAD_VALUE = 0

    cuts = CutSet.from_json("test/fixtures/ljspeech/cuts.json")
    max_num_frames = max(
        seconds_to_frames(cut.duration, FRAME_SHIFT) for cut in cuts)

    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(
            f.name) as writer:
        expected_codebook_indices = []
        for cut in cuts:
            expected_codebook_indices.append(
                np.random.randint(CODEBOOK_SIZE,
                                  size=(seconds_to_frames(
                                      cut.duration,
                                      FRAME_SHIFT), )).astype(np.int16))
            cut.codebook_indices = writer.store_array(
                cut.id,
                expected_codebook_indices[-1],
                frame_shift=FRAME_SHIFT,
                temporal_dim=0,
            )

        codebook_indices, codebook_indices_lens = collate_custom_field(
            cuts, "codebook_indices", pad_direction=pad_direction)

        assert isinstance(codebook_indices_lens, torch.Tensor)
        assert codebook_indices_lens.dtype == torch.int32
        assert codebook_indices_lens.shape == (len(cuts), )
        assert codebook_indices_lens.tolist() == [
            seconds_to_frames(c.duration, FRAME_SHIFT) for c in cuts
        ]

        assert isinstance(codebook_indices, torch.Tensor)
        assert (codebook_indices.dtype == torch.int64
                )  # the dtype got promoted by default
        assert codebook_indices.shape == (len(cuts), max_num_frames)
        for idx, cbidxs in enumerate(expected_codebook_indices):
            exp_len = cbidxs.shape[0]
            # PyTorch < 1.9.0 doesn't have an assert_equal function.
            if pad_direction == "right":
                np.testing.assert_equal(
                    codebook_indices[idx, :exp_len].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, exp_len:].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "left":
                np.testing.assert_equal(
                    codebook_indices[idx, -exp_len:].numpy(), cbidxs)
                np.testing.assert_equal(
                    codebook_indices[idx, :-exp_len].numpy(),
                    EXPECTED_PAD_VALUE)
            if pad_direction == "both":
                half = (max_num_frames - exp_len) // 2
                np.testing.assert_equal(codebook_indices[idx, :half].numpy(),
                                        EXPECTED_PAD_VALUE)
                np.testing.assert_equal(
                    codebook_indices[idx, half:half + exp_len].numpy(), cbidxs)
                if half > 0:
                    # indexing like [idx, -0:] would return the whole array rather
                    # than an empty slice.
                    np.testing.assert_equal(
                        codebook_indices[idx, -half:].numpy(),
                        EXPECTED_PAD_VALUE)