コード例 #1
0
def cut_set():
    cut = MonoCut(
        id="cut-1",
        start=0.0,
        duration=10.0,
        channel=0,
        features=Features(
            type="fbank",
            num_frames=100,
            num_features=40,
            frame_shift=0.01,
            sampling_rate=16000,
            start=0.0,
            duration=10.0,
            storage_type="lilcom",
            storage_path="irrelevant",
            storage_key="irrelevant",
        ),
        recording=Recording(
            id="rec-1",
            sampling_rate=16000,
            num_samples=160000,
            duration=10.0,
            sources=[
                AudioSource(type="file", channels=[0], source="irrelevant")
            ],
        ),
        supervisions=[
            SupervisionSegment(id="sup-1",
                               recording_id="irrelevant",
                               start=0.5,
                               duration=6.0),
            SupervisionSegment(id="sup-2",
                               recording_id="irrelevant",
                               start=7.0,
                               duration=2.0),
        ],
    )
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id="cut-nosup", supervisions=[]),
        fastcopy(cut, id="cut-norec", recording=None),
        fastcopy(cut, id="cut-nofeat", features=None),
        cut.pad(duration=30.0, direction="left"),
        cut.pad(duration=30.0, direction="right"),
        cut.pad(duration=30.0, direction="both"),
        cut.mix(cut, offset_other_by=5.0, snr=8),
    ])
コード例 #2
0
def test_cut_load_custom_recording_pad_left():
    sampling_rate = 16000
    duration = 52.4
    audio = np.random.randn(1, compute_num_samples(
        duration, sampling_rate)).astype(np.float32)
    audio /= np.abs(audio).max()  # normalize to [-1, 1]
    with NamedTemporaryFile(suffix=".wav") as f:
        torchaudio.save(f.name, torch.from_numpy(audio), sampling_rate)
        f.flush()
        os.fsync(f)
        recording = Recording.from_file(f.name)

        # Note: MonoCut doesn't normally have an "alignment" attribute,
        #       and a "load_alignment()" method.
        #       We are dynamically extending it.
        cut = MonoCut(
            id="x",
            start=0,
            duration=duration,
            channel=0,
            recording=dummy_recording(0, duration=duration),
        )
        cut.my_favorite_song = recording

        cut_pad = cut.pad(duration=60.0, direction="left")

        restored_audio = cut_pad.load_my_favorite_song()
        assert restored_audio.shape == (1, 960000)  # 16000 * 60

        np.testing.assert_almost_equal(0, restored_audio[:, :-audio.shape[1]])
        np.testing.assert_almost_equal(audio, restored_audio[:,
                                                             -audio.shape[1]:])
コード例 #3
0
def cut_set():
    cut = MonoCut(id='cut-1',
                  start=0.0,
                  duration=10.0,
                  channel=0,
                  features=Features(
                      type='fbank',
                      num_frames=100,
                      num_features=40,
                      frame_shift=0.01,
                      sampling_rate=16000,
                      start=0.0,
                      duration=10.0,
                      storage_type='lilcom',
                      storage_path='irrelevant',
                      storage_key='irrelevant',
                  ),
                  recording=Recording(id='rec-1',
                                      sampling_rate=16000,
                                      num_samples=160000,
                                      duration=10.0,
                                      sources=[
                                          AudioSource(type='file',
                                                      channels=[0],
                                                      source='irrelevant')
                                      ]),
                  supervisions=[
                      SupervisionSegment(id='sup-1',
                                         recording_id='irrelevant',
                                         start=0.5,
                                         duration=6.0),
                      SupervisionSegment(id='sup-2',
                                         recording_id='irrelevant',
                                         start=7.0,
                                         duration=2.0)
                  ])
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id='cut-nosup', supervisions=[]),
        fastcopy(cut, id='cut-norec', recording=None),
        fastcopy(cut, id='cut-nofeat', features=None),
        cut.pad(duration=30.0, direction='left'),
        cut.pad(duration=30.0, direction='right'),
        cut.pad(duration=30.0, direction='both'),
        cut.mix(cut, offset_other_by=5.0, snr=8)
    ])
コード例 #4
0
def test_cut_load_array_pad():
    """Check that loading a custom Array works after padding."""
    ivector = np.arange(20).astype(np.float32)
    with NamedTemporaryFile(suffix=".h5") as f, LilcomHdf5Writer(f.name) as writer:
        cut = MonoCut(
            id="x", start=0, duration=5, channel=0, recording=dummy_recording(1)
        )
        cut.ivector = writer.store_array(key="utt1", value=ivector)

        cut = cut.pad(duration=7.6)

        restored_ivector = cut.load_ivector()
        np.testing.assert_equal(ivector, restored_ivector)
コード例 #5
0
def test_cut_load_temporal_array_pad(pad_value):
    """Check the array loaded via TemporalArray is padded along with the cut."""
    with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer:
        cut = MonoCut(
            id="x",
            start=0,
            duration=52.4,  # 131 frames x 0.4s frame shift == 52.4s
            channel=0,
            recording=dummy_recording(1),
        )

        alignment = np.random.randint(500, size=131)
        cut.alignment = writer.store_array(
            key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0
        )
        cut_pad = cut.pad(duration=60.0, pad_value_dict={"alignment": pad_value})

        alignment_pad = cut_pad.load_alignment()
        assert alignment_pad.shape == (150,)  # 60.0 / 0.4 == 150
        np.testing.assert_equal(alignment_pad[:131], alignment)
        np.testing.assert_equal(alignment_pad[131:], pad_value)