def cut_set(): cut = MonoCut( id="cut-1", start=0.0, duration=10.0, channel=0, features=Features( type="fbank", num_frames=100, num_features=40, frame_shift=0.01, sampling_rate=16000, start=0.0, duration=10.0, storage_type="lilcom", storage_path="irrelevant", storage_key="irrelevant", ), recording=Recording( id="rec-1", sampling_rate=16000, num_samples=160000, duration=10.0, sources=[ AudioSource(type="file", channels=[0], source="irrelevant") ], ), supervisions=[ SupervisionSegment(id="sup-1", recording_id="irrelevant", start=0.5, duration=6.0), SupervisionSegment(id="sup-2", recording_id="irrelevant", start=7.0, duration=2.0), ], ) return CutSet.from_cuts([ cut, fastcopy(cut, id="cut-nosup", supervisions=[]), fastcopy(cut, id="cut-norec", recording=None), fastcopy(cut, id="cut-nofeat", features=None), cut.pad(duration=30.0, direction="left"), cut.pad(duration=30.0, direction="right"), cut.pad(duration=30.0, direction="both"), cut.mix(cut, offset_other_by=5.0, snr=8), ])
def test_cut_load_custom_recording_pad_left(): sampling_rate = 16000 duration = 52.4 audio = np.random.randn(1, compute_num_samples( duration, sampling_rate)).astype(np.float32) audio /= np.abs(audio).max() # normalize to [-1, 1] with NamedTemporaryFile(suffix=".wav") as f: torchaudio.save(f.name, torch.from_numpy(audio), sampling_rate) f.flush() os.fsync(f) recording = Recording.from_file(f.name) # Note: MonoCut doesn't normally have an "alignment" attribute, # and a "load_alignment()" method. # We are dynamically extending it. cut = MonoCut( id="x", start=0, duration=duration, channel=0, recording=dummy_recording(0, duration=duration), ) cut.my_favorite_song = recording cut_pad = cut.pad(duration=60.0, direction="left") restored_audio = cut_pad.load_my_favorite_song() assert restored_audio.shape == (1, 960000) # 16000 * 60 np.testing.assert_almost_equal(0, restored_audio[:, :-audio.shape[1]]) np.testing.assert_almost_equal(audio, restored_audio[:, -audio.shape[1]:])
def cut_set(): cut = MonoCut(id='cut-1', start=0.0, duration=10.0, channel=0, features=Features( type='fbank', num_frames=100, num_features=40, frame_shift=0.01, sampling_rate=16000, start=0.0, duration=10.0, storage_type='lilcom', storage_path='irrelevant', storage_key='irrelevant', ), recording=Recording(id='rec-1', sampling_rate=16000, num_samples=160000, duration=10.0, sources=[ AudioSource(type='file', channels=[0], source='irrelevant') ]), supervisions=[ SupervisionSegment(id='sup-1', recording_id='irrelevant', start=0.5, duration=6.0), SupervisionSegment(id='sup-2', recording_id='irrelevant', start=7.0, duration=2.0) ]) return CutSet.from_cuts([ cut, fastcopy(cut, id='cut-nosup', supervisions=[]), fastcopy(cut, id='cut-norec', recording=None), fastcopy(cut, id='cut-nofeat', features=None), cut.pad(duration=30.0, direction='left'), cut.pad(duration=30.0, direction='right'), cut.pad(duration=30.0, direction='both'), cut.mix(cut, offset_other_by=5.0, snr=8) ])
def test_cut_load_array_pad(): """Check that loading a custom Array works after padding.""" ivector = np.arange(20).astype(np.float32) with NamedTemporaryFile(suffix=".h5") as f, LilcomHdf5Writer(f.name) as writer: cut = MonoCut( id="x", start=0, duration=5, channel=0, recording=dummy_recording(1) ) cut.ivector = writer.store_array(key="utt1", value=ivector) cut = cut.pad(duration=7.6) restored_ivector = cut.load_ivector() np.testing.assert_equal(ivector, restored_ivector)
def test_cut_load_temporal_array_pad(pad_value): """Check the array loaded via TemporalArray is padded along with the cut.""" with NamedTemporaryFile(suffix=".h5") as f, NumpyHdf5Writer(f.name) as writer: cut = MonoCut( id="x", start=0, duration=52.4, # 131 frames x 0.4s frame shift == 52.4s channel=0, recording=dummy_recording(1), ) alignment = np.random.randint(500, size=131) cut.alignment = writer.store_array( key="utt1", value=alignment, frame_shift=0.4, temporal_dim=0 ) cut_pad = cut.pad(duration=60.0, pad_value_dict={"alignment": pad_value}) alignment_pad = cut_pad.load_alignment() assert alignment_pad.shape == (150,) # 60.0 / 0.4 == 150 np.testing.assert_equal(alignment_pad[:131], alignment) np.testing.assert_equal(alignment_pad[131:], pad_value)