Ejemplo n.º 1
0
 def test_cut_features_mask(self, supervisions):
     cut = Cut('cut', start=0, duration=2, channel=0,
               features=Mock(sampling_rate=16000, frame_shift=0.01, num_frames=2000),
               supervisions=supervisions)
     mask = cut.supervisions_feature_mask()
     assert (mask[:50] == 1).all()
     assert (mask[50:] == 0).all()
Ejemplo n.º 2
0
 def test_cut_speakers_audio_mask(self, supervisions, alignment):
     cut = Cut('cut',
               start=0,
               duration=2,
               channel=0,
               recording=Mock(sampling_rate=16000),
               supervisions=supervisions)
     mask = cut.speakers_audio_mask(use_alignment_if_exists=alignment)
     if alignment == "word":
         ones = [
             np.index_exp[list(chain(range(0, 1600), range(3200, 6400)))],
             np.index_exp[list(chain(range(9600, 12800)))]
         ]
         zeros = [
             np.index_exp[list(chain(range(1600, 3200), range(6400,
                                                              32000)))],
             np.index_exp[list(chain(range(0, 9600), range(12800, 32000)))]
         ]
     else:
         ones = [
             np.index_exp[range(0, 8000)], np.index_exp[range(9600, 12800)]
         ]
         zeros = [
             np.index_exp[list(chain(range(8000, 32000)))],
             np.index_exp[list(chain(range(0, 9600), range(12800, 32000)))]
         ]
     assert (mask[0, ones[0]] == 1).all()
     assert (mask[1, ones[1]] == 1).all()
     assert (mask[0, zeros[0]] == 0).all()
     assert (mask[1, zeros[1]] == 0).all()
Ejemplo n.º 3
0
 def test_cut_speakers_features_mask(self, supervisions, alignment):
     cut = Cut('cut',
               start=0,
               duration=2,
               channel=0,
               features=Mock(sampling_rate=16000,
                             frame_shift=0.01,
                             num_frames=2000),
               supervisions=supervisions)
     mask = cut.speakers_feature_mask(use_alignment_if_exists=alignment)
     if alignment == "word":
         ones = [
             np.index_exp[list(chain(range(0, 10), range(20, 40)))],
             np.index_exp[list(chain(range(60, 80)))]
         ]
         zeros = [
             np.index_exp[list(chain(range(10, 20), range(40, 200)))],
             np.index_exp[list(chain(range(0, 60), range(80, 200)))]
         ]
     else:
         ones = [
             np.index_exp[list(chain(range(0, 50)))],
             np.index_exp[list(chain(range(60, 80)))]
         ]
         zeros = [
             np.index_exp[list(chain(range(50, 200)))],
             np.index_exp[list(chain(range(0, 60), range(80, 200)))]
         ]
     assert (mask[0, ones[0]] == 1).all()
     assert (mask[1, ones[1]] == 1).all()
     assert (mask[0, zeros[0]] == 0).all()
     assert (mask[1, zeros[1]] == 0).all()
Ejemplo n.º 4
0
 def test_cut_audio_mask(self):
     cut = Cut('cut',
               start=0,
               duration=2,
               channel=0,
               recording=Mock(sampling_rate=16000))
     mask = cut.supervisions_audio_mask()
     assert mask.sum() == 0
Ejemplo n.º 5
0
 def test_cut_features_mask(self):
     cut = Cut('cut',
               start=0,
               duration=2,
               channel=0,
               features=Mock(sampling_rate=16000, frame_shift=0.01))
     mask = cut.supervisions_feature_mask()
     assert mask.sum() == 0
Ejemplo n.º 6
0
 def test_mixed_cut_audio_mask(self, supervisions):
     cut = Cut('cut', start=0, duration=2, channel=0, recording=Mock(sampling_rate=16000),
               supervisions=supervisions)
     mixed_cut = cut.append(cut)
     mask = mixed_cut.supervisions_audio_mask()
     assert (mask[:8000] == 1).all()
     assert (mask[8000:32000] == 0).all()
     assert (mask[32000:40000] == 1).all()
     assert (mask[40000:] == 0).all()
Ejemplo n.º 7
0
 def test_mixed_cut_features_mask(self, supervisions):
     cut = Cut('cut', start=0, duration=2, channel=0, features=Mock(sampling_rate=16000, frame_shift=0.01),
               supervisions=supervisions)
     mixed_cut = cut.append(cut)
     mask = mixed_cut.supervisions_feature_mask()
     assert (mask[:50] == 1).all()
     assert (mask[50:200] == 0).all()
     assert (mask[200:250] == 1).all()
     assert (mask[250:] == 0).all()
Ejemplo n.º 8
0
 def with_cut(
         self,
         sampling_rate: int,
         num_samples: int,
         features: bool = True,
         supervision: bool = False
 ) -> Cut:
     duration = num_samples / sampling_rate
     cut = Cut(
         id=str(uuid4()),
         start=0,
         duration=duration,
         channel=0,
         recording=self.with_recording(sampling_rate=sampling_rate, num_samples=num_samples)
     )
     if features:
         cut = self._with_features(cut)
     if supervision:
         cut.supervisions.append(SupervisionSegment(
             id=f'sup-{cut.id}',
             recording_id=cut.recording_id,
             start=0,
             duration=cut.duration,
             text='irrelevant'
         ))
     return cut
Ejemplo n.º 9
0
 def test_mixed_cut_audio_mask(self, supervisions):
     cut = Cut('cut',
               start=0,
               duration=2,
               channel=0,
               recording=Mock(sampling_rate=16000),
               supervisions=supervisions)
     mixed_cut = cut.append(cut)
     mask = mixed_cut.supervisions_audio_mask()
     ones = np.index_exp[list(
         chain(range(0, 8000), range(9600, 12800), range(32000, 40000),
               range(41600, 44800)))]
     zeros = np.index_exp[list(
         chain(range(8000, 9600), range(12800, 32000), range(40000, 41600),
               range(44800, 64000)))]
     assert (mask[ones] == 1).all()
     assert (mask[zeros] == 0).all()
Ejemplo n.º 10
0
 def test_mixed_cut_features_mask(self, supervisions):
     cut = Cut('cut',
               start=0,
               duration=2,
               channel=0,
               features=Mock(sampling_rate=16000, frame_shift=0.01),
               supervisions=supervisions)
     mixed_cut = cut.append(cut)
     mask = mixed_cut.supervisions_feature_mask()
     ones = np.index_exp[list(
         chain(range(0, 50), range(60, 80), range(200, 250),
               range(260, 280)))]
     zeros = np.index_exp[list(
         chain(range(50, 60), range(80, 200), range(250, 260),
               range(280, 400)))]
     assert (mask[ones] == 1).all()
     assert (mask[zeros] == 0).all()
Ejemplo n.º 11
0
def make_cut(sampling_rate: int, num_samples: int) -> Cut:
    with make_recording(sampling_rate, num_samples) as recording:
        duration = num_samples / sampling_rate
        yield Cut(id=f'cut-{sampling_rate}-{duration}',
                  start=0,
                  duration=duration,
                  channel=0,
                  recording=recording)
Ejemplo n.º 12
0
def random_cut_set(n_cuts=100) -> CutSet:
    return CutSet.from_cuts(
        Cut(id=uuid4(),
            start=round(random.uniform(0, 5), ndigits=8),
            duration=round(random.uniform(3, 10), ndigits=8),
            channel=0,
            recording=Recording(id=uuid4(),
                                sources=[],
                                sampling_rate=16000,
                                num_samples=1600000,
                                duration=100.0)) for _ in range(n_cuts))
Ejemplo n.º 13
0
def cut_with_supervision_start01(recording):
    return Cut(id='cut',
               start=0.1,
               duration=0.4,
               channel=0,
               supervisions=[
                   SupervisionSegment(id='sup',
                                      recording_id='rec',
                                      start=0.1,
                                      duration=0.3)
               ],
               recording=recording)
Ejemplo n.º 14
0
def test_augmentation_chain_randomized(target_sampling_rate: int,
                                       sp_factor: float, resample_first: bool,
                                       cut_duration: Seconds):
    recording = Recording.from_file(
        'test/fixtures/libri/libri-1088-134315-0000.wav')

    if resample_first:
        recording_aug = recording.resample(target_sampling_rate).perturb_speed(
            sp_factor)
    else:
        recording_aug = recording.perturb_speed(sp_factor).resample(
            target_sampling_rate)

    audio_aug = recording_aug.load_audio()
    assert audio_aug.shape[1] == recording_aug.num_samples

    cut_aug = Cut(id='dummy',
                  start=0.5125,
                  duration=cut_duration,
                  channel=0,
                  recording=recording_aug)
    assert cut_aug.load_audio().shape[1] == cut_aug.num_samples
Ejemplo n.º 15
0
 def with_cut(self,
              sampling_rate: int,
              num_samples: int,
              features: bool = True) -> Cut:
     duration = num_samples / sampling_rate
     cut = Cut(id=str(uuid4()),
               start=0,
               duration=duration,
               channel=0,
               recording=self.with_recording(sampling_rate=sampling_rate,
                                             num_samples=num_samples))
     if features:
         cut = self._with_features(cut)
     return cut
Ejemplo n.º 16
0
def deserialize_item(data: dict) -> Any:
    # Figures out what type of manifest is being decoded with some heuristics
    # and returns a Lhotse manifest object rather than a raw dict.
    from lhotse import Cut, Features, Recording, SupervisionSegment
    from lhotse.cut import MixedCut
    data = arr2list_recursive(data)
    if 'sources' in data:
        return Recording.from_dict(data)
    if 'num_features' in data:
        return Features.from_dict(data)
    if 'type' not in data:
        return SupervisionSegment.from_dict(data)
    cut_type = data.pop('type')
    if cut_type == 'Cut':
        return Cut.from_dict(data)
    if cut_type == 'MixedCut':
        return MixedCut.from_dict(data)
    raise ValueError(f"Unexpected cut type during deserialization: '{cut_type}'")
Ejemplo n.º 17
0
def cut_set():
    cut = Cut(id='cut-1',
              start=0.0,
              duration=10.0,
              channel=0,
              features=Features(
                  type='fbank',
                  num_frames=100,
                  num_features=40,
                  frame_shift=0.01,
                  sampling_rate=16000,
                  start=0.0,
                  duration=10.0,
                  storage_type='lilcom',
                  storage_path='irrelevant',
                  storage_key='irrelevant',
              ),
              recording=Recording(id='rec-1',
                                  sampling_rate=16000,
                                  num_samples=160000,
                                  duration=10.0,
                                  sources=[
                                      AudioSource(type='file',
                                                  channels=[0],
                                                  source='irrelevant')
                                  ]),
              supervisions=[
                  SupervisionSegment(id='sup-1',
                                     recording_id='irrelevant',
                                     start=0.5,
                                     duration=6.0),
                  SupervisionSegment(id='sup-2',
                                     recording_id='irrelevant',
                                     start=7.0,
                                     duration=2.0)
              ])
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id='cut-nosup', supervisions=[]),
        fastcopy(cut, id='cut-norec', recording=None),
        fastcopy(cut, id='cut-nofeat', features=None),
        cut.pad(duration=30.0, direction='left'),
        cut.pad(duration=30.0, direction='right'),
        cut.pad(duration=30.0, direction='both'),
        cut.mix(cut, offset_other_by=5.0, snr=8)
    ])
Ejemplo n.º 18
0
 def _with_features(self, cut: Cut) -> Cut:
     d = TemporaryDirectory()
     self.dirs.append(d)
     with LilcomFilesWriter(d.name) as storage:
         return cut.compute_and_store_features(Fbank(), storage=storage)
Ejemplo n.º 19
0
def cut(recording):
    return Cut(id='cut', start=0, duration=1.0, channel=0, recording=recording)
Ejemplo n.º 20
0
 def _with_features(self, cut: Cut, frame_shift: Seconds) -> Cut:
     d = TemporaryDirectory()
     self.dirs.append(d)
     extractor = Fbank(config=FbankConfig(frame_shift=frame_shift))
     with LilcomFilesWriter(d.name) as storage:
         return cut.compute_and_store_features(extractor, storage=storage)
Ejemplo n.º 21
0
def random_cut_set(n_cuts=100) -> CutSet:
    return CutSet.from_cuts(
        Cut(id=uuid4(),
            start=round(random.uniform(0, 5), ndigits=8),
            duration=round(random.uniform(3, 10), ndigits=8),
            channel=0) for _ in range(n_cuts))