Example #1
0
def mixed_overlapping_cut_set():
    """
    Input mixed cut::
        |---------------mixedcut--------------------|
        |--------rec1 0-30s--------|
                     |-------rec2 15-45s--------|
         |---sup1--|         |-----sup3-----|
                 |sup2|
    """
    cut_set = CutSet.from_cuts([
        MonoCut(
            'cut1', start=0, duration=30, channel=0,
            recording=Recording(
                id='rec1', sources=[], sampling_rate=16000, num_samples=160000, duration=60.0
            ),
            supervisions=[
                SupervisionSegment('sup1', 'rec1', start=1.5, duration=10.5),
                SupervisionSegment('sup2', 'rec1', start=10, duration=6),
            ]
        ).mix(
            MonoCut(
                'cut2', start=15, duration=30, channel=0,
                recording=Recording(
                    id='rec2', sources=[], sampling_rate=16000, num_samples=160000, duration=60.0
                ),
                supervisions=[
                    SupervisionSegment('sup3', 'rec2', start=8, duration=18),
                ]
            ),
            offset_other_by=15.0
        )
    ])
    assert isinstance(cut_set[0], MixedCut)
    return cut_set
Example #2
0
def test_trim_to_supervisions_mixed_cuts():
    cut_set = CutSet.from_cuts([
        Cut('cut1',
            start=0,
            duration=30,
            channel=0,
            recording=Recording(id='rec1',
                                sources=[],
                                sampling_rate=16000,
                                num_samples=160000,
                                duration=10.0),
            supervisions=[
                SupervisionSegment('sup1', 'rec1', start=1.5, duration=8.5),
                SupervisionSegment('sup2', 'rec1', start=10, duration=5),
                SupervisionSegment('sup3', 'rec1', start=20, duration=8),
            ]).append(
                Cut('cut2',
                    start=0,
                    duration=30,
                    channel=0,
                    recording=Recording(id='rec1',
                                        sources=[],
                                        sampling_rate=16000,
                                        num_samples=160000,
                                        duration=10.0),
                    supervisions=[
                        SupervisionSegment('sup4',
                                           'rec1',
                                           start=0,
                                           duration=30),
                    ]))
    ])
    assert isinstance(cut_set[0], MixedCut)
    cuts = cut_set.trim_to_supervisions()
    assert len(cuts) == 4
    # After "trimming", the MixedCut "decayed" into simple, unmixed cuts, as they did not overlap
    assert all(isinstance(cut, Cut) for cut in cuts)
    assert all(len(cut.supervisions) == 1 for cut in cuts)
    assert all(cut.supervisions[0].start == 0 for cut in cuts)
    cut = cuts[0]
    # Check that the cuts preserved their start/duration/supervisions after trimming
    assert cut.start == 1.5
    assert cut.duration == 8.5
    assert cut.supervisions[0].id == 'sup1'
    cut = cuts[1]
    assert cut.start == 10
    assert cut.duration == 5
    assert cut.supervisions[0].id == 'sup2'
    cut = cuts[2]
    assert cut.start == 20
    assert cut.duration == 8
    assert cut.supervisions[0].id == 'sup3'
    cut = cuts[3]
    assert cut.start == 0
    assert cut.duration == 30
    assert cut.supervisions[0].id == 'sup4'
Example #3
0
def test_mix_same_recording_channels():
    recording = Recording('rec',
                          sampling_rate=8000,
                          num_samples=30 * 8000,
                          duration=30,
                          sources=[
                              AudioSource('file',
                                          channels=[0],
                                          source='irrelevant1.wav'),
                              AudioSource('file',
                                          channels=[1],
                                          source='irrelevant2.wav')
                          ])
    cut_set = CutSet.from_cuts([
        Cut('cut1', start=0, duration=30, channel=0, recording=recording),
        Cut('cut2', start=0, duration=30, channel=1, recording=recording)
    ])

    mixed = cut_set.mix_same_recording_channels()
    assert len(mixed) == 1

    cut = mixed[0]
    assert isinstance(cut, MixedCut)
    assert len(cut.tracks) == 2
    assert cut.tracks[0].cut == cut_set[0]
    assert cut.tracks[1].cut == cut_set[1]
def recording():
    return Recording(
        id='rec',
        sources=[AudioSource(type='file', channels=[0, 1], source='test/fixtures/stereo.wav')],
        sampling_rate=8000,
        num_samples=8000,
        duration=1.0
    )
Example #5
0
def recording(file_source):
    return Recording(
        id="rec",
        sources=[file_source],
        sampling_rate=8000,
        num_samples=4000,
        duration=0.5,
    )
Example #6
0
def cut_with_relative_paths():
    return Cut('cut', 0, 10, 0,
               features=Features(type='fbank', num_frames=1000, num_features=40, sampling_rate=8000,
                                 storage_type='lilcom_files', storage_path='storage_dir', storage_key='feats.llc',
                                 start=0,
                                 duration=10),
               recording=Recording('rec', [AudioSource('file', [0], 'audio.wav')], 8000, 80000, 10.0)
               )
Example #7
0
def random_cut_set(n_cuts=100) -> CutSet:
    return CutSet.from_cuts(
        MonoCut(id=uuid4(),
                start=round(random.uniform(0, 5), ndigits=8),
                duration=round(random.uniform(3, 10), ndigits=8),
                channel=0,
                recording=Recording(id=uuid4(),
                                    sources=[],
                                    sampling_rate=16000,
                                    num_samples=1600000,
                                    duration=100.0)) for _ in range(n_cuts))
def test_cut_trim_to_supervisions_extend_handles_end_of_recording(mono_cut):
    """
    Scenario::

        |----------Recording---------|
        |---Sup1----|       |--Sup2--|
        |------------Cut-------------|

    Into::

        |----------Recording---------|
        |---Cut1----|     |---Cut2---|
        |---Sup1----|       |--Sup2--|
    """
    cut = MonoCut(
        id="X",
        start=0.0,
        duration=10.0,
        channel=0,
        supervisions=[
            SupervisionSegment(id="X",
                               recording_id="X",
                               start=0.0,
                               duration=4.0),
            SupervisionSegment(id="X",
                               recording_id="X",
                               start=7.0,
                               duration=3.0),
        ],
        recording=Recording(id="X",
                            sources=[],
                            sampling_rate=8000,
                            num_samples=80000,
                            duration=10.0),
    )

    cuts = cut.trim_to_supervisions(min_duration=4.0)

    assert len(cuts) == 2
    c1, c2 = cuts

    assert c1.start == 0
    assert c1.duration == 4.0
    assert len(c1.supervisions) == 1
    (c1_s1, ) = c1.supervisions
    assert c1_s1.start == 0.0
    assert c1_s1.duration == 4.0

    assert c2.start == 6.5
    assert c2.duration == 3.5
    assert len(c2.supervisions) == 1
    (c2_s1, ) = c2.supervisions
    assert c2_s1.start == 0.5
    assert c2_s1.duration == 3.0
Example #9
0
def recording():
    return Recording(
        id="rec",
        sources=[
            AudioSource(type="file",
                        channels=[0, 1],
                        source="test/fixtures/stereo.wav")
        ],
        sampling_rate=8000,
        num_samples=8000,
        duration=1.0,
    )
Example #10
0
 def with_recording(self, sampling_rate: int,
                    num_samples: int) -> Recording:
     f = NamedTemporaryFile('wb', suffix='.wav')
     self.files.append(f)
     duration = num_samples / sampling_rate
     samples = np.random.rand(num_samples)
     soundfile.write(f.name, samples, samplerate=sampling_rate)
     return Recording(
         id=str(uuid4()),
         sources=[AudioSource(type='file', channels=[0], source=f.name)],
         sampling_rate=sampling_rate,
         num_samples=num_samples,
         duration=duration)
Example #11
0
def make_recording(sampling_rate: int, num_samples: int) -> Recording:
    # The idea is that we're going to write to a temporary file with a sine wave recording
    # of specified duration and sampling rate, and clean up only after the test is executed.
    with NamedTemporaryFile('wb', suffix='.wav') as f:
        duration = num_samples / sampling_rate
        samples: np.ndarray = np.sin(2 * np.pi * np.arange(0, num_samples) /
                                     sampling_rate)
        soundfile.write(f, samples, samplerate=sampling_rate)
        yield Recording(
            id=f'recording-{sampling_rate}-{duration}',
            sources=[AudioSource(type='file', channels=[0], source=f.name)],
            sampling_rate=sampling_rate,
            num_samples=num_samples,
            duration=duration)
Example #12
0
def recording_set():
    return RecordingSet.from_recordings([
        Recording(id='x',
                  sources=[
                      AudioSource(type='file',
                                  channels=[0],
                                  source='text/fixtures/mono_c0.wav'),
                      AudioSource(type='command',
                                  channels=[1],
                                  source='cat text/fixtures/mono_c1.wav')
                  ],
                  sampling_rate=8000,
                  num_samples=4000,
                  duration=0.5)
    ])
Example #13
0
def cut_set():
    cut = MonoCut(
        id="cut-1",
        start=0.0,
        duration=10.0,
        channel=0,
        features=Features(
            type="fbank",
            num_frames=100,
            num_features=40,
            frame_shift=0.01,
            sampling_rate=16000,
            start=0.0,
            duration=10.0,
            storage_type="lilcom",
            storage_path="irrelevant",
            storage_key="irrelevant",
        ),
        recording=Recording(
            id="rec-1",
            sampling_rate=16000,
            num_samples=160000,
            duration=10.0,
            sources=[
                AudioSource(type="file", channels=[0], source="irrelevant")
            ],
        ),
        supervisions=[
            SupervisionSegment(id="sup-1",
                               recording_id="irrelevant",
                               start=0.5,
                               duration=6.0),
            SupervisionSegment(id="sup-2",
                               recording_id="irrelevant",
                               start=7.0,
                               duration=2.0),
        ],
    )
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id="cut-nosup", supervisions=[]),
        fastcopy(cut, id="cut-norec", recording=None),
        fastcopy(cut, id="cut-nofeat", features=None),
        cut.pad(duration=30.0, direction="left"),
        cut.pad(duration=30.0, direction="right"),
        cut.pad(duration=30.0, direction="both"),
        cut.mix(cut, offset_other_by=5.0, snr=8),
    ])
Example #14
0
def cut_set():
    cut = Cut(id='cut-1',
              start=0.0,
              duration=10.0,
              channel=0,
              features=Features(
                  type='fbank',
                  num_frames=100,
                  num_features=40,
                  frame_shift=0.01,
                  sampling_rate=16000,
                  start=0.0,
                  duration=10.0,
                  storage_type='lilcom',
                  storage_path='irrelevant',
                  storage_key='irrelevant',
              ),
              recording=Recording(id='rec-1',
                                  sampling_rate=16000,
                                  num_samples=160000,
                                  duration=10.0,
                                  sources=[
                                      AudioSource(type='file',
                                                  channels=[0],
                                                  source='irrelevant')
                                  ]),
              supervisions=[
                  SupervisionSegment(id='sup-1',
                                     recording_id='irrelevant',
                                     start=0.5,
                                     duration=6.0),
                  SupervisionSegment(id='sup-2',
                                     recording_id='irrelevant',
                                     start=7.0,
                                     duration=2.0)
              ])
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id='cut-nosup', supervisions=[]),
        fastcopy(cut, id='cut-norec', recording=None),
        fastcopy(cut, id='cut-nofeat', features=None),
        cut.pad(duration=30.0, direction='left'),
        cut.pad(duration=30.0, direction='right'),
        cut.pad(duration=30.0, direction='both'),
        cut.mix(cut, offset_other_by=5.0, snr=8)
    ])
Example #15
0
def random_cut_set(n_cuts=100) -> CutSet:
    sr = 16000
    return CutSet.from_cuts(
        MonoCut(
            id=uuid4(),
            start=random.randint(0, 5 * sr) / sr,
            duration=random.randint(3 * sr, 10 * sr) / sr,
            channel=0,
            recording=Recording(
                id=uuid4(),
                sources=[],
                sampling_rate=16000,
                num_samples=1600000,
                duration=100.0,
            ),
        )
        for _ in range(n_cuts)
    )
Example #16
0
def recording_set():
    return RecordingSet.from_recordings([
        Recording(
            id="x",
            sources=[
                AudioSource(type="file",
                            channels=[0],
                            source="text/fixtures/mono_c0.wav"),
                AudioSource(
                    type="command",
                    channels=[1],
                    source="cat text/fixtures/mono_c1.wav",
                ),
            ],
            sampling_rate=8000,
            num_samples=4000,
            duration=0.5,
        )
    ])
Example #17
0
def cut_with_relative_paths():
    return MonoCut(
        "cut",
        0,
        10,
        0,
        features=Features(
            type="fbank",
            num_frames=1000,
            num_features=40,
            sampling_rate=8000,
            storage_type="lilcom_files",
            storage_path="storage_dir",
            storage_key="feats.llc",
            start=0,
            duration=10,
            frame_shift=0.01,
        ),
        recording=Recording("rec", [AudioSource("file", [0], "audio.wav")],
                            8000, 80000, 10.0),
    )
def mono_cut():
    """
    Scenario::

        |-----------------Recording-----------------|
           "Hey, Matt!"  "Yes?"
        |--------------| |-----|  "Oh, nothing"
                             |------------------|
        |-------------------Cut1--------------------|
    """
    rec = Recording(id="rec1",
                    duration=10.0,
                    sampling_rate=8000,
                    num_samples=80000,
                    sources=[...])
    sups = [
        SupervisionSegment(id="sup1",
                           recording_id="rec1",
                           start=0.0,
                           duration=3.37,
                           text="Hey, Matt!"),
        SupervisionSegment(id="sup2",
                           recording_id="rec1",
                           start=4.5,
                           duration=0.9,
                           text="Yes?"),
        SupervisionSegment(id="sup3",
                           recording_id="rec1",
                           start=4.9,
                           duration=4.3,
                           text="Oh, nothing"),
    ]
    return MonoCut(
        id="rec1-cut1",
        start=0.0,
        duration=10.0,
        channel=0,
        recording=rec,
        supervisions=sups,
    )
Example #19
0
    def with_recording(
        self, sampling_rate: int, num_samples: int, use_zeros: bool = False
    ) -> Recording:
        import torchaudio  # torchaudio does not have issues on M1 macs unlike soundfile

        f = NamedTemporaryFile("wb", suffix=".wav")
        self.files.append(f)
        duration = num_samples / sampling_rate
        if use_zeros:
            samples = torch.zeros((1, num_samples))
        else:
            samples = torch.rand((1, num_samples))
        torchaudio.save(f.name, samples, sample_rate=sampling_rate)
        f.flush()
        os.fsync(f)
        return Recording(
            id=str(uuid4()),
            sources=[AudioSource(type="file", channels=[0], source=f.name)],
            sampling_rate=sampling_rate,
            num_samples=num_samples,
            duration=duration,
        )
Example #20
0
def make_recording_callhome(
        sph_path: Pathlike,
        recording_id: Optional[str] = None,
        relative_path_depth: Optional[int] = None,
        sph2pipe_path: Optional[Pathlike] = None
) -> Recording:
    """
    This function creates manifests for CallHome recordings that are compressed
    with shorten, a rare and mostly unsupported codec. You will need to install
    sph2pipe (e.g. using Kaldi) in order to read these files.
    """
    try:
        from sphfile import SPHFile
    except ImportError:
        raise ImportError("Please install sphfile (pip install sphfile) instead and "
                          "try preparing CallHome English again.")
    if sph2pipe_path is None:
        sph2pipe_path = 'sph2pipe'
    else:
        sph2pipe_path = str(sph2pipe_path).strip()
    sph_path = Path(sph_path)
    sphf = SPHFile(sph_path)
    return Recording(
        id=recording_id if recording_id is not None else sph_path.stem,
        sampling_rate=sphf.format['sample_rate'],
        num_samples=sphf.format['sample_count'],
        duration=sphf.format['sample_count'] / sphf.format['sample_rate'],
        sources=[
            AudioSource(
                type='command',
                channels=list(range(sphf.format['channel_count'])),
                source=f'{sph2pipe_path} -f wav -p ' + (
                    '/'.join(sph_path.parts[-relative_path_depth:])
                    if relative_path_depth is not None and relative_path_depth > 0
                    else str(sph_path)
                )
            )
        ]
    )
Example #21
0
def test_mix_same_recording_channels():
    recording = Recording(
        "rec",
        sampling_rate=8000,
        num_samples=30 * 8000,
        duration=30,
        sources=[
            AudioSource("file", channels=[0], source="irrelevant1.wav"),
            AudioSource("file", channels=[1], source="irrelevant2.wav"),
        ],
    )
    cut_set = CutSet.from_cuts([
        MonoCut("cut1", start=0, duration=30, channel=0, recording=recording),
        MonoCut("cut2", start=0, duration=30, channel=1, recording=recording),
    ])

    mixed = cut_set.mix_same_recording_channels()
    assert len(mixed) == 1

    cut = mixed[0]
    assert isinstance(cut, MixedCut)
    assert len(cut.tracks) == 2
    assert cut.tracks[0].cut == cut_set[0]
    assert cut.tracks[1].cut == cut_set[1]