Beispiel #1
0
def recording_set():
    return RecordingSet.from_recordings([
        Recording(id='x',
                  sources=[
                      AudioSource(type='file',
                                  channels=[0],
                                  source='text/fixtures/mono_c0.wav'),
                      AudioSource(type='command',
                                  channels=[1],
                                  source='cat text/fixtures/mono_c1.wav')
                  ],
                  sampling_rate=8000,
                  num_samples=4000,
                  duration=0.5)
    ])
Beispiel #2
0
def recording_set():
    return RecordingSet.from_recordings([
        Recording(
            id="x",
            sources=[
                AudioSource(type="file",
                            channels=[0],
                            source="text/fixtures/mono_c0.wav"),
                AudioSource(
                    type="command",
                    channels=[1],
                    source="cat text/fixtures/mono_c1.wav",
                ),
            ],
            sampling_rate=8000,
            num_samples=4000,
            duration=0.5,
        )
    ])
Beispiel #3
0
def dummy_recording(unique_id: int) -> Recording:
    return Recording(id=f'dummy-recording-{unique_id:04d}',
                     sources=[
                         AudioSource(type='command',
                                     channels=[0],
                                     source='echo "dummy waveform"')
                     ],
                     sampling_rate=16000,
                     num_samples=16000,
                     duration=1.0)
Beispiel #4
0
 def with_recording(self, sampling_rate: int,
                    num_samples: int) -> Recording:
     f = NamedTemporaryFile('wb', suffix='.wav')
     self.files.append(f)
     duration = num_samples / sampling_rate
     samples = np.random.rand(num_samples)
     soundfile.write(f.name, samples, samplerate=sampling_rate)
     return Recording(
         id=str(uuid4()),
         sources=[AudioSource(type='file', channels=[0], source=f.name)],
         sampling_rate=sampling_rate,
         num_samples=num_samples,
         duration=duration)
Beispiel #5
0
def make_recording(sampling_rate: int, num_samples: int) -> Recording:
    # The idea is that we're going to write to a temporary file with a sine wave recording
    # of specified duration and sampling rate, and clean up only after the test is executed.
    with NamedTemporaryFile('wb', suffix='.wav') as f:
        duration = num_samples / sampling_rate
        samples: np.ndarray = np.sin(2 * np.pi * np.arange(0, num_samples) /
                                     sampling_rate)
        soundfile.write(f, samples, samplerate=sampling_rate)
        yield Recording(
            id=f'recording-{sampling_rate}-{duration}',
            sources=[AudioSource(type='file', channels=[0], source=f.name)],
            sampling_rate=sampling_rate,
            num_samples=num_samples,
            duration=duration)
Beispiel #6
0
def cut_set():
    cut = MonoCut(
        id="cut-1",
        start=0.0,
        duration=10.0,
        channel=0,
        features=Features(
            type="fbank",
            num_frames=100,
            num_features=40,
            frame_shift=0.01,
            sampling_rate=16000,
            start=0.0,
            duration=10.0,
            storage_type="lilcom",
            storage_path="irrelevant",
            storage_key="irrelevant",
        ),
        recording=Recording(
            id="rec-1",
            sampling_rate=16000,
            num_samples=160000,
            duration=10.0,
            sources=[
                AudioSource(type="file", channels=[0], source="irrelevant")
            ],
        ),
        supervisions=[
            SupervisionSegment(id="sup-1",
                               recording_id="irrelevant",
                               start=0.5,
                               duration=6.0),
            SupervisionSegment(id="sup-2",
                               recording_id="irrelevant",
                               start=7.0,
                               duration=2.0),
        ],
    )
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id="cut-nosup", supervisions=[]),
        fastcopy(cut, id="cut-norec", recording=None),
        fastcopy(cut, id="cut-nofeat", features=None),
        cut.pad(duration=30.0, direction="left"),
        cut.pad(duration=30.0, direction="right"),
        cut.pad(duration=30.0, direction="both"),
        cut.mix(cut, offset_other_by=5.0, snr=8),
    ])
Beispiel #7
0
def cut_set():
    cut = Cut(id='cut-1',
              start=0.0,
              duration=10.0,
              channel=0,
              features=Features(
                  type='fbank',
                  num_frames=100,
                  num_features=40,
                  frame_shift=0.01,
                  sampling_rate=16000,
                  start=0.0,
                  duration=10.0,
                  storage_type='lilcom',
                  storage_path='irrelevant',
                  storage_key='irrelevant',
              ),
              recording=Recording(id='rec-1',
                                  sampling_rate=16000,
                                  num_samples=160000,
                                  duration=10.0,
                                  sources=[
                                      AudioSource(type='file',
                                                  channels=[0],
                                                  source='irrelevant')
                                  ]),
              supervisions=[
                  SupervisionSegment(id='sup-1',
                                     recording_id='irrelevant',
                                     start=0.5,
                                     duration=6.0),
                  SupervisionSegment(id='sup-2',
                                     recording_id='irrelevant',
                                     start=7.0,
                                     duration=2.0)
              ])
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id='cut-nosup', supervisions=[]),
        fastcopy(cut, id='cut-norec', recording=None),
        fastcopy(cut, id='cut-nofeat', features=None),
        cut.pad(duration=30.0, direction='left'),
        cut.pad(duration=30.0, direction='right'),
        cut.pad(duration=30.0, direction='both'),
        cut.mix(cut, offset_other_by=5.0, snr=8)
    ])
Beispiel #8
0
    def with_recording(
        self, sampling_rate: int, num_samples: int, use_zeros: bool = False
    ) -> Recording:
        import torchaudio  # torchaudio does not have issues on M1 macs unlike soundfile

        f = NamedTemporaryFile("wb", suffix=".wav")
        self.files.append(f)
        duration = num_samples / sampling_rate
        if use_zeros:
            samples = torch.zeros((1, num_samples))
        else:
            samples = torch.rand((1, num_samples))
        torchaudio.save(f.name, samples, sample_rate=sampling_rate)
        f.flush()
        os.fsync(f)
        return Recording(
            id=str(uuid4()),
            sources=[AudioSource(type="file", channels=[0], source=f.name)],
            sampling_rate=sampling_rate,
            num_samples=num_samples,
            duration=duration,
        )
Beispiel #9
0
def make_recording_callhome(
        sph_path: Pathlike,
        recording_id: Optional[str] = None,
        relative_path_depth: Optional[int] = None,
        sph2pipe_path: Optional[Pathlike] = None
) -> Recording:
    """
    This function creates manifests for CallHome recordings that are compressed
    with shorten, a rare and mostly unsupported codec. You will need to install
    sph2pipe (e.g. using Kaldi) in order to read these files.
    """
    try:
        from sphfile import SPHFile
    except ImportError:
        raise ImportError("Please install sphfile (pip install sphfile) instead and "
                          "try preparing CallHome English again.")
    if sph2pipe_path is None:
        sph2pipe_path = 'sph2pipe'
    else:
        sph2pipe_path = str(sph2pipe_path).strip()
    sph_path = Path(sph_path)
    sphf = SPHFile(sph_path)
    return Recording(
        id=recording_id if recording_id is not None else sph_path.stem,
        sampling_rate=sphf.format['sample_rate'],
        num_samples=sphf.format['sample_count'],
        duration=sphf.format['sample_count'] / sphf.format['sample_rate'],
        sources=[
            AudioSource(
                type='command',
                channels=list(range(sphf.format['channel_count'])),
                source=f'{sph2pipe_path} -f wav -p ' + (
                    '/'.join(sph_path.parts[-relative_path_depth:])
                    if relative_path_depth is not None and relative_path_depth > 0
                    else str(sph_path)
                )
            )
        ]
    )
Beispiel #10
0
def file_source():
    return AudioSource(type='file',
                       channels=[0],
                       source='test/fixtures/mono_c0.wav')
Beispiel #11
0
def file_source():
    return AudioSource(type="file",
                       channels=[0],
                       source="test/fixtures/mono_c0.wav")