Esempio n. 1
0
def split_with_vad_wav(
    wav_path: Path,
    out_dir: Path,
    min_dur: float,
    max_dur: float,
    max_silence: float,
    strict_min_dur: bool,
    shift: float = 0,
) -> None:

    assert Path(wav_path).suffix == ".wav"
    audio_region = AudioRegion.load(str(wav_path))
    out_dir = Path(out_dir)
    regions = audio_region.split(
        min_dur=min_dur,
        max_dur=max_dur,
        max_silence=max_silence,
        strict_min_dur=strict_min_dur,
    )

    waveform, sr = sf.read(wav_path, dtype="float32")
    out = []
    for i, r in enumerate(regions):
        start = int(r._meta.start * sr)
        end = int(r._meta.end * sr)
        path_seg = out_dir / f"{out_dir.stem}_{i}.flac"
        path_timestamp = get_path_timestamp(path_seg, ".vad.timestamp")
        save_timestamp(path_timestamp, r._meta.start + shift, r._meta.end + shift)
        sf.write(
            str(path_seg), waveform[start:end], sr, subtype="PCM_16", format="FLAC"
        )
        out.append(path_seg)

    return out
Esempio n. 2
0
    def test_StreamSaverWorker_wav(self):
        with TemporaryDirectory() as tmpdir:
            expected_filename = os.path.join(tmpdir, "output.wav")
            saver = StreamSaverWorker(self.reader, expected_filename)
            saver.start()

            tokenizer = TokenizerWorker(saver)
            tokenizer.start_all()
            tokenizer.join()
            saver.join()

            output_filename = saver.save_stream()
            region = AudioRegion.load(
                "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
            )

            expected_region = AudioRegion.load(output_filename)
            self.assertEqual(output_filename, expected_filename)
            self.assertEqual(region, expected_region)
            self.assertEqual(saver.data, bytes(expected_region))
Esempio n. 3
0
 def test_StreamSaverWorker_encode_audio(self):
     with TemporaryDirectory() as tmpdir:
         with patch("auditok.workers._run_subprocess") as patch_rsp:
             patch_rsp.return_value = (1, None, None)
             expected_filename = os.path.join(tmpdir, "output.ogg")
             tmp_expected_filename = expected_filename + ".wav"
             saver = StreamSaverWorker(self.reader, expected_filename)
             saver.start()
             tokenizer = TokenizerWorker(saver)
             tokenizer.start_all()
             tokenizer.join()
             saver.join()
             with self.assertRaises(AudioEncodingWarning) as rt_warn:
                 saver.save_stream()
         warn_msg = "Couldn't save audio data in the desired format "
         warn_msg += "'ogg'. Either none of 'ffmpeg', 'avconv' or 'sox' "
         warn_msg += "is installed or this format is not recognized.\n"
         warn_msg += "Audio file was saved as '{}'"
         self.assertEqual(
             warn_msg.format(tmp_expected_filename), str(rt_warn.exception)
         )
         ffmpef_avconv = [
             "-y",
             "-f",
             "wav",
             "-i",
             tmp_expected_filename,
             "-f",
             "ogg",
             expected_filename,
         ]
         expected_calls = [
             call(["ffmpeg"] + ffmpef_avconv),
             call(["avconv"] + ffmpef_avconv),
             call(
                 [
                     "sox",
                     "-t",
                     "wav",
                     tmp_expected_filename,
                     expected_filename,
                 ]
             ),
         ]
         self.assertEqual(patch_rsp.mock_calls, expected_calls)
         region = AudioRegion.load(
             "tests/data/test_split_10HZ_mono.raw", sr=10, sw=2, ch=1
         )
         self.assertTrue(saver._exported)
         self.assertEqual(saver.data, bytes(region))