def _get_track_metadata(path): # use mono at 44kHz as reference. For any other settings data won't be perfectly # normalized but it should be good enough. audio = AudioFile(path) mix = audio.read(streams=0, channels=1, samplerate=44100) return { "duration": audio.duration, "std": mix.std().item(), "mean": mix.mean().item() }
def test_recognize(key, filename): """ Microsoft Bing Speech APIを使って音声認識するテスト用関数 """ from audio import AudioData, AudioFile af = AudioFile(filename) af.__enter__() ad = AudioData( open(filename, 'rb').read(), af.SAMPLE_RATE, af.SAMPLE_WIDTH) bs = Bing() r = bs.recognize(ad, key=key, show_all=True) print(r)
def test_recognize(key, filename): """ Google Cloud Speech APIを使って音声認識するテスト用関数 """ from audio import AudioData, AudioFile af = AudioFile(filename) af.__enter__() ad = AudioData( open(filename, 'rb').read(), af.SAMPLE_RATE, af.SAMPLE_WIDTH) gs = Google() r = gs.recognize(ad, key=key, show_all=True) print(r)
def __getitem__(self, index): track = self.mus.tracks[index] return (track.name, AudioFile(track.path).read(channels=self.channels, seek_time=0, streams=self.streams, samplerate=self.samplerate))
def censor(self): """ Creates a clean/new version of a file by removing explicits """ audio_file = AudioFile(self.file_path) # Define the CLI progress bar p_bar, p_bar_step = self.__progress_bar(audio_file.normal_chunks) async_iter = zip(repeat(p_bar), repeat(p_bar_step), audio_file.normal_chunks) # Censor each audio chunk file asynchronously censored_chunks = ThreadPool(6).map(self.__censor_chunk, async_iter) clean_file = self.__create_clean_segment(censored_chunks) p_bar.close() self.create_clean_file(clean_file)
def __getitem__(self, index): for meta in self.metadata: examples = self._examples_count(meta) if index >= examples: index -= examples continue streams = AudioFile(meta["path"]).read(seek_time=index * self.stride, duration=self.duration, channels=self.channels, samplerate=self.samplerate) return (streams - meta["mean"]) / meta["std"]
def transcribe_file(audio_path): # split_audio_file parameter variable settings batchsize = 128 # Aggressiveness has to be within 0-3 for VAD split to work. aggressive = 1 outlierdurms = 20 outlierbatchsize = 64 # from .... import DS model # Makes sure there are cores present in a system to be used try: num_processes = cpu_count() except NotImplementedError: num_processes = 1 with AudioFile(audio_path, as_path=True) as wav_path: dataset = split_audio_file(wav_path, batch_size=batchsize, aggressiveness=aggressive, outlier_duration_ms=outlierdurms, outlier_batch_size=outlierbatchsize) print("dataset:", dataset)