def generate_wavs(data_dir): print("hello") pbar = ProgressBar() for mp3_file in pbar(glob(path.join(data_dir, '*.mp3'))): sound = pydub.AudioSegment.from_mp3(mp3_file) filename = mp3_file[-10:-4] new_file = path.splitext(data_dir)[0] + "/wavs/" + filename + ".wav" sound.export(new_file, format="wav") pbar = ProgressBar() data_dir = data_dir + '/wavs/' # change audio file to 16k sample rate for wav_file in pbar(glob(path.join(data_dir, '*.wav'))): new_file = path.splitext(wav_file)[0] + "k16.wav" transformer = Transformer() transformer.convert(samplerate=sample_rate) transformer.build(wav_file, new_file) pbar = ProgressBar() # remove old files for item in pbar(glob(path.join(data_dir, '*.wav'))): if item.endswith("k16.wav"): continue else: os.remove(item) pbar = ProgressBar() # rename files to remove k16 for item in pbar(glob(path.join(data_dir, '*.wav'))): os.rename(item, item.replace('k16', '')) print("end")
def convert(self): """Converts the mp3's associated with this instance to wav's Return: wav_directory (os.path): The directory into which the associated wav's were downloaded """ wav_directory = self._pre_convert() for mp3_filename in self.mp3_directory.glob('**/*.mp3'): wav_filename = path.join(wav_directory, os.path.splitext(os.path.basename(mp3_filename))[0] + ".wav") if not path.exists(wav_filename): _logger.debug("Converting mp3 file %s to wav file %s" % (mp3_filename, wav_filename)) transformer = Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH) transformer.build(str(mp3_filename), str(wav_filename)) else: _logger.debug("Already converted mp3 file %s to wav file %s" % (mp3_filename, wav_filename)) return wav_directory
def read(self, audio_metadata): """Read an audio file. :param audio_metadata: metadata info of an audio :return: raw audio data as float32 array and duration in seconds. """ fd = temp_path = None # Convert it to a wav file. if not audio_metadata.path.endswith('.wav'): original_sample_rate = file_info.sample_rate(audio_metadata.path) assert self._sample_rate <= original_sample_rate transformer = Transformer() transformer.convert(samplerate=self._sample_rate, n_channels=self._channels, bitdepth=self._bits_per_sample) fd, temp_path = tempfile.mkstemp(suffix='.wav') transformer.build(audio_metadata.path, temp_path) if temp_path: path = temp_path else: path = audio_metadata.path # Read the audio file. with SoundFile(path) as soundfile: # make sure the audio properties are as expected. assert soundfile.samplerate == self._sample_rate assert soundfile.channels == self._channels duration_sec = len(soundfile) / self._sample_rate pcm = soundfile.read(dtype='float32') # Add 0.5 second silence to the end of files containing keyword as in occasionally the user stopped # recording right after uttering the keyword. If the detector needs some time after seeing the keyword to # make a decision (e.g. endpointing) this is going to artificially increase the miss rates. if audio_metadata.is_keyword: pcm = np.append(pcm, np.zeros(self._sample_rate // 2)) if temp_path: os.close(fd) os.remove(temp_path) return pcm, duration_sec
def convert(self): """Converts the mp3's associated with this instance to wav's Return: wav_directory (os.path): The directory into which the associated wav's were downloaded """ wav_directory = self._pre_convert() for mp3_filename in self.mp3_directory.glob('**/*.mp3'): wav_filename = path.join( wav_directory, os.path.splitext(os.path.basename(mp3_filename))[0] + ".wav") if not path.exists(wav_filename): _logger.debug("Converting mp3 file %s to wav file %s" % (mp3_filename, wav_filename)) transformer = Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH) transformer.build(str(mp3_filename), str(wav_filename)) else: _logger.debug("Already converted mp3 file %s to wav file %s" % (mp3_filename, wav_filename)) return wav_directory
def _processSamples(sample_list): for sample in sample_list: sample_new_name = _renameSample(sample) _out = join(out_path, sample_new_name) processed_samples.append(_out) _in = sample # Sox processing using Transform instance tfm = Transformer() tfm.convert(samplerate=44100, n_channels=2, bitdepth=16) if NORMALIZE: tfm.norm(db_level=-3) if SILENCE: tfm.silence(location=-1, silence_threshold=0.05, min_silence_duration=0.1) if PADDING: tfm.pad(0, PADDING) tfm.build(_in, _out)
def _maybe_convert_wav(mp3_filename, wav_filename): if not path.exists(wav_filename): transformer = Transformer() transformer.convert(samplerate=SAMPLE_RATE) transformer.build(mp3_filename, wav_filename)
def convert_sr_channel(audio) : transformer = Transformer() transformer.convert(samplerate=16000, n_channels=1) transformer.build(original_audio_file, wav_file)