def test_should_be_audible(self): seg = audiosegment.from_file("furelise.wav") seconds_audible = seg.human_audible() self.assertGreaterEqual( seconds_audible, seg.duration_seconds * 0.85, "This file should contain at least 85 percent audible sound but does not." )
def _run_test_at_given_hz(self, hz): """ Test basic functionality at given sampling frequency. """ seg = audiosegment.from_file("furelise.wav").resample( sample_rate_Hz=hz) results = seg.detect_voice(prob_detect_voice=0.5) voiced = [tup[1] for tup in results if tup[0] == 'v'] unvoiced = [tup[1] for tup in results if tup[0] == 'u'] # Now reduce to single segments if len(voiced) > 1: voiced_segment = voiced[0].reduce(voiced[1:]) elif len(voiced) > 0: voiced_segment = voiced[0] else: voiced_segment = None if len(unvoiced) > 1: unvoiced_segment = unvoiced[0].reduce(unvoiced[1:]) elif len(unvoiced) > 0: unvoiced_segment = unvoiced[0] else: unvoiced_segment = None self.assertTrue(unvoiced_segment is not None, "Furelise should be mostly unvoiced.") if voiced_segment is not None: self.assertGreater(unvoiced_segment.duration_seconds, voiced_segment.duration_seconds)
def test_downmixing_playable(self): """ Test that downmixing results in playable audio. """ seg = audiosegment.from_file("stereo_furelise.wav") mono = seg.resample(channels=1) self.assertTrue(common.is_playable(mono))
def test_upmix_then_downmix_stereo(self): """ """ seg = audiosegment.from_file("stereo_furelise.wav") remixed = seg.resample(channels=8) unmixed = remixed.resample(channels=2) self._check_underlying_data(seg, unmixed) self.assertTrue(common.is_playable(unmixed))
def test_upmixing_does_not_change(self): """ Test that upmixing just results in two identical channels. """ seg = audiosegment.from_file("furelise.wav") remixed = seg.resample(channels=2) self.assertTrue(np.allclose(seg.to_numpy_array(), remixed.to_numpy_array()[:,0])) self.assertTrue(np.allclose(seg.to_numpy_array(), remixed.to_numpy_array()[:,1]))
def read_songs(dir, recursive = True, extensions = ["m4a", "mp3"]): # load paths to music files recursively paths = [] for ext in extensions: paths += glob(dir + "/**/**.%s"%ext,recursive= recursive) print("Found: %d Files"%len(paths)) # return a generator for reading the audio files return (audiosegment.from_file(f) for f in paths)
def test_resample_channels(self): """ Test that upmixing and downmixing does what we expect. """ segmono = audiosegment.from_file("furelise.wav") segster = audiosegment.from_file("furelise.wav") seg16 = audiosegment.from_mono_audiosegments(*[segmono for _ in range(16)]) for ch in (1, 2, 3, 4, 8, 16): with self.subTest(ch): resampled = segmono.resample(channels=ch) self._compare(resampled, segmono.frame_rate, ch, segmono.sample_width) resampled = segster.resample(channels=ch) self._compare(resampled, segmono.frame_rate, ch, segmono.sample_width) resampled = seg16.resample(channels=ch) self._compare(resampled, segmono.frame_rate, ch, segmono.sample_width)
def test_upmix_then_downmix_mono(self): """ Test that upmixing and then downmixing does not change the audio. """ seg = audiosegment.from_file("furelise.wav") remixed = seg.resample(channels=8) unmixed = remixed.resample(channels=1) self._check_underlying_data(seg, unmixed) self.assertTrue(common.is_playable(unmixed))
def test_dice(self): seg = audiosegment.from_file("furelise.wav") dice_len_s = 0.03 slices = seg.dice(seconds=dice_len_s, zero_pad=True) for i, sl in enumerate(slices): msg = "Slice {} out of {} is of duration {}s, but should be of duration {}s".format( i, len(slices), sl.duration_seconds, dice_len_s) self.assertTrue(util.isclose(sl.duration_seconds, dice_len_s), msg)
def test_resample_hz(self): """ Test that resampling does what we expect. """ seg = audiosegment.from_file("furelise.wav") for hz in (8000, 16000, 32000, 44100, 48000, 23411, 96000): with self.subTest(hz): resampled = seg.resample(sample_rate_Hz=hz) self._compare(resampled, hz, seg.channels, seg.sample_width)
def test_resample_sample_width(self): """ Test that changing the sample width does what we expect. """ seg = audiosegment.from_file("furelise.wav") for width in (1, 2, 4): with self.subTest(width): resampled = seg.resample(sample_width=width) self._compare(resampled, seg.frame_rate, seg.channels, width)
def file_conversion_to_wav(FORMAT_FROM='.m4a', FILE_NAME='data/', BIT_RATE='192k'): ''' FILE_NAME --> file name to convert BIT_RATE --> just the output bit rate # Mix down to two channels and set hard output volume #awesome.export("mashup.mp3", format="mp3", parameters=["-ac", "2", "-vol", "150"]) ''' #import pydub import audiosegment if FORMAT_FROM == '.m4a': song = audiosegment.from_file(FILE_NAME + FORMAT_FROM) OUTPUT_FILE_NAME=FILE_NAME song.export(OUTPUT_FILE_NAME+".wav", format="wav", bitrate="192k") elif FORMAT_FROM == '.mp3': song = audiosegment.from_mp3(FILE_NAME + FORMAT_FROM) OUTPUT_FILE_NAME=FILE_NAME song.export(OUTPUT_FILE_NAME+".wav", format="wav", bitrate="192k") elif FORMAT_FROM == '.ogg': song = audiosegment.from_ogg(FILE_NAME + FORMAT_FROM) OUTPUT_FILE_NAME=FILE_NAME song.export(OUTPUT_FILE_NAME+".wav", format="wav", bitrate="192k") elif FORMAT_FROM == '.flv': song = audiosegment.from_flv(FILE_NAME + FORMAT_FROM) OUTPUT_FILE_NAME=FILE_NAME song.export(OUTPUT_FILE_NAME+".wav", format="wav", bitrate="192k") elif FORMAT_FROM == '.wma': song = audiosegment.from_file(FILE_NAME + FORMAT_FROM) OUTPUT_FILE_NAME=FILE_NAME song.export(OUTPUT_FILE_NAME+".wav", format="wav", bitrate="192k") elif FORMAT_FROM == '.aac': song = audiosegment.from_flv(FILE_NAME + FORMAT_FROM) OUTPUT_FILE_NAME=FILE_NAME song.export(OUTPUT_FILE_NAME+".wav", format="wav", bitrate="192k") else: pass return
def test_pack_and_unpack(self): seg = audiosegment.from_file("stereo_furelise.wav") serialized = seg.serialize() deserialzd = audiosegment.deserialize(serialized) self.assertEqual(seg.channels, deserialzd.channels) self.assertEqual(seg.frame_rate, deserialzd.frame_rate) self.assertEqual(seg.duration_seconds, deserialzd.duration_seconds) self.assertTrue( np.allclose(seg.to_numpy_array(), deserialzd.to_numpy_array()))
def split_chunk_into_phonemes(self, filepath): sound = audiosegment.from_file(filepath) sound = sound.filter_silence() ## list of tuples (wav, timestamp) ## phonemes_list phonemes_list = sound.generate_frames_as_segments(self.phoneme_length, zero_pad=True) list_phoneme_file_names = [] for phoneme, timestamp in phonemes_list: phoneme_path_name = self.write_phoneme_wav(phoneme) list_phoneme_file_names.append(phoneme_path_name) return list_phoneme_file_names
def audio2img_sample(file_name): seg = audiosegment.from_file(file_name) freqs, times, amplitudes = seg.spectrogram(window_length_s=0.03, overlap=0.5) amplitudes = 10 * np.log10(amplitudes + 1e-9) # Plot plt.pcolormesh(times, freqs, amplitudes) plt.axis('off') plt.savefig("sample_spec.png", bbox_inches='tight', pad_inches=0) plt.close()
def test_visualize(self): seg = audiosegment.from_file("furelise.wav") vis_ms = 3000 hist_bins, hist_vals = seg[1:vis_ms].fft() hist_vals = np.abs(hist_vals) / len(hist_vals) # Now plot for human consumption plt.plot(hist_bins / 1000, hist_vals) plt.xlabel("kHz") plt.ylabel("dB") plt.show()
def test_create_file_from_two_monos(self): """ Tests that we can create a playable wav file from copying a single mono wave file into stereo. """ mono = audiosegment.from_file("furelise.wav") multi = self._test_create_file_from_n_segments(mono, 2) self.assertTrue(common.is_playable(multi)) # Now test that both channels are identical arr = multi.to_numpy_array() self.assertTrue(np.allclose(arr[:, 0], arr[:, 1]))
def test_create_file_from_four_monos(self): """ """ mono = audiosegment.from_file("furelise.wav") multi = self._test_create_file_from_n_segments(mono, 4) self.assertTrue(common.is_playable(multi)) # Now test that all channels are identical arr = multi.to_numpy_array() self.assertTrue(np.allclose(arr[:, 0], arr[:, 1])) self.assertTrue(np.allclose(arr[:, 1], arr[:, 2])) self.assertTrue(np.allclose(arr[:, 2], arr[:, 3]))
def test_stereo_to_numpy_array(self): """ Test that the numpy representation of a stereo file is what we expect. """ seg = audiosegment.from_file("stereo_furelise.wav") arr = seg.to_numpy_array() nsamples = int(round(seg.frame_rate * seg.duration_seconds)) self.assertEqual(seg.sample_width, self._look_up_sample_width(arr.dtype)) self.assertEqual(arr.shape, (nsamples, 2)) self.assertTrue(np.allclose(arr[:, 0], arr[:, 1]))
def _getAudioAttributes(self, f): # TODO error handling for reading single files data = [] s1 = AudioSegment.from_file(f, format="wav") s2 = audiosegment.from_file(f) freqChange, freqAvg, freqMax = self._getFrequency(s2) loudPoly, dBFS, maxDBFS = self._getLoudness(s1) data.append( np.append( np.concatenate((self._getDerivative(loudPoly), self._getDerivative(freqChange))), [dBFS, maxDBFS, freqAvg, freqMax])) return data
def voice_activity_detection(fname): signal = audiosegment.from_file(fname) signal = signal.resample(sample_rate_Hz=64000, sample_width=2, channels=1) signal = signal.filter_silence(duration_s=0.5, threshold_percentage=1.0) dir = os.path.dirname(fname) new_fname = fname.replace(dir, dir + 'aftervad') new_dir = dirname(new_fname) if not os.path.exists(dirname(new_dir)): os.mkdir(dirname(new_dir)) if not os.path.exists(new_dir): os.mkdir(new_dir) signal.export(new_fname, format="wav") return new_fname
def view_spectrogram_audiosegment(self, wavfile): seg = audiosegment.from_file(wavfile) freqs, times, amplitudes = seg.spectrogram(window_length_s=0.03, overlap=0.5) amplitudes = 10 * np.log10(amplitudes + 1e-9) # Plot plt.pcolormesh(times, freqs, amplitudes) plt.xlabel("Time in Seconds") plt.ylabel("Frequency in Hz") plt.show() plt.pcolormesh(amplitudes) plt.xlabel("Time in Seconds") plt.ylabel("Frequency in Hz") plt.show()
def reset_framerate(dsp_wav, new_fr): try: temp_wav = tempfile.TemporaryFile() temp_wav_path_name = "/".join([tempfile.tempdir, str(temp_wav.name)]) dsp_wav.write(temp_wav_path_name) as_wav1 = AudioSegment.from_file(temp_wav_path_name) as_wav1 = as_wav1.set_frame_rate(new_fr) as_wav1.export(temp_wav_path_name, format="wav") dsp_wav = read_wave(temp_wav_path_name) finally: temp_wav.close() return dsp_wav
def test_stereo_to_and_from_numpy_array(self): """ Tests that we can convert a stereo file to a numpy array and then back again without any changes. """ before = audiosegment.from_file("stereo_furelise.wav") arr = before.to_numpy_array() after = audiosegment.from_numpy_array(arr, before.frame_rate) self.assertEqual(before.sample_width, after.sample_width) self.assertEqual(before.duration_seconds, after.duration_seconds) self.assertEqual(before.channels, after.channels) self.assertSequenceEqual(before.raw_data, after.raw_data) self.assertTrue(common.is_playable(after))
def test_silence_removal(self): """ Basic test for exceptions. """ seg = audiosegment.from_file("furelise.wav") s = seg.filter_silence() self.assertEqual(seg.channels, s.channels) self.assertEqual(seg.frame_rate, s.frame_rate) self.assertLess(s.duration_seconds, seg.duration_seconds) # Now try again, but with massive threshold for silence removal # This will strip almost every sample in the file, leaving a practically empty # WAV file, which Pydub chokes on. _ = seg.filter_silence(threshold_percentage=99.9)
def test_create_file_from_eight_monos(self): """ """ mono = audiosegment.from_file("furelise.wav") multi = self._test_create_file_from_n_segments(mono, 8) # Now test that all channels are identical arr = multi.to_numpy_array() self.assertTrue(np.allclose(arr[:, 0], arr[:, 1])) self.assertTrue(np.allclose(arr[:, 1], arr[:, 2])) self.assertTrue(np.allclose(arr[:, 2], arr[:, 3])) self.assertTrue(np.allclose(arr[:, 3], arr[:, 4])) self.assertTrue(np.allclose(arr[:, 4], arr[:, 5])) self.assertTrue(np.allclose(arr[:, 5], arr[:, 6])) self.assertTrue(np.allclose(arr[:, 6], arr[:, 7]))
def read_wav_np(wavpath, sample_rate): audio = audiosegment.from_file(wavpath).resample( sample_rate_Hz=sample_rate) wav = audio.to_numpy_array() if len(wav.shape) == 2: wav = wav.T.flatten() if wav.dtype == np.int16: wav = wav / 32768.0 elif wav.dtype == np.int32: wav = wav / 2147483648.0 elif wav.dtype == np.uint8: wav = (wav - 128) / 128.0 wav = wav.astype(np.float32) return wav
def test_mono_file_to_nparray(self): """ Test that a mono file converts to a numpy array with the right data type, length, and underlying data. """ seg = audiosegment.from_file("furelise.wav") for width in (1, 2, 4): with self.subTest(width): seg = seg.resample(sample_width=width) arr = seg.to_numpy_array() nsamples = int(round(seg.frame_rate * seg.duration_seconds)) self.assertEqual(seg.sample_width, self._look_up_sample_width(arr.dtype)) self.assertEqual(arr.shape, (nsamples, )) self._check_underlying_data(seg, arr)
def dice(path_to_file: str, size: int = 0.1): """ GET CHUNKS dzieli sygnał na kawalki :param path_to_file: path to wave file :param size: size of one chunk in seconds :return: array2d """ data = list() chunks = audiosegment.from_file(path_to_file) chunks = chunks.dice(size) for chunk in chunks: data.append(chunk.to_numpy_array()) print("Diced %s to %d chunks each %0.1fs" % (path_to_file, len(chunks), size)) return data
def test_mono_to_and_from(self): """ Test that a mono file converts to a numpy array and back again without any change. """ seg = audiosegment.from_file("furelise.wav") for width in (1, 2, 4): with self.subTest(width): seg = seg.resample(sample_width=width) arr = seg.to_numpy_array() seg = audiosegment.from_numpy_array(arr, seg.frame_rate) nsamples = int(round(seg.frame_rate * seg.duration_seconds)) self.assertEqual(seg.sample_width, self._look_up_sample_width(arr.dtype)) self.assertEqual(arr.shape, (nsamples, )) self._check_underlying_data(seg, arr) self.assertTrue(common.is_playable(seg))