def main(): OutputPath = 'result_audio' namelist = os.listdir('testimage') for name in namelist: signal1 = nussl.AudioSignal(path_to_input_file='gt_audio/' + name + '_gt1.wav') signal2 = nussl.AudioSignal(path_to_input_file='gt_audio/' + name + '_gt2.wav') signal = nussl.AudioSignal(path_to_input_file='gt_audio/' + name + '.wav') audio = signal.audio_data m = audio_decomp(audio=audio, OutputPath=OutputPath, file=name) signal3 = nussl.AudioSignal(path_to_input_file='result_audio/' + name + '_seg1.wav') signal4 = nussl.AudioSignal(path_to_input_file='result_audio/' + name + '_seg2.wav') ref_sources = np.zeros([2, len(audio[0, :])]) est_sources = np.zeros([2, len(audio[0, :])]) ref_sources[0, :] = sum(signal1.audio_data) ref_sources[1, :] = sum(signal2.audio_data) est_sources[0, :] = sum(signal3.audio_data) est_sources[1, :] = sum(signal4.audio_data) result = separation.bss_eval_sources(ref_sources, est_sources, compute_permutation=True) print result[0]
def test_write_to_file_path1(self): a = nussl.AudioSignal(self.input_path1) a.write_audio_to_file(self.out_path1) b = nussl.AudioSignal(self.out_path1) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data))
def main(): win_type = nussl.constants.WINDOW_HANN sample_rate = nussl.DEFAULT_SAMPLE_RATE # Plot a simple sine wave at 5kHz dt = 1.0 / float(sample_rate) dur = 10.0 # sec freq = 5000 # Hz x = np.arange(0.0, dur, dt) x = np.sin(2 * np.pi * freq * x) signal = nussl.AudioSignal(audio_data_array=x) nussl.plot_stft(signal.get_channel(1), '../Output/sine_wave5000Hz.png', window_type=win_type) # Make a FM signal and plot it. lfo_freq = 3 lfo_amp = 800 freq2 = 15000 x2 = np.arange(0.0, dur, dt) modulator = lfo_amp * np.sin(2 * np.pi * lfo_freq * x2) x2 = np.sin(2 * np.pi * freq2 * x2 + modulator) x2 += x signal2 = nussl.AudioSignal(audio_data_array=x2) nussl.plot_stft(signal2.get_channel(1), '../Output/fm_wave.png', window_type=win_type) # Plot from a file path = '../Input/police_noisy.wav' a = nussl.AudioSignal(path) nussl.plot_stft(a.get_channel(1), '../Output/police_noisy.png', freq_max=18000) # Plot through audio_signal a.plot_spectrogram()
def test_load(benchmark_audio): # Load from file a = nussl.AudioSignal(benchmark_audio['K0140.wav']) b = nussl.AudioSignal() b.load_audio_from_file(benchmark_audio['K0140.wav']) assert (np.array_equal(a.audio_data, b.audio_data)) assert (a.sample_rate == b.sample_rate) # Load from array ref_sr, ref_data = wav.read(benchmark_audio['K0140.wav']) c = nussl.AudioSignal(audio_data_array=ref_data, sample_rate=ref_sr) pytest.raises(Exception, nussl.AudioSignal, benchmark_audio['K0140.wav'], ref_data) pytest.raises(Exception, nussl.AudioSignal, path_to_input_file=benchmark_audio['K0140.wav'], audio_data_array=ref_data) d = nussl.AudioSignal() d.load_audio_from_array(ref_data, ref_sr) assert (np.array_equal(c.audio_data, d.audio_data)) assert (c.sample_rate == d.sample_rate) assert (b.sample_rate == c.sample_rate) assert (np.array_equal(b.audio_data, c.audio_data))
def test_stft_istft_defaults(benchmark_audio, atol=stft_tol): dummy = nussl.AudioSignal() pytest.raises(AudioSignalException, dummy.stft) a = nussl.AudioSignal(audio_data_array=sine_wave) pytest.raises(AudioSignalException, a.istft) a.stft() a.istft() a = nussl.AudioSignal(audio_data_array=sine_wave) a.stft() calc_sine = a.istft(overwrite=False) assert np.allclose(a.audio_data, calc_sine, atol=stft_tol) # also load another object with stft_data b = nussl.AudioSignal(stft=a.stft(), sample_rate=a.sample_rate) b.istft() min_length = min(b.audio_data.shape[1], a.audio_data.shape[1]) assert np.allclose(a.audio_data[:, :min_length], b.audio_data[:, :min_length], atol=stft_tol) for key, path in benchmark_audio.items(): a = nussl.AudioSignal(path) a.stft() recon = a.istft(overwrite=False) assert np.allclose(a.audio_data, recon, atol=stft_tol)
def test_to_mono(self): """ Test functionality and correctness of AudioSignal.to_mono() function. Returns: """ num_samples = nussl.DEFAULT_SAMPLE_RATE # 1 second sin1 = np.sin(np.linspace(0, 100 * 2 * np.pi, num_samples)) # Freq = 100 Hz sig1 = nussl.AudioSignal(audio_data_array=sin1) assert (sig1.num_channels == 1) sig1.to_mono(overwrite=True) assert (sig1.num_channels == 1) sin2 = -1 * sin1 sines = np.vstack((sin1, sin2)) sig2 = nussl.AudioSignal(audio_data_array=sines) assert (sig2.num_channels == 2) sig2.to_mono() assert (sig2.num_channels == 2) sig2.to_mono(overwrite=False) assert (sig2.num_channels == 2) sig2.to_mono(overwrite=True) assert (sig2.num_channels == 1) assert (np.allclose([0.0] * len(sig2), sig2.audio_data))
def test_write_sample_rate(self): a = nussl.AudioSignal(self.audio_input1) sample_rate = a.sample_rate // 2 a.write_audio_to_file(self.audio_output, sample_rate=sample_rate) b = nussl.AudioSignal(self.audio_output) assert (b.sample_rate == sample_rate)
def test_properties(benchmark_audio): a = nussl.AudioSignal() assert a.signal_duration is None assert a.signal_length is None assert a._signal_length is None assert a.num_channels is None assert a.time_vector is None assert a.file_name is None assert not a.has_data a = nussl.AudioSignal(benchmark_audio['K0140.wav']) assert a.has_data assert a.file_name == 'K0140.wav' assert len(a.time_vector) == a.audio_data.shape[-1] pytest.raises(AudioSignalException, lambda x: x.stft_length, a) pytest.raises(AudioSignalException, lambda x: x.time_bins_vector, a) pytest.raises(AudioSignalException, lambda x: x.freq_vector, a) a.stft() assert a.stft_length == a.stft_data.shape[1] assert len(a.time_bins_vector) == a.stft_length assert len(a.freq_vector) == a.stft_data.shape[0] assert len(a.freq_vector) == a.stft_data.shape[0] a.audio_data = None assert a.has_data
def test_write_sample_rate(self): a = nussl.AudioSignal(self.input_path1) sample_rate = a.sample_rate // 2 a.write_audio_to_file(self.out_path1, sample_rate=sample_rate) b = nussl.AudioSignal(self.out_path1) assert (b.sample_rate == sample_rate)
def test_simple(self): drums_path = os.path.join('Input', 'src1.wav') flute_path = os.path.join('Input', 'src2.wav') drums = nussl.AudioSignal(drums_path) flute = nussl.AudioSignal(flute_path) flute.truncate_samples(drums.signal_length) gains = [1.0, 0.75, 0.5, 0.25, 0.0] # gain settings drum_sigs = [ drums.make_copy_with_audio_data(drums.audio_data * g) for g in gains ] # drums with different gains mixtures = [d + flute for d in drum_sigs] # mix everything together true_sources = [[flute, d] for d in drum_sigs] repet_sim = nussl.RepetSim repet_kwargs = {} scores_sim = nussl.run_and_eval_prf(repet_sim, repet_kwargs, mixtures, true_sources) repet = nussl.Repet scores_repet = nussl.run_and_eval_prf(repet, repet_kwargs, mixtures, true_sources) i = 0
def test_utils_audio_signals_to_musdb_track(musdb_tracks): track = musdb_tracks[0] mixture = nussl.AudioSignal(audio_data_array=track.audio, sample_rate=track.rate) mixture.stft() stems = track.stems true_sources = {} fake_sources = {} for k, v in sorted(track.sources.items(), key=lambda x: x[1].stem_id): true_sources[k] = nussl.AudioSignal(audio_data_array=stems[v.stem_id], sample_rate=track.rate) mask_data = np.random.rand(*mixture.stft_data.shape) soft_mask = SoftMask(mask_data) _source = mixture.apply_mask(soft_mask) _source.istft(truncate_to_length=mixture.signal_length) fake_sources[k] = _source separated_track = nussl.utils.audio_signals_to_musdb_track( mixture, fake_sources, nussl.constants.STEM_TARGET_DICT) reconstructed_track = nussl.utils.audio_signals_to_musdb_track( mixture, true_sources, nussl.constants.STEM_TARGET_DICT) assert np.allclose(track.stems, reconstructed_track.stems) assert track.stems.shape == separated_track.stems.shape
def test_sr_on_load_from_array(self): # Check that the passed in sample rate is being set in load_audio_from_array a = nussl.AudioSignal(self.audio_input1) sr, data = wav.read(self.audio_input1) b = nussl.AudioSignal() b.load_audio_from_array(data, sample_rate=sr) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data))
def test_resample_on_load_from_file(self): # Test resample right when loading from file vs resampling after loading a = nussl.AudioSignal(self.audio_input1) a.resample(48000) b = nussl.AudioSignal() b.load_audio_from_file(self.audio_input1, new_sample_rate=48000) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data))
def test_write_to_file_array1(self): sr, data = wav.read(self.input_path1) a = nussl.AudioSignal(audio_data_array=data, sample_rate=sr) a.write_audio_to_file(self.out_path1) b = nussl.AudioSignal(self.out_path1) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data))
def test_write_to_file_path2(self): a = nussl.AudioSignal() a.load_audio_from_file(self.audio_input1) a.write_audio_to_file(self.audio_output) b = nussl.AudioSignal(self.audio_output) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data))
def main(): model_path = "/Users/ethanmanilow/Documents/School/Research/audio_representations/website/backend/models/data/models/deep_clustering_vocals_44k_long.model" cutoff = -40 output_dir = os.path.join("output", "tdc_test") # for song_path in dsd_folder: # Load 'em all into memory song_path = "/Users/ethanmanilow/Documents/School/Research/audio_representations/website/backend/scripts/test_files" mix_path = os.path.join(song_path, "mix.wav") vox_path = os.path.join(song_path, "vox.wav") bk_path = os.path.join(song_path, "gtr.wav") mix = nussl.AudioSignal(mix_path) mix.to_mono(overwrite=True) vox = nussl.AudioSignal(vox_path) vox.to_mono(overwrite=True) bak = nussl.AudioSignal(bk_path) bak.to_mono(overwrite=True) n_bins = 100 if RUN_DC: gt_vox_mask, gt_bak_mask, sm = ct.mel_mask( mix, vox, bak, mix.sample_rate, bg_mask_inverse=False, silence_mask_cutoff=cutoff, ) dc = nussl.DeepClustering( mix, model_path=model_path, mask_type="binary", do_mono=True, return_mel_masks=True, pca_before_clustering=False, cutoff=cutoff, ) dc_vox_mask, dc_bk_mask, binned, mel, scaled = ct.deep_clustering_mask( dc) dc_vox_mask, dc_bk_mask = dc_vox_mask.get_channel( 0), dc_bk_mask.get_channel(0) else: dc_vox_mask = np.load(os.path.join("pickles", "dc_vox_mask.npy")) dc_bk_mask = np.load(os.path.join("pickles", "dc_bk_mask.npy")) binned = np.load(os.path.join("pickles", "binned.npy")) scaled = np.load(os.path.join("pickles", "scaled.npy")) tdc_space = np.zeros((n_bins, dc_vox_mask.shape[0])) for (i, j), tf_list in np.ndenumerate(binned): for item in tf_list: t, f = get_coordinate_from_TF_index(item, dc_vox_mask.shape[1]) tdc_space[j, t] += 1 plot_tdc(tdc_space, mix.time_vector[-1], output_dir, "tdc_test3.png")
def test_write_to_file_array2(self): sr, data = wav.read(self.audio_input1) a = nussl.AudioSignal() a.load_audio_from_array(data, sr) a.write_audio_to_file(self.audio_output) b = nussl.AudioSignal(self.audio_output) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data))
def test_resample(benchmark_audio): # Check that sample rate property changes for key, path in benchmark_audio.items(): a = nussl.AudioSignal(path) b = nussl.AudioSignal(path) b.resample(a.sample_rate / 2) assert (b.sample_rate == a.sample_rate / 2) pytest.warns(UserWarning, a.resample, a.sample_rate)
def test_write_sample_rate(benchmark_audio): for key, path in benchmark_audio.items(): with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as f: a = nussl.AudioSignal(path) sample_rate = a.sample_rate // 2 a.write_audio_to_file(f.name, sample_rate=sample_rate) b = nussl.AudioSignal(f.name) assert (b.sample_rate == sample_rate)
def test_multiple_duet(self): benchmark_mask = self.benchmark_dict['benchmark_masks'] duet = nussl.Duet(self.signal, 3) duet.run() duet.audio_signal = nussl.AudioSignal(self.dev1_wdrums) duet.run() duet.audio_signal = nussl.AudioSignal(self.dev1_female3) duet_masks = duet.run() for i in range(len(duet_masks)): assert np.array_equal(benchmark_mask[i].mask, duet_masks[i].mask)
def test_write_array_to_file(benchmark_audio): for key, path in benchmark_audio.items(): with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as f: sr, data = wav.read(path) a = nussl.AudioSignal(audio_data_array=data, sample_rate=sr) a.write_audio_to_file(f.name) b = nussl.AudioSignal(f.name) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data))
def test_resample_on_load_from_file(benchmark_audio): # Test resample right when loading from file vs resampling after loading path = [benchmark_audio[key] for key in benchmark_audio][0] a = nussl.AudioSignal(path) a.resample(48000) b = nussl.AudioSignal() b.load_audio_from_file(path, new_sample_rate=48000) assert (a.sample_rate == b.sample_rate) assert (np.allclose(a.audio_data, b.audio_data)) pytest.warns(UserWarning, nussl.AudioSignal, path, sample_rate=16000)
def setUp(self): dur = 30 offset = 60 premade_mixture_path = os.path.join('..', 'input', 'mixture', 'mixture.wav') vocals_path = os.path.join('..', 'input', 'mixture', 'vocals.wav') drums_path = os.path.join('..', 'input', 'mixture', 'drums.wav') self.premade_mixture = nussl.AudioSignal(premade_mixture_path, duration=dur, offset=offset) self.vocals = nussl.AudioSignal(vocals_path, duration=dur, offset=offset) self.drums = nussl.AudioSignal(drums_path, duration=dur, offset=offset) self.new_mixture = self.vocals + self.drums
def test_write_to_file(benchmark_audio): for key, path in benchmark_audio.items(): with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as f: dummy = nussl.AudioSignal() pytest.raises(AudioSignalException, dummy.write_audio_to_file, f.name) a = nussl.AudioSignal(path) a.write_audio_to_file(f.name) b = nussl.AudioSignal(f.name) assert (a.sample_rate == b.sample_rate)
def setUp(self): # If our working directory is not the top level dir if os.path.basename(os.path.normpath(os.getcwd())) == 'tests': os.chdir('..') # then go up one level input_mono = os.path.join('input', 'piano_and_synth_arp_chord_mono.wav') self.signal_mono = nussl.AudioSignal(input_mono) input_stereo = os.path.join('input', 'piano_and_synth_arp_chord_stereo.wav') self.signal_stereo = nussl.AudioSignal(input_stereo) self.n_src = 2 # number of sources in both of these files
def test_stft_istft_simple2(self): a = nussl.AudioSignal(audio_data_array=self.sine_wave) a.stft(use_librosa=True) calc_sine = a.istft(overwrite=False, use_librosa=True) assert np.allclose(a.audio_data, calc_sine, atol=1e-3) a = nussl.AudioSignal(audio_data_array=self.sine_wave) a.stft(use_librosa=False) calc_sine = a.istft(overwrite=False, use_librosa=False) assert np.allclose(a.audio_data, calc_sine)
def _get_signal(with_stems=False, track='Track00001'): babyslakh = os.path.expanduser('~/Documents/School/Research/babyslakh_16k') track = os.path.join(babyslakh, track) mix = nussl.AudioSignal(os.path.join(track, 'mix.wav')) if not with_stems: return mix stem_names = os.listdir(os.path.join(babyslakh, track, 'stems')) stems = {} for s in stem_names: if os.path.splitext(s)[1] == '.wav': stem_path = os.path.join(babyslakh, track, 'stems', s) stems[os.path.splitext(s)[0]] = nussl.AudioSignal(stem_path) return mix, stems
def estimated_and_true_sources(musdb_tracks): i = np.random.randint(len(musdb_tracks)) track = musdb_tracks[i] mixture = nussl.AudioSignal(audio_data_array=track.audio, sample_rate=track.rate) mixture.stft() stems = track.stems oracle_sources = [] random_sources = [] true_sources = [] random_masks = [] oracle_masks = [] keys = [] for k, v in sorted(track.sources.items(), key=lambda x: x[1].stem_id): true_sources.append( nussl.AudioSignal(audio_data_array=stems[v.stem_id], sample_rate=track.rate)) keys.append(k) mask_data = np.random.rand(*mixture.stft_data.shape) random_mask = SoftMask(mask_data) random_source = mixture.apply_mask(random_mask) random_source.istft(truncate_to_length=mixture.signal_length) random_sources.append(random_source) random_masks.append(random_mask) source_stft = true_sources[-1].stft() mask_data = ((np.abs(source_stft) + 1e-8) / ( np.maximum(np.abs(mixture.stft_data), np.abs(source_stft)) + 1e-8)) oracle_mask = SoftMask(mask_data) oracle_source = mixture.apply_mask(oracle_mask) oracle_source.istft(truncate_to_length=mixture.signal_length) oracle_sources.append(oracle_source) oracle_masks.append(oracle_mask) yield { 'oracle': oracle_sources, 'random': random_sources, 'true': true_sources, 'keys': keys, 'oracle_masks': oracle_masks, 'random_masks': random_masks, }
def mix_source_folder(toy_datasets): wsj_sources = toy_datasets['babywsj_oW0F0H9.zip'] audio_files = glob.glob(f"{wsj_sources}/**/*.wav", recursive=True) n_sources = 2 n_mixtures = 10 with tempfile.TemporaryDirectory() as tmp_dir: _dir = tmp_dir if fix_dir is None else fix_dir _dir = os.path.join(_dir, 'mix_source_folder') for i in range(n_mixtures): sources = [] for n in range(n_sources): path = random.choice(audio_files) source = nussl.AudioSignal(path) sources.append(source) min_length = min([s.signal_length for s in sources]) for n in range(n_sources): output_path = os.path.join(_dir, f's{n}', f'{i}.wav') os.makedirs(os.path.dirname(output_path), exist_ok=True) sources[n].truncate_samples(min_length) sources[n].write_audio_to_file(output_path) mix = sum(sources) output_path = os.path.join(_dir, 'mix', f'{i}.wav') os.makedirs(os.path.dirname(output_path), exist_ok=True) mix.write_audio_to_file(output_path) yield _dir
def _get_channel_helper(signal, n_channels): a = nussl.AudioSignal(audio_data_array=signal) # Check that we are counting our channels correctly assert a.num_channels == n_channels # Check that we can get every channel with AudioSignal.get_channel() for i, ch in enumerate(signal): assert np.array_equal(a.get_channel(i), ch) # Check that attempting to get higher channels raises exception for i in range(n_channels, n_channels + 10): pytest.raises(AudioSignalException, a.get_channel, i) # Check that attempting to get lower channels raises exception for i in range(-1, -11, -1): pytest.raises(AudioSignalException, a.get_channel, i) # Check that AudioSignal.get_channels() generator works i = 0 for ch in a.get_channels(): assert np.array_equal(ch, signal[i, :]) new_signal = a.make_audio_signal_from_channel(i) assert np.array_equal(ch, new_signal.audio_data[0]) i += 1 assert i == a.num_channels