def load_wav_as_mfcc_delta(fname, target_size=None, noise_files=None, augment_with_noise=False, class_dir=None): (fs, signal) = utils.read_wave_file(fname) if class_dir: signal = da.same_class_augmentation(signal, class_dir) if augment_with_noise: signal = da.noise_augmentation(signal, noise_files) mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0]) mfcc_delta_3 = librosa.feature.delta(mfcc, width=3, order=1) mfcc_delta_11 = librosa.feature.delta(mfcc, width=11, order=1) mfcc_delta_19 = librosa.feature.delta(mfcc, width=19, order=1) if target_size: mfcc = scipy.misc.imresize(mfcc, target_size) mfcc_delta_3 = scipy.misc.imresize(mfcc_delta_3, target_size) mfcc_delta_11 = scipy.misc.imresize(mfcc_delta_11, target_size) mfcc_delta_19 = scipy.misc.imresize(mfcc_delta_19, target_size) mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1) mfcc_delta_3 = mfcc_delta_3.reshape(mfcc_delta_3.shape[0], mfcc_delta_3.shape[1], 1) mfcc_delta_11 = mfcc_delta_11.reshape(mfcc_delta_11.shape[0], mfcc_delta_11.shape[1], 1) mfcc_delta_19 = mfcc_delta_19.reshape(mfcc_delta_19.shape[0], mfcc_delta_19.shape[1], 1) mfcc_delta = np.concatenate( [mfcc, mfcc_delta_3, mfcc_delta_11, mfcc_delta_19], axis=2) return mfcc_delta
def noise_augmentation_from_dirs(noise_dir, class_dir): sig_paths = glob.glob(os.path.join(class_dir, "*.wav")) sig_path = np.random.choice(sig_paths, 1, replace=False)[0] (fs, sig) = utils.read_wave_file(sig_path) aug_sig = da.noise_augmentation(sig, noise_dir) spectrogram_sig = sp.wave_to_sample_spectrogram(sig, fs) spectrogram_aug_sig = sp.wave_to_sample_spectrogram(aug_sig, fs) fig = plt.figure(1) cmap = plt.cm.get_cmap('jet') gs = gridspec.GridSpec(2, 1) # whole spectrogram ax1 = fig.add_subplot(gs[0, 0]) ax1.pcolormesh(spectrogram_sig, cmap=cmap) ax1.set_title("Original Signal") ax2 = fig.add_subplot(gs[1, 0]) ax2.pcolormesh(spectrogram_aug_sig, cmap=cmap) ax2.set_title("Noise Augmented signal") gs.update(wspace=0.5, hspace=0.5) basename = utils.get_basename_without_ext(sig_path) fig.savefig(basename + "_noise_augmentation.png") fig.clf() plt.close(fig)
def load_wav_as_tempogram(fname, target_size=None, noise_files=None, augment_with_noise=False, class_dir=None): (fs, signal) = utils.read_wave_file(fname) if class_dir: signal = da.same_class_augmentation(signal, class_dir) if augment_with_noise: signal = da.noise_augmentation(signal, noise_files) tempogram = sp.wave_to_tempogram(signal, fs) if target_size: tempogram = scipy.misc.imresize(tempogram, target_size) tempogram = tempogram.reshape((tempogram.shape[0], tempogram.shape[1], 1)) return tempogram
def load_wav_as_mfcc(fname, target_size=None, noise_files=None, augment_with_noise=False, class_dir=None): (fs, signal) = utils.read_wave_file(fname) if class_dir: signal = da.same_class_augmentation(signal, class_dir) if augment_with_noise: signal = da.noise_augmentation(signal, noise_files) mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0]) if target_size: mfcc = scipy.misc.imresize(mfcc, target_size) mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1) return mfcc
def noise_augmentation(): da.noise_augmentation(x, noise_files_small)