def load_wav(wav_file): # load audio file pcm_data, samplerate = wav.read_wav(wav_file, 400) sums, onset_times = onset_detection.detect_onset(pcm_data, samplerate, False, True) return pcm_data, onset_times, samplerate, sums
def load_wavs(self): # load the raw training data files trainingfiles = wav.get_training_filenames() print (trainingfiles) for pitch in trainingfiles: pcm_data, self.samplerate = wav.read_wav(pitch, 400) onsets = onset_detection.detect_onset(pcm_data, self.samplerate) # onsets in file self.source_pcms.append(pcm_data) self.onsetsall.append(onsets) return [i for i in range(0, len(trainingfiles))]
x_training_path = sys.argv[2] y_training_path = sys.argv[3] x_validation_path = sys.argv[4] y_validation_path = sys.argv[5] generic_path = sys.argv[6] use_cpu = True if use_cpu: os.environ['CUDA_VISIBLE_DEVICES'] = "-1" profiler = Profiler() x_training_wav = read_wav(x_training_path) y_training_wav = read_wav(y_training_path) x_validation_wav = read_wav(x_validation_path) y_validation_wav = read_wav(y_validation_path) profiler.stop('Done loading WAV files.') if len(x_training_wav) != len(y_training_wav) or len(x_validation_wav) != len( y_validation_wav): raise Exception('Dry and wet WAVs must be same length.') if mode == 'explore': explorer = Explorer(x_training_wav, y_training_wav, x_validation_wav, y_validation_wav, generic_path) explorer.explore()
if len(sys.argv) != 6: print('Usage:') print( f'python {sys.argv[0]} <dry training WAV file> <wet training WAV file> <dry validation WAV file> <wet validation WAV file> <output WAV file>' ) sys.exit(1) dry_training_wav_path = sys.argv[1] wet_training_wav_path = sys.argv[2] dry_validation_wav_path = sys.argv[3] wet_validation_wav_path = sys.argv[4] output_wav_path = sys.argv[5] dry_training_wav = read_wav(dry_training_wav_path) wet_training_wav = read_wav(wet_training_wav_path) dry_validation_wav = read_wav(dry_validation_wav_path) wet_validation_wav = read_wav(wet_validation_wav_path) training_offset = 0 for i in range(0, len(dry_training_wav)): if dry_training_wav[i] != 0: training_offset = i break dry_training_wav = dry_training_wav[training_offset:] wet_training_wav = wet_training_wav[training_offset:] block_size = 128 offset = 21651
else: clean_file_list = [clean_file_path] noisy_file_list = [noisy_file_path] cf.clear_csv_file(csv_file_path) cf.write_csv_file(csv_file_path, "file name", "SNR,SSNR,PESQ") for i in range(len(clean_file_list)): if clean_path_isdir: file_name = clean_file_list[i].replace(clean_file_path, "") else: file_name = os.path.basename(clean_file_path) print("({}/{}){} /".format(i + 1, len(clean_file_list), file_name), end="") # read train data file clean_signal, clean_sample_rate = wav.read_wav(clean_file_list[i]) noisy_signal, noisy_sample_rate = wav.read_wav(noisy_file_list[i]) # different sample rate detect if clean_sample_rate != noisy_sample_rate: raise Exception( "E: Different sample rate detected. clean({})/noisy({})".format( clean_sample_rate, noisy_sample_rate)) SNR = cs.SNR(noisy_signal, clean_signal) SSNR = cs.SSNR(noisy_signal, clean_signal, ssnr_frame_size) PESQ = pesq(clean_signal, noisy_signal, clean_sample_rate) print(" SNR:{} | SSNR:{} | PESQ:{}".format(SNR, SSNR, PESQ)) cf.write_csv_file(csv_file_path, file_name, "{},{},{}".format(SNR, SSNR, PESQ))
raise Exception("ERROR: Target and source path is incorrect") if target_path_isdir: if not cf.compare_path_list(training_target_path, training_source_path, 'wav'): raise Exception("ERROR: Target and source file list is not same") training_target_file_list = cf.read_path_list(training_target_path, "wav") training_source_file_list = cf.read_path_list(training_source_path, "wav") else: training_target_file_list = [training_target_path] training_source_file_list = [training_source_path] x_signal, y_signal = [], [] num_of_total_frame = 0 for i in range(len(training_target_file_list)): # read train data file target_signal, target_sample_rate = wav.read_wav(training_target_file_list[i]) source_signal, source_sample_rate = wav.read_wav(training_source_file_list[i]) target_signal = np.array(target_signal) source_signal = np.array(source_signal) size_of_target = target_signal.size size_of_source = source_signal.size # source & target file incorrect if size_of_source != size_of_target: raise Exception("ERROR: Input, output size mismatch") if size_of_source < current_size: raise Exception("ERROR: Input file length is too small") if shift_size <= 0: raise Exception("ERROR: Shift size is smaller or same with 0")
raise Exception("ERROR: Target and source path is incorrect") if target_path_isdir: if not cf.compare_path_list(training_target_path, training_source_path, 'wav'): raise Exception("ERROR: Target and source file list is not same") training_target_file_list = cf.read_path_list(training_target_path, "wav") training_source_file_list = cf.read_path_list(training_source_path, "wav") else: training_target_file_list = [training_target_path] training_source_file_list = [training_source_path] x_signal, y_signal = [], [] num_of_total_frame = 0 for i in range(len(training_target_file_list)): # read train data file target_signal, target_sample_rate = wav.read_wav( training_target_file_list[i]) source_signal, source_sample_rate = wav.read_wav( training_source_file_list[i]) target_signal = np.array(target_signal) source_signal = np.array(source_signal) size_of_target = target_signal.size size_of_source = source_signal.size # source & target file incorrect if size_of_source != size_of_target: raise Exception("ERROR: Input, output size mismatch") if size_of_source < current_size: raise Exception("ERROR: Input file length is too small") if shift_size <= 0: raise Exception("ERROR: Shift size is smaller or same with 0")
return sign * normalized * MAX def plot_onsets(sums, samplerate, onset_times): # create a visual representation of the onsets # debug function print("plot onsets") sums = np.asarray(sums) time = np.arange(len(sums)) / samplerate # draw a plot of the compressed audio samples plt.figure(1) plt.subplot(211) plt.plot(time, sums) # draw a vertical line at each detected onset for i in onset_times: xpos = i / samplerate plt.plot([xpos, xpos], [0, 1], 'k-', lw=1) plt.show() if __name__ == '__main__': if (len(sys.argv) > 1): pcm_data, samplerate = wav.read_wav(sys.argv[1], 400) detect_onset(pcm_data, samplerate, True)
def read_wav(f): import wav return wav.read_wav(files[f].file)
# make test data if source_path_isdir: test_source_file_list = cf.read_path_list(test_source_path, "wav") if test_target_path_exist: test_target_file_list = cf.read_path_list(test_target_path, "wav") else: test_target_file_list = test_source_file_list else: test_source_file_list = [test_source_path] if test_target_path_exist: test_target_file_list = [test_target_path] else: test_target_file_list = test_source_file_list for i in range(len(test_source_file_list)): test_source_signal, test_source_sample_rate = wav.read_wav( test_source_file_list[i]) test_target_signal, test_target_sample_rate = wav.read_wav( test_target_file_list[i]) test_target_signal = np.array(test_target_signal) test_source_signal = np.array(test_source_signal) test_size_of_source = test_source_signal.size test_size_of_target = test_target_signal.size if test_size_of_source != test_size_of_target: raise Exception("ERROR: Test input, output size mismatch") test_mod = (current_size - (test_size_of_source % current_size)) % current_size test_target_signal_padded = np.concatenate([ np.zeros(previous_size), test_target_signal, np.zeros(future_size + test_mod)
def read_wav(f): import wav return wav.read_wav(files[f].file)