def create_tf_record(self, *, prefix, subset_size, parallel=True): counter = 0 p = multiprocessing.Pool(multiprocessing.cpu_count()) for i in range(0, len(self.clean_filenames), subset_size): tfrecord_filename = 'D:/downloads/dataset/' + prefix + '_' + str( counter) + '.tfrecords' if os.path.isfile(tfrecord_filename): print(f"Skipping {tfrecord_filename}") counter += 1 continue writer = tf.io.TFRecordWriter(tfrecord_filename) clean_filenames_sublist = self.clean_filenames[i:i + subset_size] print(f"Processing files from: {i} to {i + subset_size}") if parallel: out = p.map(self.parallel_audio_processing, clean_filenames_sublist) else: out = [ self.parallel_audio_processing(filename) for filename in clean_filenames_sublist ] for o in out: noise_stft_magnitude = o[0] clean_stft_magnitude = o[1] noise_stft_phase = o[2] noise_stft_mag_features = prepare_input_features( noise_stft_magnitude, numSegments=8, numFeatures=129) noise_stft_mag_features = np.transpose(noise_stft_mag_features, (2, 0, 1)) clean_stft_magnitude = np.transpose(clean_stft_magnitude, (1, 0)) noise_stft_phase = np.transpose(noise_stft_phase, (1, 0)) noise_stft_mag_features = np.expand_dims( noise_stft_mag_features, axis=3) clean_stft_magnitude = np.expand_dims(clean_stft_magnitude, axis=2) for x_, y_, p_ in zip(noise_stft_mag_features, clean_stft_magnitude, noise_stft_phase): y_ = np.expand_dims(y_, 2) example = get_tf_feature(x_, y_, p_) writer.write(example.SerializeToString()) counter += 1 writer.close()
def __init__(self, data_dir, data_type, transform=None): """ Args: data_dir (string): Path to data directory of audio files. data_type (string): 'training', 'test' or 'validation' transform (callable, optional): Optional transform to be applied on a sample. """ self.data_dir = data_dir self.transform = transform clean_data = os.path.join(self.data_dir, "clean") noisy_data = os.path.join(self.data_dir, "noisy") clean_files = os.listdir(os.path.join(clean_data, data_type)) noisy_files = os.listdir(os.path.join(noisy_data, data_type)) # check if the # of noisy files = # of clean files assert len(clean_files) == len(noisy_files) clean_files_full_path = [ os.path.join(clean_data, data_type, filename) for filename in clean_files ] noisy_files_full_path = [ os.path.join(noisy_data, data_type, filename) for filename in noisy_files ] print("Start processing", data_type, "data") for idx in range(len(clean_files)): clean_new_array = np.load(clean_files_full_path[idx]) noisy_new_array = np.load(noisy_files_full_path[idx]) # Do some of the preprocessing here noisy_new_array, clean_new_array = prepare_input_features( noisy_new_array, clean_new_array, 8, 129) if idx == 0: noisy_final = np.copy(noisy_new_array) clean_final = np.copy(clean_new_array) else: noisy_final = np.concatenate((noisy_final, noisy_new_array)) clean_final = np.concatenate((clean_final, clean_new_array)) self.noisy_data = torch.from_numpy(noisy_final).float() self.clean_data = torch.from_numpy(clean_final).float()
def create_tf_record(self, *, prefix, subset_size, parallel=True): counter = 0 # 多进程处理方式,多核同时处理,提高速度 #p = multiprocessing.Pool(multiprocessing.cpu_count()) # 假设有10000条语音,每一批2000个的话就从0到1999个为第一个训练集tfrecords,一共5个训练集 print(len(self.clean_filenames)) print(subset_size) # 遍历传入的每一个数据文件,步长是subset_size,训练集设置为2000步长,测试集是1000步长 for i in range(0, len(self.clean_filenames), subset_size): #记录每个训练集的名称 tfrecord_filename = './records/' + prefix + '_' + str( counter) + '.tfrecords' #如果文件存在则跳过,counter计数器加一 if os.path.isfile(tfrecord_filename): print(f"Skipping {tfrecord_filename}") counter += 1 continue #创建tfrecord文件,等待写入 writer = tf.io.TFRecordWriter(tfrecord_filename) # 从第i个到第i+subset_size个为一个语音批次 clean_filenames_sublist = self.clean_filenames[i:i + subset_size] print(f"Processing files from: {i} to {i + subset_size}") #if parallel: # out = p.map(self.parallel_audio_processing, clean_filenames_sublist) # else: # out是将一个批次的语音处理后的列表(重点部分) # self.parallel_audio_processing(filename)是将语音文件加噪后傅里叶变换之后的数据(核心部分) out = [ self.parallel_audio_processing(filename) for filename in clean_filenames_sublist ] print(len(out)) for o in out: noise_stft_magnitude = o[0] clean_stft_magnitude = o[1] noise_stft_phase = o[2] noise_stft_mag_features = prepare_input_features( noise_stft_magnitude, numSegments=8, numFeatures=129) noise_stft_mag_features = np.transpose(noise_stft_mag_features, (2, 0, 1)) clean_stft_magnitude = np.transpose(clean_stft_magnitude, (1, 0)) noise_stft_phase = np.transpose(noise_stft_phase, (1, 0)) noise_stft_mag_features = np.expand_dims( noise_stft_mag_features, axis=3) # 再加一维shape=(201,129,8,1) clean_stft_magnitude = np.expand_dims( clean_stft_magnitude, axis=2) # 再加一维shape=(201,129,1) for x_, y_, p_ in zip(noise_stft_mag_features, clean_stft_magnitude, noise_stft_phase): y_ = np.expand_dims(y_, 2) example = get_tf_feature(x_, y_, p_) # 写入数据 writer.write(example.SerializeToString()) # 计数器加一,关闭文件 counter += 1 writer.close()
1j * phase) # that fixes the abs() ope previously done features = np.transpose(features, (1, 0)) return noiseAudioFeatureExtractor.get_audio_from_stft_spectrogram( features) #return noiseAudioFeatureExtractor.get_audio_from_stft_spectrogram_GL(np.abs(features)) noisyPhase = np.angle(noise_stft_features) print(noisyPhase.shape) noise_stft_features = np.abs(noise_stft_features) mean = np.mean(noise_stft_features) std = np.std(noise_stft_features) noise_stft_features = (noise_stft_features - mean) / std predictors = prepare_input_features(noise_stft_features, numSegments, numFeatures) predictors = np.reshape( predictors, (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2])) predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32) print('predictors.shape:', predictors.shape) STFTFullyConvolutional = model.predict(predictors) print(STFTFullyConvolutional.shape) denoisedAudioFullyConvolutional = revert_features_to_audio2( STFTFullyConvolutional, noisyPhase, mean, std) print("Min:", np.min(denoisedAudioFullyConvolutional), "Max:", np.max(denoisedAudioFullyConvolutional)) # ipd.Audio(data=denoisedAudioFullyConvolutional, rate=fs) # load a local WAV file