コード例 #1
0
    def create_tf_record(self, *, prefix, subset_size, parallel=True):
        counter = 0
        p = multiprocessing.Pool(multiprocessing.cpu_count())

        for i in range(0, len(self.clean_filenames), subset_size):

            tfrecord_filename = 'D:/downloads/dataset/' + prefix + '_' + str(
                counter) + '.tfrecords'

            if os.path.isfile(tfrecord_filename):
                print(f"Skipping {tfrecord_filename}")
                counter += 1
                continue

            writer = tf.io.TFRecordWriter(tfrecord_filename)
            clean_filenames_sublist = self.clean_filenames[i:i + subset_size]

            print(f"Processing files from: {i} to {i + subset_size}")
            if parallel:
                out = p.map(self.parallel_audio_processing,
                            clean_filenames_sublist)
            else:
                out = [
                    self.parallel_audio_processing(filename)
                    for filename in clean_filenames_sublist
                ]

            for o in out:
                noise_stft_magnitude = o[0]
                clean_stft_magnitude = o[1]
                noise_stft_phase = o[2]

                noise_stft_mag_features = prepare_input_features(
                    noise_stft_magnitude, numSegments=8, numFeatures=129)

                noise_stft_mag_features = np.transpose(noise_stft_mag_features,
                                                       (2, 0, 1))
                clean_stft_magnitude = np.transpose(clean_stft_magnitude,
                                                    (1, 0))
                noise_stft_phase = np.transpose(noise_stft_phase, (1, 0))

                noise_stft_mag_features = np.expand_dims(
                    noise_stft_mag_features, axis=3)
                clean_stft_magnitude = np.expand_dims(clean_stft_magnitude,
                                                      axis=2)

                for x_, y_, p_ in zip(noise_stft_mag_features,
                                      clean_stft_magnitude, noise_stft_phase):
                    y_ = np.expand_dims(y_, 2)
                    example = get_tf_feature(x_, y_, p_)
                    writer.write(example.SerializeToString())

            counter += 1
            writer.close()
コード例 #2
0
    def __init__(self, data_dir, data_type, transform=None):
        """
        Args:
            data_dir (string): Path to data directory of audio files.
            data_type (string): 'training', 'test' or 'validation'
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data_dir = data_dir
        self.transform = transform
        clean_data = os.path.join(self.data_dir, "clean")
        noisy_data = os.path.join(self.data_dir, "noisy")

        clean_files = os.listdir(os.path.join(clean_data, data_type))
        noisy_files = os.listdir(os.path.join(noisy_data, data_type))

        # check if the # of noisy files = # of clean files
        assert len(clean_files) == len(noisy_files)
        clean_files_full_path = [
            os.path.join(clean_data, data_type, filename)
            for filename in clean_files
        ]
        noisy_files_full_path = [
            os.path.join(noisy_data, data_type, filename)
            for filename in noisy_files
        ]

        print("Start processing", data_type, "data")
        for idx in range(len(clean_files)):

            clean_new_array = np.load(clean_files_full_path[idx])
            noisy_new_array = np.load(noisy_files_full_path[idx])

            # Do some of the preprocessing here
            noisy_new_array, clean_new_array = prepare_input_features(
                noisy_new_array, clean_new_array, 8, 129)
            if idx == 0:
                noisy_final = np.copy(noisy_new_array)
                clean_final = np.copy(clean_new_array)
            else:
                noisy_final = np.concatenate((noisy_final, noisy_new_array))
                clean_final = np.concatenate((clean_final, clean_new_array))

        self.noisy_data = torch.from_numpy(noisy_final).float()
        self.clean_data = torch.from_numpy(clean_final).float()
コード例 #3
0
ファイル: dataset.py プロジェクト: ctyhm/cnn-audio-denoiser
    def create_tf_record(self, *, prefix, subset_size, parallel=True):
        counter = 0
        # 多进程处理方式,多核同时处理,提高速度
        #p = multiprocessing.Pool(multiprocessing.cpu_count())
        # 假设有10000条语音,每一批2000个的话就从0到1999个为第一个训练集tfrecords,一共5个训练集
        print(len(self.clean_filenames))
        print(subset_size)
        # 遍历传入的每一个数据文件,步长是subset_size,训练集设置为2000步长,测试集是1000步长
        for i in range(0, len(self.clean_filenames), subset_size):
            #记录每个训练集的名称
            tfrecord_filename = './records/' + prefix + '_' + str(
                counter) + '.tfrecords'
            #如果文件存在则跳过,counter计数器加一
            if os.path.isfile(tfrecord_filename):
                print(f"Skipping {tfrecord_filename}")
                counter += 1
                continue
            #创建tfrecord文件,等待写入
            writer = tf.io.TFRecordWriter(tfrecord_filename)
            # 从第i个到第i+subset_size个为一个语音批次
            clean_filenames_sublist = self.clean_filenames[i:i + subset_size]

            print(f"Processing files from: {i} to {i + subset_size}")
            #if parallel:
            #   out = p.map(self.parallel_audio_processing, clean_filenames_sublist)
            # else:
            #  out是将一个批次的语音处理后的列表(重点部分)
            #  self.parallel_audio_processing(filename)是将语音文件加噪后傅里叶变换之后的数据(核心部分)
            out = [
                self.parallel_audio_processing(filename)
                for filename in clean_filenames_sublist
            ]
            print(len(out))
            for o in out:
                noise_stft_magnitude = o[0]
                clean_stft_magnitude = o[1]
                noise_stft_phase = o[2]

                noise_stft_mag_features = prepare_input_features(
                    noise_stft_magnitude, numSegments=8, numFeatures=129)

                noise_stft_mag_features = np.transpose(noise_stft_mag_features,
                                                       (2, 0, 1))
                clean_stft_magnitude = np.transpose(clean_stft_magnitude,
                                                    (1, 0))
                noise_stft_phase = np.transpose(noise_stft_phase, (1, 0))

                noise_stft_mag_features = np.expand_dims(
                    noise_stft_mag_features, axis=3)  # 再加一维shape=(201,129,8,1)
                clean_stft_magnitude = np.expand_dims(
                    clean_stft_magnitude, axis=2)  # 再加一维shape=(201,129,1)

                for x_, y_, p_ in zip(noise_stft_mag_features,
                                      clean_stft_magnitude, noise_stft_phase):
                    y_ = np.expand_dims(y_, 2)
                    example = get_tf_feature(x_, y_, p_)
                    # 写入数据
                    writer.write(example.SerializeToString())

            # 计数器加一,关闭文件
            counter += 1
            writer.close()
コード例 #4
0
            1j * phase)  # that fixes the abs() ope previously done

        features = np.transpose(features, (1, 0))
        return noiseAudioFeatureExtractor.get_audio_from_stft_spectrogram(
            features)
        #return noiseAudioFeatureExtractor.get_audio_from_stft_spectrogram_GL(np.abs(features))

    noisyPhase = np.angle(noise_stft_features)
    print(noisyPhase.shape)
    noise_stft_features = np.abs(noise_stft_features)

    mean = np.mean(noise_stft_features)
    std = np.std(noise_stft_features)
    noise_stft_features = (noise_stft_features - mean) / std

    predictors = prepare_input_features(noise_stft_features, numSegments,
                                        numFeatures)

    predictors = np.reshape(
        predictors,
        (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2]))
    predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32)
    print('predictors.shape:', predictors.shape)

    STFTFullyConvolutional = model.predict(predictors)
    print(STFTFullyConvolutional.shape)

    denoisedAudioFullyConvolutional = revert_features_to_audio2(
        STFTFullyConvolutional, noisyPhase, mean, std)
    print("Min:", np.min(denoisedAudioFullyConvolutional), "Max:",
          np.max(denoisedAudioFullyConvolutional))
    #  ipd.Audio(data=denoisedAudioFullyConvolutional, rate=fs) # load a local WAV file