コード例 #1
0
    def split_tr_tt_demand(self):
        path_DEMAND = '/media/jeonghwan/HDD2/Dataset/DEMAND/'
        save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/demand_noise/tr/'

        # Channel1을 선택해서 diffuse로 만들었음
        noise_list = glob(path_DEMAND + '**/*ch01.wav')
        for i, noi in enumerate(noise_list):
            aud, fs = librosa.core.load(noi, sr=None, mono=False)
            aud = aud[np.newaxis, :]
            for j in range(1, 4):
                aud_temp, _ = librosa.core.load(noi.replace(
                    'ch01', 'ch0{}'.format(j + 1)),
                                                sr=None,
                                                mono=False)
                aud = np.concatenate((aud, aud_temp[np.newaxis, :]), axis=0)

            fn = noi.split('/')[-2] + '_' + noi.split('/')[-1]

            len_tr = int(aud.shape[1] * 4 / 5)
            noi_tr = aud[:, :len_tr]
            noi_tt = aud[:, len_tr:]

            audiowrite(save_path + fn.split('/')[-1], noi_tr.T, fs)
            audiowrite(
                save_path.replace('/tr/', '/tt/') + fn.split('/')[-1],
                noi_tt.T, fs)
コード例 #2
0
    def mix_rir_and_sound_source_210301(self):
        ### --------- single source dataset----------- ###
        save_path = '/media/jeonghwan/HDD2/IS2021/dataset/SSL/tt/'
        rir_path = '/media/jeonghwan/HDD2/IS2021/dataset/simulated_RIR/tr/anechoic/'
        spc_path = '/media/jeonghwan/HDD1/Dataset/MS-SNSD/clean_test/'

        rir_list = glob(rir_path + '*.npz')
        spc_list = glob(spc_path + '*.wav')

        # generate random rir index
        spc_list.sort()
        for i, _spc in enumerate(tqdm(spc_list)):

            # read audio file
            aud, fs = librosa.core.load(_spc, sr=None, mono=False)

            if len(aud.shape) != 1:
                aud = aud[:, 0]

            #aud.shape[1]
            idx_s = np.random.randint(0, len(rir_list))
            npz = np.load(rir_list[idx_s], allow_pickle=True)

            # convolve
            rir = npz['rir']
            Y = ss.convolve(rir, aud[:, np.newaxis])
            audiowrite(
                save_path + rir_list[idx_s].split('/')[-1].split('.')[0] +
                '_' + _spc.split('/')[-1], Y, fs)
コード例 #3
0
    def convolve_and_save_rir(self, fn):
        # read audio file
        save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_speech/tt/'
        rir_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/simulated_RIR/anechoic/tr/'
        mix_all = False
        rir_list = glob(rir_path + '*.npz')
        aud, fs = librosa.core.load(fn, sr=None, mono=False)

        if len(aud.shape) != 1:
            aud = aud[:, 0]
        if mix_all == True:
            for i, _rir in enumerate(rir_list):
                npz = np.load(_rir, allow_pickle=True)
                rir = npz['rir']
                Y = ss.convolve(rir, aud[:, np.newaxis])
                audiowrite(
                    save_path + _rir.split('/')[-1].split('.')[0] + '_' +
                    fn.split('/')[-1], Y, fs)
        else:
            idx_s = np.random.randint(0, len(rir_list))
            npz = np.load(rir_list[idx_s], allow_pickle=True)

            # convolve
            rir = npz['rir']
            Y = ss.convolve(rir, aud[:, np.newaxis])
            audiowrite(
                save_path + rir_list[idx_s].split('/')[-1].split('.n')[0] +
                '_' + fn.split('/')[-1], Y, fs)
コード例 #4
0
    def convolve_and_save_rir_tt(self, fn):

        # path set
        mode = 'tt'
        save_path = os.getcwd() + '/multi_channel_speech/' + mode + '/clean'
        Path(save_path).mkdir(parents=True, exist_ok=True)
        rir_path = os.getcwd() + '/rir/' + mode

        mix_all = False
        rir_list = glob(rir_path + '/*/*.npz')
        # aud, fs = librosa.core.load(fn, sr=None, mono=False)
        aud, fs = audioread(fn)

        if len(aud.shape) != 1:
            aud = aud[:, 0]

        if mix_all == True:
            for i, _rir in enumerate(rir_list):
                npz = np.load(_rir, allow_pickle=True)
                rir = npz['rir']
                Y = ss.convolve(rir, aud[:, np.newaxis])
                audiowrite(
                    save_path + '/' + _rir.split('/')[-1].split('.')[0] + '_' +
                    fn.split('/')[-1], Y, fs)
        else:
            idx_s = np.random.randint(0, len(rir_list))
            npz = np.load(rir_list[idx_s], allow_pickle=True)

            # convolve
            rir = npz['rir']
            Y = ss.convolve(rir, aud[:, np.newaxis])
            audiowrite(
                save_path + '/' + rir_list[idx_s].split('/')[-2] + '_' +
                rir_list[idx_s].split('/')[-1].split('.n')[0] + '_' +
                fn.split('/')[-1], Y, fs)
コード例 #5
0
    def mix_rir_and_sound_source(self, mode):
        """
        convolve speech and speech_rir (random selected)
        :param mode: tr/cv/tt
        :return: save multi-channel speech
        """
        # path set
        save_path = os.getcwd() + '/multi_channel_speech/' + mode
        rir_path = os.getcwd() + '/rir/' + mode
        if mode == 'cv':
            rir_path = os.getcwd() + '/rir/tr'
        spc_path = '/home/dail/PycharmProjects/DCCRN/data/tr/clean'

        # rir list and sound source list
        rir_list = glob(rir_path + '/*/*.npz')
        spc_list = glob(spc_path + '/*.wav')

        # generate random rir index
        spc_list.sort()
        _use_par = False

        if _use_par == True:
            if mode == 'tr':
                _ = parmap.map(self.convolve_and_save_rir_tr,
                               spc_list,
                               pm_pbar=True,
                               pm_processes=28)
            if mode == 'cv':
                _ = parmap.map(self.convolve_and_save_rir_cv,
                               spc_list,
                               pm_pbar=True,
                               pm_processes=28)
            if mode == 'tt':
                _ = parmap.map(self.convolve_and_save_rir_tt,
                               spc_list,
                               pm_pbar=True,
                               pm_processes=28)

        else:
            for i, _spc in enumerate(tqdm(spc_list)):

                # read audio file
                # aud, fs = librosa.core.load(_spc, sr=None, mono=False)
                aud, fs = audioread(_spc)

                if len(aud.shape) != 1:
                    aud = aud[:, 0]

                #aud.shape[1]
                idx_s = np.random.randint(0, len(rir_list))
                npz = np.load(rir_list[idx_s], allow_pickle=True)

                # convolve
                rir = npz['rir']
                Y = ss.convolve(rir, aud[:, np.newaxis])
                audiowrite(
                    save_path + '/' + rir_list[idx_s].split('/')[-2] + '_' +
                    rir_list[idx_s].split('/')[-1].split('.n')[0] + '_' +
                    _spc.split('/')[-1], Y, fs)
コード例 #6
0
 def generate_whitenoise(self):
     wn = np.random.randn(self.fs * 5,
                          1) - 0.5  # create random signal [0, 1]
     wn = wn / np.max(np.abs(wn)) * 0.5
     plt.figure()
     plt.plot(wn)
     plt.show()
     audiowrite('wn.wav', wn, self.fs)
     return wn
コード例 #7
0
    def mix_spatially_white_noise(self, fn, SNR):
        """
        :param fn: audio filename
        :param SNR: want to mix
        :return:
        """

        # add spatially uncorrelated white noise

        swn = np.random.randn(Y.shape[0], 4) - 0.5
        Y += swn * 0.002

        audiowrite('mixed_{}dB.wav'.format(SNR))

        return
コード例 #8
0
 def convolve_and_save_rir_mp(self, fn):
     # read audio file
     # save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_speech/tr/'
     save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_speech/tt/'
     #save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_noisy_direct/tr/'
     #save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/multi_channel_noisy_direct/tt/'
     aud, fs = librosa.core.load(fn[0], sr=None, mono=False)
     if len(aud.shape) != 1:
         aud = aud[:, 0]
     room_num = fn[1].split('/')[-2]
     # convolve
     npz = np.load(fn[1], allow_pickle=True)
     rir = npz['rir']
     Y = ss.convolve(rir, aud[:, np.newaxis])
     audiowrite(
         save_path + fn[1].split('/')[-1].split('.n')[0] + '_' + room_num +
         '_' + fn[0].split('/')[-1], Y, fs)
コード例 #9
0
    def split_tr_tt_diffuse(self):
        path_DEMAND = '/media/jeonghwan/HDD2/Dataset/DEMAND/'
        save_path = '/media/jeonghwan/HDD2/IS2021/dataset/Simul_DB_ULA4/diffuse_noise/tr/'

        # Channel1을 선택해서 diffuse로 만들었음
        noise_list = glob(path_DEMAND + '**/*ch01.wav')
        for i, noi in enumerate(noise_list):
            aud, fs = librosa.core.load(noi, sr=None, mono=False)
            fn = noi.split('/')[-2] + '_' + noi.split('/')[-1]

            len_tr = int(aud.shape[0] * 4 / 5)
            noi_tr = aud[:len_tr]
            noi_tt = aud[len_tr:]

            audiowrite(save_path + fn.split('/')[-1], noi_tr, fs)
            audiowrite(
                save_path.replace('/tr/', '/tt/') + fn.split('/')[-1], noi_tt,
                fs)
コード例 #10
0
    def generate_single_source(self):

        # read audio file
        aud, fs = audioread(path='wn.wav', sr=None, mono=False)

        # load rir file
        npz_path = './'
        fn = 'az150_el0_r2.0'
        npz_list = glob(npz_path + '{}.npz'.format(fn))

        npz = np.load(npz_list[0], allow_pickle=True)
        rir = npz['rir']

        Y = self.convolve_rir_signal(rir, aud)

        # save
        audiowrite('{}.wav'.format(fn), Y, 16000)
        return Y
コード例 #11
0
    def mix_spc_noi_tt(self):
        """
        (1) load single channel speech / single channel speech
        (2) select speech rir and noise rir
        (3) convolve speech(noise) and speech(noise) rir
        (4) mix multi-channel speech and multi-channel noise
        (5) room(2) * noise(4) * SNR(5)
        :return: save noisy(mix), clean(s1), noise(s2) files / save 'output.csv' file
        """

        # path set
        spc_path = '/home/dail/PycharmProjects/DCCRN/datasets/tr/clean'
        noi_path = '/home/dail/PycharmProjects/DCCRN/datasets/tr/noise'
        snr_list = [-5, 0, 5, 10, 15]
        save_path = os.getcwd() + '/output/tt'
        Path(save_path + '/mix').mkdir(parents=True, exist_ok=True)
        Path(save_path + '/s1').mkdir(parents=True, exist_ok=True)
        Path(save_path + '/s2').mkdir(parents=True, exist_ok=True)

        # multi-channel speech list
        s_list = glob(spc_path + '/*.wav')
        # single-channel noise list
        n_list = glob(noi_path + '/*.wav')

        # make 'output.csv'
        f = open(f'output/tt/output.csv', 'w', newline='')
        wr = csv.writer(f)
        wr.writerow([
            'order', 'speech', 'room', 'speech_rir', 'noise', 'noise_rir',
            'snr'
        ])
        cnt = 0

        for i, s in enumerate(s_list):
            # multi_ch_aud, fs = librosa.core.load(s, sr=None, mono=False)
            # multi_ch_aud_na = os.path.splitext(os.path.basename(s))[0]
            spc, fs = audioread(s)
            spc_na = os.path.splitext(os.path.basename(s))[0]

            # select speech/noise rir
            # np.random.seed(1)
            rand_azi_s = np.random.choice(
                np.concatenate((np.arange(31), np.arange(330, 360)), axis=0))
            # np.random.seed(1)
            rand_azi_n = np.random.choice(np.arange(180, 271))
            rand_r = np.round(np.random.choice(np.linspace(1, 2.2, 5)), 1)
            spc_rir_na = f'az{rand_azi_s}_el0_r{rand_r}'
            noi_rir_na = f'az{rand_azi_n}_el0_r{rand_r}'

            room = ['R4', 'R5']

            # room
            for n in range(2):
                spc_rir = os.getcwd() + f'/rir/tt/{room[n]}/{spc_rir_na}.npz'
                npz_s = np.load(spc_rir, allow_pickle=True)
                rir_s = npz_s['rir']
                multi_ch_spc = ss.convolve(rir_s, spc[:, np.newaxis])
                multi_ch_spc = multi_ch_spc.transpose()

                noi_rir = os.getcwd() + f'/rir/tt/{room[n]}/{noi_rir_na}.npz'
                npz_n = np.load(noi_rir, allow_pickle=True)
                rir_n = npz_n['rir']

                # noise
                for idx_n in range(len(n_list)):

                    noi, fs2 = librosa.core.load(n_list[idx_n], sr=None)
                    noi_na = os.path.splitext(os.path.basename(
                        n_list[idx_n]))[0]
                    assert fs == fs2

                    rand_start = np.random.randint(
                        0, noi.shape[0] - multi_ch_spc.shape[1] - 8191)
                    multi_ch_noi_tmp = ss.convolve(
                        rir_n, noi[rand_start:rand_start +
                                   multi_ch_spc.shape[1] + 8191, np.newaxis])
                    multi_ch_noi = multi_ch_noi_tmp[8191:-8191, :].transpose()

                    # mix speech and noise with SNR
                    # idx_snr = np.random.randint(0, len(snr_list))

                    for l in range(len(snr_list)):
                        cnt = cnt + 1
                        snr = snr_list[l]

                        noisy, clean, noise = self.snr_mix(
                            multi_ch_spc, multi_ch_noi, snr)

                        audiowrite(
                            save_path +
                            f'/mix/noisy_{cnt:#05d}_{noi_na}_{snr}.wav',
                            noisy.transpose(), fs)
                        audiowrite(save_path + f'/s1/clean_{cnt:#05d}.wav',
                                   clean.transpose(), fs)
                        audiowrite(save_path + f'/s2/noise_{cnt:#05d}.wav',
                                   noise.transpose(), fs)

                        wr.writerow([
                            cnt, spc_na, room[n], spc_rir_na, noi_na,
                            noi_rir_na, snr
                        ])

        f.close()
コード例 #12
0
    def mix_spc_noi_0401(self, mode):
        """
        (1) load multi-channel speech
        (2) calculate noise rir (=speech rir + 90/180/270 degree)
        (3) convolve noise (random selected) and noise rir
        (4) mix multi-channel speech and multi-channel noise with SNR (random selected)
        :param mode: tr/cv/tt
        :return: save noisy(mix), clean(s1), noise(s2) files / save 'output.csv' file
        """

        # path set
        spc_path = os.getcwd() + '/multi_channel_speech/' + mode
        noi_path = os.getcwd() + '/Data/' + mode + '/noise'
        snr_list = [-5, 0, 5, 10, 15, 20]
        save_path = os.getcwd() + '/output/' + mode
        Path(save_path + '/mix').mkdir(parents=True, exist_ok=True)
        Path(save_path + '/s1').mkdir(parents=True, exist_ok=True)
        Path(save_path + '/s2').mkdir(parents=True, exist_ok=True)

        # multi-channel speech list
        s_list = glob(spc_path + '/*.wav')
        # single-channel noise list
        n_list = glob(noi_path + '/*.wav')

        # make 'output.csv'
        f = open(f'output/{mode}/output.csv', 'w', newline='')
        wr = csv.writer(f)
        wr.writerow([
            'order', 'speech', 'room', 'speech_rir', 'noise', 'noise_rir',
            'azimuth', 'snr'
        ])

        for i, s in enumerate(s_list):
            multi_ch_aud, fs = librosa.core.load(s, sr=None, mono=False)
            multi_ch_aud_na = os.path.splitext(os.path.basename(s))[0]

            # select noise azimuth
            split = multi_ch_aud_na.split('_')
            spc_na = f'{split[-2]}_{split[-1]}'
            spc_rir_na = f'{split[1]}_{split[2]}_{split[3]}'
            # print(time.time() - start)

            # noise rir = speech rir + 90/180/270 degree
            az = int(split[1][2:])
            room = split[0]
            n = np.random.randint(1, 4)
            noi_az = (az + 90 * n) % 360  # +90 +180 +270 degree
            split = multi_ch_aud_na.split('_')
            noi_rir_na = f'az{noi_az}_{split[2]}_{split[3]}'
            noi_rir = os.getcwd() + f'/rir/{mode}/{split[0]}/{noi_rir_na}.npz'
            if mode == 'cv':
                noi_rir = os.getcwd() + f'/rir/tr/{split[0]}/{noi_rir_na}.npz'

            # select and load random noise
            idx_n = np.random.randint(0, len(n_list))
            noi, fs2 = librosa.core.load(n_list[idx_n], sr=None)
            noi_na = os.path.splitext(os.path.basename(n_list[idx_n]))[0]
            assert fs == fs2

            # convolve noise with RIR
            npz = np.load(noi_rir, allow_pickle=True)
            rir = npz['rir']
            rand_start = np.random.randint(
                0, noi.shape[0] - multi_ch_aud.shape[1] - 8191)
            multi_ch_noi_tmp = ss.convolve(
                rir, noi[rand_start:rand_start + multi_ch_aud.shape[1] + 8191,
                         np.newaxis])

            multi_ch_noi = multi_ch_noi_tmp[8191:-8191, :].transpose()

            # mix speech and noise with SNR
            idx_snr = np.random.randint(0, len(snr_list))
            snr = snr_list[idx_snr]
            noisy, clean, noise = self.snr_mix(multi_ch_aud, multi_ch_noi, snr)

            audiowrite(
                save_path + f'/mix/noisy_{i + 1:#05d}_{noi_na}_{snr}.wav',
                noisy.transpose(), fs)
            audiowrite(save_path + f'/s1/clean_{i + 1:#05d}.wav',
                       clean.transpose(), fs)
            audiowrite(save_path + f'/s2/noise_{i + 1:#05d}.wav',
                       noise.transpose(), fs)

            wr.writerow(
                [i, spc_na, room, spc_rir_na, noi_na, noi_rir_na, 90 * n, snr])

        f.close()