def mix_and_separate2(sourceX, sourceY, noises):
    mix, y_hat, sir, sdr = createroom(sourceX, sourceY, noises, mic_p, mic_d,
                                      sour_p, sour_d, callback_mix, roomdim,
                                      absorption, max_order, n_mics, angle)
    sep1 = pra.normalize(y_hat.T[0], bits=16).astype(np.int16).T
    sep2 = pra.normalize(y_hat.T[1], bits=16).astype(np.int16).T
    return mix, sep1, sep2, sir, sdr
def perceptual_quality_evaluation(room_dim, mics, good_pos, good_index,
                                  bad_pos, bad_index, rir_location):
    print 'start'

    import numpy as np
    from scipy.io import wavfile
    from os import getpid

    import pyroomacoustics as pra

    # number of sources to  consider
    n_sources = np.arange(1, 8)
    S = n_sources.shape[0]

    # number of mics
    n_mic = mics.shape[1]

    # Set the speed of sound to match that of the measured RIR
    pra.constants.set('c', 345.5)

    Fs = 8000.
    N = 1024
    Lg = int(0.03 * Fs)  # 350 ms long filter
    delay_bf = 0.02
    sigma2_n = 1e-6

    # reflection coefficients from the walls (hand-waving)
    reflection = {
        'ground': 0.8,
        'south': 0.8,
        'west': 0.8,
        'north': 0.8,
        'east': 0.8,
        'ceilling': 0.5
    }

    speech_sample1 = 'samples/fq_sample1_8000.wav'
    speech_sample2 = 'samples/fq_sample2_8000.wav'

    # Create the room
    room = pra.ShoeBox3D(np.zeros(3),
                         room_dim,
                         Fs,
                         max_order=1,
                         absorption=reflection,
                         sigma2_awgn=sigma2_n)

    # Create the beamformer
    bf = pra.Beamformer(mics, Fs, N=N, Lg=Lg)
    room.addMicrophoneArray(bf)

    # data receptacles
    beamformer_names = ['Rake Perceptual', 'Rake MVDR']
    bf_weights_fun = [bf.rakePerceptualFilters, bf.rakeMVDRFilters]
    bf_fnames = ['1', '2']
    NBF = len(beamformer_names)

    # receptacle arrays
    pesq_input = np.zeros(2)
    pesq_bf = np.zeros((2, NBF, S))

    # create a single reference mic at position of microphone 4
    ref_mic_n = 4
    ref_mic = pra.MicrophoneArray(bf.R[:, ref_mic_n, np.newaxis], Fs)

    # since we run multiple thread, we need to uniquely identify filenames
    pid = str(getpid())

    file_ref = 'output_samples/fqref' + pid + '.wav'
    file_suffix = '-' + pid + '.wav'
    files_bf = [
        'output_samples/fq' + str(i + 1) + file_suffix for i in xrange(NBF)
    ]
    file_raw = 'output_samples/fqraw' + pid + '.wav'

    # index of good and bad sources
    good = good_index
    bad = bad_index

    # Read the two speech samples used
    rate, good_signal = wavfile.read(speech_sample1)
    good_signal = np.array(good_signal, dtype='float64')
    good_signal = pra.normalize(good_signal)
    good_signal = pra.highpass(good_signal, rate)
    good_len = good_signal.shape[0] / float(Fs)

    rate, bad_signal = wavfile.read(speech_sample2)
    bad_signal = np.array(bad_signal, dtype='float64')
    bad_signal = pra.normalize(bad_signal)
    bad_signal = pra.highpass(bad_signal, rate)
    bad_len = bad_signal.shape[0] / float(Fs)

    # variance of good signal
    good_sigma2 = np.mean(good_signal**2)

    # normalize interference signal to have equal power with desired signal
    bad_signal *= good_sigma2 / np.mean(bad_signal**2)

    # pick good source position at random
    good_distance = np.linalg.norm(bf.center[:, 0] - good_pos)

    # pick bad source position at random
    bad_distance = np.linalg.norm(bf.center[:, 0] - bad_pos)

    if good_len > bad_len:
        good_delay = 0
        bad_delay = (good_len - bad_len) / 2.
    else:
        bad_delay = 0
        good_delay = (bad_len - good_len) / 2.

    # create the reference room for freespace, noisless, no interference simulation
    ref_room = pra.ShoeBox3D([0, 0, 0], room_dim, Fs, max_order=0)
    ref_room.addSource(good_pos, signal=good_signal, delay=good_delay)
    ref_room.addMicrophoneArray(ref_mic)
    ref_room.compute_RIR()
    ref_room.simulate()
    reference = pra.highpass(ref_mic.signals[0], Fs)
    reference_n = pra.normalize(reference)

    # save the reference desired signal
    #wavfile.write(file_ref, Fs, pra.to_16b(reference_n))

    new_ref = good_signal.copy()
    new_ref = pra.normalize(pra.highpass(new_ref, Fs))
    wavfile.write(file_ref, Fs, pra.to_16b(new_ref))

    # add the sources to the 'real' room
    room.addSource(good_pos, signal=good_signal, delay=good_delay)
    room.addSource(bad_pos, signal=bad_signal, delay=bad_delay)

    # read in the RIR from file
    for r in range(n_mic):
        for s in [good_index, bad_index]:

            # read wav file
            fname_rir = rir_location % (r + 1, s + 1)
            rir_fs, rir = wavfile.read(fname_rir)
            rir = np.array(rir, dtype='float64')

            if rir_fs != Fs:
                raise NameError(
                    'The RIR and the signals do not have the same sampling rate.'
                )
                '''
                import scikits.samplerate as sr
                rir = sr.resample(rir, Fs/float(rir_fs), 'sinc_best')

                # the factor 2 was empirically determined to be necessary to get
                # amplitude of RIR in the correct ballpark.
                rir *= 2.
                '''

            room.rir.append([])
            room.rir[r].append(rir)

    # compute the input signal to the microphones
    room.simulate()

    # save degraded signal at reference microphone
    raw = bf.signals[ref_mic_n]
    raw_n = pra.normalize(pra.highpass(raw, Fs))
    wavfile.write(file_raw, Fs, pra.to_16b(raw_n))

    pesq_input = pra.pesq(file_ref, file_raw, Fs=Fs)

    for src in room.sources:
        src.setOrdering('strongest', ref_point=bf.center)

    for k, s in enumerate(n_sources):

        good_img = room.sources[0][:s]
        bad_img = room.sources[1][:s]

        for i, bfr in enumerate(beamformer_names):

            bf_weights_fun[i](good_img,
                              bad_img,
                              sigma2_n * np.eye(n_mic * Lg),
                              delay=delay_bf)

            # run beamformer
            output = bf.process()
            output = pra.normalize(pra.highpass(output, Fs))
            output = pra.time_align(reference_n, output)

            # save files for PESQ evaluation
            wavfile.write(files_bf[i], Fs, pra.to_16b(output))

            # compute PESQ
            x = pra.pesq(file_ref, files_bf[i], Fs=Fs)
            pesq_bf[:, i, k] = pra.pesq(file_ref, files_bf[i], Fs=Fs).T
    ''' This is how you can compare the true RIRs with the image src model generated one
    plt.figure()
    for m in range(n_mic):

        rir_sim = room.sources[0].getRIR(mics[:,m], Fs)
        plt.subplot(3,3,m+1)
        plt.plot(room.rir[m][0][:rir_sim.shape[0]])
        plt.plot(rir_sim)

    plt.show()
    '''

    print 'Finished'

    return pesq_input, pesq_bf
Exemple #3
0
 def play(self, src):
     sd.play(pra.normalize(src) * 0.75,
             samplerate=self.fs,
             blocking=False)
        speech_file_location, noise_file_location, room_dim, max_order,
        snr_vals, R, pos_source, pos_noise, N)
    '''
	Write to WAV + labelling of our processed noisy signals
	'''
    # we flatten by one dimension our array. In fact we just say that it does'nt have a microphones dimension
    noisy_signal_flatten = noisy_signal[:, 0, :]

    # labelling our beamformed signals and comparing their classification with the one for the original noisy signals
    score_processing = np.zeros(len(snr_vals))
    score_original = np.zeros(len(snr_vals))

    for i, snr in enumerate(snr_vals):
        print("SNR / %f dB" % snr)
        dest = os.path.join(dest_dir, "beamformed_signal_snr_db_%d.wav" % snr)
        signal = pra.normalize(noisy_signal_beamformed[i],
                               bits=16).astype(np.int16)
        wavfile.write(dest, 16000, signal)
        score_processing[i] = label_wav(dest, labels_file, graph_file,
                                        speech.meta.as_dict()['word'])

        dest = os.path.join(dest_dir, "original_signal_snr_db_%d.wav" % (snr))
        signal = pra.normalize(noisy_signal_flatten[i],
                               bits=16).astype(np.int16)
        wavfile.write(dest, 16000, signal)
        score_original[i] = label_wav(dest, labels_file, graph_file,
                                      speech.meta.as_dict()['word'])
        print()

    #plotting the result
    plt.plot(snr_vals, score_processing, label="beamformed signal")
    plt.plot(snr_vals, score_original, label="original")
    # back to time domain
    processed_audio[n:n + hop, ] = stft.synthesis(X)

    # update step
    n += hop

proc_time = time.time() - start_time
print("Processing time: {} minutes".format(proc_time / 60))
"""
Save and plot spectrogram
"""
wavfile.write(
    os.path.join(os.path.dirname(__file__), 'output_samples',
                 'denoise_output_IterativeWiener.wav'), fs,
    pra.normalize(processed_audio).astype(np.float32))
print("Noisy and denoised file written to: '%s'" %
      os.path.join(os.path.dirname(__file__), 'output_samples'))

signal_norm = signal / np.abs(signal).max()
processed_audio_norm = processed_audio / np.abs(processed_audio).max()

if plot_spec:
    min_val = -80
    max_val = -40
    plt.figure()
    plt.subplot(3, 1, 1)
    plt.specgram(noisy_signal[:n - hop],
                 NFFT=256,
                 Fs=fs,
                 vmin=min_val,
Exemple #6
0
                 label=f"SIR {s+1}",
                 marker="o")
    plt.title(args.algo)
    plt.legend()
    plt.tight_layout(pad=0.5)

    if not args.gui:
        plt.show()
    else:
        plt.show(block=False)

    if args.save:
        wavfile.write(
            "bss_iva_mix.wav",
            room.fs,
            pra.normalize(mix[0, :], bits=16).astype(np.int16),
        )
        for i, sig in enumerate(y_hat):
            wavfile.write(
                "bss_iva_source{}.wav".format(i + 1),
                room.fs,
                pra.normalize(sig, bits=16).astype(np.int16),
            )

    if args.gui:

        from tkinter import Tk

        # Make a simple GUI to listen to the separated samples
        root = Tk()
        my_gui = PlaySoundGUI(root,
Exemple #7
0
        ## Read target speech audio
        while True:
            spe_id = random.randint(start_spe_id, end_spe_id)
            utt_key = sp_utts_scp[spe_id][0]
            spe_path = sp_utts_scp[spe_id][1]
            spe_name = file_name(pathName=spe_path)
            sample_rate, spe_wav = wavfile.read(spe_path)
            if len(spe_wav.shape) > 1:
                spe_wav = np.mean(spe_wav, 1)
            spe_wav = spe_wav.astype(np.float)
            if np.mean(np.abs(spe_wav)) > 0:
                break

        spe_length = spe_wav.shape[0]
        spe_wav = pra.normalize(spe_wav)
        spe_wav = pra.highpass(spe_wav, Fs, 50)

        room_mix.add_source(target_source, signal=spe_wav, delay=delay)
        room_ref.add_source(target_source, signal=spe_wav, delay=delay)
        #room_dir.add_source(target_source, signal = spe_wav, delay = delay)

        ## Read interfere speech audio
        for it in range(0, interf_num):
            while True:
                while True:
                    inf_id = random.randint(start_spe_id, end_spe_id)
                    if np.abs(spe_id - inf_id) > 500:
                        break
                inf_path = sp_utts_scp[inf_id][1]
                sample_rate, inf_wav = wavfile.read(
def modify_input_wav_beamforming(wav, noise, room_dim, max_order, snr_vals,
                                 mic_array, pos_source, pos_noise, N):

    fs_s, audio_anechoic = wavfile.read(wav)
    fs_n, noise_anechoic = wavfile.read(noise)

    #Create a room for the signal
    room_signal = pra.ShoeBox(room_dim,
                              absorption=0.2,
                              fs=fs_s,
                              max_order=max_order)

    #Create a room for the noise
    room_noise = pra.ShoeBox(room_dim,
                             absorption=0.2,
                             fs=fs_n,
                             max_order=max_order)

    #source of the signal and of the noise in their respectiv boxes
    room_signal.add_source(pos_source, signal=audio_anechoic)
    room_noise.add_source(pos_noise, signal=noise_anechoic)

    #add the microphone array
    mics_signal = pra.Beamformer(mic_array, room_signal.fs, N)
    mics_noisy = pra.Beamformer(mic_array, room_noise.fs, N)
    room_signal.add_microphone_array(mics_signal)
    room_noise.add_microphone_array(mics_noisy)

    #simulate both rooms
    room_signal.simulate()
    room_noise.simulate()

    #take the mic_array.signals from each room
    audio_reverb = room_signal.mic_array.signals
    noise_reverb = room_noise.mic_array.signals

    #design beamforming filters
    mics_signal.rake_delay_and_sum_weights(room_signal.sources[0][:1])
    mics_noisy.rake_delay_and_sum_weights(room_signal.sources[0][:1])

    output_signal = mics_signal.process()
    output_noise = mics_noisy.process()

    #we're going to normalize the noise
    size = np.shape(audio_reverb)
    noise_normalized = np.zeros(size)

    #for each microphones
    if (len(noise_reverb[0]) < len(audio_reverb[0])):
        raise ValueError(
            'the length of the noise signal is inferior to the one of the audio signal !!'
        )
    output_noise = output_noise[:len(output_signal)]

    norm_fact = np.linalg.norm(noise_reverb[-1])
    noise_normalized = output_noise / norm_fact

    #initilialize the array of noisy_signal
    noisy_signal = np.zeros([len(snr_vals), np.shape(output_signal)[0]])

    for i, snr in enumerate(snr_vals):
        noise_std = np.linalg.norm(audio_reverb[-1]) / (10**(snr / 20.))
        final_noise = noise_normalized * noise_std
        noisy_signal[i] = pra.normalize(
            pra.highpass(output_signal + final_noise, fs_s))

    return noisy_signal
Exemple #9
0
        processed_signal[i] = sp.process.denoise(noisy_signal[i], fft_len,
                                                 lpc_order, iterations)
        processed_signal_VAD[i], _, _ = sp.process.denoise_with_vad(
            noisy_signal[i], sr, fft_len, lpc_order, iterations, alpha)
    '''
    Write to WAV + labelling of our processed noisy signals
    '''
    # labelling our different single noise channel removed signals and comparing their classification with the one for the original noisy signals
    score_processing = np.zeros(len(snr_vals))
    score_processing_VAD = np.zeros(len(snr_vals))
    score_original = np.zeros(len(snr_vals))

    for i, snr in enumerate(snr_vals):
        print("SNR : %f dB" % snr)
        dest = os.path.join(dest_dir, "denoised_snr_db_%d.wav" % (snr))
        signal = pra.normalize(processed_signal[i], bits=16).astype(np.int16)
        wavfile.write(dest, 16000, signal)
        score_processing[i] = label_wav(dest, labels_file, graph_file,
                                        speech.meta.as_dict()['word'])

        dest = os.path.join(dest_dir,
                            "denoised_with_VAD_snr_db_%d.wav" % (snr))
        signal = pra.normalize(processed_signal_VAD[i],
                               bits=16).astype(np.int16)
        wavfile.write(dest, 16000, signal)
        score_processing_VAD[i] = label_wav(dest, labels_file, graph_file,
                                            speech.meta.as_dict()['word'])

        dest = os.path.join(dest_dir, "noisy_snr_db_%d.wav" % (snr))
        signal = pra.normalize(noisy_signal[i], bits=16).astype(np.int16)
        wavfile.write(dest, 16000, signal)
        score_map_original[w] = np.zeros([sub, len(snr_vals)])
        score_map_processing[w] = np.zeros([sub, len(snr_vals)])

    # now we are gonna compute the labelling
    idx = 0
    for s in speech_samps:
        for i, snr in enumerate(snr_vals):
            word = s.meta.as_dict()['word']
            # destination of the processed signal
            dest_pro = os.path.join(
                dest_dir, "processed_signal%d%s_snr_db_%d" % (idx, word, snr))
            # destination of the original siganl
            dest_ori = os.path.join(
                dest_dir, "original_signal%d%s_snr_db_%d" % (idx, word, snr))
            # noisy processed signal
            noisy_pro = pra.normalize(processed_audio_map[s][i],
                                      bits=16).astype(np.int16)
            wavfile.write(dest_pro, 16000, noisy_pro)
            # noisy original signal
            noisy_ori = pra.normalize(noisy_signal[s][i],
                                      bits=16).astype(np.int16)
            wavfile.write(dest_ori, 16000, noisy_ori)
            # update the score maps
            print("score for processed signal: ")
            score_map_processing[word][idx][i] = label_wav(
                dest_pro, labels_file, graph_file, word)
            print()
            print("score for original signal: ")
            score_map_original[word][idx][i] = label_wav(
                dest_ori, labels_file, graph_file, word)
            print()
            idx += 1
Exemple #11
0
def modify_input_wav_multiple_mics(wav, noise, room_dim, max_order, snr_vals,
                                   mic_array, pos_source, pos_noise):

    fs_s, audio_anechoic = wavfile.read(wav)
    fs_n, noise_anechoic = wavfile.read(noise)

    #Create a room for the signal
    room_signal = pra.ShoeBox(room_dim,
                              absorption=0.2,
                              fs=fs_s,
                              max_order=max_order)

    #Create a room for the noise
    room_noise = pra.ShoeBox(room_dim,
                             absorption=0.2,
                             fs=fs_n,
                             max_order=max_order)

    #source of the signal and of the noise in their respectiv boxes
    room_signal.add_source(pos_source, signal=audio_anechoic)
    room_noise.add_source(pos_noise, signal=noise_anechoic)

    #we had the microphones array in both room
    room_signal.add_microphone_array(
        pra.MicrophoneArray(mic_array.T, room_signal.fs))
    room_noise.add_microphone_array(
        pra.MicrophoneArray(mic_array.T, room_noise.fs))

    #simulate both rooms
    room_signal.simulate()
    room_noise.simulate()

    #take the mic_array.signals from each room
    audio_reverb = room_signal.mic_array.signals
    noise_reverb = room_noise.mic_array.signals

    shape = np.shape(audio_reverb)

    noise_normalized = np.zeros(shape)

    #for each microphones
    if (len(noise_reverb[0]) < len(audio_reverb[0])):
        raise ValueError(
            'the length of the noise signal is inferior to the one of the audio signal !!'
        )
    noise_reverb = noise_reverb[:, :len(audio_reverb[0])]

    norm_fact = np.linalg.norm(noise_reverb[0])
    noise_normalized = noise_reverb / norm_fact

    #initilialize the array of noisy_signal
    noisy_signal = np.zeros([len(snr_vals), shape[0], shape[1]])

    for i, snr in enumerate(snr_vals):
        noise_std = np.linalg.norm(audio_reverb[0]) / (10**(snr / 20.))
        for m in range(shape[0]):

            final_noise = noise_normalized[m] * noise_std
            noisy_signal[i][m] = pra.normalize(audio_reverb[m] + final_noise)

    return noisy_signal
Exemple #12
0
    under different SNRs.
    """
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    # truncate beamformed noise
    noise_bf = noise_bf[:len(speech_bf)]

    # compute score for different SNR vals
    print()
    score_beamformed = np.empty(len(snr_vals))
    score_single = np.empty(len(snr_vals))
    for idx, snr in enumerate(snr_vals):

        noisy_signal = speech_bf + snr_facts[idx] * noise_bf
        noisy_signal = pra.normalize(pra.highpass(noisy_signal, fs_s),
                                     bits=16).astype(np.int16)
        dest = os.path.join(dest_dir, "das_bf_snr_db_%d.wav" % (snr))
        wavfile.write(dest, fs_s, noisy_signal)
        score_beamformed[idx] = label_wav(dest, labels_file, graph_file,
                                          speech_samp.meta.word)

        # compute score for single mic for reference
        single_mic = ref_mic_sig + snr_facts[idx] * ref_mic_noise
        single_mic = pra.normalize(pra.highpass(single_mic, fs_s),
                                   bits=16).astype(np.int16)
        dest = os.path.join(dest_dir, "single_mic_snr_db_%d.wav" % (snr))
        wavfile.write(dest, fs_s, single_mic)
        score_single[idx] = label_wav(dest, labels_file, graph_file,
                                      speech_samp.meta.word)

    plt.figure()
def perceptual_quality_evaluation(room_dim, mics, good_pos, good_index, bad_pos, bad_index, rir_location):
    print 'start'

    import numpy as np
    from scipy.io import wavfile
    from os import getpid

    import pyroomacoustics as pra

    # number of sources to  consider
    n_sources = np.arange(1,8)
    S = n_sources.shape[0]

    # number of mics
    n_mic = mics.shape[1]

    # Set the speed of sound to match that of the measured RIR
    pra.constants.set('c', 345.5)

    Fs = 8000.
    N = 1024
    Lg = int(0.03*Fs) # 350 ms long filter
    delay_bf = 0.02
    sigma2_n = 1e-6

    # reflection coefficients from the walls (hand-waving)
    reflection = {'ground':0.8, 'south':0.8, 'west':0.8, 'north':0.8, 'east':0.8, 'ceilling':0.5}

    speech_sample1 = 'samples/fq_sample1_8000.wav'
    speech_sample2 = 'samples/fq_sample2_8000.wav'

    # Create the room
    room = pra.ShoeBox3D(np.zeros(3), room_dim, Fs, 
            max_order=1, 
            absorption=reflection,
            sigma2_awgn=sigma2_n)

    # Create the beamformer
    bf = pra.Beamformer(mics, Fs, N=N, Lg=Lg)
    room.addMicrophoneArray(bf)

    # data receptacles
    beamformer_names = ['Rake Perceptual',
                        'Rake MVDR']
    bf_weights_fun   = [bf.rakePerceptualFilters,
                        bf.rakeMVDRFilters]
    bf_fnames = ['1','2']
    NBF = len(beamformer_names)

    # receptacle arrays
    pesq_input = np.zeros(2)
    pesq_bf = np.zeros((2,NBF,S))

    # create a single reference mic at position of microphone 4
    ref_mic_n = 4
    ref_mic = pra.MicrophoneArray(bf.R[:,ref_mic_n,np.newaxis], Fs)

    # since we run multiple thread, we need to uniquely identify filenames
    pid = str(getpid())

    file_ref  = 'output_samples/fqref' + pid + '.wav'
    file_suffix = '-' + pid + '.wav'
    files_bf = ['output_samples/fq' + str(i+1) + file_suffix for i in xrange(NBF)]
    file_raw  = 'output_samples/fqraw' + pid + '.wav'

    # index of good and bad sources
    good = good_index
    bad =  bad_index

    # Read the two speech samples used
    rate, good_signal = wavfile.read(speech_sample1)
    good_signal = np.array(good_signal, dtype='float64')
    good_signal = pra.normalize(good_signal)
    good_signal = pra.highpass(good_signal, rate)
    good_len = good_signal.shape[0]/float(Fs)

    rate, bad_signal = wavfile.read(speech_sample2)
    bad_signal = np.array(bad_signal, dtype='float64')
    bad_signal = pra.normalize(bad_signal)
    bad_signal = pra.highpass(bad_signal, rate)
    bad_len = bad_signal.shape[0]/float(Fs)

    # variance of good signal
    good_sigma2 = np.mean(good_signal**2)

    # normalize interference signal to have equal power with desired signal
    bad_signal *= good_sigma2/np.mean(bad_signal**2)

    # pick good source position at random
    good_distance = np.linalg.norm(bf.center[:,0] - good_pos)

    # pick bad source position at random
    bad_distance = np.linalg.norm(bf.center[:,0] - bad_pos)

    if good_len > bad_len:
        good_delay = 0
        bad_delay = (good_len - bad_len)/2.
    else:
        bad_delay = 0
        good_delay = (bad_len - good_len)/2.


    # create the reference room for freespace, noisless, no interference simulation
    ref_room = pra.ShoeBox3D(
        [0,0,0],
        room_dim,
        Fs,
        max_order=0)
    ref_room.addSource(good_pos, signal=good_signal, delay=good_delay)
    ref_room.addMicrophoneArray(ref_mic)
    ref_room.compute_RIR()
    ref_room.simulate()
    reference = pra.highpass(ref_mic.signals[0], Fs)
    reference_n = pra.normalize(reference)

    # save the reference desired signal
    #wavfile.write(file_ref, Fs, pra.to_16b(reference_n))

    new_ref = good_signal.copy()
    new_ref = pra.normalize(pra.highpass(new_ref, Fs))
    wavfile.write(file_ref, Fs, pra.to_16b(new_ref))

    # add the sources to the 'real' room
    room.addSource(good_pos, signal=good_signal, delay=good_delay)
    room.addSource(bad_pos, signal=bad_signal, delay=bad_delay)

    # read in the RIR from file
    for r in range(n_mic):
        for s in [good_index, bad_index]:

            # read wav file
            fname_rir = rir_location % (r+1,s+1)
            rir_fs,rir = wavfile.read(fname_rir)
            rir = np.array(rir, dtype='float64')

            if rir_fs != Fs:
                raise NameError('The RIR and the signals do not have the same sampling rate.')
                '''
                import scikits.samplerate as sr
                rir = sr.resample(rir, Fs/float(rir_fs), 'sinc_best')

                # the factor 2 was empirically determined to be necessary to get
                # amplitude of RIR in the correct ballpark.
                rir *= 2.
                '''

            room.rir.append([])
            room.rir[r].append(rir)

    # compute the input signal to the microphones
    room.simulate()

    # save degraded signal at reference microphone
    raw = bf.signals[ref_mic_n]
    raw_n = pra.normalize(pra.highpass(raw, Fs))
    wavfile.write(file_raw, Fs, pra.to_16b(raw_n))

    pesq_input = pra.pesq(file_ref, file_raw, Fs=Fs)

    for src in room.sources:
        src.setOrdering('strongest', ref_point=bf.center)

    for k,s in enumerate(n_sources):

        good_img = room.sources[0][:s]
        bad_img = room.sources[1][:s]

        for i, bfr in enumerate(beamformer_names):

            bf_weights_fun[i](good_img, bad_img, sigma2_n*np.eye(n_mic*Lg), delay=delay_bf)

            # run beamformer
            output = bf.process()
            output = pra.normalize(pra.highpass(output, Fs))
            output = pra.time_align(reference_n, output)

            # save files for PESQ evaluation
            wavfile.write(files_bf[i], Fs, pra.to_16b(output))

            # compute PESQ
            x = pra.pesq(file_ref, files_bf[i], Fs=Fs)
            pesq_bf[:,i,k] = pra.pesq(file_ref, files_bf[i], Fs=Fs).T

    ''' This is how you can compare the true RIRs with the image src model generated one
    plt.figure()
    for m in range(n_mic):

        rir_sim = room.sources[0].getRIR(mics[:,m], Fs)
        plt.subplot(3,3,m+1)
        plt.plot(room.rir[m][0][:rir_sim.shape[0]])
        plt.plot(rir_sim)

    plt.show()
    '''

    print 'Finished'

    return pesq_input, pesq_bf
Exemple #14
0
 idx = 0
 for s in speech_samps:
     for i, snr in enumerate(snr_vals):
         word = s.meta.as_dict()['word']
         # destination of the processed signal
         dest_pro = os.path.join(
             dest_dir, "processed_signal%d%s_snr_db_%d" % (idx, word, snr))
         # destination of the processed with VAD signal
         dest_pro_vad = os.path.join(
             dest_dir,
             "processed_signal_VAD%d%s_snr_db_%d" % (idx, word, snr))
         # destination of the original siganl
         dest_ori = os.path.join(
             dest_dir, "original_signal%d%s_snr_db_%d" % (idx, word, snr))
         # noisy processed signal
         noisy_pro = pra.normalize(processed_signal[s][i],
                                   bits=16).astype(np.int16)
         wavfile.write(dest_pro, 16000, noisy_pro)
         # noisy VAD+processed signal
         noisy_pro_vad = pra.normalize(processed_signal_VAD[s][i],
                                       bits=16).astype(np.int16)
         wavfile.write(dest_pro_vad, 16000, noisy_pro_vad)
         # noisy original signal
         noisy_ori = pra.normalize(noisy_signal[s][i],
                                   bits=16).astype(np.int16)
         wavfile.write(dest_ori, 16000, noisy_ori)
         # update the score maps
         print("score for original signal: ")
         score_map_original[word][idx][i] = label_wav(
             dest_ori, labels_file, graph_file, word)
         print()
         print("score for denoised signal: ")
Exemple #15
0
    def line_createroom(Bird1,Bird2,Bird3,callback_mix):

        roomdim = np.array([20, 20, 10]) 
        max_order = 17  
        absorption = 0.9    

        mic_p = [13, 10, 3.5]   #mic_center_point
        mic_d = 0.015      
            #mic_dinstance
        sour_p = [7,10,6]    #source_postion
        sour_d = 5           #source_distance

        n_mics = 4          #mic_number
        n_sources = 3 

        mic_rot = np.pi/2 
        bird_rot = np.pi/2 
    
     
        ### params setting ###
        np.random.seed(10)
        # STFT parameters
        framesize = 4096
        win_a = pra.hann(framesize)
        win_s = pra.transform.compute_synthesis_window(
            win_a, framesize // 2)
        ogive_mu = 0.1
        ogive_update = "switching"
        ogive_iter = 2000
        SIR = 10  # dB
        SNR = (60)
        algo = algo_choices[0]
        no_cb = True
        save = True
        n_iter = 60  
        dist = "gauss"  # laplace
        fs = 44100
        n_sources_target = 3
        assert n_sources_target <= n_mics, "More sources than microphones is not supported"

   
        source_std = np.ones(n_sources_target)
        # room size
        room_dim = roomdim

        # micro position
        mic_locs = semi_line_layout(mic_p,mic_rot,mic_d,n_mics)
     

        # target position 
        source_locs = semi_line_layout(sour_p,bird_rot,sour_d,n_sources)
   
        source_locs[0][0],source_locs[0][2] = source_locs[0][0]+0,source_locs[0][2]+0##push value
   
        # target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])

        # audio loaded
        wav_files = [Bird1,Bird2,Bird3]
     
        signals = wav_read_center(wav_files, seed=123)

        #create room
        room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption,
                            max_order=max_order, air_absorption=True, humidity=50)

        # add source
        for sig, loc in zip(signals, source_locs.T):
            room.add_source(loc, signal=sig)

        # add micro
        room.add_microphone_array(
            pra.MicrophoneArray(mic_locs, fs=room.fs))

        callback_mix_kwargs = {
            "snr": SNR,
            "sir": SIR,
            "n_src": n_sources,
            "n_tgt": n_sources_target,
            "src_std": source_std,
            "ref_mic": 0,
        }
        # # draw
        # x = mic_locs[:2][0]
        # y = mic_locs[:2][1]
        # import matplotlib.pyplot as plt
        # plt.scatter(x,y)
        # plt.axis('equal')
        # plt.xlim([0,20])
        # plt.ylim([0,20])
        # x1 = source_locs[:2][0]
        # y1 = source_locs[:2][1]
        # plt.scatter(x1,y1)
        # plt.xlim([0,20])
        # plt.ylim([0,20])
        # plt.axis('equal')
        # plt.show()

        # Run the simulation
        separate_recordings = room.simulate(
            callback_mix=callback_mix,
            callback_mix_kwargs=callback_mix_kwargs,
            return_premix=True,
        )
        mics_signals = room.mic_array.signals
        print("line Simulation done.")

   

        # Monitor Convergence
        ref = np.moveaxis(separate_recordings, 1, 2)
        if ref.shape[0] < n_mics:
            ref = np.concatenate(
                (ref, np.random.randn(n_mics -
                                        ref.shape[0], ref.shape[1], ref.shape[2])),
                axis=0,
            )

        convergence_callback = None

        X_all = pra.transform.analysis(
            mics_signals.T, framesize, framesize // 2, win=win_a
        ).astype(np.complex128)
        X_mics = X_all[:, :, :n_mics]

        tic = time.perf_counter()

        if algo == "auxiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "auxiva_pca":
            # Run AuxIVA
            Y = auxiva_pca(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "overiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
           
        elif algo == "ilrma":
            # Run AuxIVA
            Y = pra.bss.ilrma(
                X_mics,
                n_iter=n_iter,
                n_components=2,
                proj_back=True,
                callback=convergence_callback,
            )
        elif algo == "ogive":
            # Run OGIVE
            Y = ogive(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ogive_matlab":
            # Run OGIVE
            Y = ogive_matlab_wrapper(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        else:
            raise ValueError("No such algorithm {}".format(algo))

    

    

        # Run iSTFT
        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[
                :, None
            ]
            y = y.astype(np.float64)
        else:
            y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype(
                np.float64
            )

        # If some of the output are uniformly zero, just add a bit of noise to compare
        for k in range(y.shape[1]):
            if np.sum(np.abs(y[:, k])) < 1e-10:
                y[:, k] = np.random.randn(y.shape[0]) * 1e-10

        # For conventional methods of BSS, reorder the signals by decreasing power
        if algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        # Compare SIR
        m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
            ref[:n_sources_target, :m, 0],
            y[framesize // 2: m + framesize // 2, :n_sources_target].T,
        )

        # reorder the vector of reconstructed signals
        y_hat = y[:, perm]

        #return 
        mixdata = pra.normalize(mics_signals, bits=16).astype(np.int16).T
        separationdata = []
        for sig in y_hat.T:
            separationdata.append(pra.normalize(sig, bits=16).astype(np.int16).T)
        print("sdr",sdr)        
        return sdr,sir,mixdata,separationdata  #wavefile(mixdata) wavefile(separationdata[0]) wavefile(separationdata[1])
Lg = int(np.ceil(Lg_t * Fs))
Lgp = np.floor(0.4 * Lg)
Lgm = Lg - Lgp
print 'Lg=', Lg

# create a microphone array
if shape is 'Circular':
    R = circular2DArray(mic1, M, phi, d * M / (2 * np.pi))
else:
    R = pra.linear2DArray(mic1, M, phi, d)
mics = pra.Beamformer(R, Fs, N, Lg=Lg, hop=hop, zpf=zp, zpb=zp)

# The first signal (of interest) is singing
rate1, signal1 = wavfile.read('samples/singing_' + str(Fs) + '.wav')
signal1 = np.array(signal1, dtype=float)
signal1 = pra.normalize(signal1)
signal1 = pra.highpass(signal1, Fs)
delay1 = 0.

# the second signal (interferer) is some german speech
rate2, signal2 = wavfile.read('samples/german_speech_' + str(Fs) + '.wav')
signal2 = np.array(signal2, dtype=float)
signal2 = pra.normalize(signal2)
signal2 = pra.highpass(signal2, Fs)
delay2 = 1.

# create the room with sources and mics
room1 = pra.Room.shoeBox2D([0, 0],
                           room_dim,
                           Fs,
                           t0=t0,
Exemple #17
0
    mics = pra.Beamformer(echo, Fs, N=fft_len, Lg=Lg)
    roomPoly.add_microphone_array(mics)
    roomPoly.add_source(source, delay=0, signal=xtone)
    roomPoly.add_source(interferer, delay=0, signal=silence)
    roomPoly.image_source_model(use_libroom=True)
    roomPoly.compute_rir()
    roomPoly.simulate()

    # Rake MVDR simulation
    BeamformerType = 'RakeMVDR'
    good_sources = roomPoly.sources[0][:max_order_design + 1]
    bad_sources = roomPoly.sources[1][:max_order_design + 1]
    mics.rake_mvdr_filters(good_sources, bad_sources,
                           sigma2_n * np.eye(mics.Lg * mics.M))
    output = mics.process()
    out = pra.normalize(pra.highpass(output, Fs))
    out = normalize(out)

    # Rake Perceptual simulation
    # BeamformerType = 'RakePerceptual'
    # good_sources = room1.sources[0][:max_order_design+1]
    # bad_sources = room1.sources[1][:max_order_design+1]
    # mics.rake_perceptual_filters(good_sources,
    #                     bad_sources,
    #                     sigma2_n*np.eye(mics.Lg*mics.M))
    # output          =   mics.process()
    # out             =   pra.normalize(pra.highpass(output, Fs))

    # input_mic       =   pra.normalize(pra.highpass(mics.signals[mics.M//2], Fs))
    # input_mic       =   normalize(input_mic)
# compute the MaxSINR beamformer
w = [la.eigh(rs, b=rn, eigvals=(M-1,M-1))[1] for rs,rn in zip(Rs[1:], Rn[1:])]
w = np.squeeze(np.array(w))
w /= la.norm(w, axis=1)[:,None]
w = np.concatenate([np.ones((1,M))/np.sqrt(M), w], axis=0)

# Compute the gain
ref = X[vad_x,:,0]

#z = compute_gain(w, X[vad_x,:,:], ref, clip_up=1.0, clip_down=0.1)
z = compute_gain(w, X[vad_x,:,:], ref, clip_up=2.0)
#z = compute_gain(w, X[vad_x,:,:], ref)
#z = compute_gain(w, X[vad_x,:,:], ref)

sig_in = pra.normalize(mics.signals[0])

mics.weights = w.T

room.plot(img_order=1, freq=[800,1000,1200, 1400, 1600, 1800, 2000])
plt.title('No matching')
plt.figure()
mics.plot_beam_response()
plt.title('No matching')

sig_out_flat = mics.process()
sig_out_flat = pra.normalize(sig_out_flat)

mics.weights = (z[:,None] * w).T
sig_out_ref0 = mics.process()
sig_out_ref0 = pra.normalize(sig_out_ref0)
Exemple #19
0
    def createroom(mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim,
                   absorption, max_order, n_mics, angle):
        np.random.seed(10)
        # STFT parameters
        framesize = 4096
        win_a = pra.hann(framesize)
        win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)
        # algorithm parameters
        # param ogive
        ogive_mu = 0.1
        ogive_update = "switching"
        ogive_iter = 2000
        SIR = 10  # dB
        SNR = (
            60
        )  # dB, this is the SNR with respect to a single target source and microphone self-noise

        ########separation params#############
        algo = algo_choices[0]
        no_cb = True
        save = True
        n_iter = 60
        dist = "gauss"  #guass or laplace
        ########paramas set##################
        fs = 44100
        n_sources = 2
        n_mics = n_mics
        n_sources_target = 2
        assert n_sources_target <= n_mics, "More sources than microphones is not supported"

        # set the source powers, the first one is half
        source_std = np.ones(n_sources_target)
        # room size
        room_dim = roomdim
        # micro position
        rot = angle
        offset = np.pi - rot / 2
        mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics,
                                      rot=offset)  ###micro2

        # target position
        target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])
        #interference position
        interferer_locs = random_layout([14, 0, 6],
                                        n_sources - n_sources_target,
                                        offset=[5, 20, 3],
                                        seed=1)
        source_locs = target_locs
        # audio loaded
        wav_files = [amBird, saBird]
        signals = wav_read_center(wav_files, seed=123)

        #create room
        room = pra.ShoeBox(room_dim,
                           fs=44100,
                           absorption=absorption,
                           max_order=max_order,
                           air_absorption=True,
                           humidity=50)

        # add source
        for sig, loc in zip(signals, source_locs.T):
            room.add_source(loc, signal=sig)

        # add micro
        room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs))

        callback_mix_kwargs = {
            "snr": SNR,
            "sir": SIR,
            "n_src": n_sources,
            "n_tgt": n_sources_target,
            "src_std": source_std,
            "ref_mic": 0,
        }

        # Run the simulation
        separate_recordings = room.simulate(
            callback_mix=callback_mix,
            callback_mix_kwargs=callback_mix_kwargs,
            return_premix=True,
        )
        mics_signals = room.mic_array.signals
        print("Simulation done.")

        # rt60 = room.measure_rt60()
        # print(rt60)

        # Monitor Convergence
        ref = np.moveaxis(separate_recordings, 1, 2)
        if ref.shape[0] < n_mics:
            ref = np.concatenate(
                (ref,
                 np.random.randn(n_mics - ref.shape[0], ref.shape[1],
                                 ref.shape[2])),
                axis=0,
            )

        SDR, SIR, cost_func = [], [], []
        convergence_callback = None

        # START BSS

        # shape: (n_frames, n_freq, n_mics)
        X_all = pra.transform.analysis(mics_signals.T,
                                       framesize,
                                       framesize // 2,
                                       win=win_a).astype(np.complex128)
        X_mics = X_all[:, :, :n_mics]

        tic = time.perf_counter()

        # Run BSS
        if algo == "auxiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "auxiva_pca":
            # Run AuxIVA
            Y = auxiva_pca(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "overiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ilrma":
            # Run AuxIVA
            Y = pra.bss.ilrma(
                X_mics,
                n_iter=n_iter,
                n_components=2,
                proj_back=True,
                callback=convergence_callback,
            )
        elif algo == "ogive":
            # Run OGIVE
            Y = ogive(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ogive_matlab":
            # Run OGIVE
            Y = ogive_matlab_wrapper(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        else:
            raise ValueError("No such algorithm {}".format(algo))

        toc = time.perf_counter()

        # Run iSTFT
        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0],
                                        framesize,
                                        framesize // 2,
                                        win=win_s)[:, None]
            y = y.astype(np.float64)
        else:
            y = pra.transform.synthesis(Y,
                                        framesize,
                                        framesize // 2,
                                        win=win_s).astype(np.float64)

        # If some of the output are uniformly zero, just add a bit of noise to compare
        for k in range(y.shape[1]):
            if np.sum(np.abs(y[:, k])) < 1e-10:
                y[:, k] = np.random.randn(y.shape[0]) * 1e-10

        # For conventional methods of BSS, reorder the signals by decreasing power
        if algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        # Compare SIR
        m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
            ref[:n_sources_target, :m, 0],
            y[framesize // 2:m + framesize // 2, :n_sources_target].T,
        )

        # reorder the vector of reconstructed signals
        y_hat = y[:, perm]
        print("SDR:", sdr)
        print("SIR:", sir)

        ####save mix and separation #######
        if save:
            from scipy.io import wavfile
            wavfile.write(
                "birdmix.wav",
                room.fs,
                (pra.normalize(mics_signals, bits=16).astype(np.int16).T)[:,
                                                                          0],
            )
            for i, sig in enumerate(y_hat.T):
                wavfile.write(
                    "birdsep{}.wav".format(i + 1),
                    room.fs,
                    pra.normalize(sig, bits=16).astype(np.int16).T,
                )
        score_map_original[w] = np.zeros([sub, len(snr_vals)])
        score_map_processing[w] = np.zeros([sub, len(snr_vals)])

    # now we are gonna compute the labelling
    idx = 0
    for s in speech_samps:
        for i, snr in enumerate(snr_vals):
            word = s.meta.as_dict()['word']
            # destination of the processed signal
            dest_pro = os.path.join(
                dest_dir, "processed_signal%d%s_snr_db_%d" % (idx, word, snr))
            # destination of the original siganl
            dest_ori = os.path.join(
                dest_dir, "original_signal%d%s_snr_db_%d" % (idx, word, snr))
            # noisy processed signal
            noisy_pro = pra.normalize(beamformed_signal[s][i],
                                      bits=16).astype(np.int16)
            wavfile.write(dest_pro, 16000, noisy_pro)
            # noisy original signal
            noisy_ori = pra.normalize(noisy_signal[s][i],
                                      bits=16).astype(np.int16)
            wavfile.write(dest_ori, 16000, noisy_ori)
            # update the score maps
            print("score for processed signal: ")
            score_map_processing[word][idx][i] = label_wav(
                dest_pro, labels_file, graph_file, word)
            print()
            print("score for original signal: ")
            score_map_original[word][idx][i] = label_wav(
                dest_ori, labels_file, graph_file, word)
            print()
            idx += 1
delay = 0.02

# define the FFT length
N = 1024

# create a microphone array
if shape is 'Circular':
    R = pra.circular2DArray(mic1, M, phi, d*M/(2*np.pi)) 
else:
    R = pra.linear2DArray(mic1, M, phi, d) 
mics = pra.Beamformer(R, Fs, N=N, Lg=Lg)

# The first signal (of interest) is singing
rate1, signal1 = wavfile.read('samples/singing_'+str(Fs)+'.wav')
signal1 = np.array(signal1, dtype=float)
signal1 = pra.normalize(signal1)
signal1 = pra.highpass(signal1, Fs)
delay1 = 0.

# the second signal (interferer) is some german speech
rate2, signal2 = wavfile.read('samples/german_speech_'+str(Fs)+'.wav')
signal2 = np.array(signal2, dtype=float)
signal2 = pra.normalize(signal2)
signal2 = pra.highpass(signal2, Fs)
delay2 = 1.

# create the room with sources and mics
room1 = pra.Room.shoeBox2D(
    [0,0],
    room_dim,
    Fs,
Exemple #22
0
def make_noisy(args, thread_id, num_make_utts):
    
    spe_utt_ids, noise_utt_ids, diffuse_utt_ids, text_dict, utt2spk_dict, utt2data_dict = load_data(args)

    audio_parser      = AudioParser()

    spe_utt_size     = len(spe_utt_ids) if spe_utt_ids is not None else 0
    noise_utt_size   = len(noise_utt_ids) if noise_utt_ids is not None else 0
    diffuse_utt_size = len(diffuse_utt_ids) if diffuse_utt_ids is not None else 0
    
    noisy_scp_list   = []
    noisy_utt2spk    = []
    noisy_text_dict  = []
    mix2info         = []
    num_utts         = 0

    all_angle           = 360.0
    Targ_Ang_Num        = args.num_targ_ang
    Targ_Ang_Resolution = all_angle / Targ_Ang_Num if Targ_Ang_Num > 0 else 0.0

    save_mix    = args.save_mix
    save_reverb = args.save_reverb
    save_clean  = args.save_clean
    while True:
        ## Random a room
        room_x   = random.uniform(args.min_room_length, args.max_room_length)
        room_y   = random.uniform(args.min_room_weidth, args.max_room_weidth)
        room_z   = random.uniform(args.min_room_height, args.max_room_height)
        room_dim = [room_x, room_y, room_z]

        ## Create the room
        T60                   = random.uniform(args.min_T60, args.max_T60)
        absorption, max_order = pra.inverse_sabine(T60, room_dim)
        if save_mix:
            room_mix   = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(absorption), max_order=max_order, sigma2_awgn = None)
        else:
            room_mix   = None
        if save_reverb:
            room_ref   = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(absorption), max_order=max_order, sigma2_awgn = None)
        else:
            room_mix   = None
        if save_clean:
            room_dir   = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(0.99999), max_order=max_order, sigma2_awgn = None)
        else:
            room_dir = None
        
        ## Random the position of microphone array
        mic_x  = random.uniform(args.min_mic_x, room_x - args.min_mic_x)
        mic_y  = random.uniform(args.min_mic_y, room_y - args.min_mic_y)
        mic_z  = random.uniform(args.min_mic_z, max(min(room_z - args.min_mic_z, 2.0), args.min_mic_z + 0.5))

        ## Compute The position of microphones
        mic_xyz = []
        for m in range(args.num_mic):
            mic_pos   = args.mic_pos[m]
            x         = mic_x + mic_pos[0]
            y         = mic_y + mic_pos[1]
            z         = mic_z
            mic_xyz.append([x, y, z])
        mic_xyz = np.array(mic_xyz) # ( 6, 3 )
        mic_xyz = mic_xyz.T			# ( 3, 6 )

        ## Add micphone array
        mic_array = pra.MicrophoneArray(mic_xyz, args.sample_rate)
        if room_mix is not None:
            room_mix  = room_mix.add_microphone_array(mic_array)
        if room_ref is not None:
            room_ref  = room_ref.add_microphone_array(mic_array)
        if room_dir is not None:
            room_dir  = room_dir.add_microphone_array(mic_array)

        ##print("room = [%.2f %.2f %.2f], micro = [%.2f %.2f %.2f]" % (room_x, room_y, room_z, mic_x, mic_y, mic_z))
        
        ## Add target sources to room_mix and room_ref
        target_source = None
        while True:
            if args.num_targ_ang <= 0.0:
                targ_ang = random.randint( 0, int(all_angle) )
            else:
                targ_ang = int(random.randint(0, Targ_Ang_Num - 1) * Targ_Ang_Resolution)

            targ_theta  = np.pi * targ_ang / 180.0
            targ_dist   = random.uniform(args.min_targ_distance, args.max_targ_distance)
            
            targ_x      = mic_x + np.cos(targ_theta) * targ_dist
            targ_y      = mic_y + np.sin(targ_theta) * targ_dist
            targ_z      = mic_z

            target_source = [targ_x, targ_y, targ_z]

            if (targ_x < (room_x - 0.5) and targ_x > 0.5) and (targ_y < (room_y - 0.5) and targ_y > 0.5):
                break
            
        if target_source is None and not room_mix.is_inside(target_source):
            continue
        
        ##print("room = [%.2f %.2f %.2f], target_source = [%.2f %.2f %.2f]" % (room_x, room_y, room_z, target_source[0], target_source[1], target_source[2]))
        ##print("targ_ang = %d, targ_dist %.2f" % (targ_ang, targ_dist))
        targ_tdoa = targ_ang
        if args.is_linear_mic and targ_tdoa > 180:
            targ_tdoa = 360.0 - targ_tdoa
        
        ## Add interference sources to room_mix
        num_interf    = min(random.randint(1, args.max_num_interf), 1)
        interf_angs   = []
        interf_dists  = []
        interf_source = []
        
        while True:
            interf_ang  = random.randint(0, int(all_angle))
            interf_tdoa = interf_ang
            if args.is_linear_mic and interf_tdoa > 180:
                interf_tdoa = 360.0 - interf_tdoa
            if np.abs(targ_tdoa - interf_tdoa) < args.minAD:
                continue
            interf_theta = np.pi * interf_ang / 180.0
            interf_dist  = random.uniform(args.min_interf_distance, args.max_interf_distance)

            interf_x      = mic_x + np.cos(interf_theta) * interf_dist
            interf_y      = mic_y + np.sin(interf_theta) * interf_dist
            interf_z      = mic_z

            ainterf_source = [interf_x, interf_y, interf_z]
            if (interf_x < (room_x - 0.5) and interf_x > 0.5) and (interf_y < (room_y - 0.5) and interf_y > 0.5):
                interf_angs.append(interf_ang)
                interf_dists.append(interf_dist)
                interf_source.append(ainterf_source)
            
            if len(interf_source) >= num_interf:
                break
                
        ##print("interf_ang = %d, interf_dist %.2f, num_interf = %d" % (interf_ang, interf_dist, len(interf_source)))

        for sim in range(args.nutt_per_room):
            if room_mix is not None:
                room_mix.sources = []
            if room_ref is not None:
                room_ref.sources = []
            if room_dir is not None:
                room_dir.sources = []
            
            ## Add Speech to microphone array
            while True:
                spe_idx = random.randint(0, spe_utt_size - 1)
                spe_key, spe_path = spe_utt_ids[spe_idx]

                spe_wav = audio_parser.WaveData(spe_path, sample_rate = args.sample_rate)
                if spe_wav is None or spe_wav.shape[0] < args.sample_rate:
                    continue
                spe_wav = np.squeeze(spe_wav)
                if np.mean(np.abs(spe_wav)) > 0:
                    break
            
            spe_length 	   = spe_wav.shape[0]
            spe_wav        = pra.normalize(spe_wav)
            spe_wav        = pra.highpass(spe_wav, args.sample_rate, 50)
            
            if room_mix is not None and room_mix.is_inside(target_source):
                room_mix = room_mix.add_source(target_source, signal = spe_wav, delay = 0)
            else:
                print("target_source not in room_mix")
                continue
            if room_ref is not None and room_ref.is_inside(target_source):
                room_ref = room_ref.add_source(target_source, signal = spe_wav, delay = 0)
            else:
                print("target_source not in room_ref")
            if room_dir is not None and room_dir.is_inside(target_source):
                room_dir = room_dir.add_source(target_source, signal = spe_wav, delay = 0)
            else:
                print("target_source not in room_dir")
                        
            if room_mix is not None and len(room_mix.sources) < 1:
                print("target_source not in room_mix")
                break
            if room_ref is not None and len(room_ref.sources) < 1:
                print("target_source not in room_ref")
                break
            if room_dir is not None and len(room_dir.sources) < 1:
                print("target_source not in room_dir")
                break
            
            ## Add Interference to microphone array
            for it in range(0, num_interf):
                while True:
                    inf_idx = random.randint(0, noise_utt_size - 1)
                    inf_path = noise_utt_ids[inf_idx]

                    inf_wav = audio_parser.WaveData(inf_path, sample_rate = args.sample_rate)
                    if inf_wav is None or inf_wav.shape[0] < args.sample_rate:
                        continue
                    inf_wav = np.squeeze(inf_wav)
                    if np.mean(np.abs(inf_wav)) > 0:
                        break
                
                inf_length = inf_wav.shape[0]
                inf_wav = pra.normalize(inf_wav)
                inf_wav = pra.highpass(inf_wav, args.sample_rate, 50)

                while(inf_length < spe_length):
                    inf_wav    = np.concatenate((inf_wav, inf_wav), axis = 0)
                    inf_length = inf_wav.shape[0]
                inf_wav = inf_wav[:spe_length]
                
                if room_mix is not None and room_mix.is_inside(interf_source[it]):
                    room_mix = room_mix.add_source(interf_source[it], signal = inf_wav, delay = 0)
                else:
                    print("interf_source not in room_mix")
                    continue

            if room_mix is not None and len(room_mix.sources) < 1:
                break

            ## Make the far-field mixture audio
            iSIR  = random.uniform(args.lowSIR, args.upSIR)
            room_mix.simulate(callback_mix = callback_mix, callback_mix_kwargs = {'snr': 30, 'sir': iSIR, 'n_src': num_interf + 1, 'n_tgt': 1, 'ref_mic': 0})
            
            mix_wav 				= room_mix.mic_array.signals.T	# (nchannel, nsample)
            mix_length, num_channel = mix_wav.shape
            
            ## Read diffuse noise
            if diffuse_utt_ids is not None:
                while True:
                    diff_idx = random.randint(0, diffuse_utt_size - 1)
                    diff_path = diffuse_utt_ids[diff_idx]

                    diff_wav = audio_parser.WaveData(diff_path, sample_rate = args.sample_rate, id_channel = list(range(0, num_channel)))
                    if diff_wav is None or diff_wav.shape[0] < args.sample_rate:
                        continue
                    if np.mean(np.abs(diff_wav)) > 0:
                        break
                
                dif_length, num_channel = diff_wav.shape
                '''
                for i in range(int(num_channel / 2)):
                    ch_wav = diff_wav[:, i]
                    diff_wav[:, i] = diff_wav[:, num_channel - i -1]
                    diff_wav[:, num_channel - i -1] = ch_wav
                '''
                
                ## Add diffuse noise into mix
                while( dif_length < mix_length ):
                    diff_wav    = np.concatenate((diff_wav, diff_wav), axis = 0)
                    dif_length = diff_wav.shape[0]
                diff_wav = diff_wav[0:mix_length, :]
                
                iSNR    = random.uniform(args.lowSNR, args.upSNR)
                mix_wav = audio_parser.MixWave(mix_wav, diff_wav, snr = iSNR)

            ## Adapt gain of mixture audio by given gain
            gain     = random.uniform(args.lowGain, args.upGain)
            scale	 = gain / np.max(np.abs(mix_wav))
            mix_wav  = mix_wav * scale
            mix_wav  = mix_wav * 32767.0
            mix_wav  = mix_wav.astype(np.int16)

            if room_dir is not None:
                ## Simulate directional signals
                room_dir.simulate()
                dir_wav = room_dir.mic_array.signals[0,:].T # (spe_length)
                dir_wav = dir_wav * scale
                dir_wav = dir_wav * 32767.0
                dir_wav = dir_wav.astype(np.int16)
            else:
                dir_wav = None

            if room_ref is not None:
                ## Simulate the clean far-field signal to make ref signal for compute metrics
                room_ref.simulate()
                ref_wav = room_ref.mic_array.signals 		 # (num_channel, spe_length)
                ref_wav = ref_wav * scale			  		 # (num_channel, spe_length)
            else:
                ref_wav = None
            
            if ref_wav is not None:
                if args.targ_bf is not None:
                    num_block = 1
                    ref_wav   = ref_wav[np.newaxis, :, :]    	 			 # [ num_block, num_channel, spe_length ]
                    ref_wav   = torch.FloatTensor(ref_wav)   	 		     # [ num_block, num_channel, spe_length ]
                    ref_wav   = ref_wav.view(num_block * num_channel, 1, -1) # [ num_block * num_channel, 1, spe_length ]

                    input_audio  = ref_wav.to(args.device)     		 # (num_block * num_channel, 1, spe_length)

                    mFFT  = args.convstft(input_audio)                # (num_block * num_channel, num_bin * 2, num_frame)

                    num_frame = mFFT.size(2)
                    mFFT   = mFFT.view(num_block, num_channel, num_bin * 2, -1) #( num_block, num_channel, num_bin * 2, num_frame)
                    mFFT_r = mFFT[:, :, :num_bin, :] 							#( num_block, num_channel, num_bin, num_frame)
                    mFFT_i = mFFT[:, :, num_bin:, :] 							#( num_block, num_channel, num_bin, num_frame)

                    mFFT_r = mFFT_r.permute([0, 3, 2, 1]).contiguous() 		    #( num_block, num_frame, num_bin, num_channel)
                    mFFT_i = mFFT_i.permute([0, 3, 2, 1]).contiguous()          #( num_block, num_frame, num_bin, num_channel)

                    mFFT_r = mFFT_r.view(num_block * num_frame, num_bin, num_channel) # ( num_block * num_frame, num_bin, num_channel)
                    mFFT_i = mFFT_i.view(num_block * num_frame, num_bin, num_channel) # ( num_block * num_frame, num_bin, num_channel)

                    mFFT = torch.cat([torch.unsqueeze(mFFT_r, 1), torch.unsqueeze(mFFT_i, 1)], dim = 1) # ( num_block * num_frame, 2, num_bin, num_channel )

                    # Compute the BF bf_direction_resolution
                    targ_tdoa = targ_ang
                    if num_channel == 2 or args.is_linear_mic:
                        if targ_tdoa > 180:
                            targ_tdoa = 360.0 - targ_tdoa
                    bf_beam = targ_tdoa / args.bf_direction_resolution + 0.5
                    bf_beam = int(bf_beam) % args.num_beam
                    print("tdoa = %d, beam = %d" % (targ_ang, bf_beam))

                    rFFT = args.targ_bf(mFFT, bf_beam) 				            # (num_block * num_frame, 2, num_bin, 1)
                    rFFT = rFFT[:, :, :, 0].view([num_block, -1, 2, num_bin])   # (num_block, num_frame, 2, num_bin)

                    rFFT    = rFFT.permute([0, 2, 3, 1]).contiguous()    # ( num_block, 2, num_bin, num_frame )
                    est_fft = torch.cat([rFFT[:,0], rFFT[:,1]], 1) 	     # ( num_block, num_bin * 2, num_frame )
                    ref_wav = args.convistft(est_fft)                    # ( num_block, 1, num_sample)
                    ref_wav = torch.squeeze(ref_wav, 1)                  # ( num_block, num_sample)
                    ref_wav = ref_wav[0, :]								 # ( num_sample)
                    ref_wav = ref_wav.data.cpu().numpy() 				 # ( num_sample)
                else:
                    ref_wav = ref_wav[0, :]								 # ( num_sample)
                
                ref_wav = ref_wav * 32767.0
                ref_wav = ref_wav.astype(np.int16)
            else:
                ref_wav = None
            
            ## Align mix_wav, ref_wav and dir_wav
            nsample = min(mix_wav.shape[0], ref_wav.shape[0], dir_wav.shape[0])
            mix_wav = mix_wav[:nsample]
            if ref_wav is not None:
                ref_wav = ref_wav[:nsample]
            if dir_wav is not None:
                dir_wav = dir_wav[:nsample]

            num_utts += 1

            _, spe_name, _ = file_parse.getFileInfo(spe_path)

            out_path = os.path.join(args.out_path, 'wav')
            if not os.path.exists(out_path):
                os.makedirs(out_path)
            
            if utt2data_dict is not None:
                data_key, data_id = utt2data_dict[spe_idx]
                out_path = os.path.join(out_path, data_id)
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
            else:
                data_id = 'data01'

            if utt2spk_dict is not None:
                spk_key, spk_id = utt2spk_dict[spe_idx]
                out_path = os.path.join(out_path, spk_id)
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
            else:
                spk_id = 'spk01'
                out_path = os.path.join(out_path, 'wav')
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
            
            spe_key = spe_key.replace('_', '').replace('-', '').replace('.', '')
            spk_id  = spk_id.replace('_', '').replace('-', '').replace('.', '')
            #utt_id = spk_id + "_" + spe_key + "%02d%07d" % (thread_id, num_utts)
            utt_id = spk_id + "_" + "%02d%07d" % (thread_id, num_utts)
            
            if mix_wav is not None:
                ## Write the mixture audio
                filename = "%s_id%02d%07d_Doa%d_SIR%.1f_SNR%.1f" % (spe_key, thread_id, num_utts, targ_ang, iSIR, iSNR)
                mix_path = os.path.join(out_path, '%s.wav' % (filename) )
                audio_parser.WriteWave(mix_path, mix_wav, args.sample_rate)
            else:
                mix_path = None

            if dir_wav is not None:
                filename = "%s_id%02d%07d_Doa%d_DS" % (spe_key, thread_id, num_utts, targ_ang)
                ds_path = os.path.join(out_path, '%s.wav' % (filename) )
                audio_parser.WriteWave(ds_path, dir_wav, args.sample_rate)
            else:
                ds_path = None
            
            if ref_wav is not None:
                filename = "%s_id%02d%07d_Doa%d_Ref" % (spe_key, thread_id, num_utts, targ_ang)
                ref_path = os.path.join(out_path, '%s.wav' % (filename) )
                audio_parser.WriteWave(ref_path, ref_wav, args.sample_rate)
            else:
                ref_path = None

            if text_dict is not None:
                text_key, text_value = text_dict[spe_idx]
            else:
                text_value = ' '
            
            noisy_scp_list.append((utt_id, mix_path, ds_path, ref_path, targ_ang, targ_dist, iSIR, iSNR, scale))
            noisy_utt2spk.append(spk_id)
            noisy_text_dict.append(text_value)

            info = (utt_id, spe_key, mix_path, ds_path, ref_path, targ_ang, targ_dist, interf_angs, interf_dists, iSIR, iSNR, scale)

            mix2info.append(info)
            
            print("%d / %d: %s" % (num_utts, num_make_utts, mix_path))

            if num_utts >= num_make_utts:
                return noisy_scp_list, noisy_utt2spk, noisy_text_dict, mix2info
Exemple #23
0
recording = room.mic_array.signals.T

##########################
# Prepare the beamformer #

output_signal = np.zeros(recording.shape[0], dtype=recording.dtype)

# look direction
look_dir = np.array(source_locations[0]) - np.mean(array, axis=1)
look_dir /= np.linalg.norm(look_dir)

# the matched response beamformer
mrbf = MatchResponse(array, look_dir, 40, 32, nfft, fs, c)

# processing loop
n = 0
while n + shift < recording.shape[0]:

    newframe = recording[n:n + shift, :]
    X = stft_input.analysis(newframe)

    out_frame = mrbf.process(X)

    # synthesize the output signal
    output_signal[n:n + shift] = stft_output.synthesis(out_frame)

    n += shift

wavfile.write('output_mic1.wav', fs, pra.normalize(recording[:, 0]) * 0.85)
wavfile.write('output_mf.wav', fs, pra.normalize(output_signal) * 0.85)
 def play(ch):
     sd.play(pra.normalize(y[ch]) * 0.75, samplerate=room.fs, blocking=True)
Exemple #25
0
             marker='o')
    plt.legend()

    plt.tight_layout(pad=0.5)

    if not args.gui:
        plt.show()
    else:
        plt.show(block=False)

    if args.save:
        from scipy.io import wavfile

        wavfile.write(
            'bss_iva_mix.wav', room.fs,
            pra.normalize(mics_signals[0, :], bits=16).astype(np.int16))
        for i, sig in enumerate(y):
            wavfile.write('bss_iva_source{}.wav'.format(i + 1), room.fs,
                          pra.normalize(sig, bits=16).astype(np.int16))

    if args.gui:

        # Make a simple GUI to listen to the separated samples
        from tkinter import Tk, Button, Label
        import sounddevice as sd

        # Now come the GUI part
        class PlaySoundGUI(object):
            def __init__(self, master, fs, mix, sources):
                self.master = master
                self.fs = fs
delay = 0.050               # Beamformer delay in seconds

# define the FFT length
N = 1024

# create a microphone array
if shape is 'Circular':
    R = pra.circular2DArray(mic1, M, phi, d*M/(2*np.pi)) 
else:
    R = pra.linear2DArray(mic1, M, phi, d) 
mics = pra.Beamformer(R, Fs, N=N, Lg=Lg)

# The first signal (of interest) is singing
rate1, signal1 = wavfile.read('samples/singing_'+str(Fs)+'.wav')
signal1 = np.array(signal1, dtype=float)
signal1 = pra.normalize(signal1)
signal1 = pra.highpass(signal1, Fs)
delay1 = 0.

# the second signal (interferer) is some german speech
rate2, signal2 = wavfile.read('samples/german_speech_'+str(Fs)+'.wav')
signal2 = np.array(signal2, dtype=float)
signal2 = pra.normalize(signal2)
signal2 = pra.highpass(signal2, Fs)
delay2 = 1.

# create the room with sources and mics
room1 = pra.Room.shoeBox2D(
    [0,0],
    room_dim,
    Fs,
# define the FFT length
N = 1024

# create a microphone array
if shape is 'Circular':
    R = pra.circular2DArray(mic1, M, phi, d * M / (2 * np.pi))
elif shape is 'Poisson':
    R = pra.poisson2DArray(mic1, M, d)
else:
    R = pra.linear2DArray(mic1, M, phi, d)
mics = pra.Beamformer(R, Fs, N=N, Lg=Lg)

# The first signal (of interest) is singing
rate1, signal1 = wavfile.read('samples/singing_' + str(Fs) + '.wav')
signal1 = np.array(signal1, dtype=float)
signal1 = pra.normalize(signal1)
signal1 = pra.highpass(signal1, Fs)
delay1 = 0.

# the second signal (interferer) is some german speech
rate2, signal2 = wavfile.read('samples/german_speech_' + str(Fs) + '.wav')
signal2 = np.array(signal2, dtype=float)
signal2 = pra.normalize(signal2)
signal2 = pra.highpass(signal2, Fs)
delay2 = 1.

# create the room with sources and mics
room1 = pra.Room.shoeBox2D([0, 0],
                           room_dim,
                           Fs,
                           t0=t0,
Exemple #28
0
# Define the FFT length
N = 1024

# Create a microphone array
if shape is "Circular":
    R = pra.circular_2D_array(mic1, M, phi, d * M / (2 * np.pi))
else:
    R = pra.linear_2D_array(mic1, M, phi, d)

# path to samples
path = os.path.dirname(__file__)

# The first signal (of interest) is singing
rate1, signal1 = wavfile.read(path + "/input_samples/singing_" + str(Fs) + ".wav")
signal1 = np.array(signal1, dtype=float)
signal1 = pra.normalize(signal1)
signal1 = pra.highpass(signal1, Fs)
delay1 = 0.0

# The second signal (interferer) is some german speech
rate2, signal2 = wavfile.read(path + "/input_samples/german_speech_" + str(Fs) + ".wav")
signal2 = np.array(signal2, dtype=float)
signal2 = pra.normalize(signal2)
signal2 = pra.highpass(signal2, Fs)
delay2 = 1.0

# Create the room
room_dim = [4, 6]
room1 = pra.ShoeBox(
    room_dim,
    absorption=absorption,
def createroom(amBird, saBird, noises, mic_p, mic_d, sour_p, sour_d,
               callback_mix, roomdim, absorption, max_order, n_mics, angle):
    np.random.seed(10)
    # STFT parameters
    framesize = 4096
    win_a = pra.hann(framesize)
    win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)
    # algorithm parameters
    # param ogive
    ogive_mu = 0.1
    ogive_update = "switching"
    ogive_iter = 2000

    ########separation params##############
    algo = algo_choices[0]
    no_cb = True
    save = True
    n_iter = 60
    dist = "gauss"  # guass or laplace
    ########paramas set##################
    fs = 44100
    snr = 60
    sinr = 10
    # absorption, max_order = 0.45, 12  # RT60 == 0.2
    # absorption,max_order=0.9,17
    n_sources = 2 + 3
    n_mics = n_mics
    n_sources_target = 2
    assert n_sources_target <= n_mics, "More sources than microphones is not supported"

    # set the source powers, the first one is half
    source_std = np.ones(n_sources_target)
    # position
    #room size
    room_dim = roomdim
    #micro position
    rot = angle
    offset = np.pi - rot / 2
    mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics,
                                  rot=offset)  # micro2
    # mic_locs = np.transpose([[13, 9.99, 3.5],[13, 10, 3.5],[13, 10.01, 3.5]])###micro3

    # targent position
    target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])
    # inferences position
    interferer_locs = random_layout([16, 2, 6],
                                    n_sources - n_sources_target,
                                    offset=[5, 18, 3],
                                    seed=1)
    source_locs = np.concatenate((target_locs, interferer_locs), axis=1)

    # audios loaded
    wav_files = [amBird, saBird, noises[0], noises[1], noises[2]]
    signals = wav_read_center(wav_files, seed=123)

    # create room
    room = pra.ShoeBox(room_dim,
                       fs=44100,
                       absorption=absorption,
                       max_order=max_order,
                       air_absorption=True,
                       humidity=50)

    # add source
    for sig, loc in zip(signals, source_locs.T):
        room.add_source(loc, signal=sig)

    # add micro
    room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs))

    # power set
    premix = room.simulate(return_premix=True)
    n_samples = premix.shape[2]
    # Normalize the signals so that they all have unit variance at the reference microphone
    ref_mic = 0
    p_mic_ref = np.std(premix[:, ref_mic, :], axis=1)
    premix /= p_mic_ref[:, None, None]
    sources_var = np.ones(n_sources_target)
    # scale to pre-defined variance
    premix[:n_sources_target, :, :] *= np.sqrt(sources_var[:, None, None])

    # compute noise variance
    sigma_n = np.sqrt(10**(-snr / 10) * np.sum(sources_var))

    # now compute the power of interference signal needed to achieve desired SINR
    sigma_i = np.sqrt(
        np.maximum(0, 10**(-sinr / 10) * np.sum(sources_var) - sigma_n**2) /
        (n_sources - n_sources_target))
    premix[n_sources_target:, :, :] *= sigma_i
    background = (np.sum(premix[n_sources_target:, :, :], axis=0))

    # Mix down the recorded signals
    mix = np.sum(premix, axis=0)
    mics_signals = room.mic_array.signals

    print("Simulation done.")

    # rt60 = room.measure_rt60()
    # print(rt60)

    # Monitor Convergence
    ref = np.zeros((n_sources_target + 1, premix.shape[2], premix.shape[1]),
                   dtype=premix.dtype)
    ref[:n_sources_target, :, :] = premix[:n_sources_target, :, :].swapaxes(
        1, 2)
    ref[n_sources_target, :, :] = background.T
    convergence_callback = None

    # START BSS

    # shape: (n_frames, n_freq, n_mics)
    X_all = pra.transform.analysis(mics_signals.T,
                                   framesize,
                                   framesize // 2,
                                   win=win_a).astype(np.complex128)
    X_mics = X_all[:, :, :n_mics]

    # Run BSS
    if algo == "auxiva":
        # Run AuxIVA
        Y = overiva(
            X_mics,
            n_iter=n_iter,
            proj_back=True,
            model=dist,
            callback=convergence_callback,
        )
    elif algo == "auxiva_pca":
        # Run AuxIVA
        Y = auxiva_pca(
            X_mics,
            n_src=n_sources_target,
            n_iter=n_iter,
            proj_back=True,
            model=dist,
            callback=convergence_callback,
        )
    elif algo == "overiva":
        # Run AuxIVA
        Y = overiva(
            X_mics,
            n_src=n_sources_target,
            n_iter=n_iter,
            proj_back=True,
            model=dist,
            init_eig=(init == init_choices[1]),
            callback=convergence_callback,
        )
    elif algo == "ilrma":
        # Run AuxIVA
        Y = pra.bss.ilrma(
            X_mics,
            n_iter=n_iter,
            n_components=2,
            proj_back=True,
            callback=convergence_callback,
        )
    elif algo == "ogive":
        # Run OGIVE
        Y = ogive(
            X_mics,
            n_iter=ogive_iter,
            step_size=ogive_mu,
            update=ogive_update,
            proj_back=True,
            model=dist,
            init_eig=(init == init_choices[1]),
            callback=convergence_callback,
        )
    elif algo == "ogive_matlab":
        # Run OGIVE
        Y = ogive_matlab_wrapper(
            X_mics,
            n_iter=ogive_iter,
            step_size=ogive_mu,
            update=ogive_update,
            proj_back=True,
            init_eig=(init == init_choices[1]),
            callback=convergence_callback,
        )
    else:
        raise ValueError("No such algorithm {}".format(algo))

    # Run iSTFT
    if Y.shape[2] == 1:
        y = pra.transform.synthesis(Y[:, :, 0],
                                    framesize,
                                    framesize // 2,
                                    win=win_s)[:, None]
        y = y.astype(np.float64)
    else:
        y = pra.transform.synthesis(Y, framesize, framesize // 2,
                                    win=win_s).astype(np.float64)

    # If some of the output are uniformly zero, just add a bit of noise to compare
    for k in range(y.shape[1]):
        if np.sum(np.abs(y[:, k])) < 1e-10:
            y[:, k] = np.random.randn(y.shape[0]) * 1e-10

    # For conventional methods of BSS, reorder the signals by decreasing power
    if algo != "blinkiva":
        new_ord = np.argsort(np.std(y, axis=0))[::-1]
        y = y[:, new_ord]

    # Compare SIR
    m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
    sdr, sir, sar, perm = bss_eval_sources(
        ref[:n_sources_target, :m, 0],
        y[framesize // 2:m + framesize // 2, :n_sources_target].T,
    )

    # reorder the vector of reconstructed signals
    y_hat = y[:, perm]

    return pra.normalize(mics_signals,
                         bits=16).astype(np.int16).T, y_hat, sir, sdr
Lg = int(np.ceil(Lg_t*Fs))
Lgp = np.floor(0.4*Lg)
Lgm = Lg - Lgp
print 'Lg=',Lg

# create a microphone array
if shape is 'Circular':
    R = circular2DArray(mic1, M, phi, d*M/(2*np.pi)) 
else:
    R = pra.linear2DArray(mic1, M, phi, d) 
mics = pra.Beamformer(R, Fs, N, Lg=Lg, hop=hop, zpf=zp, zpb=zp)

# The first signal (of interest) is singing
rate1, signal1 = wavfile.read('samples/singing_'+str(Fs)+'.wav')
signal1 = np.array(signal1, dtype=float)
signal1 = pra.normalize(signal1)
signal1 = pra.highpass(signal1, Fs)
delay1 = 0.

# the second signal (interferer) is some german speech
rate2, signal2 = wavfile.read('samples/german_speech_'+str(Fs)+'.wav')
signal2 = np.array(signal2, dtype=float)
signal2 = pra.normalize(signal2)
signal2 = pra.highpass(signal2, Fs)
delay2 = 1.

# create the room with sources and mics
room1 = pra.Room.shoeBox2D(
    [0,0],
    room_dim,
    Fs,
            P_prev = np.roll(P_prev, -1, axis=1)
            n += hop
    # we reset the STFT object
    stft.reset()
    '''
    Write to WAV + labelling of our processed noisy signals
    '''
    # labelling our different single noise channel removed signals and comparing their classification with the one for the original noisy signals
    score_processing = np.zeros(len(snr_vals))
    score_original = np.zeros(len(snr_vals))

    for i, snr in enumerate(snr_vals):
        print("SNR : %f dB" % snr)
        dest = os.path.join(
            dest_dir, "single_noise_channel_signal_snr_db_%d.wav" % (snr))
        signal = pra.normalize(processed_audio_array[i],
                               bits=16).astype(np.int16)
        wavfile.write(dest, 16000, signal)
        score_processing[i] = label_wav(dest, labels_file, graph_file,
                                        speech.meta.as_dict()['word'])

        dest = os.path.join(dest_dir, "original_signal_snr_db_%d.wav" % (snr))
        signal = pra.normalize(noisy_single_mic[i], bits=16).astype(np.int16)
        wavfile.write(dest, 16000, signal)
        score_original[i] = label_wav(dest, labels_file, graph_file,
                                      speech.meta.as_dict()['word'])
        print()

    # plotting the result
    plt.plot(snr_vals,
             score_processing,
             label="single noise channel removal signal")
Exemple #32
0
    mics = pra.Beamformer(echo, Fs, N=fft_len, Lg=Lg)
    room1.add_microphone_array(mics)
    room1.add_source(source, delay=0, signal=xtone)
    room1.add_source(interferer, delay=0, signal=silence)
    room1.image_source_model(use_libroom=True)
    room1.compute_rir()
    room1.simulate()

    # Rake MVDR simulation
    BeamformerType = 'RakeMVDR'
    good_sources = room1.sources[0][:max_order_design + 1]
    bad_sources = room1.sources[1][:max_order_design + 1]
    mics.rake_mvdr_filters(good_sources, bad_sources,
                           sigma2_n * np.eye(mics.Lg * mics.M))
    output = mics.process()
    out = pra.normalize(pra.highpass(output, Fs))
    out = normalize(out)

    # Rake Perceptual simulation
    # BeamformerType = 'RakePerceptual'
    # good_sources = room1.sources[0][:max_order_design+1]
    # bad_sources = room1.sources[1][:max_order_design+1]
    # mics.rake_perceptual_filters(good_sources,
    #                     bad_sources,
    #                     sigma2_n*np.eye(mics.Lg*mics.M))
    # output          =   mics.process()
    # out             =   pra.normalize(pra.highpass(output, Fs))

    input_mic = pra.normalize(pra.highpass(mics.signals[mics.M // 2], Fs))
    input_mic = normalize(input_mic)