def build_audio3(is_clean, params, spk_index, audio_samples_length=-1):
    '''Construct an audio signal from source files of primary speaker
    Returns a list of all segments belonging to primary speaker'''

    fs_output = params['fs']
    silence_length = params['silence_length']

    if audio_samples_length == -1:
        audio_samples_length = int(params['audio_length'] * params['fs'])

    output_audio = np.zeros(0)
    remaining_length = audio_samples_length
    files_used = []
    clipped_files = []

    if is_clean:
        source_files = params['cleanfilenames']
        idx = spk_index

    input_audio, fs_input = audioread(source_files[idx])

    if fs_input != fs_output:
        input_audio = librosa.resample(input_audio, fs_input, fs_output)

    num_segs = int(input_audio.shape[0] /
                   (params['fs'] * params['audio_length']))
    # if current file is longer than remaining desired length, and this is
    # noise generation or this is training set, subsample it randomly

    remaining_length = input_audio.shape[0]
    start_idx = 0

    audios_all = []

    for seg in range(0, num_segs):
        # print(seg)

        audio_temp = input_audio[seg * audio_samples_length:(seg + 1) *
                                 audio_samples_length]

        if not is_clipped(audio_temp):
            audios_all.append(audio_temp)
            files_used.append(source_files[idx])
        else:
            clipped_files.append(source_files[idx])

    return audios_all, files_used, clipped_files
Esempio n. 2
0
def build_audio(is_clean, params, index, audio_samples_length=-1):
    '''Construct an audio signal from source files'''

    fs_output = params['fs']
    silence_length = params['silence_length']
    if audio_samples_length == -1:
        audio_samples_length = int(params['audio_length']*params['fs'])

    output_audio = np.zeros(0)
    remaining_length = audio_samples_length
    files_used = []
    clipped_files = []

    if is_clean:
        source_files = params['cleanfilenames']
        idx = index
    else:
        if 'noisefilenames' in params.keys():
            source_files = params['noisefilenames']
            idx = index
        # if noise files are organized into individual subdirectories, pick a directory randomly
        else:
            noisedirs = params['noisedirs']
            # pick a noise category randomly
            idx_n_dir = np.random.randint(0, np.size(noisedirs))
            source_files = glob.glob(os.path.join(noisedirs[idx_n_dir], 
                                                  params['audioformat']))
            shuffle(source_files)
            # pick a noise source file index randomly
            idx = np.random.randint(0, np.size(source_files))

    # initialize silence
    silence = np.zeros(int(fs_output*silence_length))

    # iterate through multiple clips until we have a long enough signal
    tries_left = MAXTRIES
    while remaining_length > 0 and tries_left > 0:

        # read next audio file and resample if necessary

        idx = (idx + 1) % np.size(source_files)
        input_audio, fs_input = audioread(source_files[idx])
        if input_audio is None:
            sys.stderr.write("WARNING: Cannot read file: %s\n" % source_files[idx])
            continue
        if fs_input != fs_output:
            input_audio = librosa.resample(input_audio, fs_input, fs_output)

        # if current file is longer than remaining desired length, and this is
        # noise generation or this is training set, subsample it randomly
        if len(input_audio) > remaining_length and (not is_clean or not params['is_test_set']):
            idx_seg = np.random.randint(0, len(input_audio)-remaining_length)
            input_audio = input_audio[idx_seg:idx_seg+remaining_length]

        # check for clipping, and if found move onto next file
        if is_clipped(input_audio):
            clipped_files.append(source_files[idx])
            tries_left -= 1
            continue

        # concatenate current input audio to output audio stream
        files_used.append(source_files[idx])
        output_audio = np.append(output_audio, input_audio)
        remaining_length -= len(input_audio)

        # add some silence if we have not reached desired audio length
        if remaining_length > 0:
            silence_len = min(remaining_length, len(silence))
            output_audio = np.append(output_audio, silence[:silence_len])
            remaining_length -= silence_len

    if tries_left == 0 and not is_clean and 'noisedirs' in params.keys():
        print("There are not enough non-clipped files in the " + noisedirs[idx_n_dir] + \
              " directory to complete the audio build")
        return [], [], clipped_files, idx

    return output_audio, files_used, clipped_files, idx
Esempio n. 3
0
def main_gen(params):
    '''Calls gen_audio() to generate the audio signals, verifies that they meet
       the requirements, and writes the files to storage'''

    clean_source_files = []
    clean_clipped_files = []
    clean_low_activity_files = []
    noise_source_files = []
    noise_clipped_files = []
    noise_low_activity_files = []

    clean_index = 0
    noise_index = 0
    file_num = params['fileindex_start']

    while file_num <= params['fileindex_end']:
        # generate clean speech
        clean, clean_sf, clean_cf, clean_laf, clean_index = \
            gen_audio(True, params, clean_index)

        # add reverb with selected RIR
        rir_index = random.randint(0,len(params['myrir'])-1)
        
        my_rir = os.path.normpath(os.path.join('datasets', 'impulse_responses', params['myrir'][rir_index]))
        (fs_rir,samples_rir) = wavfile.read(my_rir)

        my_channel = int(params['mychannel'][rir_index])
        
        if samples_rir.ndim==1:
            samples_rir_ch = np.array(samples_rir)
            
        elif my_channel > 1:
            samples_rir_ch = samples_rir[:, my_channel -1]
        else:
            samples_rir_ch = samples_rir[:, my_channel -1]
            #print(samples_rir.shape)
            #print(my_channel)

        clean = add_pyreverb(clean, samples_rir_ch)

        # generate noise
        noise, noise_sf, noise_cf, noise_laf, noise_index = \
            gen_audio(False, params, noise_index, len(clean))

        clean_clipped_files += clean_cf
        clean_low_activity_files += clean_laf
        noise_clipped_files += noise_cf
        noise_low_activity_files += noise_laf

        # get rir files and config

        # mix clean speech and noise
        # if specified, use specified SNR value
        if not params['randomize_snr']:
            snr = params['snr']
        # use a randomly sampled SNR value between the specified bounds
        else:
            snr = np.random.randint(params['snr_lower'], params['snr_upper'])

        clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
                                                                  clean=clean, 
                                                                  noise=noise, 
                                                                  snr=snr)
        # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer
        #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
        #                                                         clean=clean, 
        #                                                          noise=noise, 
        #                                                         snr=snr)
        # unexpected clipping
        if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr):
            print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                  "returning without writing audio to disk")
            continue

        clean_source_files += clean_sf
        noise_source_files += noise_sf

        # write resultant audio streams to files
        hyphen = '-'
        clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_sf]
        clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN]
        noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_sf]
        noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN]

        noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \
                        str(snr) + '_tl' + str(target_level) + '_fileid_' + str(file_num) + '.wav'
        cleanfilename = 'clean_fileid_'+str(file_num)+'.wav'
        noisefilename = 'noise_fileid_'+str(file_num)+'.wav'

        noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename)
        cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename)
        noisepath = os.path.join(params['noise_proc_dir'], noisefilename)

        audio_signals = [noisy_snr, clean_snr, noise_snr]
        file_paths = [noisypath, cleanpath, noisepath]

        file_num += 1
        for i in range(len(audio_signals)):
            try:
                audiowrite(file_paths[i], audio_signals[i], params['fs'])
            except Exception as e:
                print(str(e))


    return clean_source_files, clean_clipped_files, clean_low_activity_files, \
           noise_source_files, noise_clipped_files, noise_low_activity_files
def build_audio(is_clean, params, filenum, audio_samples_length=-1):
    '''Construct an audio signal from source files'''

    fs_output = params['fs']
    silence_length = params['silence_length']
    if audio_samples_length == -1:
        audio_samples_length = int(params['audio_length']*params['fs'])

    output_audio = np.zeros(0)
    remaining_length = audio_samples_length
    files_used = []
    clipped_files = []

    global clean_counter, noise_counter
    if is_clean:
        source_files = params['cleanfilenames']
        idx_counter = clean_counter
    else:    
        source_files = params['noisefilenames']        
        idx_counter = noise_counter

    # initialize silence
    silence = np.zeros(int(fs_output*silence_length))

    # iterate through multiple clips until we have a long enough signal
    tries_left = MAXTRIES
    while remaining_length > 0 and tries_left > 0:

        # read next audio file and resample if necessary
        with idx_counter.get_lock():
            idx_counter.value += 1
            idx = idx_counter.value % np.size(source_files)

        input_audio, fs_input = audioread(source_files[idx])
        if fs_input != fs_output:
            input_audio = librosa.resample(input_audio, fs_input, fs_output)

        # if current file is longer than remaining desired length, and this is
        # noise generation or this is training set, subsample it randomly
        if len(input_audio) > remaining_length and (not is_clean or not params['is_test_set']):
            idx_seg = np.random.randint(0, len(input_audio)-remaining_length)
            input_audio = input_audio[idx_seg:idx_seg+remaining_length]

        # check for clipping, and if found move onto next file
        if is_clipped(input_audio):
            clipped_files.append(source_files[idx])
            tries_left -= 1
            continue

        # concatenate current input audio to output audio stream
        files_used.append(source_files[idx])
        output_audio = np.append(output_audio, input_audio)
        remaining_length -= len(input_audio)

        # add some silence if we have not reached desired audio length
        if remaining_length > 0:
            silence_len = min(remaining_length, len(silence))
            output_audio = np.append(output_audio, silence[:silence_len])
            remaining_length -= silence_len

    if tries_left == 0:
        print("Audio generation failed for filenum " + str(filenum))
        return [], [], clipped_files

    return output_audio, files_used, clipped_files
def main_gen(params, filenum):
    '''Calls gen_audio() to generate the audio signals, verifies that they meet
       the requirements, and writes the files to storage'''

    print("Generating file #" + str(filenum))

    clean_clipped_files = []
    clean_low_activity_files = []
    noise_clipped_files = []
    noise_low_activity_files = []

    while True:
        # generate clean speech
        clean, clean_source_files, clean_cf, clean_laf = \
            gen_audio(True, params, filenum)
        # generate noise
        noise, noise_source_files, noise_cf, noise_laf = \
            gen_audio(False, params, filenum, len(clean))

        clean_clipped_files += clean_cf
        clean_low_activity_files += clean_laf
        noise_clipped_files += noise_cf
        noise_low_activity_files += noise_laf

        # mix clean speech and noise
        # if specified, use specified SNR value
        if not params['randomize_snr']:
            snr = params['snr']
        # use a randomly sampled SNR value between the specified bounds
        else:
            snr = np.random.randint(params['snr_lower'], params['snr_upper'])
            
        clean_snr, noise_snr, noisy_snr, target_level = snr_mixer(params=params, 
                                                                  clean=clean, 
                                                                  noise=noise, 
                                                                  snr=snr)
        # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer
        #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
        #                                                                    clean=clean, 
        #                                                                    noise=noise, 
        #                                                                    snr=snr)
        # unexpected clipping
        if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr):       
            continue
        else:
            break

    # write resultant audio streams to files
    hyphen = '-'
    clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_source_files]
    clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN]
    noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_source_files]
    noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN]

    noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \
                    str(snr) + '_fileid_' + str(filenum) + '.wav'
    cleanfilename = 'clean_fileid_'+str(filenum)+'.wav'
    noisefilename = 'noise_fileid_'+str(filenum)+'.wav'

    noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename)
    cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename)
    noisepath = os.path.join(params['noise_proc_dir'], noisefilename)

    audio_signals = [noisy_snr, clean_snr, noise_snr]
    file_paths = [noisypath, cleanpath, noisepath]
    
    for i in range(len(audio_signals)):
        try:
            audiowrite(file_paths[i], audio_signals[i], params['fs'])
        except Exception as e:
            print(str(e))
            pass

    return clean_source_files, clean_clipped_files, clean_low_activity_files, \
           noise_source_files, noise_clipped_files, noise_low_activity_files
def main_gen(params):
    '''Calls gen_audio() to generate the audio signals, verifies that they meet
       the requirements, and writes the files to storage'''

    clean_source_files = []
    clean_clipped_files = []
    clean_low_activity_files = []

    clean_source_files2 = []
    clean_clipped_files2 = []
    clean_low_activity_files2 = []

    noise_source_files = []
    noise_clipped_files = []
    noise_low_activity_files = []

    clean_index = 0
    clean_index2 = 0
    noise_index = 0

    file_num = params['fileindex_start']
    cleanfilenames = params['cleanfilenames']
    cleanfilenames2 = params['cleanfilenames2']

    # spk_index = 0 # start of speaker index
    num_spk = len(cleanfilenames)

    while file_num <= params['fileindex_end']:
        # generate clean speech
        #clean, clean_sf, clean_cf, clean_laf, clean_index = \
        #    gen_audio(True, params, clean_index)
        spk_index = random.randint(0, len(params['cleanfilenames']) - 1)

        chosen_clean = gen_audio3(True, params, spk_index)
        num_clips = int(len(chosen_clean))

        #(True, params, clean_index)
        # add reverb with selected RIR
        #rir_index = random.randint(0,len(params['myrir'])-1)

        num_to_select1 = 1
        rirfilenames = params['myrir']

        chosen_clean_reverb = []

        for clean in chosen_clean:
            myrir = random.sample(rirfilenames, num_to_select1)
            (fs_rir, samples_rir) = wavfile.read(myrir[0])

            if len(samples_rir.shape) > 1:
                channel = random.randint(0, len(samples_rir.shape) - 1)
                samples_rir_ch = samples_rir[:, channel]
            else:
                samples_rir_ch = samples_rir

            clean_reverb = add_pyreverb(clean, samples_rir_ch)
            chosen_clean_reverb.append(clean_reverb)

        # add secondary speech and/or noise for each chunck of primary speech
        for chose_primary in chosen_clean_reverb:
            index2 = random.randint(0, len(params['cleanfilenames2']) - 1)

            clean2, clean_sf, clean_cf, clean_laf, clean_index = \
                gen_audio2(True, params, index2, chose_primary.shape[0])

            noise_index = random.randint(0, len(params['noisefilenames']) - 1)
            # generate noise
            noise, noise_sf, noise_cf, noise_laf, noise_index = \
                gen_audio(False, params, noise_index, chose_primary.shape[0])

            # if specified, use specified SNR value
            if not params['randomize_snr']:
                snr = params['snr']
            # use a randomly sampled SNR value between the specified bounds
            else:
                snr = np.random.randint(params['snr_lower'],
                                        params['snr_upper'])
                snr2 = np.random.randint(params['snr_lower'],
                                         params['snr_upper'])
                snr3 = np.random.randint(params['snr_lower'],
                                         params['snr_upper'])

            # 1. Primary(clean) + Noise
            clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(
                params=params, clean=chose_primary, noise=noise, snr=snr)

            # 2. Primary + Secondary
            clean_snr2, noise_snr2, noisy_snr2, target_level2 = segmental_snr_mixer(
                params=params, clean=chose_primary, noise=clean2, snr=snr2)
            # 3. Primary + Seconday (clean2) + Noise
            clean_snr3, noise_snr3, noisy_snr3, target_level3 = segmental_snr_mixer(
                params=params, clean=noisy_snr2, noise=noise, snr=snr3)
            # unexpected clipping
            #if is_clipped(clean_snr) or is_clipped(noise_snr2) or is_clipped(noisy_snr2):
            if is_clipped(clean_snr) or is_clipped(noisy_snr):
                print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                    "returning without writing audio to disk")
                continue

            if is_clipped(clean_snr2) or is_clipped(noisy_snr2):
                print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                    "returning without writing audio to disk")
                continue

            if is_clipped(clean_snr3) or is_clipped(noisy_snr3):
                print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                    "returning without writing audio to disk")
                continue

            clean_source_files += clean_sf
            noise_source_files += noise_sf

            # write resultant audio streams to files
            hyphen = '-'
            clean_source_filenamesonly = [
                i[:-4].split(os.path.sep)[-1] for i in clean_sf
            ]
            clean_files_joined = hyphen.join(
                clean_source_filenamesonly)[:MAXFILELEN]
            noise_source_filenamesonly = [
                i[:-4].split(os.path.sep)[-1] for i in noise_sf
            ]
            noise_files_joined = hyphen.join(
                noise_source_filenamesonly)[:MAXFILELEN]

            noisyfilename = 'primary_noisy_fileid_' + str(file_num) + '_' + clean_files_joined + '_' + noise_files_joined + '_snr' + \
                            str(snr) + '_tl' + str(target_level) + '.wav'

            cleanfilename = 'clean_fileid_' + str(file_num) + '.wav'
            noisefilename = 'noise_fileid_' + str(file_num) + '.wav'

            noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename)
            cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename)
            noisepath = os.path.join(params['noise_proc_dir'], noisefilename)

            noisyfilename2 = 'ps_noisy_fileid_'+ str(file_num) + '_' +  clean_files_joined + '_' + noise_files_joined + '_snr' + \
                            str(snr) + '_tl' + str(target_level) + '.wav'
            cleanfilename2 = 'ps_clean_fileid_' + str(file_num) + '.wav'
            noisefilename2 = 'ps_noise_fileid_' + str(file_num) + '.wav'

            noisypath2 = os.path.join(params['noisyspeech_dir'],
                                      noisyfilename2)
            cleanpath2 = os.path.join(params['clean_proc_dir'], cleanfilename2)
            noisepath2 = os.path.join(params['noise_proc_dir'], noisefilename2)

            noisyfilename3 = 'psn_noisy_fileid_' + str(file_num) + '_' +clean_files_joined + '_' + noise_files_joined + '_snr' + \
                            str(snr) + '_tl' + str(target_level) + '.wav'
            cleanfilename3 = 'psn_clean_fileid_' + str(file_num) + '.wav'
            noisefilename3 = 'psn_noise_fileid_' + str(file_num) + '.wav'

            noisypath3 = os.path.join(params['noisyspeech_dir'],
                                      noisyfilename3)
            cleanpath3 = os.path.join(params['clean_proc_dir'], cleanfilename3)
            noisepath3 = os.path.join(params['noise_proc_dir'], noisefilename3)

            audio_signals = [noisy_snr, clean_snr, noise_snr]
            file_paths = [noisypath, cleanpath, noisepath]

            audio_signals2 = [noisy_snr2, clean_snr, noise_snr2]
            file_paths2 = [noisypath2, cleanpath2, noisepath2]

            audio_signals3 = [noisy_snr3, clean_snr, noise_snr3]
            file_paths3 = [noisypath3, cleanpath3, noisepath3]

            file_num += 1  #         file_num = file_num + 3*num_clips

            for i in range(len(audio_signals)):
                try:
                    audiowrite(file_paths[i], audio_signals[i], params['fs'])
                    audiowrite(file_paths2[i], audio_signals2[i], params['fs'])
                    audiowrite(file_paths3[i], audio_signals3[i], params['fs'])
                except Exception as e:
                    print(str(e))

            # for i in range(len(audio_signals2)):
            #     try:
            #     except Exception as e:
            #         print(str(e))

            # for i in range(len(audio_signals3)):
            #     try:
            #         audiowrite(file_paths3[i], audio_signals3[i], params['fs'])
            #     except Exception as e:
            #         print(str(e))

    return clean_source_files, clean_clipped_files, clean_low_activity_files, \
                noise_source_files, noise_clipped_files, noise_low_activity_files