def single_mix(combo_idx, split_list, datapath):
    assert len(combo_idx) == len(split_list)
    mix_rate = 1.0 / float(len(split_list))
    wav_list = []
    prefix = 'mix'
    mid_name = ''
    for part_idx in range(len(split_list)):
        idx, path = split_list[part_idx][combo_idx[part_idx]]
        wav, _ = librosa.load(path, sr=16000)
        wav_list.append(wav)
        mid_name += '-%05d' % idx

    mix_wav = np.zeros_like(wav_list[0])
    for wav in wav_list:
        mix_wav += wav * mix_rate  #mix two signals

    wav_name = prefix + mid_name + '.wav'
    wavfile.write('%s/mix_wav/%s' % (datapath, wav_name), 16000, mix_wav)

    F_mix = utils.fast_stft(mix_wav)
    name = prefix + mid_name + '.npy'
    store_path = '%s/mix/%s' % (datapath, name)

    np.save(store_path, F_mix)

    with open('%s/mix_log.txt' % datapath, 'a') as f:
        f.write(name)
        f.write('\n')
Beispiel #2
0
def single_mix(combo_idx, split_list, database_repo):
    assert len(combo_idx) == len(split_list)
    mix_rate = 1.0 / float(len(split_list))
    wav_list = []
    prefix = "mix"
    mid_name = ""

    for part_idx in range(len(split_list)):
        idx, path = split_list[part_idx][combo_idx[part_idx]]
        wav, _ = librosa.load(path, sr=16000)
        wav_list.append(wav)
        mid_name += '-%05d' % idx

    # mix wav file
    mix_wav = np.zeros_like(wav_list[0])
    for wav in wav_list:
        mix_wav += wav * mix_rate

    # save mix wav file
    wav_name = prefix + mid_name + '.wav'
    wavfile.write('%s/mix_wav/%s' % (database_repo, wav_name), 16000, mix_wav)

    # transfer mix wav to TF domain
    F_mix = utils.fast_stft(mix_wav)
    name = prefix + mid_name + ".npy"
    store_path = '%s/mix/%s' % (database_repo, name)

    # save mix as npy file
    np.save(store_path, F_mix)

    # save mix log
    with open('%s/mix_log.txt' % database_repo, 'a') as f:
        f.write(name)
        f.write("\n")
def preprocess_audio(video_name=VIDEO_NAME, sr=16000):
    path = "%s.wav" % video_name

    data, _ = librosa.load(path, sr=sr)
    data = utils.fast_stft(data)

    name = 'preprocessed-%s' % video_name
    np.save('%s.npy' % name, data)
Beispiel #4
0
def single_audio_to_npy(audio_path_list,
                        database_repo=DATABASE_REPO_PATH,
                        fix_sr=16000):
    for idx, path in audio_path_list:
        data, _ = librosa.load(path, sr=fix_sr)
        data = utils.fast_stft(data)
        name = 'single-%05d' % idx
        with open('%s/single_TF.txt' % database_repo, 'a') as f:
            f.write('%s.npy' % name)
            f.write('\n')
        np.save(('%s/single/%s.npy' % (database_repo, name)), data)
def single_audio_to_npy(audio_path_list, database_repo=DATABASE_REPO_PATH, fix_sr=16000):
    for idx, path in audio_path_list:
        print('\rsingle npy generating... %d' % ((idx / len(audio_path_list)) * 100), end='')
        data, _ = librosa.load(path, sr=fix_sr)
        data = utils.fast_stft(data, power=True)
        name = 'single-%06d' % idx
        with open('%s/single_TF.txt' % database_repo, 'a') as f:
            f.write('%s.npy' % name)
            f.write('\n')
        np.save(('%s/single/%s.npy' % (database_repo, name)), data)
    print()
Beispiel #6
0
def audio_to_numpy(audio_path_list, data_path=database_path, fix_sr=16000):
    for idx, path in audio_path_list:
        print('\r aduio numpy generating... %d' %
              ((idx / len(audio_path_list)) * 100),
              end='')
        data, _ = librosa.load(path, sr=fix_sr)
        data = utils.fast_stft(data)
        name = 'single-%05d' % idx
        with open('%s/single_TF.txt' % data_path, 'a') as f:
            f.write('%s.npy' % name)
            f.write('\n')

        np.save(('%s/single/%s.npy' % (data_path, name)), data)
    print()
# options
SHAPE_CHECK = 0
MODEL_CHECK1 = 0
MODEL_CHECK2 = 1

data1, sr1 = librosa.load('../../data/audio/audio_train/trim_audio_train0.wav',
                          sr=16000)
data2, sr2 = librosa.load('../../data/audio/audio_train/trim_audio_train1.wav',
                          sr=16000)
mix = data1 * 0.5 + data2 * 0.5

if SHAPE_CHECK:
    # check shape
    print(data1.shape)
    D1 = avp.fast_stft(data1)
    print(D1.shape)
    T1 = avp.fast_istft(D1)
    print(T1.shape)

# check model
if MODEL_CHECK1:
    F1 = avp.fast_stft(data1)
    F1 = np.expand_dims(F1, axis=0)
    F2 = avp.fast_stft(data2)
    F2 = np.expand_dims(F2, axis=0)
    FM = avp.fast_stft(mix)
    FM = np.expand_dims(FM, axis=0)

    cRM1 = np.abs(F1) / np.abs(FM)
    cRM1[~np.isfinite(cRM1)] = 0
Beispiel #8
0
def generate_mix_sample(audio_path_list,num_speaker,fix_sr=16000,verbose=0):
    '''
    generate mix sample from audios in the list

    :param audio_path_list: list contains path of the wav audio file
    :param num_speaker: specify the task for speech separation
    :param fix_sr: fix sample rate
    '''
    # initiate variables
    # shape of F_mix = (298,257,2)
    # shpae of crm = (298,257,2)
    data_list = []
    F_list = []  # STFT list for each sample
    cRM_list = []

    mix_name = "mix"
    crm_name = "crm"
    post_name = ""

    # import data
    for i in range(num_speaker):
        idx,path =audio_path_list[i]
        post_name += "-%05d"%idx
        data, _ = librosa.load(path,sr=fix_sr)
        data_list.append(data)

    # create mix audio according to mix rate
    mix_rate = 1.0 / float(num_speaker)
    mix = np.zeros(shape=data_list[0].shape)
    for data in data_list:
        mix += data*mix_rate
    # transfrom data via STFT and several preprocessing function
    for i in range(num_speaker):
        F = utils.fast_stft(data_list[i],power=False)
        F_list.append(F)
    F_mix = utils.fast_stft(mix,power=False)
    # create cRM for each speaker and fill into y_sample
    for i in range(num_speaker):
        cRM_list.append(utils.fast_cRM(F_list[i],F_mix))

    # return values
    if verbose == 1:
        print('shape of X: ',F_mix.shape)
        for i in range(len(cRM_list)):
            print('shape of cRM%s :'%i,cRM_list[i].shape)

    # save record in txt
    mix_name += post_name
    crm_name += post_name

    # write txt
    with open('audio_database/dataset.txt','a') as f:
        f.write(mix_name+".npy")
        for i in range(len(cRM_list)):
            line = " " + crm_name + ("-%05d"%audio_path_list[i][0]) + ".npy"
            f.write(line)
        f.write("\n")

    # save file as npy
    np.save(('audio_database/mix/%s.npy'%mix_name), F_mix)
    for i in range(len(cRM_list)):
        name = crm_name + ("-%05d"%audio_path_list[i][0])
        np.save(('audio_database/crm/%s.npy'%name), cRM_list[i])
Beispiel #9
0
def single_audio_to_npy(audio_path_list,fix_sr=16000):
    for idx,path in audio_path_list:
        data, _ = librosa.load(path, sr=fix_sr)
        data = utils.fast_stft(data)
        name = 'single-%05d'%idx
        np.save(('audio_database/single/%s.npy'%name),data)