def preprocess_for_training(train_A_dir, train_B_dir, cache_folder):
    num_mcep = 24
    sampling_rate = 16000
    frame_period = 5.0
    n_frames = 128

    print("Starting to prepocess data.......")
    start_time = time.time()

    wavs_A = preprocess.load_wavs(wav_dir=train_A_dir, sr=sampling_rate)
    wavs_B = preprocess.load_wavs(wav_dir=train_B_dir, sr=sampling_rate)

    f0s_A, timeaxes_A, sps_A, aps_A, coded_sps_A = preprocess.world_encode_data(
        wave=wavs_A, fs=sampling_rate, frame_period=frame_period, coded_dim=num_mcep)
    f0s_B, timeaxes_B, sps_B, aps_B, coded_sps_B = preprocess.world_encode_data(
        wave=wavs_B, fs=sampling_rate, frame_period=frame_period, coded_dim=num_mcep)

    log_f0s_mean_A, log_f0s_std_A = preprocess.logf0_statistics(f0s=f0s_A)
    log_f0s_mean_B, log_f0s_std_B = preprocess.logf0_statistics(f0s=f0s_B)

    print("Log Pitch A")
    print("Mean: {:.4f}, Std: {:.4f}".format(log_f0s_mean_A, log_f0s_std_A))
    print("Log Pitch B")
    print("Mean: {:.4f}, Std: {:.4f}".format(log_f0s_mean_B, log_f0s_std_B))

    coded_sps_A_transposed = preprocess.transpose_in_list(lst=coded_sps_A)
    coded_sps_B_transposed = preprocess.transpose_in_list(lst=coded_sps_B)

    coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std = preprocess.coded_sps_normalization_fit_transform(
        coded_sps=coded_sps_A_transposed)
    coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std = preprocess.coded_sps_normalization_fit_transform(
        coded_sps=coded_sps_B_transposed)

    if not os.path.exists(cache_folder):
        os.makedirs(cache_folder)

    np.savez(os.path.join(cache_folder, 'logf0s_normalization.npz'),
             mean_A=log_f0s_mean_A,
             std_A=log_f0s_std_A,
             mean_B=log_f0s_mean_B,
             std_B=log_f0s_std_B)

    np.savez(os.path.join(cache_folder, 'mcep_normalization.npz'),
             mean_A=coded_sps_A_mean,
             std_A=coded_sps_A_std,
             mean_B=coded_sps_B_mean,
             std_B=coded_sps_B_std)

    save_pickle(variable=coded_sps_A_norm,
                fileName=os.path.join(cache_folder, "coded_sps_A_norm.pickle"))
    save_pickle(variable=coded_sps_B_norm,
                fileName=os.path.join(cache_folder, "coded_sps_B_norm.pickle"))

    end_time = time.time()
    print("Preprocessing finsihed!! see your directory ../cache for cached preprocessed data")

    print("Time taken for preprocessing {:.4f} seconds".format(
        end_time - start_time))
def train(random_seed=0):

    np.random.seed(random_seed)

    # Download MIR1K dataset
    download_dir = 'download'
    data_dir = 'data'
    mir1k_dir = 'data/MIR1K'

    train_path = os.path.join(mir1k_dir, 'train.txt')
    valid_path = os.path.join(mir1k_dir, 'valid.txt')
    #mir1k_dir = download_mir1k(download_dir = download_dir, data_dir = data_dir)
    #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/UndividedWavfile')
    #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/Wavfile')

    with open(train_path, 'r') as text_file:
        content = text_file.readlines()
    wav_filenames_train = [file.strip() for file in content]

    with open(valid_path, 'r') as text_file:
        content = text_file.readlines()
    wav_filenames_valid = [file.strip() for file in content]

    # Preprocess parameters
    mir1k_sr = 16000
    n_fft = 1024
    hop_length = n_fft // 4
    # Model parameters
    learning_rate = 0.0001
    num_rnn_layer = 3
    num_hidden_units = [256, 256, 256]
    batch_size = 64
    sample_frames = 10
    iterations = 50000
    tensorboard_directory = './graphs/svsrnn'
    log_directory = './log'
    train_log_filename = 'train_log.csv'
    clear_tensorboard = False
    model_directory = './model'
    model_filename = 'svsrnn.ckpt'

    if not os.path.exists(log_directory):
        os.makedirs(log_directory)
    open(os.path.join(log_directory, train_log_filename), 'w').close()

    # Load train wavs
    wavs_mono_train, wavs_src1_train, wavs_src2_train = load_wavs(
        filenames=wav_filenames_train, sr=mir1k_sr)

    # Turn waves to spectrums
    stfts_mono_train, stfts_src1_train, stfts_src2_train = wavs_to_specs(
        wavs_mono=wavs_mono_train,
        wavs_src1=wavs_src1_train,
        wavs_src2=wavs_src2_train,
        n_fft=n_fft,
        hop_length=hop_length)

    wavs_mono_valid, wavs_src1_valid, wavs_src2_valid = load_wavs(
        filenames=wav_filenames_valid, sr=mir1k_sr)
    stfts_mono_valid, stfts_src1_valid, stfts_src2_valid = wavs_to_specs(
        wavs_mono=wavs_mono_valid,
        wavs_src1=wavs_src1_valid,
        wavs_src2=wavs_src2_valid,
        n_fft=n_fft,
        hop_length=hop_length)

    # Initialize model
    model = SVSRNN(num_features=n_fft // 2 + 1,
                   num_rnn_layer=num_rnn_layer,
                   num_hidden_units=num_hidden_units,
                   tensorboard_directory=tensorboard_directory,
                   clear_tensorboard=clear_tensorboard)

    # Start training
    for i in (range(iterations)):

        data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch(
            stfts_mono=stfts_mono_train,
            stfts_src1=stfts_src1_train,
            stfts_src2=stfts_src2_train,
            batch_size=batch_size,
            sample_frames=sample_frames)
        x_mixed, _ = sperate_magnitude_phase(data=data_mono_batch)
        y1, _ = sperate_magnitude_phase(data=data_src1_batch)
        y2, _ = sperate_magnitude_phase(data=data_src2_batch)

        train_loss = model.train(x=x_mixed,
                                 y1=y1,
                                 y2=y2,
                                 learning_rate=learning_rate)

        if i % 10 == 0:
            print('Step: %d Train Loss: %f' % (i, train_loss))

        if i % 200 == 0:
            print('==============================================')
            data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch(
                stfts_mono=stfts_mono_valid,
                stfts_src1=stfts_src1_valid,
                stfts_src2=stfts_src2_valid,
                batch_size=batch_size,
                sample_frames=sample_frames)
            x_mixed, _ = sperate_magnitude_phase(data=data_mono_batch)
            y1, _ = sperate_magnitude_phase(data=data_src1_batch)
            y2, _ = sperate_magnitude_phase(data=data_src2_batch)

            y1_pred, y2_pred, validation_loss = model.validate(x=x_mixed,
                                                               y1=y1,
                                                               y2=y2)
            print('Step: %d Validation Loss: %f' % (i, validation_loss))
            print('==============================================')

            with open(os.path.join(log_directory, train_log_filename),
                      'a') as log_file:
                log_file.write('{},{},{}\n'.format(i, train_loss,
                                                   validation_loss))

        if i % 1000 == 0:
            model.save(directory=model_directory, filename=model_filename)
def evaluate():

    mir1k_sr = 16000
    n_fft = 1024
    hop_length = n_fft // 4
    num_rnn_layer = 3
    num_hidden_units = [256, 256, 256]
    tensorboard_directory = 'graphs/svsrnn'
    clear_tensorboard = False
    model_directory = 'model'
    model_filename = 'svsrnn.ckpt'
    model_filepath = os.path.join(model_directory, model_filename)

    mir1k_dir = 'data/MIR1K'
    test_path = os.path.join(mir1k_dir, 'test.txt')

    with open(test_path, 'r') as text_file:
        content = text_file.readlines()
    wav_filenames = [file.strip() for file in content] 

    #wav_filenames = ['small_test_data/yifen_4_10.wav', 'small_test_data/yifen_5_10.wav']
    output_directory = 'demo'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    wavs_mono, wavs_src1, wavs_src2 = load_wavs(filenames = wav_filenames, sr = mir1k_sr)

    stfts_mono, stfts_src1, stfts_src2 = wavs_to_specs(
        wavs_mono = wavs_mono, wavs_src1 = wavs_src1, wavs_src2 = wavs_src2, n_fft = n_fft, hop_length = hop_length)

    stfts_mono_full, stfts_src1_full, stfts_src2_full = prepare_data_full(stfts_mono = stfts_mono, stfts_src1 = stfts_src1, stfts_src2 = stfts_src2)

    model = SVSRNN(num_features = n_fft // 2 + 1, num_rnn_layer = num_rnn_layer, num_hidden_units = num_hidden_units, tensorboard_directory = tensorboard_directory, clear_tensorboard = clear_tensorboard)
    model.load(filepath = model_filepath)

    wavs_src1_pred = list()
    wavs_src2_pred = list()

    for wav_filename, wav_mono, stft_mono_full in zip(wav_filenames, wavs_mono, stfts_mono_full):

        stft_mono_magnitude, stft_mono_phase = sperate_magnitude_phase(data = stft_mono_full)
        stft_mono_magnitude = np.array([stft_mono_magnitude])

        y1_pred, y2_pred = model.test(x = stft_mono_magnitude)

        # ISTFT with the phase from mono
        y1_stft_hat = combine_magnitdue_phase(magnitudes = y1_pred[0], phases = stft_mono_phase)
        y2_stft_hat = combine_magnitdue_phase(magnitudes = y2_pred[0], phases = stft_mono_phase)

        y1_stft_hat = y1_stft_hat.transpose()
        y2_stft_hat = y2_stft_hat.transpose()

        y1_hat = librosa.istft(y1_stft_hat, hop_length = hop_length)
        y2_hat = librosa.istft(y2_stft_hat, hop_length = hop_length)

        wavs_src1_pred.append(y1_hat)
        wavs_src2_pred.append(y2_hat)

    gnsdr, gsir, gsar = bss_eval_global(wavs_mono = wavs_mono, wavs_src1 = wavs_src1, wavs_src2 = wavs_src2, wavs_src1_pred = wavs_src1_pred, wavs_src2_pred = wavs_src2_pred)

    print('GNSDR:', gnsdr)
    print('GSIR:', gsir)
    print('GSAR:', gsar)
def generate_demo():

    mir1k_sr = 16000
    n_fft = 1024
    hop_length = n_fft // 4
    num_rnn_layer = 3
    num_hidden_units = [256, 256, 256]
    tensorboard_directory = 'graphs/svsrnn'
    clear_tensorboard = False
    model_directory = 'model'
    model_filename = 'svsrnn.ckpt'
    model_filepath = os.path.join(model_directory, model_filename)

    mir1k_dir = 'data/MIR1K'
    test_path = os.path.join(mir1k_dir, 'test.txt')

    with open(test_path, 'r') as text_file:
        content = text_file.readlines()
    wav_filenames = [file.strip() for file in content] 

    #wav_filenames = ['small_test_data/yifen_4_10.wav', 'small_test_data/yifen_5_10.wav']
    output_directory = 'demo'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    wavs_mono, wavs_src1, wavs_src2 = load_wavs(filenames = wav_filenames, sr = mir1k_sr)

    stfts_mono, stfts_src1, stfts_src2 = wavs_to_specs(
        wavs_mono = wavs_mono, wavs_src1 = wavs_src1, wavs_src2 = wavs_src2, n_fft = n_fft, hop_length = hop_length)

    stfts_mono_full, stfts_src1_full, stfts_src2_full = prepare_data_full(stfts_mono = stfts_mono, stfts_src1 = stfts_src1, stfts_src2 = stfts_src2)

    model = SVSRNN(num_features = n_fft // 2 + 1, num_rnn_layer = num_rnn_layer, num_hidden_units = num_hidden_units, tensorboard_directory = tensorboard_directory, clear_tensorboard = clear_tensorboard)
    model.load(filepath = model_filepath)

    for wav_filename, wav_mono, stft_mono_full in zip(wav_filenames, wavs_mono, stfts_mono_full):

        wav_filename_dir = os.path.dirname(wav_filename)
        wav_filename_base = os.path.basename(wav_filename)
        wav_mono_filename = wav_filename_base.split('.')[0] + '_mono.wav'
        wav_src1_hat_filename = wav_filename_base.split('.')[0] + '_src1.wav'
        wav_src2_hat_filename = wav_filename_base.split('.')[0] + '_src2.wav'
        wav_mono_filepath = os.path.join(output_directory, wav_mono_filename)
        wav_src1_hat_filepath = os.path.join(output_directory, wav_src1_hat_filename)
        wav_src2_hat_filepath = os.path.join(output_directory, wav_src2_hat_filename)

        print('Processing %s ...' % wav_filename_base)

        stft_mono_magnitude, stft_mono_phase = sperate_magnitude_phase(data = stft_mono_full)
        stft_mono_magnitude = np.array([stft_mono_magnitude])

        y1_pred, y2_pred = model.test(x = stft_mono_magnitude)

        # ISTFT with the phase from mono
        y1_stft_hat = combine_magnitdue_phase(magnitudes = y1_pred[0], phases = stft_mono_phase)
        y2_stft_hat = combine_magnitdue_phase(magnitudes = y2_pred[0], phases = stft_mono_phase)

        y1_stft_hat = y1_stft_hat.transpose()
        y2_stft_hat = y2_stft_hat.transpose()

        y1_hat = librosa.istft(y1_stft_hat, hop_length = hop_length)
        y2_hat = librosa.istft(y2_stft_hat, hop_length = hop_length)

        librosa.output.write_wav(wav_mono_filepath, wav_mono, mir1k_sr)
        librosa.output.write_wav(wav_src1_hat_filepath, y1_hat, mir1k_sr)
        librosa.output.write_wav(wav_src2_hat_filepath, y2_hat, mir1k_sr)
Ejemplo n.º 5
0
def train(random_seed = 0):

    np.random.seed(random_seed)

    # Download MIR1K dataset
    download_dir = 'download'
    data_dir = 'data'
    mir1k_dir = 'data/MIR1K'

    train_path = os.path.join(mir1k_dir, 'train.txt')
    valid_path = os.path.join(mir1k_dir, 'valid.txt')
    #mir1k_dir = download_mir1k(download_dir = download_dir, data_dir = data_dir)
    #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/UndividedWavfile')
    #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/Wavfile')

    with open(train_path, 'r') as text_file:
        content = text_file.readlines()
    wav_filenames_train = [file.strip() for file in content] 

    with open(valid_path, 'r') as text_file:
        content = text_file.readlines()
    wav_filenames_valid = [file.strip() for file in content] 

    # Preprocess parameters
    mir1k_sr = 16000
    n_fft = 1024
    hop_length = n_fft // 4
    # Model parameters
    learning_rate = 0.0001
    num_rnn_layer = 3
    num_hidden_units = [256, 256, 256]
    batch_size = 64
    sample_frames = 10
    iterations = 50000
    tensorboard_directory = './graphs/svsrnn'
    log_directory = './log'
    train_log_filename = 'train_log.csv'
    clear_tensorboard = False
    model_directory = './model'
    model_filename = 'svsrnn.ckpt'

    if not os.path.exists(log_directory):
        os.makedirs(log_directory)
    open(os.path.join(log_directory, train_log_filename), 'w').close()

    # Load train wavs
    wavs_mono_train, wavs_src1_train, wavs_src2_train = load_wavs(filenames = wav_filenames_train, sr = mir1k_sr)

    # Turn waves to spectrums
    stfts_mono_train, stfts_src1_train, stfts_src2_train = wavs_to_specs(
        wavs_mono = wavs_mono_train, wavs_src1 = wavs_src1_train, wavs_src2 = wavs_src2_train, n_fft = n_fft, hop_length = hop_length)


    wavs_mono_valid, wavs_src1_valid, wavs_src2_valid = load_wavs(filenames = wav_filenames_valid, sr = mir1k_sr)
    stfts_mono_valid, stfts_src1_valid, stfts_src2_valid = wavs_to_specs(
        wavs_mono = wavs_mono_valid, wavs_src1 = wavs_src1_valid, wavs_src2 = wavs_src2_valid, n_fft = n_fft, hop_length = hop_length)


    # Initialize model
    model =  SVSRNN(num_features = n_fft // 2 + 1, num_rnn_layer = num_rnn_layer, num_hidden_units = num_hidden_units, tensorboard_directory = tensorboard_directory, clear_tensorboard = clear_tensorboard)

    # Start training
    for i in (range(iterations)):
        
        data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch(
            stfts_mono = stfts_mono_train, stfts_src1 = stfts_src1_train, stfts_src2 = stfts_src2_train, batch_size = batch_size, sample_frames = sample_frames)
        x_mixed, _ = sperate_magnitude_phase(data = data_mono_batch)
        y1, _ = sperate_magnitude_phase(data = data_src1_batch)
        y2, _ = sperate_magnitude_phase(data = data_src2_batch)

        train_loss = model.train(x = x_mixed, y1 = y1, y2 = y2, learning_rate = learning_rate)

        if i % 10 == 0:
            print('Step: %d Train Loss: %f' %(i, train_loss))

        if i % 200 == 0:
            print('==============================================')
            data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch(
                stfts_mono = stfts_mono_valid, stfts_src1 = stfts_src1_valid, stfts_src2 = stfts_src2_valid, batch_size = batch_size, sample_frames = sample_frames)
            x_mixed, _ = sperate_magnitude_phase(data = data_mono_batch)
            y1, _ = sperate_magnitude_phase(data = data_src1_batch)
            y2, _ = sperate_magnitude_phase(data = data_src2_batch)

            y1_pred, y2_pred, validation_loss = model.validate(x = x_mixed, y1 = y1, y2 = y2)
            print('Step: %d Validation Loss: %f' %(i, validation_loss))
            print('==============================================')

            with open(os.path.join(log_directory, train_log_filename), 'a') as log_file:
                log_file.write('{},{},{}\n'.format(i, train_loss, validation_loss))

        if i % 1000 == 0:
            model.save(directory = model_directory, filename = model_filename)