Esempio n. 1
0
def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav):
    len_cropped = pred_src1_wav.shape[-1]
    src1_wav = src1_wav[:len_cropped]
    src2_wav = src2_wav[:len_cropped]
    mixed_wav = mixed_wav[:len_cropped]
    gnsdr, gsir, gsar = np.zeros(2), np.zeros(2), np.zeros(2)
    total_len = 0

    sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                        np.array([pred_src1_wav, pred_src2_wav]), False)
    sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]),
                                          np.array([mixed_wav, mixed_wav]), False)
    nsdr = sdr - sdr_mixed
    gnsdr += len_cropped * nsdr
    gsir += len_cropped * sir
    gsar += len_cropped * sar
    total_len += len_cropped
    gnsdr = gnsdr / total_len
    gsir = gsir / total_len
    gsar = gsar / total_len
    return gnsdr, gsir, gsar
Esempio n. 2
0
def model_batch_test():
    test_batch = h5py.File('%stest_batch' % (DIR))
    data = test_batch['data'].value

    with open('%stest_settings.pkl' % (DIR), 'rb') as f:
        settings = pickle.load(f)

    # print(settings[:2])

    combined, clean, noise = zip(data)
    combined = combined[0]
    clean = clean[0]
    noise = noise[0]
    target = np.concatenate((clean, noise), axis=2)

    # test_rate, test_audio = wavfile.read('data/test_combined/combined.wav')
    # test_spec = stft.spectrogram(test_audio)

    combined_batch, target_batch = create_batch(combined, target, 50)

    original_combined_batch = [
        copy.deepcopy(batch) for batch in combined_batch
    ]

    with tf.Graph().as_default():
        model = SeparationModel()
        saver = tf.train.Saver(tf.trainable_variables())

        with tf.Session() as session:
            ckpt = tf.train.get_checkpoint_state('checkpoints/')
            if ckpt:
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(session, ckpt.model_checkpoint_path)
            else:
                print("Created model with fresh parameters.")
                session.run(tf.initialize_all_variables())

            curr_mask_array = []
            prev_mask_array = None
            diff = float('inf')
            iters = 0

            while True:
                iters += 1
                output, _, _ = model.train_on_batch(session,
                                                    combined_batch[0],
                                                    target_batch[0],
                                                    train=False)

                num_freq_bin = output.shape[2] / 2
                clean_outputs = output[:, :, :num_freq_bin]
                noise_outputs = output[:, :, num_freq_bin:]

                # clean = [target[:,:num_freq_bin] for target in target_batch]
                # noise = [target[:,num_freq_bin:] for target in target_batch]

                num_outputs = len(clean_outputs)

                results = []

                for i in xrange(num_outputs):
                    orig_clean_output = clean_outputs[i]
                    orig_noise_output = noise_outputs[i]

                    stft_settings = copy.deepcopy(settings[i])
                    orig_length = stft_settings['orig_length']
                    stft_settings.pop('orig_length', None)
                    clean_output = orig_clean_output[-orig_length:]
                    noise_output = orig_noise_output[-orig_length:]

                    clean_mask, noise_mask = create_mask(
                        clean_output, noise_output)
                    orig_clean_mask, orig_noise_mask = create_mask(
                        orig_clean_output, orig_noise_output)

                    curr_mask_array.append(clean_mask)
                    # if i == 0:
                    # print clean_mask[10:20,10:20]
                    curr_mask_array.append(noise_mask)

                    clean_spec = createSpectrogram(
                        np.multiply(
                            clean_mask.transpose(), original_combined_batch[0]
                            [i][-orig_length:].transpose()), settings[i])
                    noise_spec = createSpectrogram(
                        np.multiply(
                            noise_mask.transpose(), original_combined_batch[0]
                            [i][-orig_length:].transpose()), settings[i])

                    # print '-' * 20
                    # print original_combined_batch[0][i]
                    # print '=' * 20
                    combined_batch[0][i] += np.multiply(
                        orig_clean_mask, original_combined_batch[0][i]) * 0.1
                    # print combined_batch[0][i]
                    # print '=' * 20
                    # print original_combined_batch[0][i]
                    # print '-' * 20

                    estimated_clean_wav = stft.ispectrogram(clean_spec)
                    estimated_noise_wav = stft.ispectrogram(noise_spec)

                    reference_clean_wav = stft.ispectrogram(
                        SpectrogramArray(clean[i][-orig_length:],
                                         stft_settings).transpose())
                    reference_noise_wav = stft.ispectrogram(
                        SpectrogramArray(noise[i][-orig_length:],
                                         stft_settings).transpose())

                    try:
                        sdr, sir, sar, _ = bss_eval_sources(
                            np.array(
                                [reference_clean_wav, reference_noise_wav]),
                            np.array(
                                [estimated_clean_wav, estimated_noise_wav]),
                            False)
                        results.append(
                            (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1]))
                        # print('%f, %f, %f, %f, %f, %f' % (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1]))
                    except ValueError:
                        print('error')
                        continue
                break

                # diff = 1
                # if prev_mask_array is not None:
                #     # print curr_mask_array[0]
                #     # print prev_mask_array[0]
                #     diff = sum(np.sum(np.abs(curr_mask_array[i] - prev_mask_array[i])) for i in xrange(len(prev_mask_array)))
                #     print('Changes after iteration %d: %d' % (iters, diff))

                # sdr_cleans, sdr_noises, sir_cleans, sir_noises, sar_cleans, sar_noises = zip(*results)
                # print('Avg sdr_cleans: %f, sdr_noises: %f, sir_cleans: %f, sir_noises: %f, sar_cleans: %f, sar_noises: %f' % (np.mean(sdr_cleans), np.mean(sdr_noises), np.mean(sir_cleans), np.mean(sir_noises), np.mean(sar_cleans), np.mean(sar_noises)))

                # prev_mask_array = [copy.deepcopy(mask[:,:]) for mask in curr_mask_array]

                # if diff == 0:
                #     break

            results_filename = '%sresults_%d_%f' % (
                'data/results/', Config.num_layers, Config.lr)
            # results_filename += 'freq_weighted'

            with open(results_filename + '.csv', 'w+') as f:
                for sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2 in results:
                    f.write('%f,%f,%f,%f,%f,%f\n' %
                            (sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2))
Esempio n. 3
0
def model_test(test_input):
    test_rate, test_audio = wavfile.read(test_input)
    clean_rate, clean_audio = wavfile.read(CLEAN_FILE)
    noise_rate, noise_audio = wavfile.read(NOISE_FILE)

    length = len(clean_audio)
    noise_audio = noise_audio[:length]

    clean_spec = stft.spectrogram(clean_audio)
    noise_spec = stft.spectrogram(noise_audio)
    test_spec = stft.spectrogram(test_audio)

    reverted_clean = stft.ispectrogram(clean_spec)
    reverted_noise = stft.ispectrogram(noise_spec)

    test_data = np.array([test_spec.transpose() / 100000
                          ])  # make data a batch of 1

    with tf.Graph().as_default():
        model = SeparationModel()
        saver = tf.train.Saver(tf.trainable_variables())

        with tf.Session() as session:
            ckpt = tf.train.get_checkpoint_state('checkpoints/')
            if ckpt:
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(session, ckpt.model_checkpoint_path)
            else:
                print("Created model with fresh parameters.")
                session.run(tf.initialize_all_variables())

            test_data_shape = np.shape(test_data)
            dummy_target = np.zeros((test_data_shape[0], test_data_shape[1],
                                     2 * test_data_shape[2]))

            output, _, _ = model.train_on_batch(session,
                                                test_data,
                                                dummy_target,
                                                train=False)

            num_freq_bin = output.shape[2] / 2
            clean_output = output[0, :, :num_freq_bin]
            noise_output = output[0, :, num_freq_bin:]

            clean_mask, noise_mask = create_mask(clean_output, noise_output)

            clean_spec = createSpectrogram(
                np.multiply(clean_mask.transpose(), test_spec),
                test_spec.stft_settings)
            noise_spec = createSpectrogram(
                np.multiply(noise_mask.transpose(), test_spec),
                test_spec.stft_settings)

            clean_wav = stft.ispectrogram(clean_spec)
            noise_wav = stft.ispectrogram(noise_spec)

            sdr, sir, sar, _ = bss_eval_sources(
                np.array([reverted_clean, reverted_noise]),
                np.array([clean_wav, noise_wav]), False)
            print(sdr, sir, sar)

            writeWav('data/test_combined/output_clean.wav', 44100, clean_wav)
            writeWav('data/test_combined/output_noise.wav', 44100, noise_wav)