コード例 #1
0
def createSpectrogram(arr, orig):
    x = SpectrogramArray(arr,
                         stft_settings={
                             'framelength': orig.stft_settings['framelength'],
                             'hopsize': orig.stft_settings['hopsize'],
                             'overlap': orig.stft_settings['overlap'],
                             'centered': orig.stft_settings['centered'],
                             'window': orig.stft_settings['window'],
                             'halved': orig.stft_settings['halved'],
                             'transform': orig.stft_settings['transform'],
                             'padding': orig.stft_settings['padding'],
                             'outlength': orig.stft_settings['outlength'],
                         })
    return x
コード例 #2
0
ファイル: run.py プロジェクト: daisp/speech_separation
def model_batch_test():
    test_batch = h5py.File('%stest_batch' % (DIR))
    data = test_batch['data'].value

    with open('%stest_settings.pkl' % (DIR), 'rb') as f:
        settings = pickle.load(f)

    # print(settings[:2])

    combined, clean, noise = zip(data)
    combined = combined[0]
    clean = clean[0]
    noise = noise[0]
    target = np.concatenate((clean, noise), axis=2)

    # test_rate, test_audio = wavfile.read('data/test_combined/combined.wav')
    # test_spec = stft.spectrogram(test_audio)

    combined_batch, target_batch = create_batch(combined, target, 50)

    original_combined_batch = [
        copy.deepcopy(batch) for batch in combined_batch
    ]

    with tf.Graph().as_default():
        model = SeparationModel()
        saver = tf.train.Saver(tf.trainable_variables())

        with tf.Session() as session:
            ckpt = tf.train.get_checkpoint_state('checkpoints/')
            if ckpt:
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(session, ckpt.model_checkpoint_path)
            else:
                print("Created model with fresh parameters.")
                session.run(tf.initialize_all_variables())

            curr_mask_array = []
            prev_mask_array = None
            diff = float('inf')
            iters = 0

            while True:
                iters += 1
                output, _, _ = model.train_on_batch(session,
                                                    combined_batch[0],
                                                    target_batch[0],
                                                    train=False)

                num_freq_bin = output.shape[2] / 2
                clean_outputs = output[:, :, :num_freq_bin]
                noise_outputs = output[:, :, num_freq_bin:]

                # clean = [target[:,:num_freq_bin] for target in target_batch]
                # noise = [target[:,num_freq_bin:] for target in target_batch]

                num_outputs = len(clean_outputs)

                results = []

                for i in xrange(num_outputs):
                    orig_clean_output = clean_outputs[i]
                    orig_noise_output = noise_outputs[i]

                    stft_settings = copy.deepcopy(settings[i])
                    orig_length = stft_settings['orig_length']
                    stft_settings.pop('orig_length', None)
                    clean_output = orig_clean_output[-orig_length:]
                    noise_output = orig_noise_output[-orig_length:]

                    clean_mask, noise_mask = create_mask(
                        clean_output, noise_output)
                    orig_clean_mask, orig_noise_mask = create_mask(
                        orig_clean_output, orig_noise_output)

                    curr_mask_array.append(clean_mask)
                    # if i == 0:
                    # print clean_mask[10:20,10:20]
                    curr_mask_array.append(noise_mask)

                    clean_spec = createSpectrogram(
                        np.multiply(
                            clean_mask.transpose(), original_combined_batch[0]
                            [i][-orig_length:].transpose()), settings[i])
                    noise_spec = createSpectrogram(
                        np.multiply(
                            noise_mask.transpose(), original_combined_batch[0]
                            [i][-orig_length:].transpose()), settings[i])

                    # print '-' * 20
                    # print original_combined_batch[0][i]
                    # print '=' * 20
                    combined_batch[0][i] += np.multiply(
                        orig_clean_mask, original_combined_batch[0][i]) * 0.1
                    # print combined_batch[0][i]
                    # print '=' * 20
                    # print original_combined_batch[0][i]
                    # print '-' * 20

                    estimated_clean_wav = stft.ispectrogram(clean_spec)
                    estimated_noise_wav = stft.ispectrogram(noise_spec)

                    reference_clean_wav = stft.ispectrogram(
                        SpectrogramArray(clean[i][-orig_length:],
                                         stft_settings).transpose())
                    reference_noise_wav = stft.ispectrogram(
                        SpectrogramArray(noise[i][-orig_length:],
                                         stft_settings).transpose())

                    try:
                        sdr, sir, sar, _ = bss_eval_sources(
                            np.array(
                                [reference_clean_wav, reference_noise_wav]),
                            np.array(
                                [estimated_clean_wav, estimated_noise_wav]),
                            False)
                        results.append(
                            (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1]))
                        # print('%f, %f, %f, %f, %f, %f' % (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1]))
                    except ValueError:
                        print('error')
                        continue
                break

                # diff = 1
                # if prev_mask_array is not None:
                #     # print curr_mask_array[0]
                #     # print prev_mask_array[0]
                #     diff = sum(np.sum(np.abs(curr_mask_array[i] - prev_mask_array[i])) for i in xrange(len(prev_mask_array)))
                #     print('Changes after iteration %d: %d' % (iters, diff))

                # sdr_cleans, sdr_noises, sir_cleans, sir_noises, sar_cleans, sar_noises = zip(*results)
                # print('Avg sdr_cleans: %f, sdr_noises: %f, sir_cleans: %f, sir_noises: %f, sar_cleans: %f, sar_noises: %f' % (np.mean(sdr_cleans), np.mean(sdr_noises), np.mean(sir_cleans), np.mean(sir_noises), np.mean(sar_cleans), np.mean(sar_noises)))

                # prev_mask_array = [copy.deepcopy(mask[:,:]) for mask in curr_mask_array]

                # if diff == 0:
                #     break

            results_filename = '%sresults_%d_%f' % (
                'data/results/', Config.num_layers, Config.lr)
            # results_filename += 'freq_weighted'

            with open(results_filename + '.csv', 'w+') as f:
                for sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2 in results:
                    f.write('%f,%f,%f,%f,%f,%f\n' %
                            (sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2))