def test_output_metrics(): example = scenario() # Take speech image + noise as prediction, i.e. perfect croos talber suppression speech_prediction = example['speech_image'][ ..., 0, :] + example['noise_image'][..., 0, :] speech_image_1, speech_image_2 = example['speech_image'][..., 0, :] speech_contribution = np.array([ [speech_image_1, np.zeros_like(speech_image_2)], [np.zeros_like(speech_image_1), speech_image_2], ]) noise_contribution = np.array([ example['noise_image'][..., 0, :], example['noise_image'][..., 0, :], ]) metrics = OutputMetrics( speech_prediction=speech_prediction, # observation=example['observation'], speech_source=example['speech_source'], # speech_image=example['speech_image'], # noise_image=example['noise_image'], speech_contribution=speech_contribution, noise_contribution=noise_contribution, sample_rate=8000, # channel_score_reduce='mean', ) assert metrics.K_source == 2 for k, v in metrics.as_dict().items(): if k == 'invasive_sdr': np.testing.assert_allclose(v, [49.137625, 44.503376]) elif k == 'invasive_sir': np.testing.assert_allclose(v, np.inf) elif k == 'invasive_snr': np.testing.assert_allclose(v, [49.137625, 44.503376]) elif k == 'mir_eval_sdr': np.testing.assert_allclose(v, [17.071665, 24.711722]) elif k == 'mir_eval_sir': np.testing.assert_allclose(v, [29.423133, 37.060289]) elif k == 'mir_eval_sar': np.testing.assert_allclose(v, [17.336992, 24.973125]) elif k == 'pesq': np.testing.assert_allclose(v, [4.37408, 4.405752]) elif k == 'stoi': np.testing.assert_allclose(v, [0.968833, 0.976151], rtol=1e-6) elif k == 'mir_eval_selection': assert all(v == [0, 1]) elif k == 'srmr': np.testing.assert_allclose(v, [0.5504078, 0.50442512]) else: raise KeyError(k, v)
def get_scores( ex, mask, Observation='Observation', beamformer='mvdr_souden', postfilter=None, ): """ Calculate the scores, where the prediction/estimated signal is tested against the source/desired signal. This function is for oracle test to figure out, which metric can work with source signal. SI-SDR does not work, when the desired signal is the signal before the room impulse response and give strange results, when the channel is changed. Example: >>> ex = get_dataset('cv_dev93')[0] >>> mask = get_mask_from_oracle(ex, 'IBM') >>> metric, result = get_scores(ex, mask) >>> pprint(result) {'pesq': array([2.014, 1.78 ]), 'stoi': array([0.68236465, 0.61319396]), 'mir_eval_sxr_sdr': array([10.23933413, 10.01566298]), 'invasive_sxr_sdr': array([15.76439393, 13.86230425])} """ if Observation == 'Observation': metric = get_multi_speaker_metrics( mask=rearrange(mask, 'k t f -> t k f'), # T Ktarget F Observation=ex['audio_data'][Observation], # D T F (stft signal) speech_source=ex['audio_data'] ['speech_source'], # Ksource N (time signal) Speech_image=ex['audio_data'] ['Speech_image'], # Ksource D T F (stft signal) Noise_image=ex['audio_data']['Noise_image'], # D T F (stft signal) istft=istft, # callable(signal, num_samples=num_samples) bf_algorithm=beamformer, postfilter=postfilter, # [None, 'mask_mul'] ) else: assert mask is None, mask assert beamformer == 'ch0', beamformer assert postfilter is None, postfilter metric = OutputMetrics( speech_prediction=ex['audio_data'][Observation][:, 0], speech_source=ex['audio_data']['speech_source'], # speech_contribution=speech_contribution, # noise_contribution=noise_contribution, sample_rate=8000, enable_si_sdr=False, ) result = metric.as_dict() del result['mir_eval_sxr_selection'] del result['mir_eval_sxr_sar'] del result['mir_eval_sxr_sir'] if 'invasive_sxr_sir' in result: del result['invasive_sxr_sir'] del result['invasive_sxr_snr'] return metric, result
def get_scores(ex, prediction, source): """ Calculate the scores, where the prediction/estimated signal is tested against the source/desired signal. This function is for oracle test to figure out, which metric can work with source signal. Example: SI-SDR does not work, when the desired signal is the signal befor the room impulse response and give strange results, when the channel is changed. >>> pprint(get_scores(get_dataset('cv_dev93')[0], 'image_0', 'early_0')) {'pesq': array([2.861]), 'stoi': array([0.97151566]), 'mir_eval_sxr_sdr': array([13.39136665]), 'si_sdr': array([10.81039897])} >>> pprint(get_scores(get_dataset('cv_dev93')[0], 'image_0', 'source')) {'pesq': array([2.234]), 'stoi': array([0.8005423]), 'mir_eval_sxr_sdr': array([12.11446204]), 'si_sdr': array([-20.05244551])} >>> pprint(get_scores(get_dataset('cv_dev93')[0], 'image_0', 'image_1')) {'pesq': array([3.608]), 'stoi': array([0.92216845]), 'mir_eval_sxr_sdr': array([9.55425598]), 'si_sdr': array([-0.16858895])} """ def get_signal(ex, name): assert isinstance(ex, dict), ex assert 'audio_data' in ex, ex assert isinstance(ex['audio_data'], dict), ex if name == 'source': return ex['audio_data']['speech_source'][:] elif name == 'early_0': return ex['audio_data']['speech_reverberation_early'][:, 0] elif name == 'early_1': return ex['audio_data']['speech_reverberation_early'][:, 1] elif name == 'image_0': return ex['audio_data']['speech_image'][:, 0] elif name == 'image_1': return ex['audio_data']['speech_image'][:, 1] elif name == 'image_0_noise': return ex['audio_data']['speech_image'][:, 0] + \ ex['audio_data']['noise_image'][0] elif name == 'image_1_noise': return ex['audio_data']['speech_image'][:, 1] + \ ex['audio_data']['noise_image'][0] else: raise ValueError(name) speech_prediction = get_signal(ex, prediction) speech_source = get_signal(ex, source) metric = OutputMetrics( speech_prediction=speech_prediction, speech_source=speech_source, sample_rate=8000, enable_si_sdr=True, ) result = metric.as_dict() del result['mir_eval_sxr_selection'] del result['mir_eval_sxr_sar'] del result['mir_eval_sxr_sir'] return result