예제 #1
0
def test_output_metrics():
    example = scenario()

    # Take speech image + noise as prediction, i.e. perfect croos talber suppression
    speech_prediction = example['speech_image'][
        ..., 0, :] + example['noise_image'][..., 0, :]

    speech_image_1, speech_image_2 = example['speech_image'][..., 0, :]

    speech_contribution = np.array([
        [speech_image_1, np.zeros_like(speech_image_2)],
        [np.zeros_like(speech_image_1), speech_image_2],
    ])
    noise_contribution = np.array([
        example['noise_image'][..., 0, :],
        example['noise_image'][..., 0, :],
    ])

    metrics = OutputMetrics(
        speech_prediction=speech_prediction,
        # observation=example['observation'],
        speech_source=example['speech_source'],
        # speech_image=example['speech_image'],
        # noise_image=example['noise_image'],
        speech_contribution=speech_contribution,
        noise_contribution=noise_contribution,
        sample_rate=8000,
        # channel_score_reduce='mean',
    )

    assert metrics.K_source == 2

    for k, v in metrics.as_dict().items():
        if k == 'invasive_sdr':
            np.testing.assert_allclose(v, [49.137625, 44.503376])
        elif k == 'invasive_sir':
            np.testing.assert_allclose(v, np.inf)
        elif k == 'invasive_snr':
            np.testing.assert_allclose(v, [49.137625, 44.503376])
        elif k == 'mir_eval_sdr':
            np.testing.assert_allclose(v, [17.071665, 24.711722])
        elif k == 'mir_eval_sir':
            np.testing.assert_allclose(v, [29.423133, 37.060289])
        elif k == 'mir_eval_sar':
            np.testing.assert_allclose(v, [17.336992, 24.973125])
        elif k == 'pesq':
            np.testing.assert_allclose(v, [4.37408, 4.405752])
        elif k == 'stoi':
            np.testing.assert_allclose(v, [0.968833, 0.976151], rtol=1e-6)
        elif k == 'mir_eval_selection':
            assert all(v == [0, 1])
        elif k == 'srmr':
            np.testing.assert_allclose(v, [0.5504078, 0.50442512])
        else:
            raise KeyError(k, v)
예제 #2
0
def get_scores(
    ex,
    mask,
    Observation='Observation',
    beamformer='mvdr_souden',
    postfilter=None,
):
    """
    Calculate the scores, where the prediction/estimated signal is tested
    against the source/desired signal.
    This function is for oracle test to figure out, which metric can work with
    source signal.

    SI-SDR does not work, when the desired signal is the signal before the
    room impulse response and give strange results, when the channel is
    changed.

    Example:

        >>> ex = get_dataset('cv_dev93')[0]
        >>> mask = get_mask_from_oracle(ex, 'IBM')
        >>> metric, result = get_scores(ex, mask)
        >>> pprint(result)
        {'pesq': array([2.014, 1.78 ]),
         'stoi': array([0.68236465, 0.61319396]),
         'mir_eval_sxr_sdr': array([10.23933413, 10.01566298]),
         'invasive_sxr_sdr': array([15.76439393, 13.86230425])}
    """

    if Observation == 'Observation':
        metric = get_multi_speaker_metrics(
            mask=rearrange(mask, 'k t f -> t k f'),  # T Ktarget F
            Observation=ex['audio_data'][Observation],  # D T F (stft signal)
            speech_source=ex['audio_data']
            ['speech_source'],  # Ksource N (time signal)
            Speech_image=ex['audio_data']
            ['Speech_image'],  # Ksource D T F (stft signal)
            Noise_image=ex['audio_data']['Noise_image'],  # D T F (stft signal)
            istft=istft,  # callable(signal, num_samples=num_samples)
            bf_algorithm=beamformer,
            postfilter=postfilter,  # [None, 'mask_mul']
        )
    else:
        assert mask is None, mask
        assert beamformer == 'ch0', beamformer
        assert postfilter is None, postfilter
        metric = OutputMetrics(
            speech_prediction=ex['audio_data'][Observation][:, 0],
            speech_source=ex['audio_data']['speech_source'],
            # speech_contribution=speech_contribution,
            # noise_contribution=noise_contribution,
            sample_rate=8000,
            enable_si_sdr=False,
        )

    result = metric.as_dict()
    del result['mir_eval_sxr_selection']
    del result['mir_eval_sxr_sar']
    del result['mir_eval_sxr_sir']
    if 'invasive_sxr_sir' in result:
        del result['invasive_sxr_sir']
        del result['invasive_sxr_snr']

    return metric, result
def get_scores(ex, prediction, source):
    """
    Calculate the scores, where the prediction/estimated signal is tested
    against the source/desired signal.
    This function is for oracle test to figure out, which metric can work with
    source signal.

    Example:
        SI-SDR does not work, when the desired signal is the signal befor the
        room impulse response and give strange results, when the channel is
        changed.

    >>> pprint(get_scores(get_dataset('cv_dev93')[0], 'image_0', 'early_0'))
    {'pesq': array([2.861]),
     'stoi': array([0.97151566]),
     'mir_eval_sxr_sdr': array([13.39136665]),
     'si_sdr': array([10.81039897])}
    >>> pprint(get_scores(get_dataset('cv_dev93')[0], 'image_0', 'source'))
    {'pesq': array([2.234]),
     'stoi': array([0.8005423]),
     'mir_eval_sxr_sdr': array([12.11446204]),
     'si_sdr': array([-20.05244551])}
    >>> pprint(get_scores(get_dataset('cv_dev93')[0], 'image_0', 'image_1'))
    {'pesq': array([3.608]),
     'stoi': array([0.92216845]),
     'mir_eval_sxr_sdr': array([9.55425598]),
     'si_sdr': array([-0.16858895])}
    """
    def get_signal(ex, name):
        assert isinstance(ex, dict), ex
        assert 'audio_data' in ex, ex
        assert isinstance(ex['audio_data'], dict), ex
        if name == 'source':
            return ex['audio_data']['speech_source'][:]
        elif name == 'early_0':
            return ex['audio_data']['speech_reverberation_early'][:, 0]
        elif name == 'early_1':
            return ex['audio_data']['speech_reverberation_early'][:, 1]
        elif name == 'image_0':
            return ex['audio_data']['speech_image'][:, 0]
        elif name == 'image_1':
            return ex['audio_data']['speech_image'][:, 1]
        elif name == 'image_0_noise':
            return ex['audio_data']['speech_image'][:, 0] + \
                   ex['audio_data']['noise_image'][0]
        elif name == 'image_1_noise':
            return ex['audio_data']['speech_image'][:, 1] + \
                   ex['audio_data']['noise_image'][0]
        else:
            raise ValueError(name)

    speech_prediction = get_signal(ex, prediction)
    speech_source = get_signal(ex, source)

    metric = OutputMetrics(
        speech_prediction=speech_prediction,
        speech_source=speech_source,
        sample_rate=8000,
        enable_si_sdr=True,
    )

    result = metric.as_dict()
    del result['mir_eval_sxr_selection']
    del result['mir_eval_sxr_sar']
    del result['mir_eval_sxr_sir']

    return result