Beispiel #1
0
    def test_compute_loudness_at_sample_rate_2d(self, sample_rate,
                                                audio_len_sec):
        batch_size = 8
        audio_sin_batch = gen_np_batched_sinusoids(self.frequency, self.amp,
                                                   sample_rate, audio_len_sec,
                                                   batch_size)
        expected_loudness_len = int(self.frame_rate * audio_len_sec)

        for use_th in [False, True]:
            loudness_batch = spectral_ops.compute_loudness(audio_sin_batch,
                                                           sample_rate,
                                                           self.frame_rate,
                                                           use_th=use_th)
            if use_th:
                loudness_batch = loudness_batch.numpy()

            assert loudness_batch.shape[0] == batch_size
            assert loudness_batch.shape[1] == expected_loudness_len
            assert np.all(np.isfinite(loudness_batch))

            # Check if batched loudness is equal to equivalent single computations
            audio_sin = gen_np_sinusoid(self.frequency, self.amp, sample_rate,
                                        audio_len_sec)
            loudness_target = spectral_ops.compute_loudness(audio_sin,
                                                            sample_rate,
                                                            self.frame_rate,
                                                            use_th=use_th)
            loudness_batch_target = np.tile(loudness_target, (batch_size, 1))
            # Allow tolerance within 1dB
            assert np.allclose(loudness_batch,
                               loudness_batch_target,
                               atol=1,
                               rtol=1)
Beispiel #2
0
    def test_compute_loudness_indivisible_rates_raises_error(
            self, sample_rate, audio_len_sec):
        audio_sin = gen_np_sinusoid(self.frequency, self.amp, sample_rate,
                                    audio_len_sec)

        for use_th in [False, True]:
            with pytest.raises(ValueError):
                spectral_ops.compute_loudness(audio_sin,
                                              sample_rate,
                                              self.frame_rate,
                                              use_th=use_th)
Beispiel #3
0
    def test_th_and_np_are_consistent(self):
        amp = 1e-2
        audio = amp * (np.random.rand(64000).astype(np.float32) * 2.0 - 1.0)
        frame_size = 2048
        frame_rate = 250

        ld_th = spectral_ops.compute_loudness(audio,
                                              n_fft=frame_size,
                                              frame_rate=frame_rate,
                                              use_th=True)

        ld_np = spectral_ops.compute_loudness(audio,
                                              n_fft=frame_size,
                                              frame_rate=frame_rate,
                                              use_th=False)

        assert np.allclose(np.abs(ld_np), ld_th.abs(), rtol=1e-3, atol=1e-3)
Beispiel #4
0
 def test_th_compute_loudness_at_sample_rate(self, sample_rate,
                                             audio_len_sec):
     audio_sin = gen_np_sinusoid(self.frequency, self.amp, sample_rate,
                                 audio_len_sec)
     loudness = spectral_ops.compute_loudness(audio_sin, sample_rate,
                                              self.frame_rate)
     expected_loudness_len = int(self.frame_rate * audio_len_sec)
     assert len(loudness) == expected_loudness_len
     assert np.all(np.isfinite(loudness))
Beispiel #5
0
    def test_compute_loudness_at_sample_rate_1d(self, sample_rate,
                                                audio_len_sec):
        audio_sin = gen_np_sinusoid(self.frequency, self.amp, sample_rate,
                                    audio_len_sec)
        expected_loudness_len = int(self.frame_rate * audio_len_sec)

        for use_th in [False, True]:
            loudness = spectral_ops.compute_loudness(audio_sin,
                                                     sample_rate,
                                                     self.frame_rate,
                                                     use_th=use_th)
            if use_th:
                loudness = loudness.numpy()
            assert len(loudness) == expected_loudness_len
            assert np.all(np.isfinite(loudness))
Beispiel #6
0
    def forward(self, audio, target_audio):
        loss = 0.0
        loss_ops = []
        diff = spectral_ops.diff

        for size in self.fft_sizes:
            loss_op = functools.partial(spectral_ops.compute_mag, size=size)
            loss_ops.append(loss_op)

        # Compute loss for each fft size.
        for loss_op in loss_ops:
            target_mag = loss_op(target_audio)
            value_mag = loss_op(audio)

            # Add magnitude loss.
            if self.mag_weight > 0:
                loss += self.mag_weight * mean_difference(
                    target_mag, value_mag, self.loss_type)

            if self.delta_time_weight > 0:
                target = diff(target_mag, axis=2)
                value = diff(value_mag, axis=2)
                loss += self.delta_time_weight * mean_difference(
                    target, value, self.loss_type)

            if self.delta_delta_time_weight > 0:
                target = diff(diff(target_mag, axis=2), axis=2)
                value = diff(diff(value_mag, axis=2), axis=2)
                loss += self.delta_delta_time_weight * mean_difference(
                    target, value, self.loss_type)

            if self.delta_freq_weight > 0:
                target = diff(target_mag, axis=1)
                value = diff(value_mag, axis=1)
                loss += self.delta_freq_weight * mean_difference(
                    target, value, self.loss_type)

            if self.delta_delta_freq_weight > 0:
                target = diff(diff(target_mag, axis=1), axis=1)
                value = diff(diff(value_mag, axis=1), axis=1)
                loss += self.delta_delta_freq_weight * mean_difference(
                    target, value, self.loss_type)

            # Add logmagnitude loss, reusing spectrogram.
            if self.logmag_weight > 0:
                target = spectral_ops.safe_log(target_mag)
                value = spectral_ops.safe_log(value_mag)
                loss += self.logmag_weight * mean_difference(
                    target, value, self.loss_type)

        if self.loudness_weight > 0:
            target = spectral_ops.compute_loudness(target_audio,
                                                   n_fft=max(self.fft_sizes),
                                                   use_th=True)
            value = spectral_ops.compute_loudness(audio,
                                                  n_fft=max(self.fft_sizes),
                                                  use_th=True)
            loss += self.loudness_weight * mean_difference(
                target, value, self.loss_type)

        return loss
Beispiel #7
0
    def forward(self, audio, target_audio):
        loss = 0.0
        loss_ops = []
        diff = spectral_ops.diff

        for size in self.fft_sizes:
            loss_op = functools.partial(
                spectral_ops.compute_mfcc,
                fft_size=size,
                sample_rate=self.sample_rate,
                lo_hz=20.0,
                hi_hz=self.sample_rate / 2,
                mfcc_bins=30,
                overlap=0.5,
            )
            loss_ops.append(loss_op)

        # Compute loss for each fft size.
        for loss_op in loss_ops:
            target_mfcc = loss_op(target_audio)
            value_mfcc = loss_op(audio)

            # Add mfcc loss.
            if self.mfcc_weight > 0:
                loss += self.mfcc_weight * mean_difference(
                    target_mfcc, value_mfcc, self.loss_type)

            if self.delta_time_weight > 0:
                target = diff(target_mfcc, axis=2)
                value = diff(value_mfcc, axis=2)
                loss += self.delta_time_weight * mean_difference(
                    target, value, self.loss_type)

            if self.delta_delta_time_weight > 0:
                target = diff(diff(target_mfcc, axis=2), axis=2)
                value = diff(diff(value_mfcc, axis=2), axis=2)
                loss += self.delta_delta_time_weight * mean_difference(
                    target, value, self.loss_type)

            if self.delta_freq_weight > 0:
                target = diff(target_mfcc, axis=1)
                value = diff(value_mfcc, axis=1)
                loss += self.delta_freq_weight * mean_difference(
                    target, value, self.loss_type)

            if self.delta_delta_freq_weight > 0:
                target = diff(diff(target_mfcc, axis=1), axis=1)
                value = diff(diff(value_mfcc, axis=1), axis=1)
                loss += self.delta_delta_freq_weight * mean_difference(
                    target, value, self.loss_type)

        if self.loudness_weight > 0:
            target = spectral_ops.compute_loudness(target_audio,
                                                   n_fft=max(self.fft_sizes),
                                                   use_th=True)
            value = spectral_ops.compute_loudness(audio,
                                                  n_fft=max(self.fft_sizes),
                                                  use_th=True)
            loss += self.loudness_weight * mean_difference(
                target, value, self.loss_type)

        return loss