Esempio n. 1
0
    def test_tf_np_mel_tables(self):
        frame_size = 400
        num_mel_bins = 80
        lower_edge_hertz = 125.0
        upper_edge_hertz = 7600.0
        sample_rate = 16000.0
        fft_size = 2**int(math.ceil(math.log(frame_size) / math.log(2.0)))

        # run numpy implementation
        mel_np = mel_table.SpectrogramToMelMatrix(
            num_mel_bins=num_mel_bins,
            num_spectrogram_bins=fft_size // 2 + 1,
            audio_sample_rate=sample_rate,
            lower_edge_hertz=lower_edge_hertz,
            upper_edge_hertz=upper_edge_hertz)

        # run TF implementation
        mel_tf = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins=num_mel_bins,
            num_spectrogram_bins=fft_size // 2 + 1,
            sample_rate=sample_rate,
            lower_edge_hertz=lower_edge_hertz,
            upper_edge_hertz=upper_edge_hertz,
            dtype=tf.float32)

        # compare numpy vs TF implementations
        self.assertAllClose(mel_np, mel_tf, rtol=1e-5, atol=1e-4)
Esempio n. 2
0
    def build(self, input_shape):

        # output size of DFT
        feature_size = self._compute_fft_size(int(input_shape[-1])) // 2 + 1
        self.feature_size = feature_size
        fft_mel_size = None

        if not self.use_tf_fft:
            # precompute mel matrix using np
            self.mel_weight_matrix = mel_table.SpectrogramToMelMatrix(
                num_mel_bins=self.num_mel_bins,
                num_spectrogram_bins=feature_size,
                audio_sample_rate=self.sample_rate,
                lower_edge_hertz=self.lower_edge_hertz,
                upper_edge_hertz=self.upper_edge_hertz)

            if self.mel_non_zero_only:
                fft_mel_size = self._get_non_zero_mel_size()
                self.mel_weight_matrix = self.mel_weight_matrix[:
                                                                fft_mel_size, :]

            self.mel_weight_matrix = tf.constant(self.mel_weight_matrix,
                                                 dtype=tf.float32)

        super(MagnitudeRDFTmel, self).build(input_shape, fft_mel_size)
Esempio n. 3
0
    def build(self, input_shape):
        super(MelSpectrogram, self).build(input_shape)
        feature_size = int(input_shape[-1])

        if self.use_tf:
            # precompute mel matrix using tf
            self.mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                num_mel_bins=self.num_mel_bins,
                num_spectrogram_bins=feature_size,
                sample_rate=self.sample_rate,
                lower_edge_hertz=self.lower_edge_hertz,
                upper_edge_hertz=self.upper_edge_hertz,
                dtype=tf.float32)
        else:
            # precompute mel matrix using np
            self.mel_weight_matrix = tf.constant(
                mel_table.SpectrogramToMelMatrix(
                    num_mel_bins=self.num_mel_bins,
                    num_spectrogram_bins=feature_size,
                    audio_sample_rate=self.sample_rate,
                    lower_edge_hertz=self.lower_edge_hertz,
                    upper_edge_hertz=self.upper_edge_hertz),
                dtype=tf.float32)