def test_tf_np_mel_tables(self): frame_size = 400 num_mel_bins = 80 lower_edge_hertz = 125.0 upper_edge_hertz = 7600.0 sample_rate = 16000.0 fft_size = 2**int(math.ceil(math.log(frame_size) / math.log(2.0))) # run numpy implementation mel_np = mel_table.SpectrogramToMelMatrix( num_mel_bins=num_mel_bins, num_spectrogram_bins=fft_size // 2 + 1, audio_sample_rate=sample_rate, lower_edge_hertz=lower_edge_hertz, upper_edge_hertz=upper_edge_hertz) # run TF implementation mel_tf = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=num_mel_bins, num_spectrogram_bins=fft_size // 2 + 1, sample_rate=sample_rate, lower_edge_hertz=lower_edge_hertz, upper_edge_hertz=upper_edge_hertz, dtype=tf.float32) # compare numpy vs TF implementations self.assertAllClose(mel_np, mel_tf, rtol=1e-5, atol=1e-4)
def build(self, input_shape): # output size of DFT feature_size = self._compute_fft_size(int(input_shape[-1])) // 2 + 1 self.feature_size = feature_size fft_mel_size = None if not self.use_tf_fft: # precompute mel matrix using np self.mel_weight_matrix = mel_table.SpectrogramToMelMatrix( num_mel_bins=self.num_mel_bins, num_spectrogram_bins=feature_size, audio_sample_rate=self.sample_rate, lower_edge_hertz=self.lower_edge_hertz, upper_edge_hertz=self.upper_edge_hertz) if self.mel_non_zero_only: fft_mel_size = self._get_non_zero_mel_size() self.mel_weight_matrix = self.mel_weight_matrix[: fft_mel_size, :] self.mel_weight_matrix = tf.constant(self.mel_weight_matrix, dtype=tf.float32) super(MagnitudeRDFTmel, self).build(input_shape, fft_mel_size)
def build(self, input_shape): super(MelSpectrogram, self).build(input_shape) feature_size = int(input_shape[-1]) if self.use_tf: # precompute mel matrix using tf self.mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=self.num_mel_bins, num_spectrogram_bins=feature_size, sample_rate=self.sample_rate, lower_edge_hertz=self.lower_edge_hertz, upper_edge_hertz=self.upper_edge_hertz, dtype=tf.float32) else: # precompute mel matrix using np self.mel_weight_matrix = tf.constant( mel_table.SpectrogramToMelMatrix( num_mel_bins=self.num_mel_bins, num_spectrogram_bins=feature_size, audio_sample_rate=self.sample_rate, lower_edge_hertz=self.lower_edge_hertz, upper_edge_hertz=self.upper_edge_hertz), dtype=tf.float32)