예제 #1
0
class AudioLoader(object):
	def __init__(self, sampling_rate, window_length, hop_size, dynamic_range_dB=50, normalize=True):
		super(AudioLoader, self).__init__()

		self._sampling_rate = sampling_rate
		self._window_length = window_length
		self._hop_size = hop_size
		self._dynamic_range_dB = dynamic_range_dB
		self._normalize = normalize
		self._anStft = GaussTruncTF(hop_size=hop_size, stft_channels=window_length)

	def hopSize(self):
		return self._hop_size

	def windowLength(self):
		return self._window_length

	def loadSound(self, file_name):
		audio, sr = librosa.load(file_name, sr=self._sampling_rate, dtype=np.float64)
		return preprocess_signal(audio)

	def computeSpectrogram(self, audio):
		audio = audio[:len(audio)-np.mod(len(audio), self._window_length)]
		audio = audio[:len(audio)-np.mod(len(audio), self._hop_size)]

		spectrogram = self._anStft.spectrogram(audio)
		logSpectrogram = log_spectrogram(spectrogram, dynamic_range_dB=self._dynamic_range_dB)

		logSpectrogram = logSpectrogram / (self._dynamic_range_dB / 2) + 1
		return logSpectrogram

	def loadAsSpectrogram(self, file_name):
		audio = self.loadSound(file_name)
		return self.computeSpectrogram(audio)
예제 #2
0
class SpectrogramInverter(object):
	def __init__(self, fft_size, fft_hop_size):
		super().__init__()
		self._hop_size = fft_hop_size
		self._anStft = GaussTruncTF(hop_size=fft_hop_size, stft_channels=fft_size)

	def _magnitudeErr(self, targetSpectrogram, originalSpectrogram):
		return np.linalg.norm(np.abs(targetSpectrogram) - np.abs(originalSpectrogram), 'fro') / \
			   np.linalg.norm(np.abs(targetSpectrogram), 'fro')

	def invertSpectrograms(self, unprocessed_spectrograms):
		reconstructed_audio_signals = np.zeros([unprocessed_spectrograms.shape[0], self._hop_size*unprocessed_spectrograms.shape[2]])

		for index, spectrogram in enumerate(unprocessed_spectrograms):
			reconstructed_audio_signals[index] = self._invertSpectrogram(spectrogram)
		return reconstructed_audio_signals

	def projectionLoss(self, unprocessed_spectrograms):
		reconstructed_audio_signals = self.invertSpectrograms(unprocessed_spectrograms)
		_projection_loss = np.zeros([unprocessed_spectrograms.shape[0]])

		for index, spectrogram in enumerate(unprocessed_spectrograms):
			reconstructed_spectrogram = self._anStft.spectrogram(reconstructed_audio_signals[index], normalize=False)
			_projection_loss[index] = projection_loss(reconstructed_spectrogram[:-1], spectrogram)
		return _projection_loss

	def projectionLossBetween(self, unprocessed_spectrograms, audio_signals):
		_projection_loss = np.zeros([unprocessed_spectrograms.shape[0]])

		for index, audio_signal in enumerate(audio_signals):
			reconstructed_spectrogram = self._anStft.spectrogram(audio_signal, normalize=False)
			_projection_loss[index] = projection_loss(reconstructed_spectrogram[:-1], unprocessed_spectrograms[index])
		return _projection_loss

	def _invertSpectrogram(self, unprocessed_spectrogram):
		unprocessed_spectrogram = np.concatenate([unprocessed_spectrogram,
												  np.ones_like(unprocessed_spectrogram)[0:1, :]*unprocessed_spectrogram.min()]
												 , axis=0)  # Fill last column of freqs with zeros

		return self._anStft.invert_spectrogram(unprocessed_spectrogram)
예제 #3
0
 def pghi_stft(x):
     use_truncated_window = True
     if use_truncated_window:
         stft_system = GaussTruncTF(
             hop_size=getattr(self, 'hop_size', 256),
             stft_channels=getattr(self, 'stft_channels', 512))
     else:
         stft_system = GaussTF(hop_size=getattr(self, 'hop_size', 256),
                               stft_channels=getattr(
                                   self, 'stft_channels', 512))
     Y = stft_system.spectrogram(x)
     log_Y = log_spectrogram(Y)
     return np.expand_dims(log_Y, axis=0)
예제 #4
0
def compute_mag_mel(y):
    '''Compute spectrogram and MEL spectrogram from signal.
    Args:
      y  : signal
    Returns:
      mel: A 2d array of shape (T, n_mels) and dtype of float32.
      mag: A 2d array of shape (T, 1+stft_channels/2) and dtype of float32.
    '''
    if p.use_truncated:
        tfsystem = GaussTruncTF(hop_size=p.hop_size,
                                stft_channels=p.stft_channels)
    else:
        tfsystem = GaussTF(hop_size=p.hop_size, stft_channels=p.stft_channels)

    # magnitude spectrogram
    mag = tfsystem.spectrogram(y, normalize=p.normalize)

    # mel spectrogram
    mel = mel_spectrogram(mag,
                          stft_channels=p.stft_channels,
                          n_mels=p.n_mels,
                          fmin=p.fmin,
                          fmax=p.fmax,
                          sr=p.sr)

    # to decibel
    mag = log_spectrogram(mag, dynamic_range_dB=p.stft_dynamic_range_dB
                          ) / p.stft_dynamic_range_dB + 1
    assert (np.max(mag) <= 1)
    assert (np.min(mag) >= 0)

    # Reduction rate
    if p.reduction_rate > 1:
        mel = downsample_tf_time(mel, p.reduction_rate)

    mel = log_spectrogram(mel, dynamic_range_dB=p.mel_dynamic_range_dB
                          ) / p.mel_dynamic_range_dB + 1

    # Float32
    mel = mel.astype(np.float32)
    mag = mag.astype(np.float32)

    return mel, mag