Beispiel #1
0
	def __init__(self, sampling_rate, window_length, hop_size, dynamic_range_dB=50, normalize=True):
		super(AudioLoader, self).__init__()

		self._sampling_rate = sampling_rate
		self._window_length = window_length
		self._hop_size = hop_size
		self._dynamic_range_dB = dynamic_range_dB
		self._normalize = normalize
		self._anStft = GaussTruncTF(hop_size=hop_size, stft_channels=window_length)
Beispiel #2
0
 def pghi_stft(x):
     use_truncated_window = True
     if use_truncated_window:
         stft_system = GaussTruncTF(
             hop_size=getattr(self, 'hop_size', 256),
             stft_channels=getattr(self, 'stft_channels', 512))
     else:
         stft_system = GaussTF(hop_size=getattr(self, 'hop_size', 256),
                               stft_channels=getattr(
                                   self, 'stft_channels', 512))
     Y = stft_system.spectrogram(x)
     log_Y = log_spectrogram(Y)
     return np.expand_dims(log_Y, axis=0)
Beispiel #3
0
        def pghi_istft(x):
            use_truncated_window = True
            if use_truncated_window:
                stft_system = GaussTruncTF(
                    hop_size=getattr(self, 'hop_size', 256),
                    stft_channels=getattr(self, 'stft_channels', 512))
            else:
                stft_system = GaussTF(hop_size=getattr(self, 'hop_size', 256),
                                      stft_channels=getattr(
                                          self, 'stft_channels', 512))

            x = np.squeeze(x.numpy(), axis=0)
            new_Y = inv_log_spectrogram(x)
            new_y = stft_system.invert_spectrogram(new_Y)
            return new_y
Beispiel #4
0
class AudioLoader(object):
	def __init__(self, sampling_rate, window_length, hop_size, dynamic_range_dB=50, normalize=True):
		super(AudioLoader, self).__init__()

		self._sampling_rate = sampling_rate
		self._window_length = window_length
		self._hop_size = hop_size
		self._dynamic_range_dB = dynamic_range_dB
		self._normalize = normalize
		self._anStft = GaussTruncTF(hop_size=hop_size, stft_channels=window_length)

	def hopSize(self):
		return self._hop_size

	def windowLength(self):
		return self._window_length

	def loadSound(self, file_name):
		audio, sr = librosa.load(file_name, sr=self._sampling_rate, dtype=np.float64)
		return preprocess_signal(audio)

	def computeSpectrogram(self, audio):
		audio = audio[:len(audio)-np.mod(len(audio), self._window_length)]
		audio = audio[:len(audio)-np.mod(len(audio), self._hop_size)]

		spectrogram = self._anStft.spectrogram(audio)
		logSpectrogram = log_spectrogram(spectrogram, dynamic_range_dB=self._dynamic_range_dB)

		logSpectrogram = logSpectrogram / (self._dynamic_range_dB / 2) + 1
		return logSpectrogram

	def loadAsSpectrogram(self, file_name):
		audio = self.loadSound(file_name)
		return self.computeSpectrogram(audio)
Beispiel #5
0
def compute_mag_mel(y):
    '''Compute spectrogram and MEL spectrogram from signal.
    Args:
      y  : signal
    Returns:
      mel: A 2d array of shape (T, n_mels) and dtype of float32.
      mag: A 2d array of shape (T, 1+stft_channels/2) and dtype of float32.
    '''
    if p.use_truncated:
        tfsystem = GaussTruncTF(hop_size=p.hop_size,
                                stft_channels=p.stft_channels)
    else:
        tfsystem = GaussTF(hop_size=p.hop_size, stft_channels=p.stft_channels)

    # magnitude spectrogram
    mag = tfsystem.spectrogram(y, normalize=p.normalize)

    # mel spectrogram
    mel = mel_spectrogram(mag,
                          stft_channels=p.stft_channels,
                          n_mels=p.n_mels,
                          fmin=p.fmin,
                          fmax=p.fmax,
                          sr=p.sr)

    # to decibel
    mag = log_spectrogram(mag, dynamic_range_dB=p.stft_dynamic_range_dB
                          ) / p.stft_dynamic_range_dB + 1
    assert (np.max(mag) <= 1)
    assert (np.min(mag) >= 0)

    # Reduction rate
    if p.reduction_rate > 1:
        mel = downsample_tf_time(mel, p.reduction_rate)

    mel = log_spectrogram(mel, dynamic_range_dB=p.mel_dynamic_range_dB
                          ) / p.mel_dynamic_range_dB + 1

    # Float32
    mel = mel.astype(np.float32)
    mag = mag.astype(np.float32)

    return mel, mag
Beispiel #6
0
def test_stft_different_hop_size(a=128, M=1024, trunc=False):
    hop_size = a
    if trunc:
        tfsystem = GaussTruncTF(hop_size, M)
    else:
        tfsystem = GaussTF(hop_size, M)
    L = 128 * 1024
    x = np.random.rand(L) * 2 - 1
    x = x / np.linalg.norm(x)
    X128 = tfsystem.dgt(x, hop_size=128)
    X256 = tfsystem.dgt(x, hop_size=256)
    assert (np.sum(np.abs(X256 - X128[:, ::2])) < 1e-12)
    x256dot = tfsystem.idgt(X256, hop_size=256)
    x128dot = tfsystem.idgt(X128, hop_size=128)
    if trunc:
        assert (np.linalg.norm(x128dot - x) < 1e-10)
        assert (np.linalg.norm(x256dot - x) < 1e-10)
    else:
        assert (np.linalg.norm(x128dot - x) < 1e-12)
        assert (np.linalg.norm(x256dot - x) < 1e-12)
Beispiel #7
0
class SpectrogramInverter(object):
	def __init__(self, fft_size, fft_hop_size):
		super().__init__()
		self._hop_size = fft_hop_size
		self._anStft = GaussTruncTF(hop_size=fft_hop_size, stft_channels=fft_size)

	def _magnitudeErr(self, targetSpectrogram, originalSpectrogram):
		return np.linalg.norm(np.abs(targetSpectrogram) - np.abs(originalSpectrogram), 'fro') / \
			   np.linalg.norm(np.abs(targetSpectrogram), 'fro')

	def invertSpectrograms(self, unprocessed_spectrograms):
		reconstructed_audio_signals = np.zeros([unprocessed_spectrograms.shape[0], self._hop_size*unprocessed_spectrograms.shape[2]])

		for index, spectrogram in enumerate(unprocessed_spectrograms):
			reconstructed_audio_signals[index] = self._invertSpectrogram(spectrogram)
		return reconstructed_audio_signals

	def projectionLoss(self, unprocessed_spectrograms):
		reconstructed_audio_signals = self.invertSpectrograms(unprocessed_spectrograms)
		_projection_loss = np.zeros([unprocessed_spectrograms.shape[0]])

		for index, spectrogram in enumerate(unprocessed_spectrograms):
			reconstructed_spectrogram = self._anStft.spectrogram(reconstructed_audio_signals[index], normalize=False)
			_projection_loss[index] = projection_loss(reconstructed_spectrogram[:-1], spectrogram)
		return _projection_loss

	def projectionLossBetween(self, unprocessed_spectrograms, audio_signals):
		_projection_loss = np.zeros([unprocessed_spectrograms.shape[0]])

		for index, audio_signal in enumerate(audio_signals):
			reconstructed_spectrogram = self._anStft.spectrogram(audio_signal, normalize=False)
			_projection_loss[index] = projection_loss(reconstructed_spectrogram[:-1], unprocessed_spectrograms[index])
		return _projection_loss

	def _invertSpectrogram(self, unprocessed_spectrogram):
		unprocessed_spectrogram = np.concatenate([unprocessed_spectrogram,
												  np.ones_like(unprocessed_spectrogram)[0:1, :]*unprocessed_spectrogram.min()]
												 , axis=0)  # Fill last column of freqs with zeros

		return self._anStft.invert_spectrogram(unprocessed_spectrogram)
Beispiel #8
0
def test_stft_different_channels(a=128, M=1024, trunc=False):
    hop_size = a
    stft_channels = M
    if trunc:
        tfsystem = GaussTruncTF(hop_size, stft_channels)
    else:
        tfsystem = GaussTF(hop_size, stft_channels)
    L = 128 * 1024
    x = np.random.rand(L) * 2 - 1
    x = x / np.linalg.norm(x)
    X1024 = tfsystem.dgt(x, stft_channels=1024)
    X512 = tfsystem.dgt(x, stft_channels=512)
    assert (np.sum(np.abs(X512 - X1024[::2, :])) < 1e-12)
    x1024dot = tfsystem.idgt(X1024, stft_channels=1024)
    x512dot = tfsystem.idgt(X512, stft_channels=512)
    if trunc:
        assert (np.linalg.norm(x1024dot - x) < 1e-5)
        assert (np.linalg.norm(x512dot - x) < 1e-5)
    else:
        assert (np.linalg.norm(x1024dot - x) < 1e-12)
        assert (np.linalg.norm(x512dot - x) < 1e-12)
Beispiel #9
0
def test_stft_different_length(a=128, M=1024, trunc=False):
    L = 128 * 1024
    if trunc:
        tfsystem = GaussTruncTF(a, M)
    else:
        tfsystem = GaussTF(a, M)

    x = np.random.rand(L) * 2 - 1
    x = x / np.linalg.norm(x)
    x[:8 * M] = 0
    x[-8 * M:] = 0
    x2 = np.pad(x.copy(), L)[L:]
    X = tfsystem.dgt(x)
    xdot = tfsystem.idgt(X)
    X2 = tfsystem.dgt(x2)
    x2dot = tfsystem.idgt(X2)
    if trunc:
        assert (np.linalg.norm(xdot - x) < 1e-10)
        assert (np.linalg.norm(x2dot - x2) < 1e-10)
        assert (np.sum(np.abs(X2[:, :X.shape[1]] - X)) < 1e-6)
    else:
        assert (np.linalg.norm(xdot - x) < 1e-12)
        assert (np.linalg.norm(x2dot - x2) < 1e-12)
        assert (np.sum(np.abs(X2[:, :X.shape[1]] - X)) < 1e-6)
Beispiel #10
0
	def __init__(self, fft_size, fft_hop_size):
		super().__init__()
		self._hop_size = fft_hop_size
		self._anStft = GaussTruncTF(hop_size=fft_hop_size, stft_channels=fft_size)