Beispiel #1
0
def beat_tracking_example():
	# Get the file path to an included audio example.
	filename = librosa.example('nutcracker')

	# Load the audio as a waveform 'y' and store the sampling rate as 'sr'.
	y, sr = librosa.load(filename)

	# Run the default beat tracker.
	tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

	print('Estimated tempo: {:.2f} beats per minute'.format(tempo))

	# Convert the frame indices of beat events into timestamps.
	beat_times = librosa.frames_to_time(beat_frames, sr=sr)
Beispiel #2
0
def tensorflow_example():
	import SpecAugment.spec_augment_tensorflow

	y, sr = librosa.load(librosa.example('nutcracker'), sr=None, mono=True)
	#y, sr = librosa.load(librosa.example('trumpet'), sr=None, mono=True)
	#y, sr = librosa.load('./stereo.ogg', sr=None, mono=True)

	mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256, hop_length=128, fmax=8000)
	print('Mel spectrogram: shape = {}, dtype = {}.'.format(mel_spectrogram.shape, mel_spectrogram.dtype))

	warped_masked_spectrogram = SpecAugment.spec_augment_tensorflow.spec_augment(mel_spectrogram=mel_spectrogram)
	print('Mel spectrogram (augmented): shape = {}, dtype = {}.'.format(warped_masked_spectrogram.shape, warped_masked_spectrogram.dtype))
	#print(warped_masked_spectrogram)

	SpecAugment.spec_augment_tensorflow.visualization_spectrogram(mel_spectrogram, 'Before augmentation')
	SpecAugment.spec_augment_tensorflow.visualization_spectrogram(warped_masked_spectrogram, 'After augmentation')
Beispiel #3
0
    def __init__(self, scale=0.25, sample_rate=22050, examples=None):
        self.scale = scale
        self.sample_rate = sample_rate
        if examples is None:
            examples = [
                'brahms', 'choice', 'fishin', 'nutcracker', 'trumpet',
                'vibeace'
            ]
            self.examples = []

            for example in examples:
                waveform, sample_rate = librosa.load(librosa.example(example))
                if sample_rate != self.sample_rate:
                    waveform = librosa.core.resample(waveform, sample_rate,
                                                     self.sample_rate)
                self.examples.append(torch.from_numpy(waveform))
        else:
            self.examples = examples
Beispiel #4
0
def pytorch_example():
	import torch
	import SpecAugment.spec_augment_pytorch

	y, sr = librosa.load(librosa.example('nutcracker'), sr=None, mono=True)
	#y, sr = librosa.load(librosa.example('trumpet'), sr=None, mono=True)
	#y, sr = librosa.load('./stereo.ogg', sr=None, mono=True)

	mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256, hop_length=128, fmax=8000)

	mel_spectrogram = torch.tensor(mel_spectrogram)
	mel_spectrogram = torch.unsqueeze(mel_spectrogram, axis=0)
	print('Mel spectrogram: shape = {}, dtype = {}.'.format(mel_spectrogram.shape, mel_spectrogram.dtype))

	warped_masked_spectrogram = SpecAugment.spec_augment_pytorch.spec_augment(mel_spectrogram=mel_spectrogram, time_warping_para=50, frequency_masking_para=50, time_masking_para=1000, frequency_mask_num=2, time_mask_num=2)
	print('Mel spectrogram (augmented): shape = {}, dtype = {}.'.format(warped_masked_spectrogram.shape, warped_masked_spectrogram.dtype))
	#print(warped_masked_spectrogram)

	SpecAugment.spec_augment_pytorch.visualization_spectrogram(mel_spectrogram, 'Before augmentation')
	SpecAugment.spec_augment_pytorch.visualization_spectrogram(warped_masked_spectrogram, 'After augmentation')
Beispiel #5
0
def test_example_fail():
    librosa.example("no such track")
Beispiel #6
0
def test_example(key, hq):

    fn = librosa.example(key, hq=hq)
    assert os.path.exists(fn)
Beispiel #7
0
y_ = librosa.resample(y, orig_sr=44100, target_sr=22050)
plt.plot(t_, y_)
S_ = librosa.stft(y_)
S_ = np.abs(S_)

# also plot the spectrogram of the signal
librosa.display.specshow(S_)  # your code here

# Q: What was different this time? Why is this method better?
# A: the signal was properly processed, so the high frequency components
#    of the signal did not cause aliasing

#############################################################################
import math as mt

y_brahms, sr_brahms = librosa.load(librosa.example('brahms'))

dur_brahms = y_brahms.shape[0] / sr_brahms

t_brahms = np.linspace(0,
                       mt.ceil(dur_brahms),
                       mt.ceil(dur_brahms) * sr_brahms,
                       endpoint=False)
t_brahms = t_brahms[0:y_brahms.shape[0]]

plt.plot(t_brahms, y_brahms)

y_brahms_down = librosa.resample(y_brahms,
                                 orig_sr=sr_brahms,
                                 target_sr=sr_brahms / 2)
plt.plot(t_brahms[0:y_brahms_down.shape[0]], y_brahms_down)
Beispiel #8
0
def fp():
    filename = librosa.example('nutcracker')
    audio, rate = librosa.load(filename)
    return Fingerprint(audio, rate)
    def getPoints(self):
        return [(self.x, self.y + self.max_height - self.height),
                (self.x + self.width, self.y + self.max_height - self.height)]

    def clamp(self, min_value, max_value, value):
        if value < min_value:
            return min_value
        elif value > max_value:
            return max_value
        return value


if __name__ == "__main__":

    # Sample file from librosa
    filename = librosa.example('nutcracker')

    # Own audio file
    #filename = "Song.wav"

    # timeSeries: 1-dimensional numpy.ndarray of floating-point values
    # sampleRate: number of samples recorded per second
    timeSeries, sampleRate = librosa.load(filename)

    # matrix of frequencies and time
    # hop_length: number of audio samples between adjacent frames
    # n_fft: number of samples in each frame
    stft = np.abs(librosa.stft(timeSeries, hop_length=512, n_fft=2048 * 8))

    # Convert amplitude to decibels
    D = librosa.amplitude_to_db(stft, ref=np.max)
Beispiel #10
0
import librosa
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.path as path
import matplotlib.animation as animation

y, sr = librosa.load(librosa.example('brahms'))
S = np.abs(librosa.stft(y, len(y) // 10))

frames, bins = S.shape
n = np.zeros(bins) + 50
bins = np.arange(bins + 1)

left = np.array(bins[:-1])
right = np.array(bins[1:])
bottom = np.zeros(len(left))
top = bottom + n
nrects = len(left)

nverts = nrects * (1 + 3 + 1)
verts = np.zeros((nverts, 2))
codes = np.ones(nverts, int) * path.Path.LINETO
codes[0::5] = path.Path.MOVETO
codes[4::5] = path.Path.CLOSEPOLY
verts[0::5, 0] = left
verts[0::5, 1] = bottom
verts[1::5, 0] = left
verts[1::5, 1] = top
verts[2::5, 0] = right
Beispiel #11
0
def data_augmentation_example():
	y, sr = librosa.load(librosa.example('nutcracker'))
	#y, sr = librosa.load(librosa.example('trumpet'))
	#y, sr = librosa.load(librosa.example('brahms'))
	#y, sr = librosa.load(librosa.example('vibeace', hq=True))

	plt.figure(figsize=(10, 4))
	librosa.display.waveshow(y, sr=sr, x_axis='time')
	plt.title('Original')
	plt.tight_layout()

	#--------------------
	# Inject noise.
	def inject_noise(y, noise_factor):
		noise = np.random.randn(len(y))
		augmented = y + noise_factor * noise
		# Cast back to same data type.
		augmented = augmented.astype(type(y[0]))
		return augmented

	noise_factor = 0.02
	y_augmented = inject_noise(y, noise_factor)

	plt.figure(figsize=(10, 4))
	librosa.display.waveshow(y_augmented, sr=sr, x_axis='time')
	plt.title('Noise Injection')
	plt.tight_layout()

	#--------------------
	# Shift time.
	def shift_time(y, sr, shift_max, shift_direction):
		shift = np.random.randint(sr * shift_max)
		if shift_direction == 'right':
			shift = -shift
		elif shift_direction == 'both':
			direction = np.random.randint(0, 2)
			if direction == 1:
				shift = -shift

		augmented = np.roll(y, shift)
		# Set to silence for heading / tailing.
		if shift > 0:
			augmented[:shift] = 0
		else:
			augmented[shift:] = 0
		return augmented

	shift_max = 10
	shift_direction = 'right'
	y_augmented = shift_time(y, sr, shift_max, shift_direction)

	plt.figure(figsize=(10, 4))
	librosa.display.waveshow(y_augmented, sr=sr, x_axis='time')
	plt.title('Time Shift')
	plt.tight_layout()

	#--------------------
	# Change pitch.
	pitch_factor = 0.2
	y_augmented = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_factor)

	plt.figure(figsize=(10, 4))
	librosa.display.waveshow(y_augmented, sr=sr, x_axis='time')
	plt.title('Pitch Shift')
	plt.tight_layout()

	#--------------------
	# Change speed.
	#	Stretch times series by a fixed rate.
	stretch_factor = 0.8  # If rate < 1, then the signal is slowed down.
	#stretch_factor = 1.2  # If rate > 1, then the signal is sped up.
	y_augmented = librosa.effects.time_stretch(y, rate=stretch_factor)

	plt.figure(figsize=(10, 4))
	librosa.display.waveshow(y_augmented, sr=sr, x_axis='time')
	plt.title('Time Stretch')
	plt.tight_layout()

	plt.show()