def plot_spec(signal, sr=16000, win_length=None, hop_length=None, return_spec=False):
    if win_length is None:
        win_length = int(sr * 0.025)
        
    if hop_length is None:
        hop_length = int(sr * 0.010)
        
    Sxx = librosa.core.stft(
        signal,
        win_length=win_length,
        hop_length=hop_length,
        n_fft=4096
    )

    spec = librosa.amplitude_to_db(np.abs(Sxx), ref=np.max)
    specshow(
        spec,
        sr=sr,
        x_axis='time',
        y_axis='hz',
        cmap='gray_r'
    )
    plt.colorbar(format='%+2.0f dB')
    
    if return_spec:
        return spec
예제 #2
0
def get_spectrograms(sound_file):
    '''Returns normalized log(melspectrogram) and log(magnitude) from `sound_file`.
    Args:
      sound_file: A string. The full path of a sound file.

    Returns:
      mel: A 2d array of shape (T, n_mels) <- Transposed
      mag: A 2d array of shape (T, 1+n_fft/2) <- Transposed
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=hp.sr)

    # Trimming
    y, _ = librosa.effects.trim(y)

    # Preemphasis
    y = np.append(y[0], y[1:] - hp.preemphasis * y[:-1])

    # stft
    linear = librosa.stft(y=y,
                          n_fft=hp.n_fft,
                          hop_length=hp.hop_length,
                          win_length=hp.win_length)

    # magnitude spectrogram
    mag = np.abs(linear)  # (1+n_fft//2, T)

    # mel spectrogram
    mel_basis = librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels)  # (n_mels, 1+n_fft//2)
    mel = np.dot(mel_basis, mag)  # (n_mels, t)

    # Sequence length
    done = np.ones_like(mel[0, :]).astype(np.int32)

    # to decibel
    mel = librosa.amplitude_to_db(mel)
    mag = librosa.amplitude_to_db(mag)

    # normalize
    mel = np.clip((mel - hp.ref_db + hp.max_db) / hp.max_db, 0, 1)
    mag = np.clip((mag - hp.ref_db + hp.max_db) / hp.max_db, 0, 1)

    # Transpose
    mel = mel.T.astype(np.float32)  # (T, n_mels)
    mag = mag.T.astype(np.float32)  # (T, 1+n_fft//2)

    return mel, done, mag
예제 #3
0
    def process(self, y, sample_rate):

        X = librosa.feature.melspectrogram(
            y, sr=sample_rate, n_mels=self.n_mels,
            n_fft=self.n_fft_, hop_length=self.hop_length_,
            power=2.0)

        return librosa.amplitude_to_db(X, ref=1.0, amin=1e-5, top_db=80.0)
예제 #4
0
def test_sharex_waveplot_ms():

    # Correct time range ~= 4.6 s or 4600ms
    # Due to shared x_axis, both plots are plotted in 'ms'.
    plt.figure(figsize=(8, 8))
    ax = plt.subplot(2, 1, 1)
    librosa.display.waveplot(y, sr)
    plt.subplot(2, 1, 2, sharex=ax)
    librosa.display.specshow(librosa.amplitude_to_db(S_abs, ref=np.max), x_axis='ms')
예제 #5
0
def plot_log_power_specgram(sound_names, raw_sounds):
    i = 1
    for n, f in zip(sound_names, raw_sounds):
        plt.subplot(10, 1, i)
        D = librosa.amplitude_to_db(np.abs(librosa.stft(f))**2, ref=np.max)
        librosa.display.specshow(D, x_axis='time', y_axis='log')
        plt.title(n.title())
        i += 1
    plt.suptitle("Figure 3: Log power spectrogram",
                 x=0.5, y=0.915, fontsize=18)
    plt.show()
예제 #6
0
파일: test_core.py 프로젝트: dpwe/librosa
def test_amplitude_to_db():

    srand()

    NOISE_FLOOR = 1e-6

    # Make some noise
    x = np.abs(np.random.randn(1000)) + NOISE_FLOOR

    db1 = librosa.amplitude_to_db(x, top_db=None)
    db2 = librosa.logamplitude(x**2, top_db=None)

    assert np.allclose(db1, db2)
예제 #7
0
파일: test_core.py 프로젝트: dpwe/librosa
def test_db_to_amplitude():

    srand()

    NOISE_FLOOR = 1e-6

    # Make some noise
    x = np.abs(np.random.randn(1000)) + NOISE_FLOOR

    db = librosa.amplitude_to_db(x, top_db=None)
    x2 = librosa.db_to_amplitude(db)

    assert np.allclose(x, x2)
예제 #8
0
def dynamic_spectrogram(data, filename, block_nb=0, ref=np.max, display=False):
    """ Compute the spectrogram of a time serie of samples.

    The dynamic spectrogram is obtained by computing the the signal in the
    frequency domain and display the spectrogram.

    Args:
        data (array): 1D array of audio data.
        display (bool): Boolean to plot or save the current spectrogram.

    Returns:
        None

    Todo:
        - remove the padding/margin around the plot
        - Add a path and a name where to save the plots

    """
    data_freq = librosa.stft(data)
    data_freq_db = librosa.amplitude_to_db(data_freq, ref=ref)
    librosa.display.specshow(data_freq_db)

    if display:
        plt.ylabel('Frequency [Hz]')
        plt.xlabel('Time [samples]')
        plt.show()
    else:
        spec_path = utils.read_config('path', 'spectrograms')
        fname = os.path.splitext(os.path.basename(filename))
        fig_path = utils.create_filename(
                spec_path,
                'png',
                fname[0],
                'dynamic',
                block_nb)
        plt.savefig(fig_path)
예제 #9
0
header += "label"
header_list = header.split(",")

#Dataframe vacío
features_set = pd.DataFrame(np.nan,
                            index=range(len(onlyfiles)),
                            columns=header_list)

#Extraemos los features y rellenamos el dataframe
id = 0
for genre in clases:
    for file in os.listdir(audio_path + genre):
        song = audio_path + genre + "/" + file
        y, sr = librosa.load(song, mono=True, duration=30)
        stft = librosa.stft(y)
        stft_db = librosa.amplitude_to_db(abs(stft))
        spectogram = np.abs(librosa.stft(y))
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr)
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
        melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
        rms = librosa.feature.rms(S=spectogram)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_flatness = librosa.feature.spectral_flatness(y=y)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        poly_features = librosa.feature.poly_features(y=y, sr=sr)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y=y)
예제 #10
0
def processMusic(socketID, melBins, core):

	mp3Path = "./Website/uploads/" + socketID + ".mp3"

	# script setup and housekeeping for BEATS PROCESSING
	#
	melBins = int(melBins)
	frameRate = 24
	sampleRate = 1000 * frameRate
	sout("")
	sout("")
	sout("Loading song into pyBlender.....")

	# load music into librosa
	#
	y, sr = librosa.load(mp3Path, sr=sampleRate)
	sout("")
	sout("**Loading Finished**")
	sout("")
	time.sleep(1)

	sout("")
	sout("")
	sout("Tempo/Beats Processing Engine")
	sout("--------------")
	sout("")

	# setup librosa functions/processing
	#
	tempo, beats = librosa.beat.beat_track(y=y, sr=sampleRate)
	beatArray = librosa.frames_to_time(beats, sr=sr)

	# sout out some information about what we're working with
	#
	sout("Beats: " + str(len(beats)))
	sout("Tempo: " + str(int(tempo)))

	# setup, fill, and save time and frame values for beat info
	#
	frameArray = []
	for i in beatArray:
		i = i * 100
		i = int(i)
		i = float(i)
		i = i / 100
		frameArray.append(int(i*24))

	np.savetxt("./Website/downloads/" +  socketID + ".bts", frameArray)

	# confirm script execution
	#
	sout("")
	sout("**Frame Array Construction Finished**")
	sout("")
	time.sleep(2)

	# script setup and housekeeping for VOLUME PROCESSING
	#
	samplesPerFrame = 2
	frameRate = 24
	sampleRate = int(1000 * frameRate)
	sampleHop = int((sampleRate/frameRate)/samplesPerFrame)
	scaleFactor = 100/80

	sout("")
	sout("")
	sout("Volume Processing Engine")
	sout("--------------")
	sout("")

	# setup librosa functions/processing
	#
	S = librosa.feature.melspectrogram(y=y, sr=sampleRate, n_mels=1, fmax=8000, hop_length = sampleHop)
	librosaMel = librosa.amplitude_to_db(S, ref=np.max)

	# sout out some information about what we're working with
	samples = len(librosaMel[0])
	sout("Samples: " + str(samples))

	frames = samples/samplesPerFrame
	sout("Anim. Frames: " + str(frames))

	sout("Samples/Frame: " + str(samplesPerFrame))

	sout("Seconds: " + str(frames/frameRate))

	# convert the bin based Mel spectrogram array to a time based array
	#
	timeArray = []
	for q in range(samples):
		tmpArry = []
		for r in range(1):
			tmpValue = ((librosaMel[r])[q])+80
			tmpArry.append(int(tmpValue*scaleFactor))
		timeArray.append(tmpArry)

	# downconvert the time based array into an animation frame array
	#
	frameArray = []
	for q in range(int(frames)):
		tmpArry = []
		for r in range(1):
			tempValue = 0
			for s in range(samplesPerFrame):
				tmpValue = tempValue + ((timeArray[(2*q)+s])[r])
				tmpValue = tmpValue / samplesPerFrame
				tmpArry.append(int(tmpValue))
		frameArray.append(tmpArry)

	np.savetxt("./Website/downloads/" +  socketID + ".vol", frameArray)

	# confirm script execution
	#
	sout("")
	sout("**Frame Array Construction Finished**")
	sout("")
	time.sleep(2)

	# script setup and housekeeping for MEL BINS PROCESSING
	#
	samples = 0
	frames = 0
	samplesPerFrame = 2
	frameRate = 24
	sampleRate = 1000 * frameRate
	sampleHop = int((sampleRate/frameRate)/samplesPerFrame)
	scaleFactor = 100/80
	sout("")
	sout("")
	sout("Mel Spectrogram Processing Engine")
	sout("--------------")

	# setup librosa functions/processing
	#
	Q = librosa.feature.melspectrogram(y=y, sr=sampleRate, n_mels=melBins, fmax=8000, hop_length = sampleHop)
	librosaMel = librosa.amplitude_to_db(Q, ref=np.max)

	# sout out some information about what we're working with
	#
	samples = len(librosaMel[0])
	sout("Samples: " + str(samples))
	frames = samples/samplesPerFrame
	sout("Anim. Frames: " + str(frames))
	sout("Samples/Frame: " + str(samplesPerFrame))
	sout("Seconds: " + str(frames/frameRate))
	sout("Mel Bins: " + str(melBins))
	# convert the bin based Mel spectrogram array to a time based array
	#
	timeArray = []
	for q in range(samples):
		tmpArry = []
		for r in range(melBins):
			tmpValue = ((librosaMel[r])[q])+80
			tmpArry.append(int(tmpValue*scaleFactor))
		timeArray.append(tmpArry)

	# downconvert the time based array into an animation frame array
	#
	frameArray = []
	for q in range(int(frames)):
		tmpArry = []
		for r in range(melBins):
			tempValue = 0
			for s in range(samplesPerFrame):
				tmpValue = tempValue + ((timeArray[(2*q)+s])[r])
			tmpValue = tmpValue / samplesPerFrame
			tmpArry.append(int(tmpValue))
		frameArray.append(tmpArry)

	np.savetxt("./Website/downloads/" +  socketID + ".mel", frameArray)

	# confirm script execution
	#
	sout("")
	sout("**Frame Array Construction Finished**")
	sout("")
	time.sleep(2)

	# setup and housekeeping for CORE PROCESSING
	#
	sout("")
	sout("")
	sout("Scene\\Render Core Fusion Engine")
	sout("--------------")
	sout("")

	scenePath  =  "./PyBlender/Scripts/SceneSetup.txt"
	corePath   =  "./PyBlender/Scripts/RenderingCores/" + core + ".txt"
	tailPath  =  "./PyBlender/Scripts/SceneTail.txt"
	#TODO - Fix this missing file from project
	scriptPath =  "./Website/downloads/" + socketID + ".brs"

	sout("Scene will be rendered with core:")
	sout(corePath)

	filenames = [scenePath, corePath, tailPath]
	with open(scriptPath, 'w') as outputFile:
		for file in filenames:
			with open(file) as inputFile:
				outputFile.write(inputFile.read().replace(socketID, "%SID%"))

	# confirm script execution
	#
	sout("")
	sout("**Render Script Construction Finished**")
	sout("")
	time.sleep(2)
예제 #11
0
     energy.append(np.mean(e))
     ent = 0.0
     m = np.mean(e)
     for j in range(0,len(e[0])):
          q = np.absolute(e[0][j] - m)
          ent = ent + (q * np.log10(q))
     entropy_of_energy.append(ent)
f_list_1 = []
f_list_1.append(zero_crossings)
f_list_1.append(energy)
f_list_1.append(entropy_of_energy)
f_np_1 = np.array(f_list_1)
f_np_1 = np.transpose(f_np_1)[:-1]
kmeans = KMeans(n_clusters=2, random_state=0).fit(f_np_1)
result=kmeans.predict(f_np_1)
D = li.amplitude_to_db(np.abs(li.stft(y)), ref=np.max)
plt.subplot(3,1,1)
plt.title("Audio Analog Signal")
plt.plot(y[1950:2000])
plt.subplot(3,1,2)
plt.title("Spectogram")
librosa.display.specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.subplot(3,1,3)
plt.title("Audio Digital Signal")
plt.plot(result, marker='d', color='blue', drawstyle='steps')
plt.show()
stream.stop_stream()
stream.close()
audio.terminate()
예제 #12
0
파일: test_core.py 프로젝트: yjt/librosa
    def __test(ref):

        db = librosa.amplitude_to_db(xp, ref=ref, top_db=None)
        xp2 = librosa.db_to_amplitude(db, ref=ref)

        assert np.allclose(xp, xp2)
예제 #13
0

#######################################
# First, let's plot the original chroma
chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr)

# For display purposes, let's zoom in on a 15-second chunk from the middle of the song
idx = [slice(None), slice(*list(librosa.time_to_frames([45, 60])))]

# And for comparison, we'll show the CQT matrix as well.
C = np.abs(librosa.cqt(y=y, sr=sr, bins_per_octave=12*3, n_bins=7*12*3))


plt.figure(figsize=(12, 4))
plt.subplot(2, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max)[idx],
                         y_axis='cqt_note', bins_per_octave=12*3)
plt.colorbar()
plt.subplot(2, 1, 2)
librosa.display.specshow(chroma_orig[idx], y_axis='chroma')
plt.colorbar()
plt.ylabel('Original')
plt.tight_layout()


###########################################################
# We can correct for minor tuning deviations by using 3 CQT
# bins per semi-tone, instead of one
chroma_os = librosa.feature.chroma_cqt(y=y, sr=sr, bins_per_octave=12*3)

예제 #14
0
def display_sample_info(file_path, label=''):
    """Generate various representations a given audio file.
    E.g. Mel, MFCC and power spectrogram's.

    Args:
        file_path (str): Path to the audio file.
        label (str): Optional label to display for the given audio file.

    Returns:
        Nothing.
    """

    if not os.path.isfile(file_path):
        raise ValueError('{} does not exist.'.format(file_path))

    # By default, all audio is mixed to mono and resampled to 22050 Hz at load time.
    y, sr = librosa.load(file_path, sr=None, mono=True)

    # At 16000 Hz, 512 samples ~= 32ms. At 16000 Hz, 200 samples = 12ms. 16 samples = 1ms @ 16kHz.
    hop_length = 200  # Number of samples between successive frames e.g. columns if a spectrogram.
    f_max = sr / 2.  # Maximum frequency (Nyquist rate).
    f_min = 64.  # Minimum frequency.
    n_fft = 1024  # Number of samples in a frame.
    n_mels = 80  # Number of Mel bins to generate.
    n_mfcc = 13  # Number of Mel cepstral coefficients to extract.
    win_length = 333  # Window length.

    # Create info string.
    num_samples = y.shape[0]
    duration = librosa.get_duration(y=y, sr=sr)
    info_str_format = 'Label: {}\nPath: {}\nDuration={:.3f}s with {:,d} Samples\n' \
                      'Sampling Rate={:,d} Hz\nMin, Max=[{:.2f}, {:.2f}]'
    info_str = info_str_format.format(label, file_path, duration, num_samples,
                                      sr, np.min(y), np.max(y))
    print(info_str)
    # Escape some LaTeX special characters
    info_str_tex = info_str.replace('_', '\\_')

    plt.figure(figsize=(10, 7))
    plt.subplot(3, 1, 1)
    display.waveplot(y, sr=sr)
    plt.title('Monophonic')

    # Plot waveforms.
    y_harm, y_perc = librosa.effects.hpss(y)
    plt.subplot(3, 1, 2)
    display.waveplot(y_harm, sr=sr, alpha=0.33)
    display.waveplot(y_perc, sr=sr, color='r', alpha=0.40)
    plt.title('Harmonic and Percussive')

    # Add file information.
    plt.subplot(3, 1, 3)
    plt.axis('off')
    plt.text(0.0, 1.0, info_str_tex, color='black', verticalalignment='top')
    plt.tight_layout()

    # Calculating MEL spectrogram and MFCC.
    db_pow = np.abs(
        librosa.stft(y=y,
                     n_fft=n_fft,
                     hop_length=hop_length,
                     win_length=win_length))**2

    s_mel = librosa.feature.melspectrogram(S=db_pow,
                                           sr=sr,
                                           hop_length=hop_length,
                                           fmax=f_max,
                                           fmin=f_min,
                                           n_mels=n_mels)

    s_mel = librosa.power_to_db(s_mel, ref=np.max)
    s_mfcc = librosa.feature.mfcc(S=s_mel, sr=sr, n_mfcc=n_mfcc)

    # STFT (Short-time Fourier Transform)
    # https://librosa.github.io/librosa/generated/librosa.core.stft.html
    plt.figure(figsize=(12, 10))
    db = librosa.amplitude_to_db(librosa.magphase(librosa.stft(y))[0],
                                 ref=np.max)
    plt.subplot(3, 2, 1)
    display.specshow(db,
                     sr=sr,
                     x_axis='time',
                     y_axis='linear',
                     hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Linear-frequency power spectrogram')

    plt.subplot(3, 2, 2)
    display.specshow(db,
                     sr=sr,
                     x_axis='time',
                     y_axis='log',
                     hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log-frequency power spectrogram')

    plt.subplot(3, 2, 3)
    display.specshow(s_mfcc,
                     sr=sr,
                     x_axis='time',
                     y_axis='linear',
                     hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('MFCC spectrogram')

    # # CQT (Constant-T Transform)
    # # https://librosa.github.io/librosa/generated/librosa.core.cqt.html
    cqt = librosa.amplitude_to_db(librosa.magphase(librosa.cqt(y, sr=sr))[0],
                                  ref=np.max)
    # plt.subplot(3, 2, 3)
    # display.specshow(cqt, sr=sr, x_axis='time', y_axis='cqt_note', hop_length=hop_length)
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Constant-Q power spectrogram (note)')

    plt.subplot(3, 2, 4)
    display.specshow(cqt,
                     sr=sr,
                     x_axis='time',
                     y_axis='cqt_hz',
                     hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Constant-Q power spectrogram (Hz)')

    plt.subplot(3, 2, 5)
    display.specshow(db,
                     sr=sr,
                     x_axis='time',
                     y_axis='log',
                     hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log power spectrogram')

    plt.subplot(3, 2, 6)
    display.specshow(s_mel, x_axis='time', y_axis='mel', hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel spectrogram')

    # TODO Import project used features (python_speech_features).
    # norm_features = 'none'
    # mfcc = load_sample(file_path, feature_type='mfcc', feature_normalization=norm_features)[0]
    # mfcc = np.swapaxes(mfcc, 0, 1)
    #
    # mel = load_sample(file_path, feature_type='mel', feature_normalization=norm_features)[0]
    # mel = np.swapaxes(mel, 0, 1)

    (__sr, __y) = wavfile.read(file_path)

    num_features = 26
    win_len = WIN_LENGTH
    win_step = WIN_STEP
    __mel = psf.logfbank(signal=__y,
                         samplerate=__sr,
                         winlen=win_len,
                         winstep=win_step,
                         nfilt=num_features,
                         nfft=n_fft,
                         lowfreq=f_min,
                         highfreq=f_max,
                         preemph=0.97)

    __mfcc = psf.mfcc(signal=__y,
                      samplerate=__sr,
                      winlen=win_len,
                      winstep=win_step,
                      numcep=num_features // 2,
                      nfilt=num_features,
                      nfft=n_fft,
                      lowfreq=f_min,
                      highfreq=f_max,
                      preemph=0.97,
                      ceplifter=22,
                      appendEnergy=False)

    __mfcc = __mfcc.astype(np.float32)
    __mel = __mel.astype(np.float32)
    __mfcc = np.swapaxes(__mfcc, 0, 1)
    __mel = np.swapaxes(__mel, 0, 1)

    plt.figure(figsize=(5.2, 1.6))
    display.waveplot(y, sr=sr)

    fig = plt.figure(figsize=(10, 4))
    plt.subplot(2, 1, 2)
    display.specshow(__mfcc,
                     sr=__sr,
                     x_axis='time',
                     y_axis='mel',
                     hop_length=win_step * __sr)
    # plt.set_cmap('magma')
    # plt.xticks(rotation=295)
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.xlim(xmin=0)
    plt.ylim(0, 8000)
    plt.colorbar(format='%+2.0f')
    plt.title('MFCC', visible=False)

    plt.subplot(2, 1, 1)
    display.specshow(__mel,
                     sr=__sr,
                     x_axis='time',
                     y_axis='mel',
                     hop_length=win_step * __sr)
    # plt.set_cmap('magma')
    # plt.xticks(rotation=295)
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.xlim(xmin=0)
    plt.ylim(0, 8000)
    plt.colorbar(format='%+2.0f', label='Power (dB)')
    plt.title('Mel Spectrogram', visible=False)

    plt.tight_layout()
    fig.savefig('/tmp/mel-mfcc-plot-we-did-it.pdf', bbox_inches='tight')
    plt.show()
예제 #15
0
def main():
    main_name = "chAngE.wav"
    inst_name = "chAngE_inst.wav"
    plt.figure(figsize=(10, 15))

    main_wav, sr = librosa.load(main_name)
    print("file_name:{}, sr:{}".format(main_name, sr))
    print(main_wav.shape)

    inst_wav, sr = librosa.load(inst_name)
    print("file_name:{}, sr:{}".format(inst_name, sr))
    print(inst_wav.shape)

    main_power_spec = np.abs(librosa.stft(main_wav))
    print("power_spec.shape:", main_power_spec.shape)

    inst_power_spec = np.abs(librosa.stft(inst_wav))
    print("power_spec.shape:", inst_power_spec.shape)

    plt.subplot(3, 1, 1)  # (row, colum, num)
    librosa.display.specshow(librosa.amplitude_to_db(main_power_spec,
                                                     ref=np.max),
                             y_axis='log',
                             x_axis='time')
    wav_title = "main_Power_spectrogram"
    plt.title(wav_title)
    plt.colorbar(format='%+2.0f dB')

    plt.subplot(3, 1, 2)  # (row, colum, num)
    librosa.display.specshow(librosa.amplitude_to_db(inst_power_spec,
                                                     ref=np.max),
                             y_axis='log',
                             x_axis='time')
    wav_title = "inst_Power_spectrogram"
    plt.title(wav_title)
    plt.colorbar(format='%+2.0f dB')

    main_len = main_power_spec.shape[1]
    inst_len = inst_power_spec.shape[1]

    if main_len > inst_len:
        diff_len = inst_len
    else:
        diff_len = main_len

    print

    diff_power_spec = []
    for i in range(diff_len):
        diff = main_power_spec.T[i] - inst_power_spec.T[i]
        # print("diff.shape:", diff.shape)

        diff_power_spec.append(diff)

    diff_power_spec = np.array(diff_power_spec).T
    print("diff_power_spec.shape:", diff_power_spec.shape)

    plt.subplot(3, 1, 3)  # (row, colum, num)
    librosa.display.specshow(librosa.amplitude_to_db(diff_power_spec,
                                                     ref=np.max),
                             y_axis='log',
                             x_axis='time')
    wav_title = "diff_Power_spectrogram"
    plt.title(wav_title)
    plt.colorbar(format='%+2.0f dB')

    plt.tight_layout()
    plt.savefig(wav_title + ".jpg")
    plt.clf()

    inv_wav = librosa.core.istft(main_power_spec)
    diff_wav = librosa.core.istft(diff_power_spec)
    librosa.output.write_wav('inv.wav', inv_wav, sr)
    librosa.output.write_wav('diff.wav', diff_wav, sr)
예제 #16
0
"""
Created on Fri Mar  8 08:41:08 2019

@author: MR toad
"""

import librosa
import os
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
#from PIL import Image

file_path = 'E:/speech/'
mfcc_path = 'E:/mfcc/'
pic_path = 'E:/pic'

file_name_list = os.listdir(file_path)
for file_name in file_name_list:
    y, sr = librosa.load(file_path + file_name)
    mfcc_feature = librosa.feature.mfcc(y=y, sr=sr)
    np.save(mfcc_path + file_name.split('.')[0] + ".npy", mfcc_feature)

    plt.figure(figsize=(12, 8))
    D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)
    plt.subplot(4, 2, 1)
    librosa.display.specshow(D, y_axis='linear')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Linear-frequency power spectrogram')
    plt.savefig(file_name.split('.')[0] + ".png", dpi=300)
예제 #17
0
    img = np.zeros(chromagram.shape, dtype=np.float32)
    w, h = chromagram.shape
    for x in range(h):
        # img.item(x, c_max[x], 0)
        img.itemset((c_max[x], x), 1)
    return img


#y, sr = load_and_trim('F:/项目/花城音乐项目/样式数据/ALL/旋律/1.31MP3/旋律1.100分.wav')
y, sr = load_and_trim(filename)
# silence_threshold = 0.2
# need_vocal_separation = check_need_vocal_separation(y, silence_threshold)
# if need_vocal_separation:
#     y, sr = get_foreground(y, sr)  # 分离前景音

CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=16000), ref=np.max)
w, h = CQT.shape
CQT[50:w, :] = -100
CQT[0:20, :] = -100

# 标准节拍时间点
type_index = get_onsets_index_by_filename(filename)
total_frames_number = get_total_frames_number(filename)
# base_frames = onsets_base_frames_rhythm(type_index,total_frames_number)
base_frames = onsets_base_frames(codes[type_index], total_frames_number)
base_onsets = librosa.frames_to_time(base_frames, sr=sr)

first_frame = base_frames[1] - base_frames[0]
rms = librosa.feature.rmse(y=y)[0]
rms = [x / np.std(rms) for x in rms]
min_waterline = find_min_waterline(rms, 8)
예제 #18
0
# perform stft
stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)

# calculate abs values on complex numbers to get magnitude
spectrogram = np.abs(stft)

# display spectrogram
plt.figure(figsize=FIG_SIZE)
librosa.display.specshow(spectrogram, sr=sample_rate, hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("Frequency")
plt.colorbar()
plt.title("Spectrogram")

# apply logarithm to cast amplitude to Decibels
log_spectrogram = librosa.amplitude_to_db(spectrogram)
plt.figure(figsize=FIG_SIZE)
librosa.display.specshow(log_spectrogram,
                         sr=sample_rate,
                         hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("Frequency")
plt.colorbar(format="%+2.0f dB")
plt.title("Spectrogram (dB)")

# MFCCs
# extract 13 MFCCs
MFCCs = librosa.feature.mfcc(signal,
                             sample_rate,
                             n_fft=n_fft,
                             hop_length=hop_length,
예제 #19
0
def extract_features(dataset='train',
                     n_fft=512,
                     hop_length=128,
                     n_mels=40,
                     dct_type=3):
    f = open(data_path + dataset + '_list.txt', 'r')

    i = 0
    for file_name in f:
        # progress check
        i = i + 1
        if not (i % 10):
            print i

        # load audio file
        file_name = file_name.rstrip('\n')
        file_path = data_path + file_name
        #print file_path
        y, sr = librosa.load(file_path, sr=SAMPLING_RATE)

        # mel-scaled spectrogram
        mel_S = librosa.feature.melspectrogram(y,
                                               sr=SAMPLING_RATE,
                                               n_fft=n_fft,
                                               hop_length=hop_length,
                                               n_mels=n_mels,
                                               fmin=0.0,
                                               fmax=8000)

        #log compression
        log_mel_S = librosa.power_to_db(mel_S)

        # mfcc (DCT)
        mfcc = librosa.feature.mfcc(S=log_mel_S,
                                    dct_type=dct_type,
                                    n_mfcc=MFCC_DIM)
        mfcc = mfcc.astype(np.float32)  # to save the memory (64 to 32 bits)

        # constant-q transform
        C = np.abs(librosa.core.cqt(y, sr=sr))
        log_cqt = librosa.amplitude_to_db(C, ref=np.max)

        mfcc_cqt = librosa.feature.mfcc(S=log_cqt,
                                        dct_type=dct_type,
                                        n_mfcc=MFCC_DIM)
        mfcc_cqt = mfcc_cqt.astype(np.float32)

        # Zero crossing rate
        zcr = librosa.feature.zero_crossing_rate(y,
                                                 frame_length=n_fft,
                                                 hop_length=hop_length)

        # STFT
        S, phase = librosa.magphase(
            librosa.core.stft(y,
                              n_fft=n_fft,
                              hop_length=hop_length,
                              win_length=n_fft))

        # Spectral centroid
        s_centroid = librosa.feature.spectral_centroid(S=S)

        # Spectral rolloff
        s_rolloff = librosa.feature.spectral_rolloff(S=S,
                                                     sr=sr,
                                                     roll_percent=0.95)

        # Spectral flatness
        s_flatness = librosa.feature.spectral_flatness(S=S)

        # Spectral bandwidth
        s_bandwidth = librosa.feature.spectral_bandwidth(S=S)

        # Spectral contrast
        s_contrast = librosa.feature.spectral_contrast(S=S)

        # Chromagram
        crm_stft = librosa.feature.chroma_stft(S=S, sr=sr)
        crm_cqt = librosa.feature.chroma_cqt(C=C, sr=sr)
        crm_cens = librosa.feature.chroma_cens(C=C, sr=sr)

        rms = librosa.feature.rms(S=S)
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)

        features = [
            mfcc, mfcc_cqt, zcr, s_centroid, s_rolloff, s_flatness,
            s_bandwidth, s_contrast, crm_stft, crm_cqt, crm_cens, rms, tonnetz
        ]

        # save mfcc as a file
        file_name = file_name.replace('.wav', '.npy')

        for feature in range(len(features)):
            save_file = './rms_tonnetz' + str(n_fft) + '/' + str(
                hop_length) + '/' + str(n_mels) + '/' + str(
                    dct_type) + '/' + feature_paths[feature] + file_name
            if not os.path.exists(os.path.dirname(save_file)):
                os.makedirs(os.path.dirname(save_file))
            np.save(save_file, features[feature])
    f.close()
예제 #20
0
# harmony, perceptr
harmony, perceptr = librosa.effects.hpss(y)
harmony_mean = np.mean(harmony)
harmony_var = np.var(harmony)
perceptr_mean = np.mean(perceptr)
perceptr_var = np.var(perceptr)
# print(harmony_mean,harmony_var,perceptr_mean,perceptr_var)

# tempo
tempo, _ = librosa.beat.beat_track(y, sr=sr)
# print(tempo)

# MFCCs
S = librosa.feature.melspectrogram(y, sr=sr)
S_DB = librosa.amplitude_to_db(S, ref=np.max)
D = np.abs(librosa.stft(y, n_fft=2048, win_length=2048, hop_length=512))
mfcc = librosa.feature.mfcc(y, sr=sr, S=librosa.power_to_db(D), n_mfcc=20)
print(np.mean(mfcc[0]))  # -11.3112

mfcc1_mean = np.mean(mfcc[0])
mfcc1_var = np.var(mfcc[0])
mfcc2_mean = np.mean(mfcc[1])
mfcc2_var = np.var(mfcc[1])
mfcc3_mean = np.mean(mfcc[2])
mfcc3_var = np.var(mfcc[2])
mfcc4_mean = np.mean(mfcc[3])
mfcc4_var = np.var(mfcc[3])
mfcc5_mean = np.mean(mfcc[4])
mfcc5_var = np.var(mfcc[4])
mfcc6_mean = np.mean(mfcc[5])
예제 #21
0
    device="default", channels=1, samplerate=samplingrate, callback=audio_callback
)

sound_name = input("name of sound (only use lowercase chars and underscore): ")


with stream:
    print("listening...")
    while True:
        if len(v) >= recording_size:
            # trim v size to be exactly recording_size
            v = v[-recording_size:]
            y = np.asarray(v, dtype=np.float32)

            mel_spec = librosa.feature.melspectrogram(y=y, sr=samplingrate)
            mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)

            # normalize data to make each value on a scale between -1 and 1
            normalized = librosa.util.normalize(mel_spec_db)
            # between 0 and 255
            normalized = [[(v + 1) * 255 / 2 for v in row] for row in normalized]
            # show preview
            # plt.imshow(normalized)
            # plt.show()
            import os
            import binascii

            fname = binascii.hexlify(os.urandom(8))
            f = f"./datasets/sounds-{int(recording_len*100)}ms/{sound_name}/{fname.decode('utf-8')}.jpg"
            res = cv2.imwrite(f, np.array(normalized))
            print(f"saved {f}")
예제 #22
0
my_dpi = 120

for index, row in speakers_filtered.iterrows():
    dir_ = root + '/' + row['SUBSET'] + '/' + str(row['ID']) + '/'
    print('working on df row {}, spaker {}'.format(index, row['CODE']))
    if not os.path.exists(dir_):
        print('dir {} not exists, skipping'.format(dir_))
        continue

    files_iter = Path(dir_).glob('**/*.flac')
    files_ = [str(f) for f in files_iter]

    for f in files_:
        ay, sr = librosa.load(f)
        duration = ay.shape[0] / sr
        start = 0
        while start + 5 < duration:
            slice_ = ay[start * sr:(start + 5) * sr]
            start = start + 5 - 1
            x = librosa.stft(slice_)
            xdb = librosa.amplitude_to_db(abs(x))
            plt.figure(figsize=(227 / my_dpi, 227 / my_dpi), dpi=my_dpi)
            plt.axis('off')
            librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='log')
            plt.savefig(root + '/train-gram/' + str(row['CODE']) + '/' +
                        uuid.uuid4().hex + '.png',
                        dpi=my_dpi)
            plt.close()

    print('work done on index {}, speaker {}'.format(index, row['CODE']))
예제 #23
0
    S = S.cuda()
    net = NMFD(S.shape, T, n_components=R).cuda()
    #net = NMF(S.shape, n_components=R, max_iter=max_iter, verbose=True, beta_loss=2).cuda()

    niter, V = net.fit_transform(S,
                                 verbose=True,
                                 beta_loss=1.5,
                                 max_iter=max_iter,
                                 alpha=0.5,
                                 l1_ratio=0.2)
    net.sort()
    W = net.W
    H = net.H

    plt.subplot(3, 1, 1)
    display.specshow(librosa.amplitude_to_db(W.detach().cpu().numpy().mean(2),
                                             ref=np.max),
                     y_axis='log',
                     sr=sr)
    plt.title('Template ')
    plt.subplot(3, 1, 2)
    display.specshow(H.detach().cpu().numpy(),
                     x_axis='time',
                     hop_length=1024,
                     sr=sr)
    plt.colorbar()
    plt.title('Activations')
    plt.subplot(3, 1, 3)
    display.specshow(librosa.amplitude_to_db(V.detach().cpu().numpy(),
                                             ref=np.max),
                     y_axis='log',
                     x_axis='time',
예제 #24
0
 def compute_cqt(self):
     c = librosa.cqt(self.audio_sample, sr=self.sr, hop_length=self.hop_length,
                     fmin=None, n_bins=self.n_bins, res_type='fft')
     c_mag = librosa.magphase(c)[0] ** self.mag_exp
     cdb = librosa.amplitude_to_db(c_mag, ref=np.max)
     return cdb
예제 #25
0
파일: test_core.py 프로젝트: dpwe/librosa
    def __test(ref):

        db = librosa.amplitude_to_db(xp, ref=ref, top_db=None)
        xp2 = librosa.db_to_amplitude(db, ref=ref)

        assert np.allclose(xp, xp2)
plt.subplot(1, 2, 1)
plt.plot(x1, y1, label="PESQ ")
plt.xlabel('Number of neurons')
plt.ylabel('PESQ')

plt.subplot(1, 2, 2)
plt.plot(x1, y2, label="STOI")
plt.xlabel('Number of neurons')
plt.ylabel('STOI')

y, sr = librosa.load(
    "D:/UB_MS/Thesis/Seprating speech signals/DataSet/one_noise_dataset/noisy_test/noisy462.wav",
    sr=16000)
plt.figure(figsize=(12, 8))
D1 = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
D1 = librosa.amplitude_to_db(D1, ref=np.max)

y, sr = librosa.load(
    "D:/UB_MS/Thesis/Seprating speech signals/DataSet/set/test_clean/clean462.wav",
    sr=16000)
plt.figure(figsize=(12, 8))
D2 = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
D2 = librosa.amplitude_to_db(D2, ref=np.max)

y, sr = librosa.load(
    "D:/UB_MS/Thesis/Seprating speech signals/DataSet/Results/one_noise_data/seq2seq/256/enhanced_462.wav",
    sr=16000)
plt.figure(figsize=(12, 8))
D3 = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
D3 = librosa.amplitude_to_db(D3, ref=np.max)
예제 #27
0
f1 = f1_score(y_test, y_pred)

print("accuracy : \t", accuracy)
print("recall : \t", recall)
print("precision : \t", precision)
print("f1 : \t", f1)

# predict 데이터
pred_pathAudio = 'C:/nmb/nmb_data/pred_voice/'
files = librosa.util.find_files(pred_pathAudio, ext=['wav'])
files = np.asarray(files)
for file in files:
    y, sr = librosa.load(file, sr=22050)
    pred_mels = librosa.feature.melspectrogram(y,
                                               sr=sr,
                                               n_fft=512,
                                               hop_length=128,
                                               n_mels=128)
    pred_mels = librosa.amplitude_to_db(pred_mels, ref=np.max)
    pred_mels = pred_mels.reshape(1, pred_mels.shape[0] * pred_mels.shape[1])
    # print(pred_mels.shape)  # (1, 110336)
    y_pred = model.predict(pred_mels)
    # print(y_pred)
    if y_pred == 0:  # label 0
        print(file, '여자입니다.')
    else:  # label 1
        print(file, '남자입니다.')

end_now = datetime.datetime.now()
time = end_now - start_now
print("time >> ", time)  # time >
예제 #28
0
def calcu(data, sr, n_fft):
    data = data.astype(np.float) / 32767
    data = librosa.stft(data[sr // 4:sr // 4 * 2], n_fft=n_fft)
    data = np.mean(librosa.amplitude_to_db(np.abs(data)), axis=1)
    return data
예제 #29
0
sig, sr = librosa.load(audio_data, sr = 44100)


# 3_Extacting Datas From Audio ====================================================================

# 3-1.Onset Envelope | 3-2.Beats | 3-3.Onsets -----------------------------------------------------
onset_frames = librosa.onset.onset_detect(sig, sr = sr)
onsets = librosa.frames_to_time(onset_frames, sr = sr)
onset_env = librosa.onset.onset_strength(sig, sr = sr, aggregate = np.median)
tempo = librosa.beat.tempo(onset_envelope = onset_env, sr = sr)
tempo, beats = librosa.beat.beat_track(onset_envelope = onset_env, sr = sr, units = 'time')

# 3-4.Frequency & Magnitude -----------------------------------------------------------------------
fft = np.fft.fft(sig)
magnitude = np.abs(fft)
magnitude_dB = librosa.amplitude_to_db(magnitude)
frequency = np.linspace(0, sr, len(magnitude_dB))

left_magnitude_dB = magnitude_dB[:len(magnitude_dB)/2]   # certain magnitude(dB)
left_frequency = frequency[:len(magnitude_dB)/2]         # certain frequency


# 4_Preprocessing Datas for Visualization =========================================================

# 4-1.Onset Envelope(propotional to audio length) -------------------------------------------------
E = len(onset_env)
x1 = np.random.rand(E) * E
y1 = np.random.rand(E) * E
n1 = 50
radii_1 = np.random.rand(E) * E / n1
colors_1 = ["#%02x%02x%02x" % (int(r), int(g), 180) for r, g in zip(x1, y1)]   # 255,100,37
예제 #30
0
import librosa
import os
import matplotlib.pyplot as plt
import librosa.display
import numpy as np

y, sr = librosa.load("sample/UltraCat_-_01_-_Orbiting_the_Earth.mp3")
Y = librosa.stft(y)
Ydb = librosa.amplitude_to_db(abs(Y))
plt.figure(figsize=(6, 2))
librosa.display.specshow(Ydb, sr=sr, cmap='magma')
img_path = "sample/UltraCat_-_01_-_Orbiting_the_Earth.png"
plt.savefig(img_path)  # save the figure to file
plt.close("all")
예제 #31
0
hop_length = 80
n_sample = 200

amps = []
log_amps = []
dbs = []
for filepath in glob.glob('{}/*.wav'.format(src_path))[:n_sample]:
    wav = read(filepath, sr, mono=True)
    spec = librosa.stft(wav, n_fft=n_fft, win_length=win_length, hop_length=hop_length)  # (n_fft/2+1, t)
    amp = np.abs(spec)
    amps.extend(amp.flatten())

    log_amp = np.log(amp)
    log_amps.extend(log_amp.flatten())

    db = librosa.amplitude_to_db(amp)
    dbs.extend(db.flatten())

amps = np.array(amps)
log_amps = np.array(log_amps)
dbs = np.array(dbs)


mean = np.mean(amps)
std = np.std(amps)

max = np.max(amps)
min = np.min(amps)

# mean = np.mean(dbs)
# std = np.std(dbs)
예제 #32
0
def spectrogram_librosa(y, fs, hparams):
    D = np.abs(_stft(preemphasis(y, hparams), fs, hparams))
    S = librosa.amplitude_to_db(D) - hparams.ref_level_db
    S_norm = _normalize(S, hparams)
    return S_norm
예제 #33
0
            Note.append(data[i - 1])
            count = 1
            N = data[i]
    return Note, Count


def Normlize(data, num):
    if type(data) != list or type(num) != list:
        return False
    Data = []
    Num = []
    for i in range(len(data)):
        num[i] = num[i] // 10
        if num[i] >= 1:
            Data.append(data[i])
            Num.append(num[i])
    return Data, Num


if __name__ == '__main__':

    y, rate = loadFile('1.wav', 44100)
    Time = librosa.get_duration(y, sr=rate)
    fft = librosa.stft(y, n_fft=1024 * 2)
    D = librosa.amplitude_to_db(abs(fft), ref=np.max)
    D = D + 80
    data = music2note(D, rate / 2)

    data, num = getNoteAndNum(data)
    data, num = Normlize(data, num)
예제 #34
0
# Decompose D into harmonic and percussive components
#
# :math:`D = D_\text{harmonic} + D_\text{percussive}`
D_harmonic, D_percussive = librosa.decompose.hpss(D)


####################################################################
# We can plot the two components along with the original spectrogram

# Pre-compute a global reference power from the input spectrum
rp = np.max(np.abs(D))

plt.figure(figsize=(12, 8))

plt.subplot(3, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(np.abs(D), ref=rp), y_axis='log')
plt.colorbar()
plt.title('Full spectrogram')

plt.subplot(3, 1, 2)
librosa.display.specshow(librosa.amplitude_to_db(np.abs(D_harmonic), ref=rp), y_axis='log')
plt.colorbar()
plt.title('Harmonic spectrogram')

plt.subplot(3, 1, 3)
librosa.display.specshow(librosa.amplitude_to_db(np.abs(D_percussive), ref=rp), y_axis='log', x_axis='time')
plt.colorbar()
plt.title('Percussive spectrogram')
plt.tight_layout()

예제 #35
0
import sklearn.cluster

import librosa
import librosa.display

#############################
# First, we'll load in a song
y, sr = librosa.load('audio/Karissa_Hobbs_-_09_-_Lets_Go_Fishin.mp3')


##############################################
# Next, we'll compute and plot a log-power CQT
BINS_PER_OCTAVE = 12 * 3
N_OCTAVES = 7
C = librosa.amplitude_to_db(librosa.cqt(y=y, sr=sr,
                                        bins_per_octave=BINS_PER_OCTAVE,
                                        n_bins=N_OCTAVES * BINS_PER_OCTAVE),
                            ref=np.max)

plt.figure(figsize=(12, 4))
librosa.display.specshow(C, y_axis='cqt_hz', sr=sr,
                         bins_per_octave=BINS_PER_OCTAVE,
                         x_axis='time')
plt.tight_layout()


##########################################################
# To reduce dimensionality, we'll beat-synchronous the CQT
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
Csync = librosa.util.sync(C, beats, aggregate=np.median)

# For plotting purposes, we'll need the timing of the beats
예제 #36
0
def audio_preprocessing():
    import librosa
    import librosa.display
    # import IPython.display
    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    import matplotlib.font_manager as fm
    import glob
    from IPython import get_ipython
    import os, errno

    # get_ipython().run_line_magic('matplotlib', 'inline')

    FIG_SIZE = (15, 10)
    FOLDER = "mfcc/"
    try:
        if not (os.path.isdir(FOLDER)):
            os.makedirs(os.path.join(FOLDER))
    except OSError as e:
        if e.errno != errno.EEXIST:
            print("Failed to create directory!!!!!")
            raise

    folder_list = glob.glob("audio_data")
    print(folder_list)
    print(len(folder_list))
    file_list = []
    for i in range(len(folder_list)):
        file = glob.glob(folder_list[i] + "/*")
        file_list.append(file)
        sig = [None] * len(file_list[i])
        sr = [None] * len(file_list[i])

        index = 0
        for j in file_list[i]:
            sig[index], sr[index] = librosa.load(j, sr=16000)
            fft = np.fft.fft(sig[index])

            # 복소공간 값 절댓갑 취해서, magnitude 구하기
            magnitude = np.abs(fft)

            # Frequency 값 만들기
            f = np.linspace(0, sr[index], len(magnitude))

            # 푸리에 변환을 통과한 specturm은 대칭구조로 나와서 high frequency 부분 절반을 날려고 앞쪽 절반만 사용한다.
            left_spectrum = magnitude[:int(len(magnitude) / 2)]
            left_f = f[:int(len(magnitude) / 2)]
            # STFT -> spectrogram
            hop_length = 512  # 전체 frame 수
            n_fft = 2048  # frame 하나당 sample 수

            # calculate duration hop length and window in seconds
            hop_length_duration = float(hop_length) / sr[index]
            n_fft_duration = float(n_fft) / sr[index]

            # STFT
            stft = librosa.stft(sig[index], n_fft=n_fft, hop_length=hop_length)

            # 복소공간 값 절댓값 취하기
            magnitude = np.abs(stft)

            # magnitude > Decibels
            log_spectrogram = librosa.amplitude_to_db(magnitude)
            # MFCCs
            # extract 40 MFCCs
            MFCCs = librosa.feature.mfcc(sig[index], sr[index], n_fft=n_fft, hop_length=hop_length, n_mfcc=40)
            # display MFCCs
            plt.figure(figsize=FIG_SIZE)
            librosa.display.specshow(MFCCs, sr=sr[index], hop_length=hop_length)
            plt.xlabel("Time")
            plt.ylabel("MFCC coefficients")
            plt.colorbar()
            plt.title("MFCCs")

            # save image
            fig = plt.gcf()
            fig.savefig(FOLDER + str(index) + '.png')
            index += 1
    print("complete!")
def transform(y,sr,hop_length):
    """ convert audio-raws format in mel-coefficients"""
    M = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length)
    log_M = librosa.amplitude_to_db(M, ref=np.max)
    M_std = (log_M+80)/80
    return M_std
예제 #38
0
        mse = mse + RMSE(X[i], Y[i])
    return mse / lenx


esperado_dir = '../avaliacao-subjetiva/Esperado/'
exp_dir = '../avaliacao-subjetiva/Preditos/'

exp_list = list(os.listdir(exp_dir))
esperado_list = list(os.listdir(esperado_dir))
esp_mag = []
esp_db = []
for i in esperado_list:
    if i[-4:] == '.wav':
        file_id = i[:-4]
        _, _, mag = load_spectrograms(os.path.join(esperado_dir, i))
        db = librosa.amplitude_to_db(mag, ref=np.max)
        display.specshow(db, y_axis='log', x_axis='time')
        save_img_dir = os.path.join(esperado_dir, i.replace('.wav', '.png'))
        esp_mag.append([file_id, mag])
        esp_db.append([file_id, db])

        plt.title('Espectrograma STFT')
        plt.colorbar(format='%+2.0f dB')
        plt.tight_layout()

        plt.savefig(save_img_dir)

        plt.cla()  # Clear axis
        plt.clf()

results_list = []
예제 #39
0
import librosa.display

#############################################
# Load an example with vocals.
y, sr = librosa.load('audio/Cheese_N_Pot-C_-_16_-_The_Raps_Well_Clean_Album_Version.mp3', duration=120)


# And compute the spectrogram magnitude and phase
S_full, phase = librosa.magphase(librosa.stft(y))


#######################################
# Plot a 5-second slice of the spectrum
idx = slice(*librosa.time_to_frames([30, 35], sr=sr))
plt.figure(figsize=(12, 4))
librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max),
                         y_axis='log', x_axis='time', sr=sr)
plt.colorbar()
plt.tight_layout()

###########################################################
# The wiggly lines above are due to the vocal component.
# Our goal is to separate them from the accompanying
# instrumentation.
#

# We'll compare frames using cosine similarity, and aggregate similar frames
# by taking their (per-frequency) median value.
#
# To avoid being biased by local continuity, we constrain similar frames to be
# separated by at least 2 seconds.
def analyse_audio(audio_file, midi_file):
    x, _ = librosa.load(audio_file, sr=sr)
    print("Music file length=%s, sampling_rate=%s" % (x.shape[0], sr))
    plt.figure(figsize=(14, 5))
    plt.title('Music Sample Waveplot')
    librosa.display.waveplot(x, sr=sr)
    x_stft_spectrum = lb.stft(x,
                              n_fft=1024,
                              hop_length=512,
                              center=True,
                              dtype=np.complex64)
    x_stft = librosa.amplitude_to_db(abs(x_stft_spectrum))
    plt.figure(figsize=(14, 5))
    librosa.display.specshow(lb.amplitude_to_db(x_stft, ref=np.max),
                             sr=sr,
                             fmin=lb.note_to_hz('A0'),
                             x_axis='time',
                             y_axis='linear',
                             cmap='coolwarm')
    plt.title('Power spectrogram')
    plt.colorbar(format='%+2.0f dB')
    plt.tight_layout()
    plt.figure(figsize=(14, 5))
    x_cqt = np.abs(
        librosa.cqt(x,
                    sr=sr,
                    bins_per_octave=bins_per_octave,
                    n_bins=n_bins,
                    fmin=lb.note_to_hz('A0')))
    librosa.display.specshow(librosa.amplitude_to_db(x_cqt, ref=np.max),
                             sr=sr,
                             x_axis='time',
                             y_axis='cqt_note',
                             cmap='coolwarm')
    print("CQT Matrix shape", x_cqt.shape)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Constant-Q power spectrum')
    plt.tight_layout()
    n_frames = x_cqt.shape[1]

    midi_data = pretty_midi.PrettyMIDI(midi_file)
    plt.figure(figsize=(12, 4))
    plot_piano_roll(midi_data, 24, 84)
    print('There are {} time signature changes'.format(
        len(midi_data.time_signature_changes)))
    print('There are {} instruments'.format(len(midi_data.instruments)))
    print('Instrument 1 has {} notes'.format(
        len(midi_data.instruments[0].notes)))
    pianoRoll = midi_data.instruments[0].get_piano_roll(fs=n_frames * 44100. /
                                                        len(x))
    midi_mat = (pianoRoll[MIDInotes[0]:MIDInotes[1] + 1, :n_frames] > 0)
    print("MIDI Matrix shape", midi_mat.shape)
    plt.figure()

    librosa.display.specshow(midi_mat,
                             sr=sr,
                             bins_per_octave=12,
                             fmin=lb.note_to_hz('A0'),
                             x_axis='time',
                             y_axis='cqt_note')
    n_pitch_frame = np.sum(midi_mat, axis=1)
    print(n_pitch_frame)
    plt.bar(range(MIDInotes[0], MIDInotes[1] + 1),
            n_pitch_frame / np.sum(n_pitch_frame).astype(np.float))
    plt.xticks(range(MIDInotes[0], MIDInotes[1] + 1, 12),
               lb.midi_to_note(range(MIDInotes[0], MIDInotes[1] + 1, 12)))
    plt.xlabel('Midi note')
    plt.ylabel('Note probability')
예제 #41
0
def main():
    # load audio file
    # get current working directory
    dir = os.path.dirname(__file__) + "/"
    # dir = "C:/Users/yamam/Desktop/lab/2021/B4Lecture-2021/ex_1/t_yamamoto/"
    audio_path = dir + "recording_b4lec_ex1.wav"
    wav, sr = librosa.load(audio_path, mono=True)

    fig, ax = plt.subplots(nrows=3, ncols=1, sharex=True)
    plt.subplots_adjust(hspace=0.6)

    # draw original signal
    librosa.display.waveplot(wav, sr=sr, color="g", ax=ax[0])
    ax[0].set(title="Original signal", xlabel=None, ylabel="Magnitude")

    # parameter
    hop = 0.5
    win_length = 1024
    hop_length = int(win_length * hop)

    # STFT
    amp = stft(wav, hop=hop, win_length=win_length)
    # convert an amplitude spectrogram to dB-scaled spectrogram
    db = librosa.amplitude_to_db(np.abs(amp))
    # db = librosa.amplitude_to_db(np.abs(librosa.stft(wav)), ref=np.max)
    # draw spectrogram (log scale)
    img = librosa.display.specshow(
        db,
        sr=sr,
        hop_length=hop_length,
        x_axis="time",
        y_axis="log",
        ax=ax[1],
        cmap="plasma",
    )
    ax[1].set(title="Spectrogram", xlabel=None, ylabel="Frequency [Hz]")
    ax[1].set_yticks([0, 128, 512, 2048, 8192])
    fig.colorbar(img,
                 aspect=10,
                 pad=0.01,
                 extend="both",
                 ax=ax[1],
                 format="%+2.f dB")

    # inverse-STFT
    inv_wav = istft(amp, hop=hop, win_length=win_length)
    # draw re-synthesized signal
    librosa.display.waveplot(inv_wav, sr=sr, color="g", ax=ax[2])
    ax[2].set(title="Re-synthesized signal",
              xlabel="Time [s]",
              ylabel="Magnitude")

    # graph adjustment
    ax_pos_0 = ax[0].get_position()
    ax_pos_1 = ax[1].get_position()
    ax_pos_2 = ax[2].get_position()
    ax[0].set_position(
        [ax_pos_0.x0, ax_pos_0.y0, ax_pos_1.width, ax_pos_1.height])
    ax[2].set_position(
        [ax_pos_2.x0, ax_pos_2.y0, ax_pos_1.width, ax_pos_1.height])
    # fig.tight_layout()
    fig.align_labels()

    # save and show figure of result
    plt.savefig(dir + "ex1_result.png")
    plt.show()
예제 #42
0
# Visualize an STFT power spectrum
import librosa
import matplotlib.pyplot as plt

y, sr = librosa.load(librosa.util.example_audio_file())
plt.figure(figsize=(12, 8))

D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)
plt.subplot(4, 2, 1)
librosa.display.specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.title('Linear-frequency power spectrogram')

# Or on a logarithmic scale

plt.subplot(4, 2, 2)
librosa.display.specshow(D, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Log-frequency power spectrogram')

# Or use a CQT scale

CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=sr), ref=np.max)
plt.subplot(4, 2, 3)
librosa.display.specshow(CQT, y_axis='cqt_note')
plt.colorbar(format='%+2.0f dB')
plt.title('Constant-Q power spectrogram (note)')

plt.subplot(4, 2, 4)
librosa.display.specshow(CQT, y_axis='cqt_hz')
plt.colorbar(format='%+2.0f dB')
예제 #43
0
amountOfSegments = times.size

file = open(fileJustName + '_times.txt', 'w')
for cont in range(amountOfSegments - 1):
    posFrameInit = times[cont]
    file.write(str(posFrameInit) + "\n")
file.close()

# Plot
timess = librosa.frames_to_time(np.arange(len(onset_env)),
                                sr=sr,
                                hop_length=512)
plt.figure()
ax = plt.subplot(2, 1, 2)
D = librosa.stft(y)
librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
                         y_axis='log',
                         x_axis='time')
plt.subplot(2, 1, 1, sharex=ax)
plt.plot(timess, onset_env, alpha=0.8, label='Onset strength')
plt.vlines(timess[peaks],
           0,
           onset_env.max(),
           color='r',
           alpha=0.8,
           label='Selected peaks')
plt.legend(frameon=True, framealpha=0.8)
plt.axis('tight')
plt.tight_layout()
plt.show()
t=np.linspace(0,N/fe,N);
s = 0.2*np.cos(2*np.pi*200*t) + 2*np.cos(2*np.pi*400*t);
tf=np.linspace(0,fe/N,N);
plt.subplot(1,2,1);
plt.plot(t[:200],s[:200]);
plt.title('280Hz et 500Hz,fe=8000Hz')
plt.subplot(1,2,2);
plt.plot(np.abs(np.fft.fft(s)));
plt.title('280Hz et 500Hz,fe=8000Hz')
"""
#x, fe = librosa.load('ressources/mesange-tete-noire.wav')
x, fe = librosa.load('ressources/PIANO.wav')
plt.figure(figsize=(14, 5))
librosa.display.waveplot(x, sr=fe)
plt.title('')
plt.show()
fe /= 2
n = len(x)
t = np.linspace(0, n / fe, n, endpoint=False)
s = 0.75 * np.cos(2 * np.pi * 440 * t)
plt.plot(t, x)
plt.plot(np.abs(np.fft.fft(s)))
Sdb = librosa.amplitude_to_db(abs(s))

S = np.abs(librosa.stft(s))
Sdb = librosa.amplitude_to_db(abs(S))
#librosa.display.specshow(Sdb, sr=fe, x_axis='time', y_axis='hz')
#librosa.display.specshow(Sdb, sr=fe, x_axis='time', y_axis='hz')

sd.play(x, fe)
status = sd.wait()
예제 #45
0
import librosa.display

#############################################
# Load an example signal
y, sr = librosa.load('audio/sir_duke_slow.mp3')


# And compute the spectrogram magnitude and phase
S_full, phase = librosa.magphase(librosa.stft(y))


###################
# Plot the spectrum
plt.figure(figsize=(12, 4))
librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max),
                         y_axis='log', x_axis='time', sr=sr)
plt.colorbar()
plt.tight_layout()

###########################################################
# As you can see, there are periods of silence and
# non-silence throughout this recording.
#

# As a first step, we can plot the root-mean-square (RMS) curve
rms = librosa.feature.rms(y=y)[0]

times = librosa.frames_to_time(np.arange(len(rms)))

plt.figure(figsize=(12, 4))
예제 #46
0
def main(args):
    """
    fname = "aiueo.wav"
    """

    # get current working directory
    path = os.path.dirname(os.path.abspath(__file__))

    # load audio file
    fname = os.path.join(path, "data", args.fname)
    wav, sr = librosa.load(fname, mono=True)

    # plot signal
    plt.figure()
    ax = plt.subplot(111)
    librosa.display.waveplot(wav, sr=sr, color="g", ax=ax)
    ax.set(title="Original signal", xlabel="Time [s]", ylabel="Magnitude")
    save_fname = os.path.join(path, "result", "signal.png")
    plt.savefig(save_fname, transparent=True)
    plt.show()

    # parameter
    hop = 0.5
    win_length = 1024
    hop_length = int(win_length * hop)

    # make mel filter bank
    n_channels = 20  # the number of mel filter bank channels
    df = sr / win_length  # frequency resolution (Hz width per frequency index 1)
    filterbank, _ = melFilterBank(sr, win_length, n_channels)

    # plot mel filter bank
    for c in range(n_channels):
        plt.plot(np.arange(0, win_length / 2) * df, filterbank[c])

    plt.title("Mel filter bank")
    plt.xlabel("Frequency [Hz]")
    save_fname = os.path.join(path, "result", "MelFilterBank.png")
    plt.savefig(save_fname, transparent=True)
    plt.show()

    # spectrogram (ex1)
    fig, ax = plt.subplots(nrows=1, ncols=1)
    amp = utils.stft(wav, hop=hop, win_length=win_length)
    db = librosa.amplitude_to_db(np.abs(amp))
    img = librosa.display.specshow(
        db,
        sr=sr,
        hop_length=hop_length,
        x_axis="time",
        y_axis="linear",
        ax=ax,
        cmap="rainbow",
    )
    ax.set(title="Spectrogram", xlabel=None, ylabel="Frequency [Hz]")
    fig.colorbar(img, aspect=10, pad=0.01, ax=ax, format="%+2.f dB")
    save_fname = os.path.join(path, "result", "spectrogram.png")
    plt.savefig(save_fname, transparent=True)
    plt.show()

    fig, ax = plt.subplots(nrows=4, ncols=1, sharex=True, figsize=(10, 6))
    plt.subplots_adjust(hspace=0.6)

    # calculate mel spectrogram and mfcc
    mel_spec, mfcc = calc_mfcc(wav, hop, win_length, filterbank)

    # mel spectrogram
    wav_time = wav.shape[0] // sr
    f_nyq = sr // 2
    extent = [0, wav_time, 0, f_nyq]

    img = ax[0].imshow(
        librosa.amplitude_to_db(mel_spec),
        aspect="auto",
        extent=extent,
        cmap="rainbow",
    )
    ax[0].set(
        title="Mel spectrogram",
        xlabel=None,
        ylabel="Mel frequency [mel]",
        ylim=[0, 8000],
        yticks=range(0, 10000, 2000),
    )
    fig.colorbar(img, aspect=10, pad=0.01, ax=ax[0], format="%+2.f dB")

    # mfcc
    n_mfcc = 12
    extent = [0, wav_time, 0, n_mfcc]
    img = ax[1].imshow(np.flipud(mfcc[:n_mfcc]),
                       aspect="auto",
                       extent=extent,
                       cmap="rainbow")
    ax[1].set(
        title="MFCC sequence",
        xlabel=None,
        ylabel="MFCC",
        yticks=range(0, 13, 4),
    )
    fig.colorbar(img, aspect=10, pad=0.01, ax=ax[1], format="%+2.f dB")

    # d-mfcc
    d_mfcc = delta_mfcc(mfcc, k=2)

    img = ax[2].imshow(np.flipud(d_mfcc[:n_mfcc]),
                       aspect="auto",
                       extent=extent,
                       cmap="rainbow")
    ax[2].set(
        title="ΔMFCC sequence",
        xlabel=None,
        ylabel="ΔMFCC",
        yticks=range(0, 13, 4),
    )
    fig.colorbar(img, aspect=10, pad=0.01, ax=ax[2], format="%+2.f dB")

    # dd-mfcc
    dd_mfcc = delta_mfcc(d_mfcc, k=2)
    img = ax[3].imshow(np.flipud(dd_mfcc[:n_mfcc]),
                       aspect="auto",
                       extent=extent,
                       cmap="rainbow")
    ax[3].set(
        title="ΔΔMFCC sequence",
        xlabel="Time [s]",
        ylabel="ΔΔMFCC",
        yticks=range(0, 13, 4),
    )
    fig.colorbar(img, aspect=10, pad=0.01, ax=ax[3], format="%+2.f dB")

    save_fname = os.path.join(path, "result", "mfcc_result.png")
    plt.savefig(save_fname, transparent=True)
    plt.show()