예제 #1
0
def get_hpeaks_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20):
    """
    Get Harmonic peaks in an audio
    :param audio: Audio either mono or stereo. Will be downsampled to mono
    :param sr: Samplerate used for the audio
    :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively.
    """
    if audio.ndim > 1:
        audio = std.MonoMixer()(audio, audio.shape[1])

    fft_algo = std.FFT()
    pyin = std.PitchYin()
    hpeaks = std.HarmonicPeaks()
    sine_anal = std.SineModelAnal(maxnSines=nsines,
                                  orderBy='frequency',
                                  minFrequency=1)
    sines = []
    for i, frame in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        pitch, _ = pyin(frame)
        fft = fft_algo(frame)
        freqs, mags, _ = sine_anal(fft)
        sorting_indexes = np.argsort(freqs)
        freqs = freqs[sorting_indexes]
        mags = mags[sorting_indexes]
        non_zero_freqs = np.where(freqs != 0)
        freqs = freqs[non_zero_freqs]
        mags = mags[non_zero_freqs]
        freqs, mags = hpeaks(freqs, mags, pitch)
        sines.append([freqs, mags])
    sines = np.array(sines)
    if onlyfrecuencies:
        return sines[:, 0, :]
    else:
        return sines[:, 0, :], sines[:, 1, :]
    def get_Yin_Pitch(self):
        if self.audio != []:
            pitchDetect = es.PitchYin(frameSize=self.frameSize,
                                      sampleRate=self.sampleRate)
            estPitch = []
            pitchConfidence = []
            frame_times = []

            counter = 0
            for frame in es.FrameGenerator(self.audio, frameSize=self.frameSize, hopSize=self.hopSize):
                f, conf = pitchDetect(frame)
                estPitch += [f]
                pitchConfidence += [conf]
                frame_times.append(counter*self.hopSize/self.sampleRate)
                counter+=1

            return np.array(estPitch), pitchConfidence, frame_times
        else:
            return None, None, None
예제 #3
0
# Temporal descriptors
power = es.InstantPower()
log_attack_time = es.LogAttackTime()
effective_duration = es.EffectiveDuration()
auto_correlation = es.AutoCorrelation()
zero_crossing_rate = es.ZeroCrossingRate()

# Spectral descriptors
peak_freq = es.MaxMagFreq()
roll_off = es.RollOff()
flux = es.Flux()
flatness = es.Flatness()

# Harmonic descriptors
pitch = es.PitchYin(frameSize=1024)
spectral_peaks = es.SpectralPeaks(minFrequency=1e-5)
harmonic_peaks = es.HarmonicPeaks()
inharmonicity = es.Inharmonicity()
oer = es.OddToEvenHarmonicEnergyRatio()
tristimulus = es.Tristimulus()

# MFCC
mfcc = es.MFCC(inputSize=513)


class Audio:
    def __init__(self, path):
        self.audio = es.MonoLoader(filename=str(path))()
        self.name = path.name
        self.pool = essentia.Pool()
def pitchCalculation(audio, start_end_samples, frameSize, sampleRate, maxFrequency):

    PITCHYIN = ess.PitchYin(frameSize=frameSize, sampleRate=sampleRate, maxFrequency=maxFrequency)

    pitch, pitchConfidence = PITCHYIN(audio[start_end_samples[0]:start_end_samples[1]])
    return pitch, pitchConfidence
예제 #5
0
import os
import matplotlib.pyplot as plt

mbid = 'ead85d20-ce7d-4ed0-a00d-0ae199b94d12'

hopSize = 128

loader = es.MonoLoader(filename=os.path.join(mbid, mbid + '-voice.mp3'))
track = loader()

# track[track<0.000001] = 0

print(len(track) / 44100.)

# pY = es.PitchYin(minFrequency=55, maxFrequency=900, tolerance=0.06)
pY = es.PitchYin(minFrequency=55, maxFrequency=600, tolerance=0.03)
rms = es.RMS()

pitch = []
loudness = []

print('Computing pitch and loudness')
for frame in es.FrameGenerator(track,
                               frameSize=2048,
                               hopSize=hopSize,
                               startFromZero=True):
    f = pY(frame)
    if f[1] >= 0.8:
        pitch.append(f[0])
        loudness.append(rms(frame))
    else:
예제 #6
0
import essentia.standard as ess
import numpy as np

M = 1024
N = 1024
H = 512
fs = 44100

x = ess.MonoLoader(filename='output3.wav', sampleRate=fs)()
ess.AudioLoader()

spectrum = ess.Spectrum(size=N)
window = ess.Windowing(size=M, type='hann')

pitchYin = ess.PitchYin()


hpcp = ess.HPCP()
hpcps = []

spectralPeaks = ess.SpectralPeaks()
pitches = []
pitchConfidences = []



for frame in ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True):
    pitch, pitchConfidence = pitchYin(frame)
    if pitchConfidence < 0.9:
        pitches.append(None)
def algorithm_pitch_note_essentia(sound):
    """
    Estimates the note of a given audio file.
    
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()

    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)
    frameSize = 1024
    hopsize = frameSize

    # Estimate pitch using PitchYin
    frames = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopsize)
    pitchDetect = estd.PitchYin(frameSize=frameSize, sampleRate=44100)
    pitches = []
    confidence = []
    for frame in frames:
        f, conf = pitchDetect(frame)
        pitches += [f]
        confidence += [conf]

    pitches = [pitch for pitch in pitches if pitch > 0]
    if not pitches:
        pitch_median = 0.1
    else:
        pitch_median = median(pitches)
    midi_note = frequency_to_midi_note(pitch_median)
    note = midi_note_to_note(midi_note)
    results.update({
        'EssentiaPitchYin': {
            'note': note,
            'midi_note': midi_note,
            'pitch': pitch_median
        }
    })

    # Estimate pitch using PithYinFFT
    frames = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopsize)
    pitchDetect = estd.PitchYinFFT(frameSize=frameSize, sampleRate=44100)
    win = estd.Windowing(type='hann')
    pitches = []
    confidence = []
    for frame in frames:
        spec = estd.Spectrum()(win(frame))
        f, conf = pitchDetect(spec)
        pitches += [f]
        confidence += [conf]
    pitches = [pitch for pitch in pitches if pitch > 0]
    if not pitches:
        pitch_median = 0.1
    else:
        pitch_median = median(pitches)
    midi_note = frequency_to_midi_note(pitch_median)
    note = midi_note_to_note(midi_note)
    results.update({
        'EssentiaPitchYinFFT': {
            'note': note,
            'midi_note': midi_note,
            'pitch': pitch_median
        }
    })

    return results