def audio_to_midi_melody(infile, outfile):

    #Load audio and define parameters
    audio = es.MonoLoader(filename = infile)()
    winSize = 2048
    hopSize = 128
    Fs = 44100
    pend = np.size(audio) - winSize #ultimo valor de ventana para calcular
    pin =0  #Apuntador
    totalTime = np.size(audio)/float(Fs)

    #Instanciar funciones de essentia
    w = es.Windowing(type = 'hann', size = winSize)
    yin = es.PitchYinFFT(frameSize = winSize, minFrequency = 20, maxFrequency = 2000)
    spectrum = es.Spectrum()    #FUncion para calcular espectro por frame
    calcEnergy = es.Energy()
    # predMel = es.PredominantMelody(numberHarmonics = 15, filterIterations = 3, frameSize = 2048, hopSize = 128, minFrequency = 20, maxFrequency = 2000, minDuration = 50)
    predMel = es.PredominantPitchMelodia(numberHarmonics = 15, filterIterations = 3, frameSize = 2048, hopSize = 128, minFrequency = 20, maxFrequency = 2000, minDuration = 50)
    onsetDet = es.OnsetDetection(method = 'flux')

    #Inicializar vectores
    vectSize = np.ceil(np.size(audio))/hopSize
    peakDet = es.PeakDetection(threshold = 0.15, maxPosition = vectSize, range = vectSize, maxPeaks = 300)    #has to be normalized

    melodyV = []
    confV = []
    energy = []
    freqBands = []

    for frame in es.FrameGenerator(audio, winSize, hopSize):
        spec = spectrum(w(frame))   #Calcular el espectro
        melody, conf = yin(spec)    #Calcular melodia con algoritmo yin
        melody = 0 if conf < 0.5 else melody #0 para valores con poca confidence
        melodyV.append(melody)
        ener = calcEnergy(frame)   #Calcular energia por frame
        energy.append(ener)
        freqBands.append(es.FrequencyBands()(spec))
        #onsets[i] = onsetDet(spec, spec)

    #Convertir valores a float32 compatible con Essentia
    melodyV = arrayCast(melodyV)
    confV = arrayCast(confV)
    energy = arrayCast(energy)

    predMelody, confid = predMel(audio) #Calcular melodia con otro algoritmo

    melodyV = freq2cent(melodyV)
    predMelody = freq2cent(predMelody)

    energy = np.log10(energy+.0000001)
    energy = energy/float(max(energy)) #Normalize Energy
    smoothEnergy = medfilt(arrayCast(energy), .7)   #Usando filtro para alisar curva, valor en ms
    onsets, amplitudes = peakDet(smoothEnergy)  #detectar picos

    noteOnsets = arrayCast(segmentByFreq(melodyV.copy(), totalTime))

    bpm, bpmAmpl = es.NoveltyCurveFixedBpmEstimator()(es.NoveltyCurve()(arrayCast(freqBands)))

    saveMIDI(outfile, noteOnsets, predMelody, bpm[0], Fs, hopSize)

    #Graficar
    plt.subplot(3,1,1)
    plt.plot(np.linspace(0, totalTime, np.size(melodyV)), melodyV, 'b')
    plt.plot(np.linspace(0, totalTime, np.size(predMelody)), predMelody, 'r')
    plt.vlines(noteOnsets*hopSize/Fs, min(melodyV), max(melodyV))

    plt.subplot(3,1,2)
    plt.plot(np.linspace(0, totalTime, np.size(energy)),energy)
    plt.vlines(onsets*hopSize/Fs, min(energy), max(energy))

    plt.subplot(3,1,3)
    plt.plot(np.linspace(0, totalTime, np.size(quantizeNote(cambioNota(melodyV)))),quantizeNote(cambioNota(melodyV)))
    plt.show()
Example #2
0
confV = []
energy = []
freqBands = []

for frame in es.FrameGenerator(audio, winSize, hopSize):
    spec = spectrum(w(frame))   #Calcular el espectro  
    melody, conf = yin(spec)    #Calcular melodia con algoritmo yin
    melody = 0 if conf < 0.5 else melody #0 para valores con poca confidence
    melodyV.append(melody)    
    ener = calcEnergy(frame)   #Calcular energia por frame
    energy.append(ener)    
    freqBands.append(es.FrequencyBands()(spec))
    #onsets[i] = onsetDet(spec, spec)    

#Convertir valores a float32 compatible con Essentia
melodyV = arrayCast(melodyV)
confV = arrayCast(confV)
energy = arrayCast(energy)

predMelody, confid = predMel(audio) #Calcular melodia con otro algoritmo

melodyV = freq2cent(melodyV)
predMelody = freq2cent(predMelody)

energy = np.log10(energy+.0000001)
energy = energy/float(max(energy)) #Normalize Energy
smoothEnergy = medfilt(arrayCast(energy), .7)   #Usando filtro para alisar curva, valor en ms
onsets, amplitudes = peakDet(smoothEnergy)  #detectar picos

noteOnsets = arrayCast(segmentByFreq(melodyV.copy()))