Exemple #1
0
def estimate_main_band(infile):
    """
    Estimate if this is a low, mid, or high track.

    Not _really_ sure if this does what I need it to,
    but some quick tests looked right.
    """
    loader = streaming.MonoLoader(filename=infile)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000])
    pool = Pool()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> freqbands.spectrum
    freqbands.bands >> (pool, 'bands')

    run(loader)

    sums = np.sum(pool['bands'], axis=0)
    band = np.argmax(sums)
    if band == 0:
        return 'low'
    elif band == 1:
        return 'mid'
    elif band == 2:
        return 'high'
def analysisSynthesisStreaming(params, signal):

    out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="hann")
    fft = es.FFT(size=params['frameSize'])
    ifft = es.IFFT(size=params['frameSize'])
    overl = es.OverlapAdd(frameSize=params['frameSize'],
                          hopSize=params['hopSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))
    insignal = VectorInput(signal)
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> ifft.fft
    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio[2 * params['hopSize']:]
    return outaudio
Exemple #3
0
def get_key(file_in):
    """
    Estimates the key and scale for an audio file.
    """
    loader = streaming.MonoLoader(filename=file_in)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
    pool = Pool()
    hpcp = streaming.HPCP()
    key = streaming.Key()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp >> key.pcp
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')

    run(loader)

    return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
Exemple #4
0
def analHpsModelStreaming(params, signal):

    #out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params['frameSize'])

    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'],
                                 sampleRate=params['sampleRate'])

    smanal = es.HpsModelAnal(sampleRate=params['sampleRate'],
                             hopSize=params['hopSize'],
                             maxnSines=params['maxnSines'],
                             magnitudeThreshold=params['magnitudeThreshold'],
                             freqDevOffset=params['freqDevOffset'],
                             freqDevSlope=params['freqDevSlope'],
                             minFrequency=params['minFrequency'],
                             maxFrequency=params['maxFrequency'],
                             stocf=params['stocf'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] // 2))
    insignal = VectorInput (signal)


    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame
    spec.spectrum >> pitchDetect.spectrum

    fcut.frame >> smanal.frame
    pitchDetect.pitch >> smanal.pitch
    pitchDetect.pitch >> (pool, 'pitch')
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    smanal.stocenv >> (pool, 'stocenv')


    essentia.run(insignal)

    # remove first half window frames
    mags = pool['magnitudes']
    freqs = pool['frequencies']
    phases = pool['phases']
    pitchConf = pool['pitchConfidence']

    # remove short tracks
    minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize'])
    freqsClean = cleaningHarmonicTracks(freqs, minFrames, pitchConf)
    pool['frequencies'].data = freqsClean

    return mags, freqsClean, phases
Exemple #5
0
def analsynthSineModelStreaming(params, signal):

    out = numpy.array(0)

    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    fft = es.FFT(size=params['frameSize'])
    smanal = es.SineModelAnal(sampleRate=params['sampleRate'],
                              maxnSines=params['maxnSines'],
                              magnitudeThreshold=params['magnitudeThreshold'],
                              freqDevOffset=params['freqDevOffset'],
                              freqDevSlope=params['freqDevSlope'])
    smsyn = es.SineModelSynth(sampleRate=params['sampleRate'],
                              fftSize=params['frameSize'],
                              hopSize=params['hopSize'])
    ifft = es.IFFT(size=params['frameSize'])
    overl = es.OverlapAdd(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          gain=1. / params['frameSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))
    insignal = VectorInput(signal)
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smsyn.fft >> ifft.fft
    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)

    # remove short tracks
    freqs = pool['frequencies']
    minFrames = int(params['minSineDur'] * params['sampleRate'] /
                    params['hopSize'])
    freqsClean = cleaningSineTracks(freqs, minFrames)
    pool['frequencies'].data = freqsClean

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
Exemple #6
0
    def cutAudioFile(self, filename, frameSize, hopSize, startFromZero, expectedNumFrames):
        loader = es.MonoLoader(filename=join(testdata.audio_dir, 'generated','synthesised', 'shortfiles', filename))
        fc = es.FrameCutter(frameSize=frameSize,
                         hopSize = hopSize,
                         startFromZero = startFromZero)
        p = Pool()
        loader.audio >> fc.signal
        fc.frame >> (p, 'audio.frames')
        run(loader)

        self.assertEqual(len(p['audio.frames']), expectedNumFrames)
def analsynthSpsModelStreaming(params, signal):

    out = array([0.])

    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params['frameSize'])

    smanal = es.SpsModelAnal(sampleRate=params['sampleRate'],
                             hopSize=params['hopSize'],
                             maxnSines=params['maxnSines'],
                             magnitudeThreshold=params['magnitudeThreshold'],
                             freqDevOffset=params['freqDevOffset'],
                             freqDevSlope=params['freqDevSlope'],
                             minFrequency=params['minFrequency'],
                             maxFrequency=params['maxFrequency'],
                             stocf=params['stocf'])
    synFFTSize = min(
        int(params['frameSize'] / 4),
        4 * params['hopSize'])  # make sure the FFT size is appropriate
    smsyn = es.SpsModelSynth(sampleRate=params['sampleRate'],
                             fftSize=synFFTSize,
                             hopSize=params['hopSize'],
                             stocf=params['stocf'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] // 2))
    insignal = VectorInput(signal)

    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> smanal.frame

    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smanal.stocenv >> smsyn.stocenv

    smsyn.frame >> (pool, 'frames')
    smsyn.sineframe >> (pool, 'sineframes')
    smsyn.stocframe >> (pool, 'stocframes')

    essentia.run(insignal)

    outaudio = framesToAudio(pool['frames'])
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
def analsynthHarmonicMaskStreaming(params, signal):

    out = array([0.])

    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    fft = es.FFT(size=params['frameSize'])
    spec = es.Spectrum(size=params['frameSize'])

    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'],
                                 sampleRate=params['sampleRate'])

    hmask = es.HarmonicMask(sampleRate=params['sampleRate'],
                            binWidth=params['binWidth'],
                            attenuation=params['attenuation_dB'])

    ifft = es.IFFT(size=params['frameSize'])
    overl = es.OverlapAdd(frameSize=params['frameSize'],
                          hopSize=params['hopSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] // 2))
    insignal = VectorInput(signal)

    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame
    w.frame >> fft.frame
    spec.spectrum >> pitchDetect.spectrum

    fft.fft >> hmask.fft
    pitchDetect.pitch >> hmask.pitch
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')

    hmask.fft >> ifft.fft

    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
    def estimate_chroma(self, uid):
        loader = esstr.MonoLoader(
            filename=self.audio_path_extractor.audio_path_name(uid))
        framecutter = esstr.FrameCutter(hopSize=self.hop_size,
                                        frameSize=self.frame_size)
        windowing = esstr.Windowing(type="blackmanharris62")
        spectrum = esstr.Spectrum()
        spectralpeaks = esstr.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
        hpcp = esstr.HPCP(size=12,
                          referenceFrequency=self.tuning_freq,
                          harmonics=8,
                          bandPreset=True,
                          minFrequency=float(40),
                          maxFrequency=float(5000),
                          bandSplitFrequency=500.0,
                          weightType="cosine",
                          nonLinear=True,
                          windowSize=1.0)
        """
        hpcp = esstr.HPCP(
            size=12,
            referenceFrequency = tuningFreq,
            harmonics = 8,
            bandPreset = True,
            minFrequency = 40.0,
            maxFrequency = 5000.0,
            bandSplitFrequency = 250.0,
            weightType = "cosine",
            nonLinear = False,
            windowSize = 1.0)
        """
        pool = essentia.Pool()
        # connect algorithms together
        loader.audio >> framecutter.signal
        framecutter.frame >> windowing.frame >> spectrum.frame
        spectrum.spectrum >> spectralpeaks.spectrum
        spectrum.spectrum >> (pool, 'spectrum.magnitude')
        spectralpeaks.magnitudes >> hpcp.magnitudes
        spectralpeaks.frequencies >> hpcp.frequencies
        hpcp.hpcp >> (pool, 'chroma.hpcp')

        essentia.run(loader)
        # roll from 'A' based to 'C' based
        chroma = pool['chroma.hpcp']
        chroma = np.roll(chroma, shift=-3, axis=1)
        return chroma
def analsynthHprModelStreaming(params, signal):
  
    out = array([0.])
  
    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "blackmanharris92");    
    spec = es.Spectrum(size = params['frameSize']);
    
    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate =  params['sampleRate'])    
    
    smanal = es.HprModelAnal(sampleRate = params['sampleRate'], hopSize = params['hopSize'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'], minFrequency =  params['minFrequency'], maxFrequency =  params['maxFrequency'])
    synFFTSize = min(params['frameSize']/4, 4*params['hopSize']);  # make sure the FFT size is appropriate
    smsyn = es.SprModelSynth(sampleRate = params['sampleRate'], fftSize = synFFTSize, hopSize = params['hopSize'])    
    
    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)
        
      
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame   
    spec.spectrum >> pitchDetect.spectrum
    
    fcut.frame >> smanal.frame
    pitchDetect.pitch >> smanal.pitch  
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')  
    pitchDetect.pitch >> (pool, 'pitch')  
    print freqsClean
    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smanal.res >> smsyn.res
    
    smsyn.frame >> (pool, 'frames')
    smsyn.sineframe >> (pool, 'sineframes')
    smsyn.resframe >> (pool, 'resframes')
    
    essentia.run(insignal)
       
    outaudio = framesToAudio(pool['frames'])        
    outaudio = outaudio [2*params['hopSize']:]
    

    return outaudio, pool
    def testSilentFrames(self):
        input = [0] * 1024 * 3  # 3 frames of 1024
        gen = VectorInput(input)
        pool = Pool()
        expectedFrames = 5

        # adding noise
        frameCutter = es.FrameCutter(frameSize=1024,
                                     hopSize=512,
                                     startFromZero=True,
                                     silentFrames="noise")

        gen.data >> frameCutter.signal
        frameCutter.frame >> (pool, 'frames')
        run(gen)
        self.assertEqual(len(pool['frames']), expectedFrames)
        energy = std.Energy()
        for f in pool['frames']:
            self.assertTrue(essentia._essentia.isSilent(f))
            self.assertTrue(energy(f) != 0)

        pool.remove('frames')
        reset(gen)

        # keep silent frames
        frameCutter.configure(frameSize=1024,
                              hopSize=512,
                              startFromZero=True,
                              silentFrames="keep")

        run(gen)

        self.assertEqual(len(pool['frames']), expectedFrames)
        energy = std.Energy()
        for f in pool['frames']:
            self.assertTrue(essentia._essentia.isSilent(f))
            self.assertTrue(energy(f) == 0)

        pool.remove('frames')
        reset(gen)

        # drop silent frames
        frameCutter.configure(frameSize=1024,
                              hopSize=512,
                              startFromZero=True,
                              silentFrames="drop")

        run(gen)
        self.assertTrue(len(pool.descriptorNames()) == 0)
Exemple #12
0
    def cutFrames(self, options, input=range(100)):
        input = [float(x) for x in input]
        gen = VectorInput(input)
        pool = Pool()
        if not 'validFrameThresholdRatio' in options:
            options['validFrameThresholdRatio'] = 0

        frameCutter = es.FrameCutter(frameSize = options['frameSize'],
                                     hopSize = options['hopSize'],
                                     startFromZero = options['startFromZero'],
                                     validFrameThresholdRatio = options['validFrameThresholdRatio'])

        gen.data >> frameCutter.signal
        frameCutter.frame >> (pool, 'frame')
        run(gen)
        if pool.descriptorNames(): return pool['frame']
        return []
Exemple #13
0
def analsynthSineSubtractionStreaming(params, signal):

    out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    fft = es.FFT(size=params['frameSize'])
    smanal = es.SineModelAnal(sampleRate=params['sampleRate'],
                              maxnSines=params['maxnSines'],
                              magnitudeThreshold=params['magnitudeThreshold'],
                              freqDevOffset=params['freqDevOffset'],
                              freqDevSlope=params['freqDevSlope'])

    subtrFFTSize = min(params['frameSize'] / 4, 4 * params['hopSize'])
    smsub = es.SineSubtraction(sampleRate=params['sampleRate'],
                               fftSize=subtrFFTSize,
                               hopSize=params['hopSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))

    insignal = VectorInput(signal)
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    # subtraction
    fcut.frame >> smsub.frame
    smanal.magnitudes >> smsub.magnitudes
    smanal.frequencies >> smsub.frequencies
    smanal.phases >> smsub.phases
    smsub.frame >> (pool, 'frames')

    essentia.run(insignal)

    print pool['frames'].shape
    outaudio = framesToAudio(pool['frames'])
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
def analSprModelStreaming(params, signal):

    #out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params['frameSize'])

    smanal = es.SprModelAnal(sampleRate=params['sampleRate'],
                             maxnSines=params['maxnSines'],
                             magnitudeThreshold=params['magnitudeThreshold'],
                             freqDevOffset=params['freqDevOffset'],
                             freqDevSlope=params['freqDevSlope'],
                             minFrequency=params['minFrequency'],
                             maxFrequency=params['maxFrequency'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))
    insignal = VectorInput(signal)

    # analysis
    insignal.data >> fcut.signal

    fcut.frame >> smanal.frame
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    smanal.res >> (pool, 'res')

    essentia.run(insignal)

    # remove first half window frames
    mags = pool['magnitudes']
    freqs = pool['frequencies']
    phases = pool['phases']

    # remove short tracks
    minFrames = int(params['minSineDur'] * params['sampleRate'] /
                    params['hopSize'])
    freqsClean = cleaningSineTracks(freqs, minFrames)
    pool['frequencies'].data = freqsClean

    return mags, freqsClean, phases
Exemple #15
0
    def tonalAnalysis(self, signal):
        vectorinput = ess.VectorInput(np.single(signal))
        framecutter = ess.FrameCutter(frameSize=4096,
                                      hopSize=2048,
                                      silentFrames='noise')
        windowing = ess.Windowing(type='blackmanharris62')
        spectrum = ess.Spectrum()
        spectralpeaks = ess.SpectralPeaks(orderBy='frequency',
                                          magnitudeThreshold=1e-5,
                                          minFrequency=20,
                                          maxFrequency=3500,
                                          maxPeaks=60)

        dissonance = ess.Dissonance()
        tuning_frequency = ess.TuningFrequency()
        inharmonicity = ess.Inharmonicity()

        # Use pool to store data
        pool = essentia.Pool()

        # Connect streaming algorithms
        vectorinput.data >> framecutter.signal
        framecutter.frame >> windowing.frame >> spectrum.frame
        spectrum.spectrum >> spectralpeaks.spectrum
        spectralpeaks.magnitudes >> dissonance.magnitudes
        spectralpeaks.frequencies >> dissonance.frequencies
        spectralpeaks.magnitudes >> tuning_frequency.magnitudes
        spectralpeaks.frequencies >> tuning_frequency.frequencies
        spectralpeaks.magnitudes >> inharmonicity.magnitudes
        spectralpeaks.frequencies >> inharmonicity.frequencies

        dissonance.dissonance >> (pool, 'tonal.dissonance')
        inharmonicity.inharmonicity >> (pool, 'tonal.inharmonicity')
        tuning_frequency.tuningFrequency >> (pool, 'tonal.tuningFrequency')
        tuning_frequency.tuningCents >> (pool, 'tonal.tuningCents')

        # Run streaming network
        essentia.run(vectorinput)

        return pool['tonal.dissonance'], pool['tonal.inharmonicity'], pool[
            'tonal.tuningFrequency']
    def computeNoveltyCurve(self,
                            filename,
                            frameSize=1024,
                            hopSize=512,
                            windowType='hann',
                            weightCurveType='hybrid',
                            sampleRate=44100.0,
                            startTime=0,
                            endTime=2000):

        loader = ess.EasyLoader(filename=filename,
                                startTime=startTime,
                                endTime=endTime,
                                sampleRate=sampleRate,
                                downmix='left')
        fc = ess.FrameCutter(frameSize=frameSize,
                             hopSize=hopSize,
                             silentFrames="keep",
                             startFromZero=False,
                             lastFrameToEndOfFile=True)
        window = ess.Windowing(type=windowType,
                               zeroPhase=True,
                               zeroPadding=1024 - frameSize)
        freqBands = ess.FrequencyBands(
            sampleRate=sampleRate)  # using barkbands by default
        spec = ess.Spectrum()

        pool = Pool()
        loader.audio >> fc.signal
        fc.frame >> window.frame >> spec.frame
        spec.spectrum >> freqBands.spectrum
        freqBands.bands >> (pool, 'frequency_bands')
        essentia.run(loader)

        noveltyCurve = NoveltyCurve(frameRate=sampleRate / float(hopSize),
                                    weightCurveType=weightCurveType)(
                                        pool['frequency_bands'])

        return noveltyCurve
Exemple #17
0
# loop over all frames
audioout = np.array(0)
counter = 0

# input and output files
import os.path
tutorial_dir = os.path.dirname(os.path.realpath(__file__))
inputFilename = os.path.join(tutorial_dir, 'singing-female.wav')
outputFilename = os.path.join(tutorial_dir, 'singing-female-out-sinesubtraction.wav')


out = np.array(0)
loader = es.MonoLoader(filename = inputFilename, sampleRate =  params['sampleRate'])
pool = essentia.Pool()
fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
w = es.Windowing(type = "blackmanharris92");
fft = es.FFT(size = params['frameSize']);
smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'])
subtrFFTSize = min(params['frameSize']/4, 4* params['hopSize'])
smsub = es.SineSubtraction(sampleRate = params['sampleRate'], fftSize = subtrFFTSize, hopSize = params['hopSize'])


# analysis
loader.audio >> fcut.signal
fcut.frame >> w.frame
w.frame >> fft.frame
fft.fft >> smanal.fft
smanal.magnitudes >> (pool, 'magnitudes')
smanal.frequencies >> (pool, 'frequencies')
smanal.phases >> (pool, 'phases')
Exemple #18
0
# make temporary directory and unique time identifier
uniqueTime = str(int(tiempo()))
temp_folder = os.getcwd()+'/tmp'
os.mkdir(temp_folder)

# retrieve filenames from folder:
soundfiles = os.listdir(audio_folder)
if '.DS_Store' in soundfiles:
    soundfiles.remove('.DS_Store')

print "\nANALYSIS..."
for item in soundfiles:
    loader = estr.MonoLoader(filename=audio_folder+'/'+item, 
                             sampleRate=sample_rate)
    framecutter = estr.FrameCutter(frameSize=window_size, 
                                   hopSize=hop_size)
    windowing = estr.Windowing(size=window_size, 
                               type=window_type)
    spectrum = estr.Spectrum(size=window_size)
    spectralpeaks = estr.SpectralPeaks(magnitudeThreshold=magnitude_threshold, 
                                       minFrequency=min_frequency, 
                                       maxFrequency=max_frequency, 
                                       maxPeaks=max_peaks,
                                       sampleRate=sample_rate)      
    hpcp = estr.HPCP(bandPreset=band_preset,
                     harmonics = harmonics,
                     minFrequency=min_frequency, 
                     maxFrequency=max_frequency,
                     nonLinear=non_linear,
                     normalized=normalize,
                     referenceFrequency=reference_frequency,
Exemple #19
0
db_conn = sqlite3.connect("data.db")
db_cursor = db_conn.cursor()

names = [
    'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop',
    'reggae', 'rock'
]

for name in names:

    for i in range(10):
        song_name = name + '.' + '0000' + str(i) + '.au'
        print(song_name)
        loader = ess.MonoLoader(filename="genres/" + name + "/" + song_name)
        framecutter = ess.FrameCutter(frameSize=4096,
                                      hopSize=2048,
                                      silentFrames='noise')
        windowing = ess.Windowing(type='blackmanharris62')
        spectrum = ess.Spectrum()
        spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                          magnitudeThreshold=0.00001,
                                          minFrequency=20,
                                          maxFrequency=3500,
                                          maxPeaks=60)

        # Use default HPCP parameters
        hpcp = ess.HPCP()

        # Use pool to store data
        pool = essentia.Pool()
 def testInvalidoptions(self):
     self.assertConfigureFails(es.FrameCutter(), {'frameSize': 0})
     self.assertConfigureFails(es.FrameCutter(), {'frameSize': -23})
     self.assertConfigureFails(es.FrameCutter(), {'hopSize': 0})
     self.assertConfigureFails(es.FrameCutter(), {'hopSize': -23})
 def testIncompatibleParams(self):
     self.assertConfigureFails(es.FrameCutter(), {
         'startFromZero': False,
         'validFrameThresholdRatio': .6
     })
    out = overl(ifftframe)    

    if counter >= (framesize/(2*hopsize)):
      audioout = np.append(audioout, out)
    counter += 1

  # write audio output
  print audioout.shape
  awrite(audioout.astype(np.float32))


if mode == 'streaming':
  out = np.array(0)
  loader = es.MonoLoader(filename = inputFilename, sampleRate = 44100)
  pool = essentia.Pool()
  fcut = es.FrameCutter(frameSize = framesize, hopSize = hopsize, startFromZero =  False);
  w = es.Windowing(type = "hann");
  fft = es.FFT(size = framesize);
  ifft = es.IFFT(size = framesize);
  overl = es.OverlapAdd (frameSize = framesize, hopSize = hopsize);
  awrite = es.MonoWriter (filename = outputFilename, sampleRate = 44100);
  
  #gen = audio #VectorInput(audio)
  loader.audio >> fcut.signal
  fcut.frame >> w.frame
  w.frame >> fft.frame
  fft.fft >> ifft.fft
  ifft.frame >> overl.frame
  overl.signal >> awrite.audio
  overl.signal >> (pool, 'audio')
  
Exemple #23
0
def compute_features(path, f_mfcc_kl, f_mfcc_euclid, f_notes, f_chroma, f_bh):
    gc.enable()
    # Loading audio file
    #will resample if sampleRate is different!
    try:
        audio = es.MonoLoader(filename=path, sampleRate=fs)()
    except:
        print("Erroneos File detected by essentia standard: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #will resample if sampleRate is different!
    try:
        loader = ess.MonoLoader(filename=path, sampleRate=44100)
    except:
        print("Erroneos File detected by essentia streaming: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #Initialize algorithms we will use
    frameSize = 4096  #512
    hopSize = 2048  #256
    #######################################
    # DO FILTERING ONLY FOR MFCC - not with essentia standard
    # below is just an example
    #HP = es.HighPass(cutoffFrequency=128)
    #LP = es.LowPass(cutoffFrequency=4096)
    #lp_f = LP(audio)
    #hp_f = HP(lp_f)
    #audio = hp_f
    #MonoWriter(filename='music/filtered.wav')(filtered_audio)
    HP = ess.HighPass(cutoffFrequency=128)
    LP = ess.LowPass(cutoffFrequency=4096)
    #loader = ess.MonoLoader(filename=path, sampleRate=44100)
    #writer = ess.MonoWriter(filename='music/filtered.wav')
    #frameCutter = FrameCutter(frameSize = 1024, hopSize = 512)
    #pool = essentia.Pool()
    # Connect streaming algorithms
    #loader.audio >> HP.signal
    #HP.signal >> LP.signal
    #LP.signal >> writer.audio
    # Run streaming network
    #essentia.run(loader)
    bpm = 0
    histogram = 0
    key = 0
    scale = 0
    notes = 0
    chroma_matrix = 0
    mean = 0
    cov = 0
    var = 0
    cov_kl = 0
    #####################################
    # extract mfcc
    #####################################
    if f_mfcc_kl == 1 or f_mfcc_euclid == 1:
        #features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path)
        #m, n = features['lowlevel.mfcc.cov'].shape
        #print m
        #iu1 = np.triu_indices(m)
        #cov = features['lowlevel.mfcc.cov'][iu1]
        #mean = features['lowlevel.mfcc.mean']
        #print(features['lowlevel.mfcc.cov'])
        hamming_window = es.Windowing(type='hamming')
        spectrum = es.Spectrum()  # we just want the magnitude spectrum
        mfcc = es.MFCC(numberCoefficients=13)
        frame_sz = 2048  #512
        hop_sz = 1024  #256
        mfccs = np.array([
            mfcc(spectrum(hamming_window(frame)))[1] for frame in
            es.FrameGenerator(audio, frameSize=frame_sz, hopSize=hop_sz)
        ])
        #Let's scale the MFCCs such that each coefficient dimension has zero mean and unit variance:
        #mfccs = sklearn.preprocessing.scale(mfccs)
        #print mfccs.shape
        mean = np.mean(mfccs.T, axis=1)
        #print(mean)
        var = np.var(mfccs.T, axis=1)
        #print(var)
        cov = np.cov(mfccs.T)
        cov_kl = cov  #.flatten()
        #get only upper triangular matrix values to shorten length
        iu1 = np.triu_indices(13)
        cov = cov[iu1]
        #plt.imshow(mfccs.T, origin='lower', aspect='auto', interpolation='nearest')
        #plt.ylabel('MFCC Coefficient Index')
        #plt.xlabel('Frame Index')
        #plt.colorbar()
    #####################################
    # extract beat features and histogram
    #####################################
    if f_bh == 1 or f_chroma == 1 or f_notes == 1:
        # Compute beat positions and BPM
        rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
        bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(
            audio)
        if f_bh == 1:
            peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = es.BpmHistogramDescriptors(
            )(beats_intervals)
        tempo = bpm
        times = beats
        beats_frames = (beats * fs) / hopSize
        beats_frames = beats_frames.astype(int)

        #fig, ax = plt.subplots()
        #ax.bar(range(len(histogram)), histogram, width=1)
        #ax.set_xlabel('BPM')
        #ax.set_ylabel('Frequency')
        #plt.title("BPM histogram")
        #ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))])
        #ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))])
        #plt.show()

    #####################################
    # extract full beat aligned chroma
    #####################################

    framecutter = ess.FrameCutter(frameSize=frameSize,
                                  hopSize=hopSize,
                                  silentFrames='noise')
    windowing = ess.Windowing(type='blackmanharris62')
    spectrum = ess.Spectrum()
    spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                      magnitudeThreshold=0.00001,
                                      minFrequency=20,
                                      maxFrequency=3500,
                                      maxPeaks=60)
    # Use default HPCP parameters for plots, however we will need higher resolution
    # and custom parameters for better Key estimation
    hpcp = ess.HPCP()
    hpcp_key = ess.HPCP(
        size=36,  # we will need higher resolution for Key estimation
        referenceFrequency=440,  # assume tuning frequency is 44100.
        bandPreset=False,
        minFrequency=20,
        maxFrequency=3500,
        weightType='cosine',
        nonLinear=False,
        windowSize=1.)
    key = ess.Key(
        profileType='edma',  # Use profile for electronic music
        numHarmonics=4,
        pcpSize=36,
        slope=0.6,
        usePolyphony=True,
        useThreeChords=True)
    # Use pool to store data
    pool = essentia.Pool()
    # Connect streaming algorithms
    ###################################
    # USE FILTER - comment next lines in
    loader.audio >> HP.signal
    HP.signal >> LP.signal
    LP.signal >> framecutter.signal
    ###################################
    ###################################
    # NO FILTER - comment next line in
    #loader.audio >> framecutter.signal
    ###################################
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    spectralpeaks.magnitudes >> hpcp_key.magnitudes
    spectralpeaks.frequencies >> hpcp_key.frequencies
    hpcp_key.hpcp >> key.pcp
    hpcp.hpcp >> (pool, 'tonal.hpcp')
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')
    # Run streaming network
    essentia.run(loader)
    #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale'])
    #print(pool['tonal.hpcp'].T)
    chroma = pool['tonal.hpcp'].T
    key = pool['tonal.key_key']
    scale = pool['tonal.key_scale']
    if f_chroma == 1:
        # Plot HPCP
        #imshow(pool['tonal.hpcp'].T, aspect='auto', origin='lower', interpolation='none')
        #plt.title("HPCPs in frames (the 0-th HPCP coefficient corresponds to A)")
        #show()
        #print beats_frames.shape[0]
        chroma_matrix = np.zeros((beats_frames.shape[0], 12))
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma_align = chroma
        chroma_align = chroma_align.transpose()
        mat_index = 0
        for i in beats_frames:
            act_beat = i
            value = sum(
                chroma_align[prev_beat:act_beat]) / (act_beat - prev_beat)
            chroma_align[prev_beat:act_beat] = value
            prev_beat = i
            if np.linalg.norm(value, ord=1) != 0:
                value = value / np.linalg.norm(value, ord=1)
            chroma_matrix[mat_index] = value
            mat_index = mat_index + 1

        #chroma_align = chroma_align.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma_align, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma_align = chroma_align.transpose()
    #print(chroma_align[24:28])
    #####################################
    # extract full chroma text
    #####################################
    if f_notes == 1:
        #print(chroma.shape)
        m, n = chroma.shape
        avg = 0
        chroma = chroma.transpose()
        m, n = chroma.shape
        for j in chroma:
            avg = avg + np.sum(j)
        avg = avg / m
        threshold = avg / 2
        for i in chroma:
            if np.sum(i) > threshold:
                ind = np.where(i == np.max(i))
                max_val = i[ind]  #is always 1!
                i[ind] = 0

                ind2 = np.where(i == np.max(i))
                i[ind] = 1

                #if np.any(i[ind2][0] >= 0.8 * max_val):
                #i[ind2] = i[ind2]
                #pass
                #low_values_flags = i < 1
                low_values_flags = i < 0.8

                i[low_values_flags] = 0
            else:
                i.fill(0)
        chroma = chroma.transpose()
        # Compute beat positions and BPM
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma = chroma.transpose()
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            #print(sum_key)
            #print(chroma[prev_beat:act_beat])

            ind = np.where(sum_key == np.max(sum_key))
            ind = ind[0]
            #print("debug")
            fill = np.zeros(len(j))
            if (np.all(chroma[prev_beat:act_beat] == 0)):
                fill[ind] = 0
            else:
                fill[ind] = 1
            chroma[prev_beat:act_beat] = fill
            #print(chroma[prev_beat:act_beat])
            prev_beat = i
            #print("BEAT")
        notes = []
        for i in notes:
            del i
        prev_beat = 0
        act_beat = 0
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            ind = np.where(sum_key == np.max(sum_key))
            prev_beat = i
            notes.append(ind[0][0])
            prev_beat = i
        #chroma = chroma.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma = chroma.transpose()
    gc.collect()
    return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl