Ejemplo n.º 1
0
    def handle(self, audio: np.array) -> np.array:
        left, right = self.to_stereo(audio)

        left = array(self.apply_delay(left))
        right = array(self.apply_delay(right))

        return self.to_mono(left, right)
Ejemplo n.º 2
0
def extractHPCP(audiosignal, frameSize, hopSize, w, speaks, hpcp, signalname):
    # w is the preconfigured windowing algorithm
    # hpcp is the preconfigured HPCP algorithm 

    audio = essentia.array(audiosignal)
    # TODO: not sure if this is necessary: 
    if len(audio)%2:
        audio = audio[:-1] 

    spectrum = Spectrum()
    speaks.maxFrequency = hpcp.paramValue('maxFrequency')
    chromagram = []
    spectrogram = []

    signal_spectrum = spectrum(audio)

    for frame in FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize):
        frame_spectrum = spectrum(w(frame))
        spectrogram.append(frame_spectrum)
        pfreq, pmagn = speaks(frame_spectrum)
        chromagram.append(hpcp(pfreq, pmagn))

    spectrogram = essentia.array(spectrogram).T    
    chromagram = essentia.array(chromagram).T
       
    hpcp_mean = np.mean(chromagram, axis=1)
    hpcp_median = np.median(chromagram, axis=1)
    
    
    return chromagram, spectrogram, signal_spectrum, hpcp_mean, hpcp_median
Ejemplo n.º 3
0
def extract_features(x,
                     M=WINDOW_SIZE,
                     N=FFT_SIZE,
                     H=HOP_SIZE,
                     fs=SR,
                     window_type=WINDOW_TYPE):
    '''
    extract magnitudes spectra from input vector and apply power-law compression
    '''
    #init functions and vectors
    x = essentia.array(x)
    spectrum = ess.Spectrum(size=N)
    window = ess.Windowing(size=M, type=window_type)
    SP = []

    #compute STFT
    for frame in ess.FrameGenerator(x,
                                    frameSize=M,
                                    hopSize=H,
                                    startFromZero=True):  #generate frames
        wX = window(frame)  #window frame
        mX = spectrum(wX)  #compute fft
        ###############################OPTIMIZATION[[[[[[[[[[[[[[]]]]]]]]]]]]]]
        #DEPRECATED
        #################################################
        SP.append(mX)

    SP = essentia.array(SP)
    SP = np.power(SP, 2. / 3.)  #power law compression

    return SP
Ejemplo n.º 4
0
class TestCrossSimilarityMatrix(TestCase):

    # hpcp matrix of a short query song segment (2 frames) computed using essentia hpcp algorithm
    query_feature = array([[0.3218126, 0.00541916, 0.26444072, 0.36874822, 1., 0.10472599, 0.05123469, 0.03934194, 0.07354275, 0.646091, 0.55201685, 0.03270169],
                    [0.07695414, 0.04679213, 0.56867135, 1., 0.10247268, 0.03653419, 0.03635696, 0.2443251, 0.2396715, 0.1190474, 0.8045795, 0.41822678]])
    
    # hpcp matrix of a short reference song segment (3 frames) computed using essentia hpcp algorithm
    reference_feature = array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                    [0.36084786, 0.37151814, 0.40913638, 0.15566002, 0.40571737, 1., 0.6263613, 0.65415925, 0.53127843, 0.7900088, 0.50427467, 0.51956046],
                    [0.42861825, 0.36887613, 0.05665652, 0.20978431, 0.1992704, 0.14884946, 1., 0.24148795, 0.43031794, 0.14265466, 0.17224492, 0.36498153]]) 
    
    # expected euclidean pairwise similarity matrix without binary thresholding (pre-computed using a python script adopted from https://github.com/albincorreya/ChromaCoverId/blob/master/cover_similarity_measures.py)
    expected_sim_matrix = [[1.432924 , 1.5921365, 1.5593135],
                           [1.5159905, 1.7596511, 1.5824637]]
    # expected euclidean pairwise similarity matrix with binary thresholding where binarizePercentile=0.095, frameStackStride=1 and frameStackSize=1 (pre-computed using a python script adopted from https://github.com/albincorreya/ChromaCoverId/blob/master/cover_similarity_measures.py)
    expected_sim_matrix_binary = [[1., 0., 0.],
                                  [0., 0., 0.]]

    def testEmpty(self):
        self.assertComputeFails(CrossSimilarityMatrix(), [], [])

    def testRegressionStandard(self):
        csm = CrossSimilarityMatrix(binarize=False, frameStackStride=1, frameStackSize=1)
        result = csm(self.query_feature, self.reference_feature)
        self.assertAlmostEqualMatrix(self.expected_sim_matrix, result)

    def testRegressionBinary(self):
        csm = CrossSimilarityMatrix(binarize=True, binarizePercentile=0.095, frameStackStride=1, frameStackSize=1)
        result = csm(self.query_feature, self.reference_feature)
        self.assertAlmostEqualMatrix(self.expected_sim_matrix_binary, result)
    def save_separated_audiofiles(self):
        # check and see if directory exists
        if self.directory != "" and ("/" in self.directory):
            directoryLevels = self.directory.split("/")
            for ixLevel, directoryLevel in enumerate(directoryLevels):
                if ixLevel == 0:
                    LevelPath = directoryLevel
                else:
                    LevelPath += "/" + directoryLevel
                if not os.path.isdir(LevelPath):
                    os.mkdir(LevelPath)

        # Create audio writer object
        if self.fsIsSpecified == "False":  # Notify user if sampling rate not specified
            print(
                "Sample Rate not specified for writing the audio files. Assumed Fs (Hz) is "
                + str(self.fs))

        if self.formatIsSpecified == "False":  # Notify user if format not specified
            print(
                "File format not specified for writing the audio files. Assumed format is "
                + str(self.format))

        MonoWriter = es.MonoWriter(sampleRate=self.fs, format=self.format)
        MonoWriter.configure(filename=self.directory + self.filename +
                             "_percussive." + self.format)
        MonoWriter(array(self.x_p))

        MonoWriter = es.MonoWriter(sampleRate=self.fs, format=self.format)
        MonoWriter.configure(filename=self.directory + self.filename +
                             "_harmonic." + self.format)
        MonoWriter(array(self.x_h))
Ejemplo n.º 6
0
def extract_features(x,
                     M=Config.WINDOW_SIZE,
                     N=Config.FFT_SIZE,
                     H=Config.HOP_SIZE,
                     fs=Config.FS,
                     window_type=Config.WINDOW_TYPE):
    '''
    Function that extracts spectrogram from an audio signal
    -----------------------
    Input: Samples, window size (int), FFT size (int), Hop size (int),
    Sampling rate, Window type (e.g. Hanning)

    Output: Spectrogram
    -----------------------
    '''
    # init functions and vectors
    x = essentia.array(x)
    spectrum = ess.Spectrum(size=N)
    window = ess.Windowing(size=M, type=window_type)
    SP = []
    # compute STFT
    for frame in ess.FrameGenerator(x,
                                    frameSize=M,
                                    hopSize=H,
                                    startFromZero=True):  # generate frames
        wX = window(frame)  # window frame
        mX = spectrum(wX)  # compute fft

        SP.append(mX)
    SP = essentia.array(SP)
    SP = np.power(SP, 2. / 3.)  # power law compression
    SP = SP[:, :int(Config.FFT_SIZE / 4 + 1)]

    return SP
Ejemplo n.º 7
0
def callback(in_data, frame_count, time_info, status):
    global abuffer, recording, identifying, resampling, \
            ncalls, act_status, collect, nturnoff, buffer_size
    global wf,ratei,odir,model,keys,filename
    # If there is nothing to do
    if not identifying:
        return (in_data,pyaudio.paContinue)
    # If there is something to do
    in_data = np.fromstring(in_data, dtype='Int16')
    in_data = in_data/32767.0
    if resampling != 1.0:
        in_data=resample(in_data,in_data*resmpling)
    abuffer.append(in_data)

    if len(abuffer)>buffer_size:
        in_data = np.concatenate(abuffer[-buffer_size:])
        if identifying:
            # AQUI VA EL CODIGO DE
            # Extraer MFCC
            # Extraer estadísticas
            # Predecir
              mfccs =[]
        audio = essentia.array(in_data)
        for frame in FrameGenerator(audio, frameSize = 2048 , hopSize = 1024):
            mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
            mfccs.append(mfcc_coeffs)
        mfccs = essentia.array(mfccs).T
        stats = Stats_Es(mfccs)
        print stats.shape
        print model.predict(stats)       
        abuffer.pop(len(abuffer)-buffer_size)
    if not stop:
        return (in_data,pyaudio.paContinue)
    else:
        return (in_data,pyaudio.paComplete)
Ejemplo n.º 8
0
def extract_features(x,
                     M=WINDOW_SIZE,
                     N=FFT_SIZE,
                     H=HOP_SIZE,
                     fs=SR,
                     window_type=WINDOW_TYPE):
    '''
    extract magnitudes spectra from input vector
    apply power-law compression
    cutt the upper spectrum
    '''
    #init functions and vectors
    x = essentia.array(x)
    spectrum = ess.Spectrum(size=N)
    window = ess.Windowing(size=M, type=WINDOW_TYPE)
    SP = []

    #compute STFT
    for frame in ess.FrameGenerator(x,
                                    frameSize=M,
                                    hopSize=H,
                                    startFromZero=True):  #generate frames
        wX = window(frame)  #window frame
        mX = spectrum(wX)  #compute fft
        SP.append(mX)

    SP = essentia.array(SP)
    SP = np.power(SP, 2. / 3.)  #power law compression
    #SP = SP[:,:int(FFT_SIZE/2+1)]  #cut upper spectrum (above 4 khz)

    return SP
Ejemplo n.º 9
0
def extractHPCP(audiosignal, frameSize, hopSize, w, speaks, hpcp, signalname):
    # w is the preconfigured windowing algorithm
    # hpcp is the preconfigured HPCP algorithm

    audio = essentia.array(audiosignal)
    # TODO: not sure if this is necessary:
    if len(audio) % 2:
        audio = audio[:-1]

    spectrum = Spectrum()
    speaks.maxFrequency = hpcp.paramValue('maxFrequency')
    chromagram = []
    spectrogram = []

    signal_spectrum = spectrum(audio)

    for frame in FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize):
        frame_spectrum = spectrum(w(frame))
        spectrogram.append(frame_spectrum)
        pfreq, pmagn = speaks(frame_spectrum)
        chromagram.append(hpcp(pfreq, pmagn))

    spectrogram = essentia.array(spectrogram).T
    chromagram = essentia.array(chromagram).T

    hpcp_mean = np.mean(chromagram, axis=1)
    hpcp_median = np.median(chromagram, axis=1)

    return chromagram, spectrogram, signal_spectrum, hpcp_mean, hpcp_median
Ejemplo n.º 10
0
    def calc_chromagram(self):

        # save the results in the stft_pool
        self.chromagram = []
        hpcp = es.HPCP(
            size=12,  # we will need higher resolution for Key estimation
            referenceFrequency=440,  # assume tuning frequency is 44100.
            bandPreset=False,
            weightType='cosine',
            nonLinear=False,
            windowSize=1.,
            sampleRate=self.sample_rate)

        spectrum = es.Spectrum(size=self.fft_size)
        spectral_peaks = es.SpectralPeaks(sampleRate=self.sample_rate)

        for frame in es.FrameGenerator(self.audio,
                                       frameSize=self.frame_size,
                                       hopSize=self.hop_size,
                                       startFromZero=True):
            frame = array(frame * self.window)
            freqs, mags = spectral_peaks(spectrum(frame))
            chroma = hpcp(freqs, mags)
            self.chromagram.append(chroma)

        self.chromagram = array(self.chromagram)

        self.timeAxSec = np.arange(len(
            self.chromagram)) * self.hop_size / float(self.sample_rate)
Ejemplo n.º 11
0
def save_features(key, pool, mfcc, hpcp, tonnetz):
    """Saves the features into the specified pool under the given key."""
    [pool.add(key + ".mfcc", essentia.array(mfcc_coeff))
        for mfcc_coeff in mfcc]
    [pool.add(key + ".hpcp", essentia.array(hpcp_coeff))
        for hpcp_coeff in hpcp]
    [pool.add(key + ".tonnetz", essentia.array(tonnetz_coeff))
        for tonnetz_coeff in tonnetz]
Ejemplo n.º 12
0
def main():
    args = parse_args()
    metadata = get_metadata(args.filename)
    audio = load_partial_audio(args.filename, args.start_time, args.end_time)
    sample_seconds = get_duration(audio, metadata.sampleRate)
    audio = resample_audio(audio, metadata.sampleRate, PLOT_SAMPLE_RATE)

    if args.energy_buckets:
        _, audio = energy_buckets(audio, sample_seconds)
        print "Created %d energy points" % len(audio)
        audio = essentia.array(audio)

    if args.amplitude:
        print 'Amplitude'
        audio = essentia.array(numpy.absolute(audio))

    write_raw(audio, 'output/raw_audio.csv')
    x = essentia.array(linspace(0, sample_seconds, len(audio)))

    if args.spline:
        print 'Spline'
        f = get_spline_function(x, audio)
        audio = essentia.array(numpy.vectorize(lambda x: f(x)[0])(audio))

    if args.moving_max:
        print 'Moving Max'
        audio = essentia.array(moving_max(audio, window_size=50))

    if args.moving_average:
        print 'Moving Average'
        audio = moving_average(audio, size=6)

    plot(x, audio)

    if args.find_peaks:
        # print beat ticks
        print 'Find Peaks'
        ticks = find_peaks(audio) * max(x)
        for tick in ticks:
            axvline(tick, ymin=0, ymax=0.1, color='red')

    if args.gradient:
        print 'Gradient'
        gradient = get_gradient(audio)
        write_raw(gradient, 'output/gradient.csv')
        plot(x, gradient, color="yellow")

    savefig('output/waveform')
    close()

    if args.spectrum:
        print 'Spectrum'
        spec = spectrum(audio)
        write_raw(spec, 'output/raw_spec.csv')
        plot(arange(len(spec)), spec)
        savefig('output/spectrum')
        close()
Ejemplo n.º 13
0
def compute(audio):

    """
    filters out maxs values corresponding to harmonic part
    """
    audio = essentia.array(audio)
    sampleRate  = int(conf.opts['sampleRate'])
    frameSize   = int(conf.opts['frameSize'])
    hopSize     = int(conf.opts['hopSize'])
    zeroPadding = int(conf.opts['zeroPadding'])
    windowType  = conf.opts['windowType']
 
    frameRate = float(sampleRate)/float(hopSize)
    whitenf = Whitener(sampleRate=sampleRate,peaksNumber=opts["numPeaks"],hopSize=hopSize,frameSize=frameSize)
    
    
    audio = whitenf(audio)
# 
#     frames  = FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
#     window  = Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType)
#     fft = FFT()
#     ifft = IFFT()
#     cartesian2polar = CartesianToPolar()
#     polar2cartesian = PolarToCartesian()
#     whitef = SpectralWhitening(sampleRate = sampleRate)
#     peaksf  = SpectralPeaks(sampleRate = sampleRate,maxPeaks=5)
#     
#     audioout=np.zeros(len(audio))
#     
#     
#     
#     total_frames = frames.num_frames()
#     n_frames = 0
#     start_of_frame =0
#     
#     for frame in frames:
# 
#         windowed_frame = window(frame)
#         complex_fft = fft(windowed_frame)
#         (spectrum,phase) = cartesian2polar(complex_fft)
#         peaks,mags =peaksf(spectrum)
#         whited = whitef(spectrum,peaks,mags) 
#         i=0
#         for p in peaks: 
#             spectrum[int(p*frameSize/sampleRate)]=whited[i]
#             i+=1
# 
# 
#         complex_fft=polar2cartesian(spectrum,phase)
#         outf = ifft(complex_fft)*.5*hopSize
#         if start_of_frame+frameSize < len(audio) and start_of_frame>0:
#             audioout[start_of_frame:start_of_frame+frameSize]+=window(outf)
#     
#         n_frames += 1
#         start_of_frame += hopSize
#         
    return essentia.array(audio)
Ejemplo n.º 14
0
def extractEnvelopeSegments(audio):
    pd = PeakDetection(orderBy='amplitude')
    duration = Duration()
    midpoint, _ = pd(audio)
    slicer = Slicer(startTimes=essentia.array([0, midpoint[0]*duration(audio)]),
                    endTimes=essentia.array([midpoint[0]*duration(audio), duration(audio)]))
    slices = slicer(audio)

    # XXX: ugly
    return ensureEven(slices[0]), ensureEven(slices[1])
Ejemplo n.º 15
0
def compute(audio, pool, options):

    INFO('Computing Inter Onsets Intervals...')

    sampleRate = options['sampleRate']
    bpm = pool.value('rhythm.bpm')
    onsets = pool.value('rhythm.onset_times')

    # special case
    if bpm < 0 or len(onsets) < 2:
       pool.add(namespace + '.' + 'relative_ioi_peaks', [float()])#, pool.GlobalScope)
       pool.add(namespace + '.' + 'relative_ioi', [float()])#, pool.GlobalScope)

       INFO('100% done...')

       return

    # 32th note interval
    interp = 32.
    interval = (60./bpm) / interp
    riois = []
    old = onsets[0]
    for i in range(1,len(onsets)): riois += [ round( (onsets[i] - onsets[i-1]) / interval ) ]
    for i in range(2,len(onsets)): riois += [ round( (onsets[i] - onsets[i-2]) / interval ) ]
    for i in range(3,len(onsets)): riois += [ round( (onsets[i] - onsets[i-3]) / interval ) ]
    for i in range(4,len(onsets)): riois += [ round( (onsets[i] - onsets[i-4]) / interval ) ]
    ioidist = essentia.array(bincount(riois))
    fullioidist = essentia.array(zip( [p/interp for p in range(len(ioidist))], [ioi/sum(ioidist) for ioi in ioidist]))
    fullioidist = fullioidist[0:interp*5]
    peak_detection = essentia.PeakDetection(minPosition = 0., maxPosition = len(ioidist),
                                            maxPeaks = 5, range = len(ioidist) - 1.,
                                            interpolate = True, orderBy = 'amplitude')
    pos, mags = peak_detection(ioidist)

    # scale back to 1 beat
    pos = [ p/interp for p in pos ]

    # ratio across whole distribution surface
    mags = [ mag/sum(ioidist) for mag in mags ]

    # add to pool
    pool.add(namespace + '.' + 'relative_ioi_peaks', essentia.array(zip(pos,mags)))#, pool.GlobalScope)
    pool.add(namespace + '.' + 'relative_ioi', fullioidist)#, pool.GlobalScope)

    # debug plot
    if 0:
        from pylab import plot, show, hold
        plot([i/interp for i in range(len(ioidist))], [ioi/sum(ioidist) for ioi in ioidist],'b+-')
        hold(True)
        for i,j in zip(pos,mags):
            plot([i]*2,[0.,j],'+-')
        hold(False)
        show()

    INFO('100% done...')
Ejemplo n.º 16
0
    def handle(self, audio: np.array):
        left, right = self.to_stereo(audio)
        diff = (left == right)
        mul_array = (np.array([self.mul] * len(audio)) * (diff - 1)) + 1
        left = left * mul_array
        right = right * mul_array

        left = essentia.array(left)
        right = essentia.array(right)

        return self.to_mono(left, right)
def extract_mfcc(audio):
    w = Windowing(type = 'blackmanharris62')
    spectrum = Spectrum()
    mfcc = essentia.standard.MFCC()
    mfccs =[]
    audio = essentia.array(audio)
    for frame in FrameGenerator(audio, frameSize = 2048 , hopSize = 1024):
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
        mfccs.append(mfcc_coeffs)
    mfccs = essentia.array(mfccs).T
    return mfccs
def extract_mfcc(audio):
    w = Windowing(type='blackmanharris62')
    spectrum = Spectrum()
    mfcc = essentia.standard.MFCC()
    mfccs = []
    audio = essentia.array(audio)
    for frame in FrameGenerator(audio, frameSize=2048, hopSize=1024):
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
        mfccs.append(mfcc_coeffs)
    mfccs = essentia.array(mfccs).T
    return mfccs
Ejemplo n.º 19
0
def compute(features, opt):
    frameRate = opt["sampleRate"] / opt["hopSize"]

    delay = int(conf.opts["doubleOnsetT"] / 2.0 * frameRate)
    onsets = Onsets(frameRate=frameRate, alpha=opts["alpha"], delay=delay, silenceThreshold=opts["silenceThresh"])
    if isinstance(features[0], list) or isinstance(features[0], np.ndarray):
        weights = essentia.array([1 for x in range(len(features))])
        time_onsets = list(onsets(essentia.array(features), weights))
    else:
        time_onsets = list(onsets(essentia.array([features]), essentia.array([1])))

    return time_onsets
Ejemplo n.º 20
0
def compute(audio):
    """
    compress/expand
    """
    f = waveshaper(xPoints=essentia.array(opts["LUTx"]),
                   yPoints=essentia.array(opts["LUTy"]),
                   normalize = True if opts["normalize"] else False,
                   spline = True if opts["spline"] else False,
                       )
    
    audio = f(essentia.array(audio))
    return audio
Ejemplo n.º 21
0
def extractOnsets(audio):
        od1 = OnsetDetection(method = 'hfc')
        od2 = OnsetDetection(method = 'complex')

        # let's also get the other algorithms we will need, and a pool to store the results

        w = Windowing(type = 'hann')
        fft = FFT() # this gives us a complex FFT
        c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

        pool = essentia.Pool()

        # let's get down to business
        for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
                mag, phase, = c2p(fft(w(frame)))
                pool.add('features.hfc', od1(mag, phase))
                pool.add('features.complex', od2(mag, phase))


        # Phase 2: compute the actual onsets locations
        onsets = Onsets()

        onsets_hfc = onsets(# this algo expects a matrix, not a vector
                array([ pool['features.hfc'] ]),

                # you need to specify weights, but as there is only a single
                # function, it doesn't actually matter which weight you give it
                [ 1 ])
#        np.savetxt(outFile, onsets_hfc, fmt='%f')

        #Let's just take the complex as an example
        onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ])

        startTimes = onsets_hfc
        endTimes = onsets_hfc[1:]
        duration = Duration()
        endTimes = np.append(endTimes, duration(audio))

        slicer = Slicer(startTimes = array(startTimes), endTimes = array(endTimes))
        
        frames = slicer(audio)        

        lengthInFrames = 0
        for i in range(len(frames)):
                lengthInFrames = lengthInFrames + len(frames[i])

        format = Format('wav')
        global counter
        f = Sndfile('out'+ str(counter) + '.wav' , 'w', format, 1, 44100)
        counter = counter + 1
        f.write_frames(np.asarray(frames[0]))

        return frames
Ejemplo n.º 22
0
def compute(audio):
    """
    filters out maxs values corresponding to harmonic part
    """
    audio = essentia.array(audio)
    if opts['ratio'] == 0:
        return audio
    else:
        sampleRate = int(conf.opts['sampleRate'])
        frameSize = int(conf.opts['frameSize'])
        hopSize = frameSize / 4
        zeroPadding = int(conf.opts['zeroPadding'])
        windowType = conf.opts['windowType']

        frameRate = float(sampleRate) / float(hopSize)

        frames = FrameGenerator(audio=audio,
                                frameSize=frameSize,
                                hopSize=hopSize)
        window = Windowing(size=frameSize,
                           zeroPadding=zeroPadding,
                           type=windowType)
        fft = FFT()
        ifft = IFFT()
        cartesian2polar = CartesianToPolar()
        polar2cartesian = PolarToCartesian()
        audioout = np.zeros(len(audio))

        total_frames = frames.num_frames()
        n_frames = 0
        start_of_frame = 0

        for frame in frames:

            windowed_frame = window(frame)
            complex_fft = fft(windowed_frame)
            (spectrum, phase) = cartesian2polar(complex_fft)
            sortedS = np.sort(spectrum)
            minFloor = sortedS[int(len(sortedS) * (1. - opts["ratio"]))]

            #spectrum = essentia.array([0 if x>minFloor else x for x in spectrum ])

            complex_fft = polar2cartesian(spectrum, phase)
            outf = ifft(complex_fft) * .5 * hopSize
            if start_of_frame + frameSize < len(audio) and start_of_frame > 0:
                audioout[start_of_frame:start_of_frame +
                         frameSize] += window(outf)

            n_frames += 1
            start_of_frame += hopSize

            return essentia.array(audioout)
Ejemplo n.º 23
0
def extractEnvelopeSegments(audio):
    pd = PeakDetection(orderBy='amplitude')
    duration = Duration()
    midpoint, _ = pd(audio)
    slicer = Slicer(startTimes=essentia.array(
        [0, midpoint[0] * duration(audio)]),
                    endTimes=essentia.array(
                        [midpoint[0] * duration(audio),
                         duration(audio)]))
    slices = slicer(audio)

    # XXX: ugly
    return ensureEven(slices[0]), ensureEven(slices[1])
Ejemplo n.º 24
0
def compute(audio):
    """
    compress/expand
    """
    f = waveshaper(
        xPoints=essentia.array(opts["LUTx"]),
        yPoints=essentia.array(opts["LUTy"]),
        normalize=True if opts["normalize"] else False,
        spline=True if opts["spline"] else False,
    )

    audio = f(essentia.array(audio))
    return audio
Ejemplo n.º 25
0
def compute(audio):
    """
    filters out maxs values corresponding to harmonic part
    """
    audio = essentia.array(audio)
    if opts['ratio']==0:
        return audio
    else:
        sampleRate  = int(conf.opts['sampleRate'])
        frameSize   = int(conf.opts['frameSize'])
        hopSize     = frameSize/4
        zeroPadding = int(conf.opts['zeroPadding'])
        windowType  = conf.opts['windowType']
    
        frameRate = float(sampleRate)/float(hopSize)
    
    
        frames  = FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
        window  = Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType)
        fft = FFT()
        ifft = IFFT()
        cartesian2polar = CartesianToPolar()
        polar2cartesian = PolarToCartesian()
        audioout=np.zeros(len(audio))
        
        
        
        total_frames = frames.num_frames()
        n_frames = 0
        start_of_frame =0
        
        for frame in frames:
    
            windowed_frame = window(frame)
            complex_fft = fft(windowed_frame)
            (spectrum,phase) = cartesian2polar(complex_fft)
            sortedS = np.sort(spectrum)
            minFloor = sortedS[int(len(sortedS)*(1.-opts["ratio"]))]
     
            #spectrum = essentia.array([0 if x>minFloor else x for x in spectrum ])
    
    
            complex_fft=polar2cartesian(spectrum,phase)
            outf = ifft(complex_fft)*.5*hopSize
            if start_of_frame+frameSize < len(audio) and start_of_frame>0:
                audioout[start_of_frame:start_of_frame+frameSize]+=window(outf)
        
            n_frames += 1
            start_of_frame += hopSize
        
            return essentia.array(audioout)
Ejemplo n.º 26
0
def getOnsetFunctions(fname):
    logger = log.get_logger("rhythm")
    zeropadLen = params.Nfft - params.frmSize
    zz = np.zeros((zeropadLen, ), dtype='float32')
    frameCounter = 0
    bufferFrame = np.zeros((params.Nfft / 2 + 1, ))
    logger.info('Reading audio file...')
    audio = ess.MonoLoader(filename=fname)()
    fft = ess.FFT(size=params.Nfft)  # this gives us a complex FFT
    c2p = ess.CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    pool = es.Pool()
    w = ess.Windowing(type="hamming")
    fTicks = params.fTicks
    poolName = 'features.flux'
    logger.info('Extracting Onset functions...')
    for frame in ess.FrameGenerator(audio,
                                    frameSize=params.frmSize,
                                    hopSize=params.hop):
        frmTime = params.hop / params.Fs * frameCounter + params.frmSize / (
            2.0 * params.Fs)
        zpFrame = np.hstack((frame, zz))
        mag, phase, = c2p(fft(w(zpFrame)))
        magFlux = mag - bufferFrame
        bufferFrame = np.copy(
            mag)  # Copying for the next iteration to compute flux
        for bands in range(params.numBands):
            chosenInd = (fTicks >= params.fBands[bands, 0]) & (
                fTicks <= params.fBands[bands, 1])
            magFluxBand = magFlux[chosenInd]
            magFluxBand = (magFluxBand + abs(magFluxBand)) / 2
            oFn = magFluxBand.sum()
            if (math.isnan(oFn)):
                print("NaN found here")
            pass
            pool.add(poolName + str(bands), oFn)
        pass
        pool.add('features.time', frmTime)
        frameCounter += 1
        if not np.mod(frameCounter, 10000):
            logger.info(
                str(frameCounter) + '/' + str(audio.size / params.hop) + '...')
    logger.info('Total frames processed = ' + str(frameCounter))
    timeStamps = es.array([pool['features.time']])
    all_feat = timeStamps
    for bands in range(params.numBands):
        feat_flux = es.array([pool[poolName + str(bands)]])
        all_feat = np.vstack((all_feat, feat_flux))
    pass
    return np.transpose(all_feat)
Ejemplo n.º 27
0
def main():
    args = parse_args()
    audios = []
    for filename in args.filename:
        metadata = get_metadata(filename)
        audio = load_partial_audio(filename, args.start_time, args.end_time)
        sample_seconds = get_duration(audio, metadata.sampleRate)
        audio = resample_audio(audio, metadata.sampleRate, PLOT_SAMPLE_RATE)
        _, audio = energy_buckets(audio, sample_seconds)
        x = essentia.array(linspace(0, sample_seconds, len(audio)))
        print "Created %d energy points" % len(audio)
        audio = essentia.array(audio)
        audios.append((x, audio))

    plotting.plot_lines(audios, args.filename, args.output)
Ejemplo n.º 28
0
def algorithm_durations(sound):
    """
    Returns the duration of a file according to its length in number of samples and according to an envelope
    computation (See FFont ismir paper TODO: cite correctly).
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()
    sample_rate = 44100
    n_channels = 1
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)
    length_samples = len(audio)
    duration = float(len(audio))/(sample_rate * n_channels)
    # NOTE: load_audio_file will resample to the given sample_rate and downmix to mono

    # Effective duration
    env = estd.Envelope(attackTime=10, releaseTime=10)
    envelope = env(essentia.array(audio))
    threshold = envelope.max() * 0.05
    envelope_above_threshold = np.where(envelope >= threshold)
    start_effective_duration = envelope_above_threshold[0][0]
    end_effective_duration = envelope_above_threshold[0][-1]
    length_samples_effective_duration = end_effective_duration - start_effective_duration

    results['durations'] = {
        'duration': duration,
        'length_samples': length_samples,
        'length_samples_effective_duration': length_samples_effective_duration,
        'start_effective_duration': start_effective_duration,
        'end_effective_duration': end_effective_duration
    }
    return results
Ejemplo n.º 29
0
def serra_cover_similarity_measures(input_crp, disOnset=0.5, disExtension=0.5, simType='qmax'):
    """
    Computes distance cover song similarity measure using smith-waterman local allignment from the
    cross recurrent plots as mentioned in [1] (qmax) and [2] (dmax)

    [1]. Serra, J., Serra, X., & Andrzejak, R. G. (2009). Cross recurrence quantification for cover
        song identification. New Journal of Physics, 11.

    [2]. Chen, N., Li, W., & Xiao, H. (2017). Fusing similarity functions for cover song identification.
         Multimedia Tools and Applications.

    Input:
        input_crp: 2-d binary matrix of cross recurrent plot (x-axis query song and y-axis for reference song)

      Params:
             disOnset: penalty for a disurption onset
             disExtension: penalty for a disurption extension
             simType: ['qmax', 'dmax']

    Return: cover similarity distance

    NOTE: CoverSongSimilarity algo will be available soon in the new essentia release
    """
    coversim = CoverSongSimilarity(disOnset=disOnset, disExtension=disExtension, simType=simType)
    score_matrix = coversim.compute(array(input_crp))
    return np.divide(np.sqrt(input_crp.shape[1]), np.max(score_matrix))
Ejemplo n.º 30
0
def compute_essentia_descriptors(audio_segment, actual_bar_beg,
                                 actual_bar_end):
    """
	Computes the values of selected descriptors in the given audio segment.
	"""
    frames = FrameGenerator(audio_segment,
                            frameSize=frameSize,
                            hopSize=hopSize)
    mfccs_bar = []
    bark_vector = [0] * 27
    pool = essentia.Pool()
    total_frames = frames.num_frames()

    for frame in frames:
        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)
        (frame_frequencies, frame_magnitudes) = spectralPeaks(frame_spectrum)
        mag, phase, = c2p(fft(frame_windowed))
        pool.add('onsets.hfc', od(mag, phase))
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add('dissonance', frame_dissonance)
        # pool.add('zerocrossingrate', zerocrossingrate(frame))
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(window(frame)))
        mfccs_bar.append(mfcc_coeffs)
        frame_barkbands = barkbands(frame_spectrum)
        for i in range(27):
            bark_vector[i] += frame_barkbands[i] / total_frames

    onsets_hfc = onsets(essentia.array([pool['onsets.hfc']]), [1])
    onset_rate = float(len(onsets_hfc)) / (actual_bar_end - actual_bar_beg)
    bar_dissonance = mean(pool["dissonance"])

    return mfccs_bar, bark_vector, onset_rate, bar_dissonance
Ejemplo n.º 31
0
def tuningSystemFeatures(pool, namespace=''):
    # expects tonal descriptors and tuning features to be in pool
    tonalspace = 'tonal.'
    if namespace: tonalspace = namespace + '.tonal.'

    # 1-diatonic strength
    hpcp_highres = normalize(numpy.mean(pool[tonalspace + 'hpcp_highres'], 0))
    key, scale, strength, _ = standard.Key(
        profileType='diatonic')(hpcp_highres)
    pool.set(tonalspace + 'tuning_diatonic_strength', strength)

    # 2- high resolution features
    eqTempDeviation, ntEnergy, _ = standard.HighResolutionFeatures()(
        hpcp_highres)
    pool.set(tonalspace + 'tuning_equal_tempered_deviation', eqTempDeviation)
    pool.set(tonalspace + 'tuning_nontempered_energy_ratio', ntEnergy)

    # 3- THPCP
    hpcp = normalize(numpy.mean(pool[tonalspace + 'hpcp'], 0))
    hpcp_copy = hpcp[:]
    idx = numpy.argmax(hpcp)
    offset = len(hpcp) - idx
    hpcp[:offset] = hpcp_copy[idx:offset + idx]
    hpcp[offset:offset + idx] = hpcp_copy[0:idx]
    pool.set(tonalspace + 'thpcp', essentia.array(hpcp))
Ejemplo n.º 32
0
 def process(self, frames, eod=False):
     if not eod:
         w_frame = self.windower(essentia.array(frames.squeeze()))
         spectrum = self.spec_alg(w_frame)
         spec, mags = self.spec_peaks_alg(spectrum)
         self.dissonance.append(self.dissonance_alg(spec, mags))
     return frames, eod
def callback(data):
    # update audio buffer
    buffer[:] = array(unpack('f' * bufferSize, data))

    # generate predictions
    reset(vimp)
    run(vimp)
Ejemplo n.º 34
0
    def extractBeats(self, fileName):
        """Use a beattracker to return beat locations
        
        :param fileName: the file to load and extract beats from
         
        :return:
            ticks: the times in the file
        
            slices: the segmented audio units
        
            fileName: pass out the filename again
        """

        slices = None
        ticks = None

        beatTracker = essentia.standard.BeatTrackerDegara()
        duration = essentia.standard.Duration()

        if fileName:
            audio = self.loadAudio(fileName)

            ticks = beatTracker(audio)

            endTimes = ticks[1:]
            d = duration(audio)
            endTimes = np.append(endTimes, d)
            endTimes = essentia.array(endTimes)

            # slicer = essentia.standard.Slicer(startTimes=onsetTimes, endTimes=endTimes)
            # slices = slicer(audio)

            slices = self.slice(ticks, audio)

        return ticks, slices, fileName
Ejemplo n.º 35
0
    def loadAudio(self, filename):
        """Load audio from a filename and return the audio vector
        
        :param filename: input filename
        
        :return: audio signal
        """

        audio = None

        if filename:
            # loader = essentia.standard.MonoLoader(filename=filename)
            #
            # # and then we actually perform the loading:
            # audio = loader()

            #Essentia's loader (above)  has a bug that doesn't close files
            #It causes problems processing large number of files, use madmom instead
            # audio, sample_rate = madmom.audio.signal.load_wave_file(filename, num_channels=1)

            y, sr = librosa.load(filename, sr=None)

            audio = essentia.array(y)

        return audio
Ejemplo n.º 36
0
    def mean_scope(self, scopeFrom, scopeTo):
        descriptors_mean = {}

        for key in self.descriptors.keys():
            descriptor = self.descriptors[self.__currentNamespace][key]
            values_in_scope = []

            # Global descriptor (should also check that scope spans the entire file)
            if len(descriptor['values']) == 1:
                descriptors_mean[key] = descriptor['values'][0]
                continue

            for scope, value in zip(descriptor['scopes'],
                                    descriptor['values']):
                if scope[0] >= scopeFrom and scope[1] <= scopeTo:
                    values_in_scope.append(value)

            if len(values_in_scope) > 0:
                try:
                    descriptors_mean[key] = essentia.array(
                        numpy.mean(values_in_scope, axis=0))
                except TypeError:  # values are not numeric
                    descriptors_mean[key] = values_in_scope[0]

        return descriptors_mean
Ejemplo n.º 37
0
	def get_onsets(self, in_filename):

		# print in_filename
		# Load the audio (in mono)
		audio, sampleRate, numChan = AudioLoader(filename=in_filename)()
		audio = MonoLoader(filename=in_filename)()

		self.sampleRate = sampleRate

		# 1) Compute onset detection functions
		od = OnsetDetection(method='rms')

		w = Windowing(type='hann')
		fft = FFT()
		c2p = CartesianToPolar()

		pool_features = Pool()

		# print 'Computing onset detection functions'
		for frame in FrameGenerator(audio, frameSize=self.frame_size, hopSize=self.hop_size):
			mag, phase = c2p(fft(w(frame)))
			pool_features.add('features.rms', od(mag, phase))

		# 2) Compute the onset locations
		onsets = Onsets(silenceThreshold=0.14, delay=10)

		# print 'Computing onset locations'
		onsets_rms = onsets(
							array([ pool_features['features.rms'] ]),
							[ 1 ])

		print "Num onsets: " + str(len(onsets_rms))

		return onsets_rms
Ejemplo n.º 38
0
def extractor(filename):    

    PREEMPH = 0.97
    fs = 44100
    audio = ess.MonoLoader(filename = filename, 
                                          sampleRate = fs)()
    # dynamic range expansion as done in HTK implementation
    audio = audio*2**15

    frameSize = 1102 # corresponds to htk default WINDOWSIZE = 250000.0 
    hopSize = 441 # corresponds to htk default TARGETRATE = 100000.0
    fftSize = 2048
    spectrumSize= fftSize//2+1
    zeroPadding = fftSize - frameSize

    w = ess.Windowing(type = 'hamming', #  corresponds to htk default  USEHAMMING = T
                        size = frameSize, 
                        zeroPadding = zeroPadding,
                        normalized = False,
                        zeroPhase = False)

    spectrum = ess.Spectrum(size = fftSize)

    mfcc_htk = ess.MFCC(inputSize = spectrumSize,
                        type = 'magnitude', # htk uses mel filterbank magniude
                        warpingFormula = 'htkMel', # htk's mel warping formula
                        weighting = 'linear', # computation of filter weights done in Hz domain
                        highFrequencyBound = 8000, # corresponds to htk default
                        lowFrequencyBound = 0, # corresponds to htk default
                        numberBands = 26, # corresponds to htk default  NUMCHANS = 26
                        numberCoefficients = 13,
                        normalize = 'unit_max', # htk filter normaliation to have constant height = 1  
                        dctType = 3, # htk uses DCT type III
                        logType = 'log',
                        liftering = 22) # corresponds to htk default CEPLIFTER = 22

    preemph_filter = ess.IIR(numerator=[1-PREEMPH])
    mfccs = []
    # startFromZero = True, validFrameThresholdRatio = 1 : the way htk computes windows
    for frame in ess.FrameGenerator(audio, frameSize = frameSize, hopSize = hopSize , startFromZero = True, validFrameThresholdRatio = 1):
        frame = frame - np.mean(frame)    # if ENORMALISE = T
        
        frame_doubled_first = np.insert(frame,0,frame[0])  ##### if PREEMPHASIS needed
        preemph_frame = preemph_filter(frame_doubled_first)
        frame = preemph_frame[1:]
        
        spect = spectrum(w(frame))
        mel_bands, mfcc_coeffs = mfcc_htk(spect)
        mfccs.append(mfcc_coeffs)


    # transpose to have it in a better shape
    # we need to convert the list to an essentia.array first (== numpy.array of floats)
    # mfccs = essentia.array(pool['MFCC']).T
    mfccs = essentia.array(mfccs).T

    # and plot
    plt.imshow(mfccs[1:,:], aspect = 'auto', interpolation='none') # ignore enery
    # plt.imshow(mfccs, aspect = 'auto', interpolation='none')
    plt.show() # unnecessary if you started "ipython --pylab"
Ejemplo n.º 39
0
def file_to_hpcp(loop):
    loop = e.array(loop)

    windowing = es.Windowing(type='blackmanharris62')
    spectrum = es.Spectrum()
    spectral_peaks = es.SpectralPeaks(orderBy='magnitude',
                                      magnitudeThreshold=0.001,
                                      maxPeaks=20,
                                      minFrequency=20,
                                      maxFrequency=8000)
    hpcp = es.HPCP(maxFrequency=8000)
    spec_group = []
    hpcp_group = []
    for frame in es.FrameGenerator(loop, frameSize=1024, hopSize=512):
        windowed = windowing(frame)
        fft = spectrum(windowed)
        frequencies, magnitudes = spectral_peaks(fft)
        final_hpcp = hpcp(frequencies, magnitudes)
        spec_group.append(fft)
        hpcp_group.append(final_hpcp)

    mean_hpcp = np.mean(np.array(hpcp_group).T, axis=1)
    #normalize to 1
    mean_hpcp = mean_hpcp / mean_hpcp.max()

    return mean_hpcp
Ejemplo n.º 40
0
def algorithm_durations(sound):
    """
    Returns the duration of a file according to its length in number of samples and according to an envelope
    computation (See FFont ismir paper TODO: cite correctly).
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()
    sample_rate = 44100
    n_channels = 1
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)
    length_samples = len(audio)
    duration = float(len(audio))/(sample_rate * n_channels)
    # NOTE: load_audio_file will resample to the given sample_rate and downmix to mono

    # Effective duration
    env = estd.Envelope(attackTime=10, releaseTime=10)
    envelope = env(essentia.array(audio))
    threshold = envelope.max() * 0.05
    envelope_above_threshold = np.where(envelope >= threshold)
    start_effective_duration = envelope_above_threshold[0][0]
    end_effective_duration = envelope_above_threshold[0][-1]
    length_samples_effective_duration = end_effective_duration - start_effective_duration

    results['durations'] = {
        'duration': duration,
        'length_samples': length_samples,
        'length_samples_effective_duration': length_samples_effective_duration,
        'start_effective_duration': start_effective_duration,
        'end_effective_duration': end_effective_duration
    }
    return results
Ejemplo n.º 41
0
def clicke(length):
    short = np.zeros(int(44100 * (length / 1000.)))
    short[0] = 1.0
    decay = length / 6
    env = Envelope()
    env.configure(attackTime=0, releaseTime=decay)
    return env(essentia.array(short))
Ejemplo n.º 42
0
    def detect_peaks(self, min_peak_ratio=0.15):
        """--------------------------------------------------------------------
        Finds the peak indices of the distribution. These are treated as tonic
        candidates in higher order functions.
        min_peak_ratio: The minimum ratio between the max peak value and the
                        value of a detected peak
        --------------------------------------------------------------------"""
        assert 1 >= min_peak_ratio >= 0, \
            'min_peak_ratio should be between 0 (keep all peaks) and ' \
            '1 (keep only the highest peak)'

        # Peak detection is handled by Essentia
        detector = std.PeakDetection()
        peak_bins, peak_vals = detector(essentia.array(self.vals))

        # Essentia normalizes the positions to 1, they are converted here
        # to actual index values to be used in bins.
        peak_inds = np.array([int(round(bn * (len(self.bins) - 1)))
                              for bn in peak_bins])

        # if the object is pcd and there is a peak at zeroth index,
        # there will be another in the last index. Since a pcd is circular
        # remove the lower value
        if self.is_pcd() and peak_inds[0] == 0:
            if peak_vals[0] >= peak_vals[-1]:
                peak_inds = peak_inds[:-1]
                peak_vals = peak_vals[:-1]
            else:
                peak_inds = peak_inds[1:]
                peak_vals = peak_vals[1:]

        # remove peaks lower than the min_peak_ratio
        peak_bool = peak_vals / max(peak_vals) >= min_peak_ratio

        return peak_inds[peak_bool], peak_vals[peak_bool]
Ejemplo n.º 43
0
def extractPredominantMelody(audio_URI, frameSize=None, hopSize=None):
    '''
    extract predominant melody with melodia
    to reduce dependency copied from https://github.com/georgid/AlignmentDuration/blob/noteOnsets/src/align/FeatureExtractor.py

    audio_URI: string
        full file URI with extension
    '''
    from essentia.standard import PredominantPitchMelodia

    fs = 44100
    vTol = 1.4
    
    loader = essentia.standard.MonoLoader(filename= audio_URI    )
    audioSamples = loader()

    # extract f0 using ESSENTIA
    input = essentia.array(audioSamples)
    pitchTracker = PredominantPitchMelodia(frameSize = frameSize, hopSize = hopSize, sampleRate = fs,
        voicingTolerance = vTol, voiceVibrato = False, filterIterations=10, 
        peakDistributionThreshold=0.9, guessUnvoiced=True)
    
#     pitchTracker = PredominantMelody(frameSize = wSize, hopSize = hSize, sampleRate = fs,
#         voicingTolerance = vTol, voiceVibrato = False, filterIterations=10, 
#         peakDistributionThreshold=0.9, guessUnvoiced=True)
    
    f0, pitchConf = pitchTracker(input)
    
    timestamps = calc_TimeStamps(audioSamples, f0, frameSize, fs)
    est_freq_and_ts = np.array(zip(timestamps, f0))
    
    return est_freq_and_ts
Ejemplo n.º 44
0
def compute(features, opt):
    frameRate = opt['sampleRate'] / opt['hopSize']

    delay = int(conf.opts["doubleOnsetT"] / 2. * frameRate)
    onsets = Onsets(frameRate=frameRate,
                    alpha=opts["alpha"],
                    delay=delay,
                    silenceThreshold=opts["silenceThresh"])
    if isinstance(features[0], list) or isinstance(features[0], np.ndarray):
        weights = essentia.array([1 for x in range(len(features))])
        time_onsets = list(onsets(essentia.array(features), weights))
    else:
        time_onsets = list(
            onsets(essentia.array([features]), essentia.array([1])))

    return time_onsets
Ejemplo n.º 45
0
	def detect_peaks(self):
		detector = std.PeakDetection()
		peak_bins, peak_vals = detector(essentia.array(self.vals))
		# Essentia normalizes the positions to 1
		peak_idxs = [round(bn * (len(self.bins) - 1)) for bn in peak_bins]
		if(peak_idxs[0] == 0):
			peak_idxs = np.delete(peak_idxs, [len(peak_idxs) - 1])
			peak_vals = np.delete(peak_vals, [len(peak_vals) - 1])
		return peak_idxs, peak_vals
Ejemplo n.º 46
0
def compute_all_features(audio_file, audio_beats=False):
    """Computes all the features for a specific audio file and its respective
        human annotations.

    Returns
    -------
    features : dict
        Dictionary with the following features:
            mfcc : np.array
                Mel Frequency Cepstral Coefficients representation
            hpcp : np.array
                Harmonic Pitch Class Profiles
            tonnets : np.array
                Tonal Centroids (or Tonnetz)
    """

    # Makes sure the output features folder exists
    utils.ensure_dir(OUTPUT_FEATURES)
    features_file = os.path.join(OUTPUT_FEATURES,
                                 os.path.basename(audio_file) + ".json")

    # If already precomputed, read and return
    if os.path.exists(features_file):
        with open(features_file, "r") as f:
            features = json.load(f)
        return list_to_array(features)

    # Load Audio
    logging.info("Loading audio file %s" % os.path.basename(audio_file))
    audio = ES.MonoLoader(filename=audio_file, sampleRate=SAMPLE_RATE)()
    duration = len(audio) / float(SAMPLE_RATE)

    # Estimate Beats
    features = {}
    ticks, conf = compute_beats(audio)
    ticks = np.concatenate(([0], ticks, [duration]))  # Add first and last time
    ticks = essentia.array(np.unique(ticks))
    features["beats"] = ticks.tolist()

    # Compute Beat-sync features
    features["mfcc"], features["hpcp"], features["tonnetz"] = \
        compute_beatsync_features(ticks, audio)

    # Save output as audio file
    if audio_beats:
        logging.info("Saving Beats as an audio file")
        marker = ES.AudioOnsetsMarker(onsets=ticks, type='beep',
                                      sampleRate=SAMPLE_RATE)
        marked_audio = marker(audio)
        ES.MonoWriter(filename='beats.wav',
                      sampleRate=SAMPLE_RATE)(marked_audio)

    # Save features
    with open(features_file, "w") as f:
        json.dump(features, f)

    return list_to_array(features)
Ejemplo n.º 47
0
def getOnsetFunctions(fname):
    logger = log.get_logger("rhythm")
    zeropadLen = params.Nfft - params.frmSize
    zz = np.zeros((zeropadLen,),dtype = 'float32')
    frameCounter = 0
    bufferFrame = np.zeros((params.Nfft/2+1,))
    logger.info('Reading audio file...')
    audio = ess.MonoLoader(filename = fname)()
    fft = ess.FFT(size = params.Nfft) # this gives us a complex FFT
    c2p = ess.CartesianToPolar() # and this turns it into a pair (magnitude, phase)
    pool = es.Pool()
    w = ess.Windowing(type = "hamming")
    fTicks = params.fTicks
    poolName ='features.flux' 
    logger.info('Extracting Onset functions...')
    for frame in ess.FrameGenerator(audio, frameSize = params.frmSize, hopSize = params.hop):
        frmTime = params.hop/params.Fs*frameCounter + params.frmSize/(2.0*params.Fs)
        zpFrame = np.hstack((frame,zz))
        mag, phase, = c2p(fft(w(zpFrame)))
        magFlux = mag - bufferFrame
        bufferFrame = np.copy(mag)      # Copying for the next iteration to compute flux 
        for bands in range(params.numBands):
            chosenInd = (fTicks >= params.fBands[bands,0]) & (fTicks <= params.fBands[bands,1])
            magFluxBand = magFlux[chosenInd]
            magFluxBand = (magFluxBand + abs(magFluxBand))/2
            oFn = magFluxBand.sum()
            if (math.isnan(oFn)):
                print "NaN found here"
            pass
            pool.add(poolName + str(bands), oFn)
        pass
        pool.add('features.time', frmTime);
        frameCounter += 1
        if not np.mod(frameCounter,10000):
            logger.info(str(frameCounter) + '/' + str(audio.size/params.hop) + '...')
    logger.info('Total frames processed = ' + str(frameCounter))
    timeStamps = es.array([ pool['features.time'] ])
    all_feat = timeStamps
    for bands in range(params.numBands):
        feat_flux = es.array([ pool[poolName + str(bands)] ])
        all_feat = np.vstack((all_feat,feat_flux))
    pass
    return np.transpose(all_feat)
Ejemplo n.º 48
0
def getLPF(signalNeeded):
    
    midi_note = int(float(signalNeeded[-2:]))
    
    fc = 440.*np.power(semitone, midi_note)
    LPF = LowPass(sampleRate=fs, cutoffFrequency=fc)
    signal = essentia.array(LPF(getWhiteNoise()))
    # Normalization
    signal = signal / max(signal)
    
    return signal
Ejemplo n.º 49
0
	def detect_essentia(arquivo_audio,selected): #ODF using essentia library
		# 
		try:
		    filename = arquivo_audio
		except:
		    print "usage:", sys.argv[0], "<audiofile>"
		    sys.exit()

		# don't forget, we can actually instantiate and call an algorithm on the same line!
		global audio

		# Phase 1: compute the onset detection function
		# The OnsetDetection algorithm tells us that there are several methods available in Essentia,
		# let's do two of them
		if selected==3:
			od = OnsetDetection(method = 'hfc')
		elif selected==4:
			od = OnsetDetection(method = 'complex')
		elif selected==5:
			od = OnsetDetection(method = 'melflux')
		elif selected==6:
			od = OnsetDetection(method = 'complex_phase')
		elif selected==7:
			od = OnsetDetection(method = 'rms')


		# let's also get the other algorithms we will need, and a pool to store the results
		w = Windowing(type = 'hann')
		fft = FFT() # this gives us a complex FFT
		c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

		pool = Pool()

		# let's get down to business
		print 'Computing onset detection functions...'
		for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
		    mag, phase, = c2p(fft(w(frame)))
		    pool.add('features.method', od(mag, phase))

		# Phase 2: compute the actual onsets locations
		onsets = Onsets()
		print 'Computing onset times...'
		onsets_method = onsets(array([ pool['features.method'] ]), [ 1 ])

		# and mark them on the audio, which we'll write back to disk
		# we use beeps instead of white noise to mark them, as it's more distinctive

		#convertendo para o tipo list
		listadet = onsets_method.tolist()

		#convertendo os segundos para frames
		listadet = [int(SecToFrames(x)) for x in listadet if x >= 0]
		 
		return listadet
def SMDetect(l_deriv, alph, sTh):
    if len(l_deriv) > 1:
        ons = ess.Onsets(alpha=alph, silenceThreshold=sTh, frameRate=172)
        det = ons(essentia.array(l_deriv).reshape(1, len(l_deriv)), [1])

        if len(det) > 1:
            return 1
        else:
            return 0

    else:
        return 0
Ejemplo n.º 51
0
 def extract_mfcc(self,fname):
     w = Windowing(type = 'blackmanharris62')
     spectrum = Spectrum()
     mfcc = essentia.standard.MFCC()
     mfccs =[]
     loader=EasyLoader(filename=fname)
     audio = loader.compute()
     for frame in FrameGenerator(audio, frameSize = 2048 , hopSize = 1024):
         mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
         mfccs.append(mfcc_coeffs)
     mfccs = essentia.array(mfccs).T
     return mfccs
Ejemplo n.º 52
0
def frequenciesComp(bands, fmin, fmax,ffts,sampleRate=44100,a=440):
        """
        Returns a list of frequencies aligned on a logarithmic scale.

        :param bands: number of filter bands per octave
        :param fmin:  the minimum frequency [Hz]
        :param fmax:  the maximum frequency [Hz]
        :param a:     frequency of A0 [Hz]
        :returns:     a list of frequencies

        Using 12 bands per octave and a=440 corresponding to the MIDI notes.

        """
        
        if fmax > sampleRate / 2:
            fmax = sampleRate / 2
        

        
        

        # factor 2 frequencies are apart
        factor = 2.0 ** (1.0 / bands)
        # start with A0
        freq = a
        frequencies = [freq]
        # go upwards till fmax
        while freq <= fmax:
            # multiply once more, since the included frequency is a frequency
            # which is only used as the right corner of a (triangular) filter
            freq *= factor
            frequencies.append(freq)
        # restart with a and go downwards till fmin
        freq = a
        while freq >= fmin:

            freq /= factor
            frequencies.append(freq)
        # sort frequencies
        frequencies.sort()
        # return the list
        
        # conversion factor for mapping of frequencies to spectrogram bins
        factor = (sampleRate / 2.0) / ffts
        # map the frequencies to the spectrogram bins
        frequencies = np.round(np.asarray(frequencies) / factor).astype(int)
        # only keep unique bins
        frequencies = np.unique(frequencies)
        # filter out all frequencies outside the valid range
        frequencies = [f*factor for f in frequencies if f < ffts]

        return essentia.array(frequencies)
Ejemplo n.º 53
0
def compute(audio):
    audio = essentia.array(audio)
    sampleRate  = int(conf.opts['sampleRate'])
    frameSize   = int(conf.opts['frameSize'])
    hopSize     = int(conf.opts['hopSize'])
    zeroPadding = int(conf.opts['zeroPadding'])
    windowType  = conf.opts['windowType']

    frameRate = float(sampleRate)/float(hopSize)

    INFO('Computing Onset Detection...')

    frames  = FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window  = Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType)
    nsdff = Nsdf()
    fftf = Spectrum()
    crestf = Crest()
    instPowf = InstantPower()
    
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5


    progress = Progress(total = total_frames)
    cr = []
    env = []
    for frame in frames:

        windowed_frame = window(frame)
        nsdf = nsdff(frame)
        fftn = fftf(nsdf)
        cr += [crestf(fftn)]
        pow = instPowf(frame)
        
        if len(cr)>2 and pow<opts["minthresh"]:
            
            cr[-1]=cr[-2]
        

        n_frames += 1
        start_of_frame += hopSize

    
    cr = np.array(cr)
#     w = signal.gaussian(3,1)
#     area = np.sum(w)
#     cr =np.convolve(cr,w , 'same')
#     cr = cr/(100.*area)
    return cr
Ejemplo n.º 54
0
def compute(audio):
    audio = essentia.array(audio)
    sampleRate = int(conf.opts["sampleRate"])
    frameSize = int(conf.opts["frameSize"])
    hopSize = int(conf.opts["hopSize"])
    zeroPadding = int(conf.opts["zeroPadding"])
    windowType = conf.opts["windowType"]

    frameRate = float(sampleRate) / float(hopSize)

    INFO("Computing Ess Detection...")

    frames = FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = Windowing(size=frameSize, zeroPadding=zeroPadding, type=windowType)
    fft = FFT()
    cartesian2polar = CartesianToPolar()
    onsetdetectionHFC = OnsetDetection(method="hfc", sampleRate=sampleRate)
    onsetdetectionComplex = OnsetDetection(method="complex", sampleRate=sampleRate)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    hfc = []
    complex = []

    progress = Progress(total=total_frames)
    maxhfc = 0

    for frame in frames:

        windowed_frame = window(frame)
        complex_fft = fft(windowed_frame)
        (spectrum, phase) = cartesian2polar(complex_fft)
        hfc.append(onsetdetectionHFC(spectrum, phase))
        maxhfc = max(hfc[-1], maxhfc)
        complex.append(onsetdetectionComplex(spectrum, phase))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # The onset rate is defined as the number of onsets per seconds
    res = [[x / maxhfc for x in hfc]]
    res += [complex]

    return np.array(res)
Ejemplo n.º 55
0
def getBPF(signalNeeded):
    
    midi_note = int(float(signalNeeded[-2:]))
    
    
    fc0 = 440.*np.power(semitone, midi_note)
    fc1 = 440.*np.power(semitone, midi_note + 2*12 + 1) # 2 octaves
    bandwidth = fc1 - fc0
    cutoffFrequency = (fc0 + fc1) / 2.
    BPF = BandPass(bandwidth=bandwidth, cutoffFrequency=cutoffFrequency,sampleRate=fs)
    signal = essentia.array(BPF(getWhiteNoise()))    
    # Normalization
    signal = signal / max(signal)
    
    return signal
	def detect_peaks(self):
		"""-------------------------------------------------------------------------
		Finds the peak indices of the distribution. These are treated as tonic
		candidates in higher order functions.
		-------------------------------------------------------------------------"""
		# Peak detection is handled by Essentia
		detector = std.PeakDetection()
		peak_bins, peak_vals = detector(essentia.array(self.vals))

		# Essentia normalizes the positions to 1, they are converted here
		# to actual index values to be used in bins.
		peak_idxs = [round(bn * (len(self.bins) - 1)) for bn in peak_bins]
		if(peak_idxs[0] == 0):
			peak_idxs = np.delete(peak_idxs, [len(peak_idxs) - 1])
			peak_vals = np.delete(peak_vals, [len(peak_vals) - 1])
		return peak_idxs, peak_vals
Ejemplo n.º 57
0
def compute(inputFilename, audio, pool, options):

    INFO('Doing segmentation...')

    type = options[namespace]['type']
    minimumLength = options[namespace]['minimumSegmentsLength']
    thumbnail = options[namespace]['thumbnailing']

    if pool.value('metadata.duration_processed') < minimumLength:
        segments = []
        INFO('No segments found!')
    else:
        segments = doSegmentation(inputFilename, audio, pool, options)

    #pool.setCurrentNamespace(namespace)
    pool.add(namespace + '.' + 'timestamps', essentia.array(segments))#, pool.GlobalScope)

    return segments
Ejemplo n.º 58
0
def extractor(filename):    
	frameSize = 1024
	hopSize = 512
	fs = 44100
	audio = ess.MonoLoader(filename = filename, 
                                          sampleRate = fs)()
	w = ess.Windowing(type = 'hamming',
                    normalized = False)
	# make sure these are same for MFCC and IDCT computation
	NUM_BANDS  = 26
	DCT_TYPE = 2
	LIFTERING = 0
	NUM_MFCCs = 13

	spectrum = ess.Spectrum()
	mfcc = ess.MFCC(numberBands = NUM_BANDS, # 
                        numberCoefficients = NUM_MFCCs, # make sure you specify first N mfcc: the less, the more lossy (blurry) the smoothed mel spectrum will be
						weighting = 'linear', # computation of filter weights done in Hz domain (optional)
						normalize = 'unit_max', #  htk filter normaliation to have constant height = 1 (optional)
                        dctType = DCT_TYPE, # 
                        logType = 'log',
                        liftering = LIFTERING) # corresponds to htk default CEPLIFTER = 22
	
	idct = ess.IDCT(inputSize=NUM_MFCCs, 
                outputSize=NUM_BANDS, 
                dctType = DCT_TYPE, 
                liftering = LIFTERING)
	all_melbands_smoothed = []
	for frame in ess.FrameGenerator(audio, frameSize = frameSize, hopSize = hopSize):
		spect = spectrum(w(frame))
		melbands, mfcc_coeffs = mfcc(spect)
		melbands_smoothed = np.exp(idct(mfcc_coeffs)) # inverse the log taken in MFCC computation
		all_melbands_smoothed.append(melbands_smoothed)


    # transpose to have it in a better shape
    # we need to convert the list to an essentia.array first (== numpy.array of floats)
    # mfccs = essentia.array(pool['MFCC']).T
	all_melbands_smoothed = essentia.array(all_melbands_smoothed).T

    # and plot
	plt.imshow(all_melbands_smoothed, aspect = 'auto', interpolation='none') # ignore enery
    # plt.imshow(mfccs, aspect = 'auto', interpolation='none')
	plt.show() # unnecessary if you started "ipython --pylab"