コード例 #1
0
def extractDefaultFeatures(audio, outputDir):

    # compute all features for all sounds
    extractor = ess.Extractor(dynamics=True,
                              dynamicsFrameSize=88200,
                              dynamicsHopSize=44100,
                              highLevel=True,
                              lowLevel=True,
                              lowLevelFrameSize=2048,
                              lowLevelHopSize=1024,
                              midLevel=True,
                              namespace="",
                              relativeIoi=False,
                              rhythm=True,
                              sampleRate=44100,
                              tonalFrameSize=4096,
                              tonalHopSize=2048,
                              tuning=True)

    pool = essentia.Pool()
    pool = extractor(audio)
    aggPool = ess.PoolAggregator()(pool)

    if not path.exists(outputDir):
        makedirs(outputDir)

    ess.YamlOutput(filename=outputDir + "features.json",
                   format="json",
                   doubleCheck=True)(aggPool)
コード例 #2
0
def featureExtraction(soundfiles):
    # extractor = esst.LowLevelSpectralExtractor()
    extractor = esst.Extractor(dynamics = False,
                                                dynamicsFrameSize = 88200,
                                                dynamicsHopSize = 44100,
                                                highLevel = False,
        			         lowLevel = True,
        			         lowLevelFrameSize = 2048,
        			         lowLevelHopSize = 1024,
        			         midLevel = True,
        			         namespace = "",
        			         relativeIoi = False,
        			         rhythm = False,
        			         sampleRate  = 44100,
        			         tonalFrameSize  = 4096,
        			         tonalHopSize = 2048,
			         tuning = True)

	#soundfiles = listdir(inputPath)
    for file in soundfiles:

        path1= '/Users/helena/Desktop/SMC/ASP/sms-tools/workspace/A10/code/downloaded/'
        name=file[70:-4] + '_features.json'
        outPath = path1 + 'features/' + name
        print file
        audioLoader = esst.MonoLoader(filename=file)
        audio = audioLoader()
        pool = essentia.Pool()
        pool = extractor(audio)
        aggPool = esst.PoolAggregator()(pool)
        output = esst.YamlOutput(filename = outPath, format='json')
        output(aggPool)
        print (outPath + ' exported')
コード例 #3
0
def featureExtraction(soundfiles):

    #extractor = esst.LowLevelSpectralExtractor()
    extractor = esst.Extractor(dynamics=True,
                               dynamicsFrameSize=88200,
                               dynamicsHopSize=44100,
                               highLevel=True,
                               lowLevel=True,
                               lowLevelFrameSize=2048,
                               lowLevelHopSize=1024,
                               midLevel=True,
                               namespace="",
                               relativeIoi=False,
                               rhythm=True,
                               sampleRate=44100,
                               tonalFrameSize=4096,
                               tonalHopSize=2048,
                               tuning=True)

    #soundfiles = listdir(inputPath)

    for file, outPath in soundfiles:

        audioLoader = esst.MonoLoader(filename=file)
        audio = audioLoader()
        pool = essentia.Pool()
        pool = extractor(audio)
        aggPool = esst.PoolAggregator()(pool)
        esst.YamlOutput(filename=outPath + 'features.json',
                        format='json')(aggPool)
        print(file + ' exported')
コード例 #4
0
def computeAggregation(pool, segments_namespace=''):
    stats = ['mean', 'var', 'min', 'max', 'dmean', 'dmean2', 'dvar', 'dvar2']

    exceptions = {'lowlevel.mfcc': ['mean', 'cov', 'icov']}
    for namespace in segments_namespace:
        exceptions[namespace + '.lowlevel.mfcc'] = ['mean', 'cov', 'icov']

    if segments_namespace: exceptions['segmentation.timestamps'] = ['copy']
    return standard.PoolAggregator(defaultStats=stats,
                                   exceptions=exceptions)(pool)
コード例 #5
0
def main_danceability(args):
    """main_danceability

    Compute the danceability feature over input waveform and plot it
    """
    audio = loadaudio(args)
    
    # create the pool and the necessary algorithms
    pool = e.Pool()
    w = estd.Windowing()
    spec = estd.Spectrum()
    centroid = estd.SpectralCentroidTime()

    # compute the centroid for all frames in our audio and add it to the pool
    for frame in estd.FrameGenerator(audio, frameSize = 1024, hopSize = 512):
        c = centroid(spec(w(frame)))
        pool.add('lowlevel.centroid', c)

    # aggregate the results
    aggrpool = estd.PoolAggregator(defaultStats = [ 'mean', 'var' ])(pool)


    # create the pool and the necessary algorithms
    pool = e.Pool()
    w = estd.Windowing()
    # spec = estd.Spectrum()
    # centroid = estd.SpectralCentroidTime()
    danceability = estd.Danceability(maxTau = 10000, minTau = 300, sampleRate = args.samplerate)
    
    # compute the centroid for all frames in our audio and add it to the pool
    for frame in estd.FrameGenerator(audio, frameSize = 10 * args.samplerate, hopSize = 5 * args.samplerate):
        dreal, ddfa = danceability(w(frame))
        print(("d", dreal)) # , "frame", frame
        pool.add('rhythm.danceability', dreal)

    print((type(pool['rhythm.danceability'])))
        
    # aggregate the results
    # aggrpool = estd.PoolAggregator(defaultStats = [ 'mean', 'var' ])(pool)
    
    # write result to file
    # estd.YamlOutput(filename = args.file + '.features.yaml')(aggrpool)

    fig, gs = makefig(rows = 2, cols = 2)
    ax = fig.axes

    ax[0].plot(pool['rhythm.danceability'])

    plt.show()
コード例 #6
0
def aggregatePoolArraysToNumbers(input_pool):
    """
  Retorna uma cópia de input_pool onde usou es_mode.PoolAggregator() para 
  calcular as estatísticas dos arrays extraídos do áudio e insereri-las como números
  """
    output_pool = duplicatePool(input_pool)
    pool_arrays = es.Pool()
    for feat in output_pool.descriptorNames(pk_array):
        pool_arrays.merge(feat, output_pool[feat])

    aggr = es_mode.PoolAggregator()(pool_arrays)

    for feat in aggr.descriptorNames():
        newName = feat.replace("array", "number")
        output_pool.mergeSingle(newName, aggr[feat])

    return output_pool
コード例 #7
0
def reComputeDescriptors(inputAudioFile, outputJsonFile):
    """
    :param inputAudioFile:
    :param outputJsonFile:
    :return:
    """

    M = 2048
    N = 2048
    H = 1024
    fs = 44100

    W = 'blackmanharris62'

    # analysis parameters
    options = {}

    options['sampleRate'] = fs
    options['frameSize'] = M
    options['hopSize'] = H
    options['windowType'] = W
    options['skipSilence'] = True

    audio = ess.MonoLoader(filename=inputAudioFile, sampleRate=fs)()

    pool = es.Pool()

    sfx.compute(audio, pool, options)
    esx.compute(audio, pool, options)

    #output = ess.YamlOutput(filename='joeTestOut/essExtract_Pool.json', format='json')
    #output(pool)

    #calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean', 'var'])
    calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean'])
    aggrPool = calc_Mean_Var(pool)

    #output = ess.YamlOutput(filename='joeTestOut/essExtract_AggrPool.json', format='json')
    #output = ess.YamlOutput(filename=outputJsonFile, format='json')
    #output(aggrPool)

    features = makeFeatures(aggrPool)
    json.dump(features, open(outputJsonFile, 'w'))
コード例 #8
0
ファイル: joe_Opt2_Extract.py プロジェクト: hoinx/sms-tools
def reComputeDescriptors(inputAudioFile, outputJsonFile):

    """
    :param inputAudioFile:
    :param outputJsonFile:
    :return:
    """

    M = 2048
    N = 2048
    H = 1024
    fs = 44100

    W = 'blackmanharris62'


    #spectrum = ess.Spectrum(size=N)
    spectrum = ess.Spectrum()
    #window = ess.Windowing(size=M, type=W)
    window = ess.Windowing(type=W)
    #mfcc = ess.MFCC(numberCoefficients=12, inputSize=N/2+1)
    mfcc = ess.MFCC()

    spectral_peaks = ess.SpectralPeaks(minFrequency=1,
                                       maxFrequency=20000,
                                       maxPeaks=100,
                                       sampleRate=fs,
                                       magnitudeThreshold=0,
                                       orderBy="magnitude")

    dissonance = ess.Dissonance()

    #pitch_detection = ess.PitchYinFFT(frameSize=M, sampleRate=fs)
    pitch_detection = ess.PitchYinFFT()

    harmonic_peaks = ess.HarmonicPeaks()

    inharmonicity = ess.Inharmonicity()

    #spectral_contrast = ess.SpectralContrast(sampleRate=fs)
    spectral_contrast = ess.SpectralContrast()

    centroid = ess.Centroid()

    log_attack_time = ess.LogAttackTime()

    hfc = ess.HFC()

    # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame, see lowlevel.py
    spectral_complexity = ess.SpectralComplexity(magnitudeThreshold=0.005)


    energy = ess.Energy()

    x = ess.MonoLoader(filename=inputAudioFile, sampleRate=fs)()
    frames = ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True)

    E = []
    numFrames = 0
    for frame in frames:
        numFrames += 1
        E_frame = energy(frame)
        E.append(E_frame)

    E_max = np.max(E)

    frames = ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True)

    pools = [(t, es.Pool()) for t in dscr.threshold]
    for frame in frames:

        eNorm = energy(frame) / E_max

        threshPools = []
        for t, pool in pools:
            if eNorm >= t:
                threshPools.append(pool)

        mX = spectrum(window(frame))
        mfcc_bands, mfcc_coeffs = mfcc(mX)

        [pool.add('lowlevel.mfcc', mfcc_coeffs) for pool in threshPools]
        #[pool.add('lowlevel.mfcc_bands', mfcc_bands) for pool in threshPools]

        pfreq, pmag = spectral_peaks(mX)

        inds = pfreq.argsort()
        pfreq_sorted = pfreq[inds]
        pmag_sorted = pmag[inds]

        diss = dissonance(pfreq_sorted, pmag_sorted)
        [pool.add('lowlevel.dissonance', diss) for pool in threshPools]

        pitch, pitch_confidence = pitch_detection(mX)

        phfreq, phmag = harmonic_peaks(pfreq_sorted, pmag_sorted, pitch)
        if len(phfreq) > 1:
            inharm = inharmonicity(phfreq, phmag)
            [pool.add('sfx.inharmonicity', inharm) for pool in threshPools]

        sc_coeffs, sc_valleys = spectral_contrast(mX)
        [pool.add('lowlevel.spectral_contrast', sc_coeffs) for pool in threshPools]

        c = centroid(mX)
        [pool.add('lowlevel.spectral_centroid', c) for pool in threshPools]

        lat = log_attack_time(frame)
        [pool.add('sfx.logattacktime', lat) for pool in threshPools]

        h = hfc(mX)
        [pool.add('lowlevel.hfc', h) for pool in threshPools]

        spec_complx = spectral_complexity(mX)
        [pool.add('lowlevel.spectral_complexity', spec_complx) for pool in threshPools]


    #calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean', 'var'])
    calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean'])
    aggrPools = [calc_Mean_Var(pool) for t, pool in pools]

    features = {}
    [appendFeatures(features, aggrPools[i], ("ethc"+str(dscr.thresholdSelect[i]))) for i in range(len(aggrPools))]
    json.dump(features, open(outputJsonFile, 'w'))
コード例 #9
0
def reComputeDescriptors(inputAudioFile, outputJsonFile):
    """
    :param inputAudioFile:
    :param outputJsonFile:
    :return:
    """

    #help(ess.SpectralContrast)
    """ orig
    M = 1024
    N = 1024
    H = 512
    fs = 44100
    W = 'hann'
    """
    """ freesound
    Real sampleRate = 44100;
    int frameSize =   2048;
    int hopSize =     1024;
    int zeroPadding = 0;

    string silentFrames ="noise";
    string windowType = "blackmanharris62";

    // Silence Rate
    Real thresholds_dB[] = { -20, -30, -60 };
    vector<Real> thresholds(ARRAY_SIZE(thresholds_dB));
    for (uint i=0; i<thresholds.size(); i++) {
        thresholds[i] = db2lin(thresholds_dB[i]/2.0);
    }


    """

    M = 2048
    N = 2048
    H = 1024
    fs = 44100

    W = 'blackmanharris62'
    #silentFrames = "noise"
    #thresholds_dB = np.array([ -20, -30, -60 ])
    #thresholds = np.power (10.0, thresholds_dB / 20)

    #spectrum = ess.Spectrum(size=N)
    spectrum = ess.Spectrum()
    #window = ess.Windowing(size=M, type=W)
    window = ess.Windowing(type=W)
    #mfcc = ess.MFCC(numberCoefficients=12, inputSize=N/2+1)
    mfcc = ess.MFCC()

    spectral_peaks = ess.SpectralPeaks(minFrequency=1,
                                       maxFrequency=20000,
                                       maxPeaks=100,
                                       sampleRate=fs,
                                       magnitudeThreshold=0,
                                       orderBy="magnitude")

    dissonance = ess.Dissonance()

    #pitch_detection = ess.PitchYinFFT(frameSize=M, sampleRate=fs)
    pitch_detection = ess.PitchYinFFT()

    harmonic_peaks = ess.HarmonicPeaks()

    inharmonicity = ess.Inharmonicity()

    #spectral_contrast = ess.SpectralContrast(sampleRate=fs)
    spectral_contrast = ess.SpectralContrast()

    centroid = ess.Centroid()

    log_attack_time = ess.LogAttackTime()

    hfc = ess.HFC()

    energy = ess.Energy()

    x = ess.MonoLoader(filename=inputAudioFile, sampleRate=fs)()
    frames = ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True)
    pool = es.Pool()
    for frame in frames:
        mX = spectrum(window(frame))
        mfcc_bands, mfcc_coeffs = mfcc(mX)

        pool.add('lowlevel.mfcc', mfcc_coeffs)
        pool.add('lowlevel.mfcc_bands', mfcc_bands)

        pfreq, pmag = spectral_peaks(mX)

        inds = pfreq.argsort()
        pfreq_sorted = pfreq[inds]
        pmag_sorted = pmag[inds]

        diss = dissonance(pfreq_sorted, pmag_sorted)
        pool.add('lowlevel.dissonance', diss)

        pitch, pitch_confidence = pitch_detection(mX)

        phfreq, phmag = harmonic_peaks(pfreq_sorted, pmag_sorted, pitch)
        if len(phfreq) > 1:
            inharm = inharmonicity(phfreq, phmag)
            pool.add('sfx.inharmonicity', inharm)

        sc_coeffs, sc_valleys = spectral_contrast(mX)
        pool.add('lowlevel.spectral_contrast', sc_coeffs)

        c = centroid(mX)
        pool.add('lowlevel.spectral_centroid', c)

        lat = log_attack_time(frame)
        pool.add('sfx.logattacktime', lat)

        h = hfc(mX)
        pool.add('lowlevel.hfc', h)

    calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean', 'var'])
    aggrPool = calc_Mean_Var(pool)

    features = makeFeatures(aggrPool)
    json.dump(features, open(outputJsonFile, 'w'))
コード例 #10
0
# So let's redo the previous using a Pool
pool = es.Pool()
for frame in ess.FrameGenerator(audio, frameSize=1024, hopSize=512):
    mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
    pool.add('lowlevel.mfcc', mfcc_coeffs)
    pool.add('lowlevel.mfcc_bands', mfcc_bands)
"""
plotMfcc = pool['lowlevel.mfcc'].T[1:,:]
plt.pcolormesh(plotMfcc)
plt.show()
"""

#output = es.YamlOutput(filename = 'mfcc.sig')
output = ess.YamlOutput(filename='joeTestOut/mfcc.json', format='json')
output(pool)

# Say we're not interested in all the MFCC frames, but just their mean & variance.
# To this end, we have the PoolAggregator algorithm, that can do all sorts of
# aggregation: mean, variance, min, max, etc...
aggrPool = ess.PoolAggregator(defaultStats=['mean', 'var'])(pool)

print 'Original pool descriptor names:'
print pool.descriptorNames()
print
print 'Aggregated pool descriptor names:'
print aggrPool.descriptorNames()

output = ess.YamlOutput(filename='joeTestOut/mfccaggr.json', format='json')
output(aggrPool)
コード例 #11
0
def compute_features(complete_path):
    result = []
    meta_result = []
    file_count = 0
    # for loop over files
    for file in os.listdir(complete_path):
        if file.endswith(".wav"):
            file_count+=1
            # print(file +' : ' + str(file_count))

            # load our audio into an array
            audio = es.MonoLoader(filename=complete_path + file, sampleRate=44100)()

            # create the pool and the necessary algorithms
            pool = essentia.Pool()
            window = es.Windowing()
            energy = es.Energy()
            spectrum = es.Spectrum()
            centroid = es.Centroid(range=22050)
            rolloff = es.RollOff()
            crest = es.Crest()
            speak = es.StrongPeak()
            rmse = es.RMS()
            mfcc = es.MFCC()
            flux = es.Flux()
            barkbands = es.BarkBands( sampleRate = 44100)
            zerocrossingrate = es.ZeroCrossingRate()

            meta = es.MetadataReader(filename=complete_path + file, failOnError=True)()
            pool_meta, duration, bitrate, samplerate, channels = meta[7:]
            
            # centralmoments = es.SpectralCentralMoments()
            # distributionshape = es.DistributionShape()

            # compute the centroid for all frames in our audio and add it to the pool
            for frame in es.FrameGenerator(audio, frameSize = 1024, hopSize = 512):
                frame_windowed = window(frame)
                frame_spectrum = spectrum(frame_windowed)
                
                c = centroid(frame_spectrum)
                pool.add('spectral.centroid', c)

                cr = crest(frame_spectrum)
                pool.add('spectral crest', cr)

                r = rolloff(frame_spectrum)
                pool.add('spectral rolloff', r)

                sp = speak(frame_spectrum)
                pool.add('strong peak', sp)

                rms = rmse(frame_spectrum)
                pool.add('RMS', rms)

                pool.add('spectral_energy', energy(frame_spectrum))
                # (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
                # pool.add('frame_MFCC', frame_mfcc)

                fl = flux(frame_spectrum)
                pool.add('spectral flux', fl)

                # bbands = barkbands(frame_spectrum)
                # pool.add('bark bands', bbands)

                zcr = zerocrossingrate(frame_spectrum)
                pool.add('zero crossing rate', zcr)

                # frame_centralmoments = centralmoments(power_spectrum)
                # (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments)
                # pool.add('spectral_kurtosis', frame_kurtosis)
                # pool.add('spectral_spread', frame_spread)
                # pool.add('spectral_skewness', frame_skewness)

            # aggregate the results (find mean if needed)
            aggrpool = es.PoolAggregator(defaultStats = ['mean'])(pool) #,'stdev' ])(pool)
            
            pool_meta.set("duration", duration)
            pool_meta.set("filename", os.path.relpath(file))

            # write pools to lists
            pool_arr = pool_to_array(aggrpool)
            result.append(pool_arr)

            meta_arr = pool_to_array(pool_meta)
            meta_result.append(meta_arr)
         
    features_df = pd.DataFrame.from_records(result)
    features_df.columns = ['centroid', 'crest','roll off','strong peak','rms','energy','flux','zcr']
    
    meta_df = pd.DataFrame.from_records(meta_result)
    meta_df.columns = ['duration','filename','metadata.tags.comment']
    del meta_df['metadata.tags.comment']

    return features_df,meta_df