Python Progress Examples

Programming Language: Python

Namespace/Package Name: essentia.progress

Class/Type: Progress

Examples at hotexamples.com: 18

Python Progress - 18 examples found. These are the top rated real world Python examples of essentia.progress.Progress extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Progress(9)

update(8)

finish(6)

Example #1

Show file

File: segmentation_simple.py Project: Aldor007/essentia

def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize  = options['frameSize']
    hopSize    = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType)
    spectrum = essentia.Spectrum(size = frameSize)

    # spectral algorithms
    energy = essentia.Energy()
    mfcc = essentia.MFCC(highFrequencyBound = 8000)

    INFO('Computing Low-Level descriptors necessary for segmentation...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5

    progress = Progress(total = total_frames)

    for frame in frames:

        frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)
        pool.add(namespace + '.' + 'scope', frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # need the energy for getting the thumbnail
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'spectral_mfcc', frame_mfcc)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()

Example #2

Show file

def compute(audio):
    audio = essentia.array(audio)
    sampleRate = int(conf.opts['sampleRate'])
    frameSize = int(conf.opts['frameSize'])
    hopSize = int(conf.opts['hopSize'])
    zeroPadding = int(conf.opts['zeroPadding'])
    windowType = conf.opts['windowType']

    frameRate = float(sampleRate) / float(hopSize)

    INFO('Computing Ess Detection...')

    frames = FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = Windowing(size=frameSize,
                       zeroPadding=zeroPadding,
                       type=windowType)
    fft = FFT()
    cartesian2polar = CartesianToPolar()
    onsetdetectionHFC = OnsetDetection(method="hfc", sampleRate=sampleRate)
    onsetdetectionComplex = OnsetDetection(method="complex",
                                           sampleRate=sampleRate)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    hfc = []
    complex = []

    progress = Progress(total=total_frames)
    maxhfc = 0

    for frame in frames:

        windowed_frame = window(frame)
        complex_fft = fft(windowed_frame)
        (spectrum, phase) = cartesian2polar(complex_fft)
        hfc.append(onsetdetectionHFC(spectrum, phase))
        maxhfc = max(hfc[-1], maxhfc)
        complex.append(onsetdetectionComplex(spectrum, phase))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # The onset rate is defined as the number of onsets per seconds
    res = [[x / maxhfc for x in hfc]]
    res += [complex]

    return np.array(res)

Example #3

Show file

File: essOnsetFunc.py Project: EQ4/EvalOnsets

def compute(audio):
    audio = essentia.array(audio)
    sampleRate = int(conf.opts["sampleRate"])
    frameSize = int(conf.opts["frameSize"])
    hopSize = int(conf.opts["hopSize"])
    zeroPadding = int(conf.opts["zeroPadding"])
    windowType = conf.opts["windowType"]

    frameRate = float(sampleRate) / float(hopSize)

    INFO("Computing Ess Detection...")

    frames = FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = Windowing(size=frameSize, zeroPadding=zeroPadding, type=windowType)
    fft = FFT()
    cartesian2polar = CartesianToPolar()
    onsetdetectionHFC = OnsetDetection(method="hfc", sampleRate=sampleRate)
    onsetdetectionComplex = OnsetDetection(method="complex", sampleRate=sampleRate)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    hfc = []
    complex = []

    progress = Progress(total=total_frames)
    maxhfc = 0

    for frame in frames:

        windowed_frame = window(frame)
        complex_fft = fft(windowed_frame)
        (spectrum, phase) = cartesian2polar(complex_fft)
        hfc.append(onsetdetectionHFC(spectrum, phase))
        maxhfc = max(hfc[-1], maxhfc)
        complex.append(onsetdetectionComplex(spectrum, phase))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # The onset rate is defined as the number of onsets per seconds
    res = [[x / maxhfc for x in hfc]]
    res += [complex]

    return np.array(res)

Example #4

Show file

File: segmentation_simple.py Project: lovesunstar/vox

def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = essentia.Spectrum(size=frameSize)

    # spectral algorithms
    energy = essentia.Energy()
    mfcc = essentia.MFCC(highFrequencyBound=8000)

    INFO('Computing Low-Level descriptors necessary for segmentation...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    progress = Progress(total=total_frames)

    for frame in frames:

        frameScope = [
            start_of_frame / sampleRate,
            (start_of_frame + frameSize) / sampleRate
        ]
        #pool.setCurrentScope(frameScope)
        pool.add(namespace + '.' + 'scope', frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # need the energy for getting the thumbnail
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'spectral_mfcc', frame_mfcc)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()

Example #5

Show file

def compute(audio):
    audio = essentia.array(audio)
    sampleRate = int(conf.opts['sampleRate'])
    frameSize = int(conf.opts['frameSize'])
    hopSize = int(conf.opts['hopSize'])
    zeroPadding = int(conf.opts['zeroPadding'])
    windowType = conf.opts['windowType']

    frameRate = float(sampleRate) / float(hopSize)

    INFO('Computing Onset Detection...')

    frames = FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = Windowing(size=frameSize,
                       zeroPadding=zeroPadding,
                       type=windowType)
    nsdff = Nsdf()
    fftf = Spectrum()
    crestf = Crest()
    instPowf = InstantPower()

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    progress = Progress(total=total_frames)
    cr = []
    env = []
    for frame in frames:

        windowed_frame = window(frame)
        nsdf = nsdff(frame)
        fftn = fftf(nsdf)
        cr += [crestf(fftn)]
        pow = instPowf(frame)

        if len(cr) > 2 and pow < opts["minthresh"]:

            cr[-1] = cr[-2]

        n_frames += 1
        start_of_frame += hopSize

    cr = np.array(cr)
    #     w = signal.gaussian(3,1)
    #     area = np.sum(w)
    #     cr =np.convolve(cr,w , 'same')
    #     cr = cr/(100.*area)
    return cr

Example #6

Show file

File: lowlevel.py Project: hoinx/sms-tools

def compute(audio, pool, options):
    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # temporal descriptors
    lpc = ess.LPC(order=10, type='warped', sampleRate=sampleRate)
    zerocrossingrate = ess.ZeroCrossingRate()

    # frame algorithms
    frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = ess.Spectrum(size=frameSize)

    # spectral algorithms
    barkbands = ess.BarkBands(sampleRate=sampleRate)
    centralmoments = ess.CentralMoments()
    crest = ess.Crest()
    centroid = ess.Centroid()
    decrease = ess.Decrease()
    spectral_contrast = ess.SpectralContrast(frameSize=frameSize,
                                             sampleRate=sampleRate,
                                             numberBands=6,
                                             lowFrequencyBound=20,
                                             highFrequencyBound=11000,
                                             neighbourRatio=0.4,
                                             staticDistribution=0.15)
    distributionshape = ess.DistributionShape()
    energy = ess.Energy()
    # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers
    energyband_bass = ess.EnergyBand(startCutoffFrequency=20.0, stopCutoffFrequency=150.0, sampleRate=sampleRate)
    energyband_middle_low = ess.EnergyBand(startCutoffFrequency=150.0, stopCutoffFrequency=800.0, sampleRate=sampleRate)
    energyband_middle_high = ess.EnergyBand(startCutoffFrequency=800.0, stopCutoffFrequency=4000.0,
                                            sampleRate=sampleRate)
    energyband_high = ess.EnergyBand(startCutoffFrequency=4000.0, stopCutoffFrequency=20000.0, sampleRate=sampleRate)
    flatnessdb = ess.FlatnessDB()
    flux = ess.Flux()
    harmonic_peaks = ess.HarmonicPeaks()
    hfc = ess.HFC()
    mfcc = ess.MFCC()
    rolloff = ess.RollOff()
    rms = ess.RMS()
    strongpeak = ess.StrongPeak()

    # pitch algorithms
    pitch_detection = ess.PitchYinFFT(frameSize=frameSize, sampleRate=sampleRate)
    pitch_salience = ess.PitchSalience()

    # dissonance
    spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency')
    dissonance = ess.Dissonance()

    # spectral complexity
    # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame
    spectral_complexity = ess.SpectralComplexity(magnitudeThreshold=0.005)

    INFO('Computing Low-Level descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    pitches, pitch_confidences = [], []

    progress = Progress(total=total_frames)

    #scPool = es.Pool()  # pool for spectral contrast

    for frame in frames:

        frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate]
        # pool.setCurrentScope(frameScope)

        # silence rate
        # pool.add(namespace + '.' + 'silence_rate_60dB', es.isSilent(frame))
        pool.add(namespace + '.' + 'silence_rate_60dB', is_silent_threshold(frame, -60))
        pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30))
        pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20))

        if options['skipSilence'] and es.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        # temporal descriptors
        pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame))
        (frame_lpc, frame_lpc_reflection) = lpc(frame)
        pool.add(namespace + '.' + 'temporal_lpc', frame_lpc)

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectrum-based descriptors
        power_spectrum = frame_spectrum ** 2
        pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum))
        pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum))
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum))
        pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum))

        # central moments descriptors
        frame_centralmoments = centralmoments(power_spectrum)
        (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments)
        pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis)
        pool.add(namespace + '.' + 'spectral_spread', frame_spread)
        pool.add(namespace + '.' + 'spectral_skewness', frame_skewness)

        # dissonance
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add(namespace + '.' + 'dissonance', frame_dissonance)

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'mfcc', frame_mfcc)

        # spectral contrast
        (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum)
        #scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs)
        #scPool.add(namespace + '.' + 'scvalleys', sc_valleys)
        pool.add(namespace + '.' + 'spectral_contrast', sc_coeffs)


        # barkbands-based descriptors
        frame_barkbands = barkbands(frame_spectrum)
        pool.add(namespace + '.' + 'barkbands', frame_barkbands)
        pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands))
        pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands))
        barkbands_centralmoments = ess.CentralMoments(range=len(frame_barkbands) - 1)
        (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape(
            barkbands_centralmoments(frame_barkbands))
        pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread)
        pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness)
        pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)
        if frame_pitch > 0 and frame_pitch <= 20000.:
            pool.add(namespace + '.' + 'pitch', frame_pitch)
        pitches.append(frame_pitch)
        pitch_confidences.append(frame_pitch_confidence)
        pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence)

        frame_pitch_salience = pitch_salience(frame_spectrum[:-1])
        pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience)

        # spectral complexity
        pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # if no 'temporal_zerocrossingrate' it means that this is a silent file
    if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace):
        raise ess.EssentiaError('This is a silent file!')

    #spectralContrastPCA(scPool, pool)

    # build pitch value histogram
    from math import log
    from numpy import bincount
    # convert from Hz to midi notes
    midipitches = []
    unknown = 0
    for freq in pitches:
        if freq > 0. and freq <= 12600:
            midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) - 3.)
        else:
            unknown += 1

    if len(midipitches) > 0:
        # compute histogram
        midipitchhist = bincount(midipitches)
        # set 0 midi pitch to be the number of pruned value
        midipitchhist[0] = unknown
        # normalise
        midipitchhist = [val / float(sum(midipitchhist)) for val in midipitchhist]
        # zero pad
        for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0)
    else:
        midipitchhist = [0.] * 128
        midipitchhist[0] = 1.

    # pitchhist = ess.array(zip(range(len(midipitchhist)), midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist)  # , pool.GlobalScope)

    # the code below is the same as the one above:
    # for note in midipitchhist:
    #    pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note)
    #    print "midi note:", note

    pitch_centralmoments = ess.CentralMoments(range=len(midipitchhist) - 1)
    (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape(
        pitch_centralmoments(midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread)  # , pool.GlobalScope)

    progress.finish()

Example #7

Show file

File: sfx.py Project: hoinx/sms-tools

def compute(audio, pool, options):
    INFO("Computing SFX descriptors...")

    # analysis parameters
    sampleRate = options["sampleRate"]
    frameSize = options["frameSize"]
    hopSize = options["hopSize"]
    windowType = options["windowType"]

    # frame algorithms
    frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = ess.Spectrum(size=frameSize)

    # pitch algorithm
    pitch_detection = ess.PitchYinFFT(frameSize=2048, sampleRate=sampleRate)

    # sfx descriptors
    spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy="frequency")
    harmonic_peaks = ess.HarmonicPeaks()
    inharmonicity = ess.Inharmonicity()
    odd2evenharmonicenergyratio = ess.OddToEvenHarmonicEnergyRatio()
    tristimulus = ess.Tristimulus()

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5
    progress = Progress(total=total_frames)

    for frame in frames:

        frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate]
        # pool.setCurrentScope(frameScope)

        if options["skipSilence"] and es.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)

        # spectral peaks based descriptors
        frame_frequencies, frame_magnitudes = spectral_peaks(frame_spectrum)

        # ERROR CORRECTION - hoinx 2015-12
        errIdx = np.where(frame_frequencies < 1)
        frame_frequencies = np.delete(frame_frequencies, errIdx)
        frame_magnitudes = np.delete(frame_magnitudes, errIdx)

        (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks(
            frame_frequencies, frame_magnitudes, frame_pitch
        )
        if len(frame_harmonic_frequencies) > 1:
            frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + "." + "inharmonicity", frame_inharmonicity)
            frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + "." + "tristimulus", frame_tristimulus)
            frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio(
                frame_harmonic_frequencies, frame_harmonic_magnitudes
            )
            pool.add(namespace + "." + "odd2evenharmonicenergyratio", frame_odd2evenharmonicenergyratio)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    envelope = ess.Envelope()
    file_envelope = envelope(audio)

    # temporal statistics
    decrease = ess.Decrease()
    pool.add(namespace + "." + "temporal_decrease", decrease(file_envelope))  # , pool.GlobalScope)

    centralmoments = ess.CentralMoments()
    file_centralmoments = centralmoments(file_envelope)

    distributionshape = ess.DistributionShape()
    (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments)
    pool.add(namespace + "." + "temporal_spread", file_spread)  # , pool.GlobalScope)
    pool.add(namespace + "." + "temporal_skewness", file_skewness)  # , pool.GlobalScope)
    pool.add(namespace + "." + "temporal_kurtosis", file_kurtosis)  # , pool.GlobalScope)

    centroid = ess.Centroid()
    pool.add(namespace + "." + "temporal_centroid", centroid(file_envelope))  # , pool.GlobalScope)

    # effective duration
    effectiveduration = ess.EffectiveDuration()
    pool.add(namespace + "." + "effective_duration", effectiveduration(file_envelope))  # , pool.GlobalScope)

    # log attack time
    logattacktime = ess.LogAttackTime()
    pool.add(namespace + "." + "logattacktime", logattacktime(audio))  # , pool.GlobalScope)

    # strong decay
    strongdecay = ess.StrongDecay()
    pool.add(namespace + "." + "strongdecay", strongdecay(file_envelope))  # , pool.GlobalScope)

    # dynamic profile
    flatness = ess.FlatnessSFX()
    pool.add(namespace + "." + "flatness", flatness(file_envelope))  # , pool.GlobalScope)

    """
    # onsets number
    onsets_number = len(pool['rhythm.onset_times'][0])
    pool.add(namespace + '.' + 'onsets_number', onsets_number)  # , pool.GlobalScope)
    """

    # morphological descriptors
    max_to_total = ess.MaxToTotal()
    pool.add(namespace + "." + "max_to_total", max_to_total(file_envelope))  # , pool.GlobalScope)

    tc_to_total = ess.TCToTotal()
    pool.add(namespace + "." + "tc_to_total", tc_to_total(file_envelope))  # , pool.GlobalScope)

    derivativeSFX = ess.DerivativeSFX()
    (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope)
    pool.add(namespace + "." + "der_av_after_max", der_av_after_max)  # , pool.GlobalScope)
    pool.add(namespace + "." + "max_der_before_max", max_der_before_max)  # , pool.GlobalScope)

    # pitch profile
    """
    pitch = pool['lowlevel.pitch']

    if len(pitch) > 1:
        pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch))  # , pool.GlobalScope)

        min_to_total = ess.MinToTotal()
        pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch))  # , pool.GlobalScope)

        pitch_centroid = ess.Centroid(range=len(pitch) - 1)
        pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch))  # , pool.GlobalScope)

        pitch_after_max_to_before_max_energy_ratio = ess.AfterMaxToBeforeMaxEnergyRatio()
        pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio',
                 pitch_after_max_to_before_max_energy_ratio(pitch))  # , pool.GlobalScope)

    else:
        pool.add(namespace + '.' + 'pitch_max_to_total', 0.0)  # , pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_min_to_total', 0.0)  # , pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_centroid', 0.0)  # , pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0)  # , pool.GlobalScope)
    """

    progress.finish()

Example #8

Show file

File: average_loudness.py Project: AnasGhrab/essentia

def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize  = options['frameSize']
    hopSize    = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize, startFromZero = True)
    loudness = essentia.Loudness()

    INFO('Computing Dynamic descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0

    level_array = []

    progress = Progress(total = total_frames)

    for frame in frames:

        frame_level = loudness(frame)
        level_array.append(frame_level)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1

    # Maximum dynamic
    EPSILON = 10e-6
    max_value = max(level_array)
    if max_value <= EPSILON:
       max_value = EPSILON

    # Normalization to the maximum
    THRESHOLD = 0.0001 # this corresponds to -80dB
    for i in range(len(level_array)):
      level_array[i] /= max_value
      if level_array[i] <= THRESHOLD:
         level_array[i] = THRESHOLD

    # Dynamic Average
    mean = essentia.Mean()
    average_loudness = 10.0*log10(mean(level_array))

    # re-scaling and range-control
    # This yields in numbers between
    #
    #  0 for signals with  large dynamic variace and
    #    thus low dynamic average
    #  1 for signal with little dynamic range and thus
    # a dynamic average close to the maximum

    # TO DO: [0, 0] should be pool.GlobalScope
    average_loudness_within_zero_to_one = squeezeInto([-5, 0], [-2, 1], average_loudness)
    pool.add(namespace + "." + "average_loudness", average_loudness_within_zero_to_one)#, pool.GlobalScope)

    # Dynamic Fluctuation
    '''
    variance = essentia.Variance()
    level_variance = variance(level_array)
    if level_variance <= THRESHOLD:
       level_variance = THRESHOLD
    level_fluctuation = 10*log10(level_variance)
    # TO DO: [0, 0] should be pool.GlobalScope
    pool.add("level_fluctuation", level_fluctuation, pool.GlobalScope)
    '''

    INFO('\r100% done...')

Example #9

Show file

def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize,
                                     startFromZero=True)
    loudness = essentia.Loudness()

    INFO('Computing Dynamic descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0

    level_array = []

    progress = Progress(total=total_frames)

    for frame in frames:

        frame_level = loudness(frame)
        level_array.append(frame_level)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1

    # Maximum dynamic
    EPSILON = 10e-6
    max_value = max(level_array)
    if max_value <= EPSILON:
        max_value = EPSILON

    # Normalization to the maximum
    THRESHOLD = 0.0001  # this corresponds to -80dB
    for i in range(len(level_array)):
        level_array[i] /= max_value
        if level_array[i] <= THRESHOLD:
            level_array[i] = THRESHOLD

    # Dynamic Average
    mean = essentia.Mean()
    average_loudness = 10.0 * log10(mean(level_array))

    # re-scaling and range-control
    # This yields in numbers between
    #
    #  0 for signals with  large dynamic variace and
    #    thus low dynamic average
    #  1 for signal with little dynamic range and thus
    # a dynamic average close to the maximum

    # TO DO: [0, 0] should be pool.GlobalScope
    average_loudness_within_zero_to_one = squeezeInto([-5, 0], [-2, 1],
                                                      average_loudness)
    pool.add(namespace + "." + "average_loudness",
             average_loudness_within_zero_to_one)  #, pool.GlobalScope)

    # Dynamic Fluctuation
    '''
    variance = essentia.Variance()
    level_variance = variance(level_array)
    if level_variance <= THRESHOLD:
       level_variance = THRESHOLD
    level_fluctuation = 10*log10(level_variance)
    # TO DO: [0, 0] should be pool.GlobalScope
    pool.add("level_fluctuation", level_fluctuation, pool.GlobalScope)
    '''

    INFO('\r100% done...')

Example #10

Show file

def compute(audio, pool, options):

    INFO('Computing Tonal descriptors...')

    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType = options['windowType']

    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize,
                                zeroPadding=zeroPadding,
                                type=windowType)
    spectrum = essentia.Spectrum(size=(frameSize + zeroPadding) / 2)
    spectral_peaks = essentia.SpectralPeaks(maxPeaks=10000,
                                            magnitudeThreshold=0.00001,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            orderBy="frequency")
    tuning = essentia.TuningFrequency()

    # computing the tuning frequency
    tuning_frequency = 440.0

    for frame in frames:

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        #if len(frame_frequencies) > 0:
        (tuning_frequency, tuning_cents) = tuning(frame_frequencies,
                                                  frame_magnitudes)

    pool.add(namespace + '.' + 'tuning_frequency',
             tuning_frequency)  #, pool.GlobalScope)

    # computing the HPCPs
    spectral_whitening = essentia.SpectralWhitening()

    hpcp_key_size = 36
    hpcp_chord_size = 36
    hpcp_tuning_size = 120

    hpcp_key = essentia.HPCP(size=hpcp_key_size,
                             referenceFrequency=tuning_frequency,
                             bandPreset=False,
                             minFrequency=40.0,
                             maxFrequency=5000.0,
                             weightType='squaredCosine',
                             nonLinear=False,
                             windowSize=4.0 / 3.0,
                             sampleRate=sampleRate)

    hpcp_chord = essentia.HPCP(size=hpcp_chord_size,
                               referenceFrequency=tuning_frequency,
                               harmonics=8,
                               bandPreset=True,
                               minFrequency=40.0,
                               maxFrequency=5000.0,
                               splitFrequency=500.0,
                               weightType='cosine',
                               nonLinear=True,
                               windowSize=0.5,
                               sampleRate=sampleRate)

    hpcp_tuning = essentia.HPCP(size=hpcp_tuning_size,
                                referenceFrequency=tuning_frequency,
                                harmonics=8,
                                bandPreset=True,
                                minFrequency=40.0,
                                maxFrequency=5000.0,
                                splitFrequency=500.0,
                                weightType='cosine',
                                nonLinear=True,
                                windowSize=0.5,
                                sampleRate=sampleRate)

    # intializing the HPCP arrays
    hpcps_key = []
    hpcps_chord = []
    hpcps_tuning = []

    # computing HPCP loop
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    progress = Progress(total=total_frames)

    for frame in frames:

        #frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectral peaks
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        if (len(frame_frequencies) > 0):
            # spectral_whitening
            frame_magnitudes_white = spectral_whitening(
                frame_spectrum, frame_frequencies, frame_magnitudes)
            frame_hpcp_key = hpcp_key(frame_frequencies,
                                      frame_magnitudes_white)
            frame_hpcp_chord = hpcp_chord(frame_frequencies,
                                          frame_magnitudes_white)
            frame_hpcp_tuning = hpcp_tuning(frame_frequencies,
                                            frame_magnitudes_white)
        else:
            frame_hpcp_key = essentia.array([0] * hpcp_key_size)
            frame_hpcp_chord = essentia.array([0] * hpcp_chord_size)
            frame_hpcp_tuning = essentia.array([0] * hpcp_tuning_size)

        # key HPCP
        hpcps_key.append(frame_hpcp_key)

        # add HPCP to the pool
        pool.add(namespace + '.' + 'hpcp', frame_hpcp_key)

        # chords HPCP
        hpcps_chord.append(frame_hpcp_chord)

        # tuning system HPCP
        hpcps_tuning.append(frame_hpcp_tuning)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()

    # check if silent file
    if len(hpcps_key) == 0:
        raise EssentiaError('This is a silent file!')

    # key detection
    key_detector = essentia.Key(profileType='temperley')
    average_hpcps_key = numpy.average(essentia.array(hpcps_key), axis=0)
    average_hpcps_key = normalize(average_hpcps_key)

    # thpcps
    max_arg = numpy.argmax(average_hpcps_key)
    thpcp = []
    for i in range(max_arg, len(average_hpcps_key)):
        thpcp.append(float(average_hpcps_key[i]))
    for i in range(max_arg):
        thpcp.append(float(average_hpcps_key[i]))
    pool.add(namespace + '.' + 'thpcp', thpcp)  #, pool.GlobalScope  )

    (key, scale, key_strength,
     first_to_second_relative_strength) = key_detector(
         essentia.array(average_hpcps_key))
    pool.add(namespace + '.' + 'key_key', key)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'key_scale', scale)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'key_strength',
             key_strength)  #, pool.GlobalScope)

    # chord detection
    chord_detector = essentia.Key(profileType='tonictriad', usePolyphony=False)
    hpcp_frameSize = 2.0  # 2 seconds
    hpcp_number = int(hpcp_frameSize * (sampleRate / hopSize - 1))

    for hpcp_index in range(len(hpcps_chord)):

        hpcp_index_begin = max(0, hpcp_index - hpcp_number)
        hpcp_index_end = min(hpcp_index + hpcp_number, len(hpcps_chord))
        average_hpcps_chord = numpy.average(essentia.array(
            hpcps_chord[hpcp_index_begin:hpcp_index_end]),
                                            axis=0)
        average_hpcps_chord = normalize(average_hpcps_chord)
        (key, scale, strength,
         first_to_second_relative_strength) = chord_detector(
             essentia.array(average_hpcps_chord))

        if scale == 'minor':
            chord = key + 'm'
        else:
            chord = key

        frame_second_scope = [
            hpcp_index_begin * hopSize / sampleRate,
            hpcp_index_end * hopSize / sampleRate
        ]
        pool.add(namespace + '.' + 'chords_progression',
                 chord)  #, frame_second_scope)
        pool.add(namespace + '.' + 'chords_strength',
                 strength)  #, frame_second_scope)

    # tuning system features
    keydetector = essentia.Key(profileType='diatonic')
    average_hpcps_tuning = numpy.average(essentia.array(hpcps_tuning), axis=0)
    average_hpcps_tuning = normalize(average_hpcps_tuning)
    (key, scale, diatonic_strength,
     first_to_second_relative_strength) = keydetector(
         essentia.array(average_hpcps_tuning))

    pool.add(namespace + '.' + 'tuning_diatonic_strength',
             diatonic_strength)  #, pool.GlobalScope)

    (equal_tempered_deviation, nontempered_energy_ratio,
     nontempered_peaks_energy_ratio
     ) = essentia.HighResolutionFeatures()(average_hpcps_tuning)

    pool.add(namespace + '.' + 'tuning_equal_tempered_deviation',
             equal_tempered_deviation)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'tuning_nontempered_energy_ratio',
             nontempered_energy_ratio)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'tuning_nontempered_peaks_energy_ratio',
             nontempered_peaks_energy_ratio)  #, pool.GlobalScope)

Example #11

Show file

File: onsetdetection.py Project: HybridVigor/essentia-musicbricks

def compute(audio, pool, options):

    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType = options['windowType']

    frameRate = float(sampleRate) / float(frameSize - hopSize)

    INFO('Computing Onset Detection...')

    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize,
                                zeroPadding=zeroPadding,
                                type=windowType)
    fft = essentia.FFT()
    cartesian2polar = essentia.CartesianToPolar()
    onsetdetectionHFC = essentia.OnsetDetection(method="hfc",
                                                sampleRate=sampleRate)
    onsetdetectionComplex = essentia.OnsetDetection(method="complex",
                                                    sampleRate=sampleRate)
    onsets = essentia.Onsets(frameRate=frameRate)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    hfc = []
    complex = []

    progress = Progress(total=total_frames)

    for frame in frames:

        if essentia.instantPower(frame) < 1.e-4:
            total_frames -= 1
            start_of_frame += hopSize
            hfc.append(0.)
            complex.append(0.)
            continue

        windowed_frame = window(frame)
        complex_fft = fft(windowed_frame)
        (spectrum, phase) = cartesian2polar(complex_fft)
        hfc.append(onsetdetectionHFC(spectrum, phase))
        complex.append(onsetdetectionComplex(spectrum, phase))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # The onset rate is defined as the number of onsets per seconds
    detections = numpy.concatenate(
        [essentia.array([hfc]),
         essentia.array([complex])])

    # prune all 'doubled' detections
    time_onsets = list(onsets(detections, essentia.array([1, 1])))
    t = 1
    while t < len(time_onsets):
        if time_onsets[t] - time_onsets[t - 1] < 0.080: time_onsets.pop(t)
        else: t += 1

    onsetrate = len(time_onsets) / (len(audio) / sampleRate)

    pool.add(namespace + '.' + "onset_times",
             essentia.array(time_onsets))  #, pool.GlobalScope)
    pool.add(namespace + '.' + "onset_rate", onsetrate)  #, pool.GlobalScope)

    progress.finish()

Example #12

Show file

File: beats.py Project: arseneyr/essentia

def compute(audio, pool, options):

    INFO("Computing Beats descriptors...")

    sampleRate = options['sampleRate']
    windowType = options['windowType']

    beat_window_duration = 0.1 # 100ms
    beat_duration = 0.05 # 50ms estimation after checking some drums kicks duration on freesound


    beats = pool.value('rhythm.beats_position')[0]

    # special case
    if len(beats) == 0:

        # we add them 2 times to get 'mean/var' stats and not 'value'
        # and not on full scope so it's not global
        # FIXME: should use "undefined"
        pool.add("beats_loudness", 0.0,      [0., 0.])
        pool.add("beats_loudness", 0.0,      [0., 0.])
        pool.add("beats_loudness_bass", 0.0, [0., 0.])
        pool.add("beats_loudness_bass", 0.0, [0., 0.])

        INFO('100% done...')

        return

    duration = pool.value('metadata.duration_processed')[0]

    # FIXME: converted to samples in order to have more accurate control of the size of
    # the window. This is due to FFT not being able to be computed on arrays of
    # odd sizes. Please FIXME later, when FFT accepts all kinds of sizes.
    beat_window_duration = int(beat_window_duration*float(sampleRate) + 0.5)
    beat_duration = int(beat_duration*float(sampleRate) + 0.5)
    duration *= float(sampleRate)
    if beat_duration%2 == 1:
        beat_duration += 1;
        beat_window_duration = beat_duration*2;

    energy = essentia.Energy()
    energybandratio = essentia.EnergyBandRatio(startFrequency = 20.0, stopFrequency = 150.0, sampleRate = sampleRate)

    total_beats = len(beats)
    n_beats = 1

    progress = Progress(total = total_beats)

    between_beats_start = [0.0]
    between_beats_end = []

    beats_spectral_energy = 0.0

    # love on the beats
    for beat in beats:
        # convert beat to samples in order to ensure an even size
        beat = beat*float(sampleRate)

        beat_window_start = (beat - beat_window_duration / 2.0) # in samples
        beat_window_end = (beat + beat_window_duration / 2.0) # in samples

        if beat_window_start > 0.0 and beat_window_end < duration: # in samples
            #print "duration: ", duration, "start:", beat_window_start, "end:", beat_window_end

            beat_window = audio[beat_window_start : beat_window_end]

            beat_start = beat_window_start + max_energy_index(beat_window)
            beat_end = beat_start + beat_duration
            beat_audio = audio[beat_start : beat_end]

            beat_scope = [beat_start / float(sampleRate), beat_end/float(sampleRate)] # in seconds
            #print "beat audio size: ", len(beat_audio)

            window = essentia.Windowing(size = len(beat_audio), zeroPadding = 0, type = windowType)
            spectrum = essentia.Spectrum(size = len(beat_audio))
            beat_spectrum = spectrum(window(beat_audio))

            beat_spectral_energy = energy(beat_spectrum)
            pool.add(namespace + '.' + 'beats_loudness', beat_spectral_energy)#, beat_scope)
            beats_spectral_energy += beat_spectral_energy

            beat_spectral_energybandratio = energybandratio(beat_spectrum)
            pool.add(namespace + '.' + 'beats_loudness_bass', beat_spectral_energybandratio)#, beat_scope)

            # filling between-beats arrays
            between_beats_end.append(beat_start/float(sampleRate))
            between_beats_start.append(beat_end/float(sampleRate))

        # display of progress report
        progress.update(n_beats/float(sampleRate))

        n_beats += 1

    between_beats_end.append(duration)

    between_beats_spectral_energy = 0.0

    # love in between beats
    '''
    for between_beat_start, between_beat_end in zip(between_beats_start, between_beats_end):

        between_beat_audio = audio[between_beat_start * sampleRate : between_beat_end * sampleRate]

        between_beat_scope = [between_beat_start, between_beat_end]

        window = essentia.Windowing(windowSize = len(between_beat_audio), zeroPadding = 0, type = "blackmanharris62")
        spectrum = essentia.Spectrum(size = len(between_beat_audio))
        between_beat_spectrum = spectrum(window(between_beat_audio))

        between_beat_spectral_energy = energy(between_beat_spectrum)
        between_beats_spectral_energy += between_beat_spectral_energy
    '''

    progress.finish()

Example #13

Show file

File: onsetdetection.py Project: arseneyr/essentia

def compute(audio, pool, options):

    sampleRate  = options['sampleRate']
    frameSize   = options['frameSize']
    hopSize     = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType  = options['windowType']

    frameRate = float(sampleRate)/float(frameSize - hopSize)

    INFO('Computing Onset Detection...')

    frames  = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window  = essentia.Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType)
    fft = essentia.FFT()
    cartesian2polar = essentia.CartesianToPolar()
    onsetdetectionHFC = essentia.OnsetDetection(method = "hfc", sampleRate = sampleRate)
    onsetdetectionComplex = essentia.OnsetDetection(method = "complex", sampleRate = sampleRate)
    onsets = essentia.Onsets(frameRate = frameRate)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5

    hfc = []
    complex = []

    progress = Progress(total = total_frames)

    for frame in frames:

        if essentia.instantPower(frame) < 1.e-4 :
           total_frames -= 1
           start_of_frame += hopSize
           hfc.append(0.)
           complex.append(0.)
           continue

        windowed_frame = window(frame)
        complex_fft = fft(windowed_frame)
        (spectrum,phase) = cartesian2polar(complex_fft)
        hfc.append(onsetdetectionHFC(spectrum,phase))
        complex.append(onsetdetectionComplex(spectrum,phase))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # The onset rate is defined as the number of onsets per seconds
    detections = numpy.concatenate([essentia.array([hfc]), essentia.array([complex]) ])

    # prune all 'doubled' detections
    time_onsets = list(onsets(detections, essentia.array([1, 1])))
    t = 1
    while t < len(time_onsets):
      if time_onsets[t] - time_onsets[t-1] < 0.080: time_onsets.pop(t)
      else: t += 1

    onsetrate = len(time_onsets) / ( len(audio) / sampleRate )

    pool.add(namespace + '.' + "onset_times", essentia.array(time_onsets))#, pool.GlobalScope)
    pool.add(namespace + '.' + "onset_rate", onsetrate)#, pool.GlobalScope)

    progress.finish()

Example #14

Show file

File: tonal.py Project: arseneyr/essentia

def compute(audio, pool, options):

    INFO('Computing Tonal descriptors...')

    sampleRate  = options['sampleRate']
    frameSize   = options['frameSize']
    hopSize     = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType  = options['windowType']

    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType)
    spectrum = essentia.Spectrum(size = (frameSize + zeroPadding) / 2)
    spectral_peaks = essentia.SpectralPeaks(maxPeaks = 10000, magnitudeThreshold = 0.00001, minFrequency = 40, maxFrequency = 5000, orderBy = "frequency")
    tuning = essentia.TuningFrequency()

    # computing the tuning frequency
    tuning_frequency = 440.0

    for frame in frames:

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        #if len(frame_frequencies) > 0:
        (tuning_frequency, tuning_cents) = tuning(frame_frequencies, frame_magnitudes)

    pool.add(namespace + '.' + 'tuning_frequency', tuning_frequency)#, pool.GlobalScope)

    # computing the HPCPs
    spectral_whitening = essentia.SpectralWhitening()

    hpcp_key_size = 36
    hpcp_chord_size = 36
    hpcp_tuning_size = 120

    hpcp_key = essentia.HPCP(size = hpcp_key_size,
                             referenceFrequency = tuning_frequency,
                             bandPreset = False,
                             minFrequency = 40.0,
                             maxFrequency = 5000.0,
                             weightType = 'squaredCosine',
                             nonLinear = False,
                             windowSize = 4.0/3.0,
                             sampleRate = sampleRate)

    hpcp_chord = essentia.HPCP(size = hpcp_chord_size,
                               referenceFrequency = tuning_frequency,
                               harmonics = 8,
                               bandPreset = True,
                               minFrequency = 40.0,
                               maxFrequency = 5000.0,
                               splitFrequency = 500.0,
                               weightType = 'cosine',
                               nonLinear = True,
                               windowSize = 0.5,
                               sampleRate = sampleRate)

    hpcp_tuning = essentia.HPCP(size = hpcp_tuning_size,
                                referenceFrequency = tuning_frequency,
                                harmonics = 8,
                                bandPreset = True,
                                minFrequency = 40.0,
                                maxFrequency = 5000.0,
                                splitFrequency = 500.0,
                                weightType = 'cosine',
                                nonLinear = True,
                                windowSize = 0.5,
                                sampleRate = sampleRate)

    # intializing the HPCP arrays
    hpcps_key = []
    hpcps_chord = []
    hpcps_tuning = []

    # computing HPCP loop
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    progress = Progress(total = total_frames)


    for frame in frames:

        #frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectral peaks
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        if (len(frame_frequencies) > 0):
           # spectral_whitening
           frame_magnitudes_white = spectral_whitening(frame_spectrum, frame_frequencies, frame_magnitudes)
           frame_hpcp_key = hpcp_key(frame_frequencies, frame_magnitudes_white)
           frame_hpcp_chord = hpcp_chord(frame_frequencies, frame_magnitudes_white)
           frame_hpcp_tuning = hpcp_tuning(frame_frequencies, frame_magnitudes_white)
        else:
           frame_hpcp_key = essentia.array([0] * hpcp_key_size)
           frame_hpcp_chord = essentia.array([0] * hpcp_chord_size)
           frame_hpcp_tuning = essentia.array([0] * hpcp_tuning_size)

        # key HPCP
        hpcps_key.append(frame_hpcp_key)

        # add HPCP to the pool
        pool.add(namespace + '.' +'hpcp', frame_hpcp_key)

        # chords HPCP
        hpcps_chord.append(frame_hpcp_chord)

        # tuning system HPCP
        hpcps_tuning.append(frame_hpcp_tuning)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()

    # check if silent file
    if len(hpcps_key) == 0:
       raise EssentiaError('This is a silent file!')

    # key detection
    key_detector = essentia.Key(profileType = 'temperley')
    average_hpcps_key = numpy.average(essentia.array(hpcps_key), axis=0)
    average_hpcps_key = normalize(average_hpcps_key)

    # thpcps
    max_arg = numpy.argmax( average_hpcps_key )
    thpcp=[]
    for i in range( max_arg, len(average_hpcps_key) ):
        thpcp.append( float(average_hpcps_key[i]) )
    for i in range( max_arg ):
        thpcp.append( float(average_hpcps_key[i]) )
    pool.add(namespace + '.' +'thpcp', thpcp)#, pool.GlobalScope  )

    (key, scale, key_strength, first_to_second_relative_strength) = key_detector(essentia.array(average_hpcps_key))
    pool.add(namespace + '.' +'key_key', key)#, pool.GlobalScope)
    pool.add(namespace + '.' +'key_scale', scale)#, pool.GlobalScope)
    pool.add(namespace + '.' +'key_strength', key_strength)#, pool.GlobalScope)

    # chord detection
    chord_detector = essentia.Key(profileType = 'tonictriad', usePolyphony = False)
    hpcp_frameSize = 2.0 # 2 seconds
    hpcp_number = int(hpcp_frameSize * (sampleRate / hopSize - 1))

    for hpcp_index in range(len(hpcps_chord)):

        hpcp_index_begin = max(0, hpcp_index - hpcp_number)
        hpcp_index_end = min(hpcp_index + hpcp_number, len(hpcps_chord))
        average_hpcps_chord = numpy.average(essentia.array(hpcps_chord[hpcp_index_begin : hpcp_index_end]), axis=0)
        average_hpcps_chord = normalize(average_hpcps_chord)
        (key, scale, strength, first_to_second_relative_strength) = chord_detector(essentia.array(average_hpcps_chord))

        if scale == 'minor':
           chord = key + 'm'
        else:
           chord = key

        frame_second_scope = [hpcp_index_begin * hopSize / sampleRate, hpcp_index_end * hopSize / sampleRate]
        pool.add(namespace + '.' +'chords_progression', chord)#, frame_second_scope)
        pool.add(namespace + '.' +'chords_strength', strength)#, frame_second_scope)

    # tuning system features
    keydetector	= essentia.Key(profileType = 'diatonic')
    average_hpcps_tuning = numpy.average(essentia.array(hpcps_tuning), axis=0)
    average_hpcps_tuning = normalize(average_hpcps_tuning)
    (key, scale, diatonic_strength, first_to_second_relative_strength) = keydetector(essentia.array(average_hpcps_tuning))

    pool.add(namespace + '.' +'tuning_diatonic_strength', diatonic_strength)#, pool.GlobalScope)

    (equal_tempered_deviation,
     nontempered_energy_ratio,
     nontempered_peaks_energy_ratio) = essentia.HighResolutionFeatures()(average_hpcps_tuning)

    pool.add(namespace + '.' +'tuning_equal_tempered_deviation', equal_tempered_deviation)#, pool.GlobalScope)
    pool.add(namespace + '.' +'tuning_nontempered_energy_ratio', nontempered_energy_ratio)#, pool.GlobalScope)
    pool.add(namespace + '.' +'tuning_nontempered_peaks_energy_ratio', nontempered_peaks_energy_ratio)#, pool.GlobalScope)

Example #15

Show file

def compute(audio, pool, options):

    INFO("Computing Beats descriptors...")

    sampleRate = options['sampleRate']
    windowType = options['windowType']

    beat_window_duration = 0.1  # 100ms
    beat_duration = 0.05  # 50ms estimation after checking some drums kicks duration on freesound

    beats = pool.value('rhythm.beats_position')[0]

    # special case
    if len(beats) == 0:

        # we add them 2 times to get 'mean/var' stats and not 'value'
        # and not on full scope so it's not global
        # FIXME: should use "undefined"
        pool.add("beats_loudness", 0.0, [0., 0.])
        pool.add("beats_loudness", 0.0, [0., 0.])
        pool.add("beats_loudness_bass", 0.0, [0., 0.])
        pool.add("beats_loudness_bass", 0.0, [0., 0.])

        INFO('100% done...')

        return

    duration = pool.value('metadata.duration_processed')[0]

    # FIXME: converted to samples in order to have more accurate control of the size of
    # the window. This is due to FFT not being able to be computed on arrays of
    # odd sizes. Please FIXME later, when FFT accepts all kinds of sizes.
    beat_window_duration = int(beat_window_duration * float(sampleRate) + 0.5)
    beat_duration = int(beat_duration * float(sampleRate) + 0.5)
    duration *= float(sampleRate)
    if beat_duration % 2 == 1:
        beat_duration += 1
        beat_window_duration = beat_duration * 2

    energy = essentia.Energy()
    energybandratio = essentia.EnergyBandRatio(startFrequency=20.0,
                                               stopFrequency=150.0,
                                               sampleRate=sampleRate)

    total_beats = len(beats)
    n_beats = 1

    progress = Progress(total=total_beats)

    between_beats_start = [0.0]
    between_beats_end = []

    beats_spectral_energy = 0.0

    # love on the beats
    for beat in beats:
        # convert beat to samples in order to ensure an even size
        beat = beat * float(sampleRate)

        beat_window_start = (beat - beat_window_duration / 2.0)  # in samples
        beat_window_end = (beat + beat_window_duration / 2.0)  # in samples

        if beat_window_start > 0.0 and beat_window_end < duration:  # in samples
            #print "duration: ", duration, "start:", beat_window_start, "end:", beat_window_end

            beat_window = audio[beat_window_start:beat_window_end]

            beat_start = beat_window_start + max_energy_index(beat_window)
            beat_end = beat_start + beat_duration
            beat_audio = audio[beat_start:beat_end]

            beat_scope = [
                beat_start / float(sampleRate), beat_end / float(sampleRate)
            ]  # in seconds
            #print "beat audio size: ", len(beat_audio)

            window = essentia.Windowing(size=len(beat_audio),
                                        zeroPadding=0,
                                        type=windowType)
            spectrum = essentia.Spectrum(size=len(beat_audio))
            beat_spectrum = spectrum(window(beat_audio))

            beat_spectral_energy = energy(beat_spectrum)
            pool.add(namespace + '.' + 'beats_loudness',
                     beat_spectral_energy)  #, beat_scope)
            beats_spectral_energy += beat_spectral_energy

            beat_spectral_energybandratio = energybandratio(beat_spectrum)
            pool.add(namespace + '.' + 'beats_loudness_bass',
                     beat_spectral_energybandratio)  #, beat_scope)

            # filling between-beats arrays
            between_beats_end.append(beat_start / float(sampleRate))
            between_beats_start.append(beat_end / float(sampleRate))

        # display of progress report
        progress.update(n_beats / float(sampleRate))

        n_beats += 1

    between_beats_end.append(duration)

    between_beats_spectral_energy = 0.0

    # love in between beats
    '''
    for between_beat_start, between_beat_end in zip(between_beats_start, between_beats_end):

        between_beat_audio = audio[between_beat_start * sampleRate : between_beat_end * sampleRate]

        between_beat_scope = [between_beat_start, between_beat_end]

        window = essentia.Windowing(windowSize = len(between_beat_audio), zeroPadding = 0, type = "blackmanharris62")
        spectrum = essentia.Spectrum(size = len(between_beat_audio))
        between_beat_spectrum = spectrum(window(between_beat_audio))

        between_beat_spectral_energy = energy(between_beat_spectrum)
        between_beats_spectral_energy += between_beat_spectral_energy
    '''

    progress.finish()

Example #16

Show file

File: sfx.py Project: Aldor007/essentia

def compute(audio, pool, options):

    INFO('Computing SFX descriptors...')

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize  = options['frameSize']
    hopSize    = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType)
    spectrum = essentia.Spectrum(size = frameSize)

    # pitch algorithm
    pitch_detection = essentia.PitchDetection(frameSize = 2048, sampleRate = sampleRate)

    # sfx descriptors
    spectral_peaks = essentia.SpectralPeaks(sampleRate = sampleRate, orderBy = 'frequency')
    harmonic_peaks = essentia.HarmonicPeaks()
    inharmonicity = essentia.Inharmonicity()
    odd2evenharmonicenergyratio = essentia.OddToEvenHarmonicEnergyRatio()
    tristimulus = essentia.Tristimulus()

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5
    progress = Progress(total = total_frames)

    for frame in frames:

        frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

	if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)

        # spectral peaks based descriptors
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks(frame_frequencies, frame_magnitudes, frame_pitch)
        if len(frame_harmonic_frequencies) > 1:
            frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'inharmonicity', frame_inharmonicity)
            frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'tristimulus', frame_tristimulus)
            frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'odd2evenharmonicenergyratio', frame_odd2evenharmonicenergyratio)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize


    envelope = essentia.Envelope()
    file_envelope = envelope(audio)

    # temporal statistics
    decrease = essentia.AudioDecrease(blockSize = len(audio))
    pool.add(namespace + '.' + 'temporal_decrease', decrease(file_envelope))#, pool.GlobalScope)

    centralmoments = essentia.AudioCentralMoments(blockSize = len(audio))
    file_centralmoments = centralmoments(file_envelope)

    distributionshape = essentia.DistributionShape()
    (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments)
    pool.add(namespace + '.' + 'temporal_spread', file_spread)#, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_skewness', file_skewness)#, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_kurtosis', file_kurtosis)#, pool.GlobalScope)

    centroid = essentia.AudioCentroid(blockSize = len(audio))
    pool.add(namespace + '.' + 'temporal_centroid', centroid(file_envelope))#, pool.GlobalScope)

    # effective duration
    effectiveduration = essentia.EffectiveDuration()
    pool.add(namespace + '.' + 'effective_duration', effectiveduration(file_envelope))#, pool.GlobalScope)

    # log attack time
    logattacktime = essentia.LogAttackTime()
    pool.add(namespace + '.' + 'logattacktime', logattacktime(audio))#, pool.GlobalScope)

    # strong decay
    strongdecay = essentia.StrongDecay()
    pool.add(namespace + '.' + 'strongdecay', strongdecay(file_envelope))#, pool.GlobalScope)

    # dynamic profile
    flatness = essentia.FlatnessSFX()
    pool.add(namespace + '.' + 'flatness', flatness(file_envelope))#, pool.GlobalScope)

    # onsets number
    onsets_number = len(pool.value('rhythm.onset_times')[0])
    pool.add(namespace + '.' + 'onsets_number', onsets_number)#, pool.GlobalScope)

    # morphological descriptors
    max_to_total = essentia.MaxToTotal()
    pool.add(namespace + '.' + 'max_to_total', max_to_total(file_envelope))#, pool.GlobalScope)

    tc_to_total = essentia.TCToTotal(sampleRate = sampleRate)
    pool.add(namespace + '.' + 'tc_to_total', tc_to_total(file_envelope))#, pool.GlobalScope)

    derivativeSFX = essentia.DerivativeSFX(sampleRate = sampleRate)
    (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope)
    pool.add(namespace + '.' + 'der_av_after_max', der_av_after_max)#, pool.GlobalScope)
    pool.add(namespace + '.' + 'max_der_before_max', max_der_before_max)#, pool.GlobalScope)

    # pitch profile
    pitch = pool.value('lowlevel.pitch')

    if len(pitch) > 1:
       pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch))#, pool.GlobalScope)

       min_to_total = essentia.MinToTotal()
       pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch))#, pool.GlobalScope)

       pitch_centroid = essentia.Centroid(range = len(pitch)-1)
       pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch))#, pool.GlobalScope)

       pitch_after_max_to_before_max_energy_ratio = essentia.AfterMaxToBeforeMaxEnergyRatio()
       pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', pitch_after_max_to_before_max_energy_ratio(pitch))#, pool.GlobalScope)

    else:
       pool.add(namespace + '.' + 'pitch_max_to_total', 0.0)#, pool.GlobalScope)
       pool.add(namespace + '.' + 'pitch_min_to_total', 0.0)#, pool.GlobalScope)
       pool.add(namespace + '.' + 'pitch_centroid', 0.0)#, pool.GlobalScope)
       pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0)#, pool.GlobalScope)

    progress.finish()

Example #17

Show file

File: lowlevel.py Project: arseneyr/essentia

def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize  = options['frameSize']
    hopSize    = options['hopSize']
    windowType = options['windowType']

    # temporal descriptors
    lpc = essentia.LPC(order = 10, type = 'warped', sampleRate = sampleRate)
    zerocrossingrate = essentia.ZeroCrossingRate()

    # frame algorithms
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType)
    spectrum = essentia.Spectrum(size = frameSize)

    # spectral algorithms
    barkbands = essentia.BarkBands(sampleRate = sampleRate)
    centralmoments = essentia.SpectralCentralMoments()
    crest = essentia.Crest()
    centroid = essentia.SpectralCentroid()
    decrease = essentia.SpectralDecrease()
    spectral_contrast = essentia.SpectralContrast(frameSize = frameSize,
                                                  sampleRate = sampleRate,
                                                  numberBands = 6,
                                                  lowFrequencyBound = 20,
                                                  highFrequencyBound = 11000,
                                                  neighbourRatio = 0.4,
                                                  staticDistribution = 0.15)
    distributionshape = essentia.DistributionShape()
    energy = essentia.Energy()
    # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers
    energyband_bass = essentia.EnergyBand(startCutoffFrequency = 20.0, stopCutoffFrequency = 150.0, sampleRate = sampleRate)
    energyband_middle_low = essentia.EnergyBand(startCutoffFrequency = 150.0, stopCutoffFrequency = 800.0, sampleRate = sampleRate)
    energyband_middle_high = essentia.EnergyBand(startCutoffFrequency = 800.0, stopCutoffFrequency = 4000.0, sampleRate = sampleRate)
    energyband_high = essentia.EnergyBand(startCutoffFrequency = 4000.0, stopCutoffFrequency = 20000.0, sampleRate = sampleRate)
    flatnessdb = essentia.FlatnessDB()
    flux = essentia.Flux()
    harmonic_peaks = essentia.HarmonicPeaks()
    hfc = essentia.HFC()
    mfcc = essentia.MFCC()
    rolloff = essentia.RollOff()
    rms = essentia.RMS()
    strongpeak = essentia.StrongPeak()

    # pitch algorithms
    pitch_detection = essentia.PitchDetection(frameSize = frameSize, sampleRate = sampleRate)
    pitch_salience = essentia.PitchSalience()

    # dissonance
    spectral_peaks = essentia.SpectralPeaks(sampleRate = sampleRate, orderBy='frequency')
    dissonance = essentia.Dissonance()

    # spectral complexity
    # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame
    spectral_complexity = essentia.SpectralComplexity(magnitudeThreshold = 0.005)

    INFO('Computing Low-Level descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5

    pitches, pitch_confidences =  [],[]

    progress = Progress(total = total_frames)

    scPool = essentia.Pool() # pool for spectral contrast

    for frame in frames:

        frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

        # silence rate
        pool.add(namespace + '.' + 'silence_rate_60dB', essentia.isSilent(frame))
        pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30))
        pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20))

        if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        # temporal descriptors
        pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame))
        (frame_lpc, frame_lpc_reflection) = lpc(frame)
        pool.add(namespace + '.' + 'temporal_lpc', frame_lpc)

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectrum-based descriptors
        power_spectrum = frame_spectrum ** 2
        pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum))
        pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum))
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum))
        pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum))

	# central moments descriptors
	frame_centralmoments = centralmoments(power_spectrum)
        (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments)
        pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis)
	pool.add(namespace + '.' + 'spectral_spread', frame_spread)
        pool.add(namespace + '.' + 'spectral_skewness', frame_skewness)

	# dissonance
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add(namespace + '.' + 'dissonance', frame_dissonance)

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'mfcc', frame_mfcc)

        # spectral contrast
        (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum)
        scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs)
        scPool.add(namespace + '.' + 'scvalleys', sc_valleys)

        # barkbands-based descriptors
        frame_barkbands = barkbands(frame_spectrum)
        pool.add(namespace + '.' + 'barkbands', frame_barkbands)
        pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands))
        pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands))
        barkbands_centralmoments = essentia.CentralMoments(range = len(frame_barkbands) - 1)
        (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape(barkbands_centralmoments(frame_barkbands))
        pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread)
        pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness)
        pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)
        if frame_pitch > 0 and frame_pitch <= 20000.:
            pool.add(namespace + '.' + 'pitch', frame_pitch)
        pitches.append(frame_pitch)
        pitch_confidences.append(frame_pitch_confidence)
        pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence)

        frame_pitch_salience = pitch_salience(frame_spectrum[:-1])
        pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience)

        # spectral complexity
        pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # if no 'temporal_zerocrossingrate' it means that this is a silent file
    if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace):
        raise essentia.EssentiaError('This is a silent file!')

    spectralContrastPCA(scPool, pool)

    # build pitch value histogram
    from math import log
    from numpy import bincount
    # convert from Hz to midi notes
    midipitches = []
    unknown = 0
    for freq in pitches:
        if freq > 0. and freq <= 12600:
            midipitches.append(12*(log(freq/6.875)/0.69314718055995)-3.)
        else:
            unknown += 1

    if len(midipitches) > 0:
      # compute histogram
      midipitchhist = bincount(midipitches)
      # set 0 midi pitch to be the number of pruned value
      midipitchhist[0] = unknown
      # normalise
      midipitchhist = [val/float(sum(midipitchhist)) for val in midipitchhist]
      # zero pad
      for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0)
    else:
      midipitchhist = [0.]*128
      midipitchhist[0] = 1.

    # pitchhist = essentia.array(zip(range(len(midipitchhist)), midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist)#, pool.GlobalScope)

    # the code below is the same as the one above:
    #for note in midipitchhist:
    #    pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note)
    #    print "midi note:", note

    pitch_centralmoments = essentia.CentralMoments(range = len(midipitchhist) - 1)
    (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape(pitch_centralmoments(midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread)#, pool.GlobalScope)

    progress.finish()

Example #18

Show file

File: sfx.py Project: HybridVigor/essentia-musicbricks

def compute(audio, pool, options):

    INFO('Computing SFX descriptors...')

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = essentia.Spectrum(size=frameSize)

    # pitch algorithm
    pitch_detection = essentia.PitchDetection(frameSize=2048,
                                              sampleRate=sampleRate)

    # sfx descriptors
    spectral_peaks = essentia.SpectralPeaks(sampleRate=sampleRate,
                                            orderBy='frequency')
    harmonic_peaks = essentia.HarmonicPeaks()
    inharmonicity = essentia.Inharmonicity()
    odd2evenharmonicenergyratio = essentia.OddToEvenHarmonicEnergyRatio()
    tristimulus = essentia.Tristimulus()

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5
    progress = Progress(total=total_frames)

    for frame in frames:

        frameScope = [
            start_of_frame / sampleRate,
            (start_of_frame + frameSize) / sampleRate
        ]
        #pool.setCurrentScope(frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)

        # spectral peaks based descriptors
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        (frame_harmonic_frequencies,
         frame_harmonic_magnitudes) = harmonic_peaks(frame_frequencies,
                                                     frame_magnitudes,
                                                     frame_pitch)
        if len(frame_harmonic_frequencies) > 1:
            frame_inharmonicity = inharmonicity(frame_harmonic_frequencies,
                                                frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'inharmonicity', frame_inharmonicity)
            frame_tristimulus = tristimulus(frame_harmonic_frequencies,
                                            frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'tristimulus', frame_tristimulus)
            frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio(
                frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'odd2evenharmonicenergyratio',
                     frame_odd2evenharmonicenergyratio)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    envelope = essentia.Envelope()
    file_envelope = envelope(audio)

    # temporal statistics
    decrease = essentia.AudioDecrease(blockSize=len(audio))
    pool.add(namespace + '.' + 'temporal_decrease',
             decrease(file_envelope))  #, pool.GlobalScope)

    centralmoments = essentia.AudioCentralMoments(blockSize=len(audio))
    file_centralmoments = centralmoments(file_envelope)

    distributionshape = essentia.DistributionShape()
    (file_spread, file_skewness,
     file_kurtosis) = distributionshape(file_centralmoments)
    pool.add(namespace + '.' + 'temporal_spread',
             file_spread)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_skewness',
             file_skewness)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_kurtosis',
             file_kurtosis)  #, pool.GlobalScope)

    centroid = essentia.AudioCentroid(blockSize=len(audio))
    pool.add(namespace + '.' + 'temporal_centroid',
             centroid(file_envelope))  #, pool.GlobalScope)

    # effective duration
    effectiveduration = essentia.EffectiveDuration()
    pool.add(namespace + '.' + 'effective_duration',
             effectiveduration(file_envelope))  #, pool.GlobalScope)

    # log attack time
    logattacktime = essentia.LogAttackTime()
    pool.add(namespace + '.' + 'logattacktime',
             logattacktime(audio))  #, pool.GlobalScope)

    # strong decay
    strongdecay = essentia.StrongDecay()
    pool.add(namespace + '.' + 'strongdecay',
             strongdecay(file_envelope))  #, pool.GlobalScope)

    # dynamic profile
    flatness = essentia.FlatnessSFX()
    pool.add(namespace + '.' + 'flatness',
             flatness(file_envelope))  #, pool.GlobalScope)

    # onsets number
    onsets_number = len(pool.value('rhythm.onset_times')[0])
    pool.add(namespace + '.' + 'onsets_number',
             onsets_number)  #, pool.GlobalScope)

    # morphological descriptors
    max_to_total = essentia.MaxToTotal()
    pool.add(namespace + '.' + 'max_to_total',
             max_to_total(file_envelope))  #, pool.GlobalScope)

    tc_to_total = essentia.TCToTotal(sampleRate=sampleRate)
    pool.add(namespace + '.' + 'tc_to_total',
             tc_to_total(file_envelope))  #, pool.GlobalScope)

    derivativeSFX = essentia.DerivativeSFX(sampleRate=sampleRate)
    (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope)
    pool.add(namespace + '.' + 'der_av_after_max',
             der_av_after_max)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'max_der_before_max',
             max_der_before_max)  #, pool.GlobalScope)

    # pitch profile
    pitch = pool.value('lowlevel.pitch')

    if len(pitch) > 1:
        pool.add(namespace + '.' + 'pitch_max_to_total',
                 max_to_total(pitch))  #, pool.GlobalScope)

        min_to_total = essentia.MinToTotal()
        pool.add(namespace + '.' + 'pitch_min_to_total',
                 min_to_total(pitch))  #, pool.GlobalScope)

        pitch_centroid = essentia.Centroid(range=len(pitch) - 1)
        pool.add(namespace + '.' + 'pitch_centroid',
                 pitch_centroid(pitch))  #, pool.GlobalScope)

        pitch_after_max_to_before_max_energy_ratio = essentia.AfterMaxToBeforeMaxEnergyRatio(
        )
        pool.add(namespace + '.' +
                 'pitch_after_max_to_before_max_energy_ratio',
                 pitch_after_max_to_before_max_energy_ratio(
                     pitch))  #, pool.GlobalScope)

    else:
        pool.add(namespace + '.' + 'pitch_max_to_total',
                 0.0)  #, pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_min_to_total',
                 0.0)  #, pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_centroid', 0.0)  #, pool.GlobalScope)
        pool.add(namespace + '.' +
                 'pitch_after_max_to_before_max_energy_ratio',
                 0.0)  #, pool.GlobalScope)

    progress.finish()