Ejemplo n.º 1
0
def feature_allframes(audio, beats, frame_indexer=None):

    # Initialise the algorithms
    w = Windowing(type='hann')
    spectrum = Spectrum(
    )  # FFT would return complex FFT, we only want magnitude
    melbands = MelBands(numberBands=NUMBER_BANDS)
    pool = Pool()

    if frame_indexer is None:
        frame_indexer = list(
            range(4,
                  len(beats) - 1)
        )  # Exclude first frame, because it has no predecessor to calculate difference with

    # 13 MFCC coefficients
    # 40 Mel band energies
    mfcc_bands = np.zeros((len(beats), NUMBER_BANDS))
    # 1 cosine distance value between every mfcc feature vector
    # 13 differences between MFCC coefficient of this frame and previous frame
    # 13 differences between MFCC coefficient of this frame and frame - 4
    # 13 differences between the differences above
    # Idem for mel band energies
    mfcc_bands_diff = np.zeros((len(beats), NUMBER_BANDS * 4))

    # Step 1: Calculate framewise for all output frames
    # Calculate this for all frames where this frame, or its successor, is in the frame_indexer
    for i in [
            i for i in range(len(beats)) if (i in frame_indexer) or (
                i + 1 in frame_indexer) or (i - 1 in frame_indexer) or (
                    i - 2 in frame_indexer) or (i - 3 in frame_indexer)
    ]:
        SAMPLE_RATE = 44100
        start_sample = int(beats[i] * SAMPLE_RATE)
        end_sample = int(beats[i + 1] * SAMPLE_RATE)
        frame = audio[start_sample:end_sample if (start_sample - end_sample) %
                      2 == 0 else end_sample - 1]
        bands = melbands(spectrum(w(frame)))
        mfcc_bands[i] = bands

    # Step 2: Calculate the cosine distance between the MFCC values
    for i in frame_indexer:
        # The norm of difference is usually very high around downbeat, because of melodic changes there!
        mfcc_bands_diff[i][0 * NUMBER_BANDS:1 *
                           NUMBER_BANDS] = mfcc_bands[i + 1] - mfcc_bands[i]
        mfcc_bands_diff[i][1 * NUMBER_BANDS:2 *
                           NUMBER_BANDS] = mfcc_bands[i + 2] - mfcc_bands[i]
        mfcc_bands_diff[i][2 * NUMBER_BANDS:3 *
                           NUMBER_BANDS] = mfcc_bands[i + 3] - mfcc_bands[i]
        mfcc_bands_diff[i][3 * NUMBER_BANDS:4 *
                           NUMBER_BANDS] = mfcc_bands[i] - mfcc_bands[i - 1]

    result = mfcc_bands_diff[frame_indexer]
    return preprocessing.scale(result)
Ejemplo n.º 2
0
def rms(audio,params):
    """ hop size, frame size, window type """
    hopSize, frameSize, wtype = params
    w = Windowing(type=wtype)
    spec = Spectrum()
    result = []
    RMS = ess.RMS()
    for frame in ess.FrameGenerator(audio, frameSize = frameSize, hopSize = hopSize):
        sf = spec(w(frame))
        result.append(RMS(sf))
    return np.asarray(result),hopSize
Ejemplo n.º 3
0
def spectralCentroid(audio,params):
    """ hop size, frame size, window type """
    hopSize, frameSize, wtype = params
    w = Windowing(type=wtype)
    spec = Spectrum()
    result = []
    centroid = ess.Centroid(range=int(44100/2))
    for frame in ess.FrameGenerator(audio, frameSize = frameSize, hopSize = hopSize):
        sf = spec(w(frame))
        result.append(centroid(sf))
    return np.asarray(result),hopSize
Ejemplo n.º 4
0
 def setup(self,
           channels=None,
           samplerate=None,
           blocksize=None,
           totalframes=None):
     super(Essentia_Dissonance, self).setup(channels, samplerate, blocksize,
                                            totalframes)
     self.spec_alg = Spectrum(size=self.input_blocksize)
     self.spec_peaks_alg = SpectralPeaks(
         sampleRate=self.input_samplerate,
         maxFrequency=self.input_samplerate / 2,
         minFrequency=0,
         orderBy='frequency')
Ejemplo n.º 5
0
    def calculateDownbeats(self, audio, bpm, phase):
        # Step 0: calculate the CSD (Complex Spectral Difference) features
        # and the associated onset detection function ON LOWPASSED SIGNAL
        spec = Spectrum(size=self.FRAME_SIZE)
        w = Windowing(type='hann')
        fft = FFT()
        c2p = CartesianToPolar()
        od_csd = OnsetDetection(method='complex')
        lowpass = LowPass(cutoffFrequency=1500)

        pool = Pool()

        # TODO test faster (numpy) way
        #audio = lowpass(audio)
        for frame in FrameGenerator(audio,
                                    frameSize=self.FRAME_SIZE,
                                    hopSize=self.HOP_SIZE):
            mag, ph = c2p(fft(w(frame)))
            pool.add('onsets.complex', od_csd(mag, ph))

        # Step 1: normalise the data using an adaptive mean threshold
        novelty_mean = self.adaptive_mean(pool['onsets.complex'], 16.0)

        # Step 2: half-wave rectify the result
        novelty_hwr = (pool['onsets.complex'] - novelty_mean).clip(min=0)

        # Step 7 (experimental): Determine downbeat locations as subsequence with highest complex spectral difference
        for i in range(4):
            phase_frames = (phase * 44100.0) / (512.0)
            frames = (
                np.round(
                    np.arange(phase_frames + i * self.numFramesPerBeat(bpm),
                              np.size(novelty_hwr),
                              4 * self.numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.downbeat',
                     np.sum(novelty_hwr[frames]) / np.size(frames))

            plt.subplot(4, 1, i + 1)
            plt.plot(novelty_hwr)
            for f in frames:
                plt.axvline(x=f)
        print pool['output.downbeat']
        downbeatIndex = np.argmax(pool['output.downbeat'])
        plt.show()

        # experimental
        return 1.0 * self.beats[downbeatIndex::4]
Ejemplo n.º 6
0
def create_analyzers(fs=44100.0,
                     nhop=512,
                     nffts=[1024, 2048, 4096],
                     mel_nband=80,
                     mel_freqlo=27.5,
                     mel_freqhi=16000.0):
    analyzers = []
    for nfft in nffts:
        window = Windowing(size=nfft, type='blackmanharris62')
        spectrum = Spectrum(size=nfft)
        mel = MelBands(inputSize=(nfft // 2) + 1,
                       numberBands=mel_nband,
                       lowFrequencyBound=mel_freqlo,
                       highFrequencyBound=mel_freqhi,
                       sampleRate=fs)
        analyzers.append((window, spectrum, mel))
    return analyzers
Ejemplo n.º 7
0
def rms_centroids(filename, frameSize=1024, hopSize=512, sampleRate=44100):
    # load our audio into an array
    audio = MonoLoader(filename=filename, sampleRate=44100)()

    # create the pool and the necessary algorithms
    w = Windowing()
    spec = Spectrum()
    rms = RMS()
    centroid = Centroid(range=int(sampleRate / 2))
    cs = []
    rmss = []
    # compute the centroid for all frames in our audio and add it to the pool
    for frame in FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize):
        sf = spec(w(frame))
        cs.append(centroid(sf))
        rmss.append(rms(sf))
    return np.array(rmss), np.array(cs)
Ejemplo n.º 8
0
def mel40_analyzer():
    window = Windowing(size=256, type='blackmanharris62')
    spectrum = Spectrum(size=256)
    mel = MelBands(
            inputSize=129,
            numberBands=40,
            lowFrequencyBound=27.5,
            highFrequencyBound=8000.0,
            sampleRate=16000.0)
    def analyzer(samples):
        feats = []
        for frame in FrameGenerator(samples, 256, 160):
            frame_feats = mel(spectrum(window(frame)))
            frame_feats = np.log(frame_feats + 1e-16)
            feats.append(frame_feats)
        return np.array(feats)
    return analyzer
Ejemplo n.º 9
0
def shared_main(source, dest, display_result):
    source_audio = _loader(source)
    destination_audio = _loader(dest)

    source_frame = FrameGenerator(source_audio, frameSize=2048, hopSize=512)
    destination_frame = FrameGenerator(destination_audio,
                                       frameSize=2048,
                                       hopSize=512)

    window = Windowing(type='hann')  # window function
    spectrum = Spectrum()  # spectrum function
    pitch_yin_fft = PitchYinFFT()  # pitch extractor
    pitch_saliennce = PitchSalience()
    loudness = Loudness()

    # draw_plot(source_frame, window, spectrum, pitch_yin_fft)
    min_cost, match_result = compare(source_frame, destination_frame, window, \
                                  spectrum, pitch_yin_fft, 5, 1, 1, display_result, loudness)

    return min_cost, match_result
Ejemplo n.º 10
0
def feature_allframes(audio, beats, frame_indexer = None):
	
	# Initialise the algorithms
	w = Windowing(type = 'blackmanharris92')
	spectrum = Spectrum()
	specPeaks = SpectralPeaks()
	hpcp = HPCP()
	
	if frame_indexer is None:
		frame_indexer = range(1,len(beats) - 1) # Exclude first frame, because it has no predecessor to calculate difference with
		
	# 12 chromagram values by default
	chroma_values = np.zeros((len(beats), 12))
	# Difference between chroma vectors
	chroma_differences = np.zeros((len(beats), 3))
	
	# Step 1: Calculate framewise for all output frames
	# Calculate this for all frames where this frame, or its successor, is in the frame_indexer
	for i in [i for i in range(len(beats)) if (i in frame_indexer) or (i+1 in frame_indexer) or (i+1 in frame_indexer)]:
		
		SAMPLE_RATE = 44100
		start_sample = int(beats[i] * SAMPLE_RATE)
		end_sample = int(beats[i+1] * SAMPLE_RATE) 
		#print start_sample, end_sample
		frame = audio[start_sample : (end_sample if (start_sample - end_sample) % 2 == 0 else end_sample - 1)]
		freq, mag = specPeaks(spectrum(w(frame)))
		chroma_values[i] = hpcp(freq, mag)
	
	# Step 2: Calculate the cosine distance between the MFCC values
	for i in frame_indexer:
		chroma_differences[i][0] = np.linalg.norm(chroma_values[i] - chroma_values[i-1])
		chroma_differences[i][1] = np.linalg.norm(chroma_values[i] - chroma_values[i+1])
		chroma_differences[i][2] = np.linalg.norm(chroma_values[i-1] - chroma_values[i+1])
		
	# Include the raw values as absolute features
	result = np.append(chroma_values[frame_indexer], chroma_differences[frame_indexer], axis=1)
	
	#~ print np.shape(result), np.shape(chroma_values), np.shape(chroma_differences)
	return preprocessing.scale(result)
Ejemplo n.º 11
0
beatTracker.run(audio)
beats = beatTracker.getBeats()
bpm = beatTracker.getBpm()
phase = beatTracker.getPhase()
beats = beats - phase
print 'Bpm: ', bpm
print 'Frame size in samples: ', 44100 * (60.0 / bpm)

# Followed approach from Foote

# Adjust the frame size to the length of a beat, to extract beat-aligned information (zelf-uitgevonden)
FRAME_SIZE = int(44100 * (60.0 / bpm))
HOP_SIZE = FRAME_SIZE / 2
frames_per_second = (44100.0 / FRAME_SIZE) * (FRAME_SIZE / HOP_SIZE)
beats = beats * frames_per_second
spec = Spectrum(size=FRAME_SIZE - FRAME_SIZE % 2)
w = Windowing(type='hann')
spectrum = Spectrum()  # FFT would return complex FFT, we only want magnitude
mfcc = MFCC()
pool = Pool()

# Step 0: align audio with phase

beats = beats - 0.5

start_sample = int((phase) * (44100.0 * 60 / bpm))

# Step 1: Calculate framewise MFCC
for frame in FrameGenerator(audio[start_sample:],
                            frameSize=FRAME_SIZE,
                            hopSize=HOP_SIZE):
Ejemplo n.º 12
0
    def run(self, audio):
        def numFramesPerBeat(bpm):
            return (60.0 * self.SAMPLE_RATE) / (self.HOP_SIZE * bpm)

        def autocorr(x):
            result = np.correlate(x, x, mode='full')
            return result[result.size / 2:]

        def adaptive_mean(x, N):
            return np.convolve(x, [1.0] * int(N), mode='same') / N

        # Step 0: calculate the CSD (Complex Spectral Difference) features
        # and the associated onset detection function
        spec = Spectrum(size=self.FRAME_SIZE)
        w = Windowing(type='hann')
        fft = np.fft.fft
        c2p = CartesianToPolar()
        od_csd = OnsetDetection(method='melflux')

        pool = Pool()

        for frame in FrameGenerator(audio,
                                    frameSize=self.FRAME_SIZE,
                                    hopSize=self.HOP_SIZE):
            pool.add('audio.windowed_frames', w(frame))

        fft_result = fft(pool['audio.windowed_frames']).astype('complex64')
        fft_result_mag = np.absolute(fft_result)
        fft_result_ang = np.angle(fft_result)

        for mag, phase in zip(fft_result_mag, fft_result_ang):
            pool.add('onsets.complex', od_csd(mag, phase))

        # Step 1: normalise the data using an adaptive mean threshold
        novelty_mean = adaptive_mean(pool['onsets.complex'], 16.0)

        # Step 2: half-wave rectify the result
        novelty_hwr = (pool['onsets.complex'] - novelty_mean).clip(min=0)

        # Step 3: then calculate the autocorrelation of this signal
        novelty_autocorr = autocorr(novelty_hwr)

        # Step 4: Sum over constant intervals to detect most likely BPM
        valid_bpms = np.arange(self.minBpm, self.maxBpm, self.stepBpm)
        for bpm in valid_bpms:
            frames = (
                np.round(
                    np.arange(0, np.size(novelty_autocorr),
                              numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.bpm',
                     np.sum(novelty_autocorr[frames]) / np.size(frames))
        bpm = valid_bpms[np.argmax(pool['output.bpm'])]

        # Step 5: Calculate phase information
        valid_phases = np.arange(0.0, 60.0 / bpm,
                                 0.001)  # Valid phases in SECONDS
        for phase in valid_phases:
            # Convert phase from seconds to frames
            phase_frames = (phase * 44100.0) / (512.0)
            frames = (
                np.round(
                    np.arange(phase_frames, np.size(novelty_hwr),
                              numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.phase',
                     np.sum(novelty_hwr[frames]) / np.size(frames))
        phase = valid_phases[np.argmax(pool['output.phase'])]

        # Step 6: Determine the beat locations
        spb = 60. / bpm  #seconds per beat
        beats = (np.arange(phase, (np.size(audio) / 44100) - spb + phase,
                           spb).astype('single'))

        # Store all the results
        self.bpm = bpm
        self.phase = phase
        self.beats = beats
Ejemplo n.º 13
0
def feature_allframes(audio, beats, frame_indexer = None):
	
	# Initialise the algorithms	
	FRAME_SIZE = 1024
	HOP_SIZE = 512
	spec = Spectrum(size = FRAME_SIZE)
	w = Windowing(type = 'hann')
	fft = np.fft.fft

	od_csd = OnsetDetection(method = 'complex')
	od_hfc = OnsetDetection(method = 'flux')

	pool = Pool()
	
	# Calculate onset detection curve on audio
	for frame in FrameGenerator(audio, frameSize = FRAME_SIZE, hopSize = HOP_SIZE):
		pool.add('windowed_frames', w(frame))
		
	fft_result = fft(pool['windowed_frames']).astype('complex64')
	fft_result_mag = np.absolute(fft_result)
	fft_result_ang = np.angle(fft_result)

	for mag,phase in zip(fft_result_mag, fft_result_ang):
		pool.add('onsets.flux', od_hfc(mag, phase))
	
	# Normalize and half-rectify onset detection curve
	def adaptive_mean(x, N):
		return np.convolve(x, [1.0]*int(N), mode='same')/N
		
	novelty_mean = adaptive_mean(pool['onsets.flux'], 16.0)
	novelty_hwr = (pool['onsets.flux'] - novelty_mean).clip(min=0)
	novelty_hwr = novelty_hwr / np.average(novelty_hwr)
	
	# For every frame in frame_indexer, 
	if frame_indexer is None:
		frame_indexer = list(range(4,len(beats) - 1)) # Exclude first frame, because it has no predecessor to calculate difference with
		
	# Feature: correlation between current frame onset detection f and of previous frame
	# Feature: correlation between current frame onset detection f and of next frame
	# Feature: diff between correlation between current frame onset detection f and corr cur and next
	onset_integrals = np.zeros((2 * len(beats), 1))
	frame_i = (np.array(beats) * 44100.0/ HOP_SIZE).astype('int')
	onset_correlations = np.zeros((len(beats), 21))
	
	for i in [i for i in range(len(beats)) if (i in frame_indexer) or (i+1 in frame_indexer)
		or (i-1 in frame_indexer) or (i-2 in frame_indexer) or (i-3 in frame_indexer)
		or (i-4 in frame_indexer) or (i-5 in frame_indexer) or (i-6 in frame_indexer) or (i-7 in frame_indexer)]:
		
		half_i = int((frame_i[i] + frame_i[i+1]) / 2)
		cur_frame_1st_half = novelty_hwr[frame_i[i] : half_i]
		cur_frame_2nd_half = novelty_hwr[half_i : frame_i[i+1]]
		onset_integrals[2*i] = np.sum(cur_frame_1st_half)
		onset_integrals[2*i + 1] = np.sum(cur_frame_2nd_half)
	
	# Step 2: Calculate the cosine distance between the MFCC values
	for i in frame_indexer:
		
		onset_correlations[i][0] = max(np.correlate(novelty_hwr[frame_i[i-1] : frame_i[i]], novelty_hwr[frame_i[i] : frame_i[i+1]], mode='valid')) # Only 1 value
		onset_correlations[i][1] = max(np.correlate(novelty_hwr[frame_i[i] : frame_i[i+1]], novelty_hwr[frame_i[i+1] : frame_i[i+2]], mode='valid')) # Only 1 value
		onset_correlations[i][2] = max(np.correlate(novelty_hwr[frame_i[i] : frame_i[i+1]], novelty_hwr[frame_i[i+2] : frame_i[i+3]], mode='valid')) # Only 1 value
		onset_correlations[i][3] = max(np.correlate(novelty_hwr[frame_i[i] : frame_i[i+1]], novelty_hwr[frame_i[i+3] : frame_i[i+4]], mode='valid')) # Only 1 value
		
		# Difference in integrals of novelty curve between frames
		# Quantifies the difference in number and prominence of onsets in this frame
		onset_correlations[i][4] = onset_integrals[2*i] - onset_integrals[2*i-1]
		onset_correlations[i][5] = onset_integrals[2*i+2] + onset_integrals[2*i+3] - onset_integrals[2*i-1] - onset_integrals[2*i-2]
		for j in range(1,16):
			onset_correlations[i][5 + j] = onset_integrals[2*i + j] - onset_integrals[2*i]
		
			
	# Include the MFCC coefficients as features
	result = onset_correlations[frame_indexer]
	return preprocessing.scale(result)
Ejemplo n.º 14
0
import errno
import time
import essentia
from essentia.standard import Extractor, MonoLoader, Trimmer, Mean, FrameGenerator, Spectrum, SpectralPeaks, Dissonance, BarkBands, Windowing, \
 ZeroCrossingRate, OddToEvenHarmonicEnergyRatio, EnergyBand, MetadataReader, OnsetDetection, Onsets, CartesianToPolar, FFT, MFCC, SingleGaussian
from build_map import build_map

sampleRate = 44100
frameSize = 2048
hopSize = 1024
windowType = "hann"

mean = Mean()

keyDetector = essentia.standard.Key(pcpSize=12)
spectrum = Spectrum()
window = Windowing(size=frameSize, zeroPadding=0, type=windowType)
mfcc = MFCC()
gaussian = SingleGaussian()
od = OnsetDetection(method='hfc')
fft = FFT()  # this gives us a complex FFT
c2p = CartesianToPolar()  # and this turns it into a pair (magnitude, phase)
onsets = Onsets(alpha=1)

# dissonance
spectralPeaks = SpectralPeaks(sampleRate=sampleRate, orderBy='frequency')
dissonance = Dissonance()

# barkbands
barkbands = BarkBands(sampleRate=sampleRate)
Ejemplo n.º 15
0
    def run(self, audio):

        # TODO put this in some util class

        # Step 0: calculate the CSD (Complex Spectral Difference) features
        # and the associated onset detection function
        spec = Spectrum(size=self.FRAME_SIZE)
        w = Windowing(type='hann')
        fft = FFT()
        c2p = CartesianToPolar()
        od_csd = OnsetDetection(method='complex')

        pool = Pool()

        # TODO test faster (numpy) way
        for frame in FrameGenerator(audio,
                                    frameSize=self.FRAME_SIZE,
                                    hopSize=self.HOP_SIZE):
            mag, phase = c2p(fft(w(frame)))
            pool.add('onsets.complex', od_csd(mag, phase))

        # Step 1: normalise the data using an adaptive mean threshold
        novelty_mean = self.adaptive_mean(pool['onsets.complex'], 16.0)

        # Step 2: half-wave rectify the result
        novelty_hwr = (pool['onsets.complex'] - novelty_mean).clip(min=0)

        # Step 3: then calculate the autocorrelation of this signal
        novelty_autocorr = self.autocorr(novelty_hwr)

        # Step 4: Sum over constant intervals to detect most likely BPM
        valid_bpms = np.arange(self.minBpm, self.maxBpm, self.stepBpm)
        for bpm in valid_bpms:
            frames = (
                np.round(
                    np.arange(0, np.size(novelty_autocorr),
                              self.numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.bpm',
                     np.sum(novelty_autocorr[frames]) / np.size(frames))
        bpm = valid_bpms[np.argmax(pool['output.bpm'])]

        # Step 5: Calculate phase information
        valid_phases = np.arange(0.0, 60.0 / bpm,
                                 0.001)  # Valid phases in SECONDS
        for phase in valid_phases:
            # Convert phase from seconds to frames
            phase_frames = (phase * 44100.0) / (512.0)
            frames = (
                np.round(
                    np.arange(phase_frames, np.size(novelty_hwr),
                              self.numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.phase',
                     np.sum(novelty_hwr[frames]) / np.size(frames))
        phase = valid_phases[np.argmax(pool['output.phase'])]
        print 'PHASE', phase
        # Step 6: Determine the beat locations
        spb = 60. / bpm  #seconds per beat
        beats = (np.arange(phase, (np.size(audio) / 44100) - spb + phase,
                           spb).astype('single'))

        # Store all the results
        self.bpm = bpm
        self.phase = phase
        self.beats = beats

        self.downbeats = self.calculateDownbeats(audio, bpm, phase)