Ejemplo n.º 1
0
def get_bpm(file_in):
    pool = Pool()

    loader = streaming.MonoLoader(filename=file_in)
    bt = streaming.RhythmExtractor2013()
    bpm_histogram = streaming.BpmHistogramDescriptors()
    # BPM histogram output size is 250
    centroid = streaming.Centroid(range=250)

    loader.audio >> bt.signal
    bt.bpm >> (pool, 'bpm')
    bt.ticks >> None
    bt.confidence >> (pool, 'confidence')
    bt.estimates >> None
    bt.bpmIntervals >> bpm_histogram.bpmIntervals
    bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
    bpm_histogram.firstPeakWeight >> None
    bpm_histogram.firstPeakSpread >> None
    bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
    bpm_histogram.secondPeakWeight >> None
    bpm_histogram.secondPeakSpread >> None
    bpm_histogram.histogram >> (pool, 'bpm_histogram')
    bpm_histogram.histogram >> centroid.array
    centroid.centroid >> (pool, 'bpm_centroid')

    run(loader)
    return pool['bpm']
Ejemplo n.º 2
0
def get_key(file_in):
    """
    Estimates the key and scale for an audio file.
    """
    loader = streaming.MonoLoader(filename=file_in)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
    pool = Pool()
    hpcp = streaming.HPCP()
    key = streaming.Key()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp >> key.pcp
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')

    run(loader)

    return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
Ejemplo n.º 3
0
def estimate_main_band(infile):
    """
    Estimate if this is a low, mid, or high track.

    Not _really_ sure if this does what I need it to,
    but some quick tests looked right.
    """
    loader = streaming.MonoLoader(filename=infile)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000])
    pool = Pool()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> freqbands.spectrum
    freqbands.bands >> (pool, 'bands')

    run(loader)

    sums = np.sum(pool['bands'], axis=0)
    band = np.argmax(sums)
    if band == 0:
        return 'low'
    elif band == 1:
        return 'mid'
    elif band == 2:
        return 'high'
Ejemplo n.º 4
0
    def cutAudioFile(self, filename, frameSize, hopSize, startFromZero, expectedNumFrames):
        loader = es.MonoLoader(filename=join(testdata.audio_dir, 'generated','synthesised', 'shortfiles', filename))
        fc = es.FrameCutter(frameSize=frameSize,
                         hopSize = hopSize,
                         startFromZero = startFromZero)
        p = Pool()
        loader.audio >> fc.signal
        fc.frame >> (p, 'audio.frames')
        run(loader)

        self.assertEqual(len(p['audio.frames']), expectedNumFrames)
Ejemplo n.º 5
0
def estimate_danceability(infile):
    loader = streaming.MonoLoader(filename=infile)
    dance = streaming.Danceability()
    pool = Pool()

    loader.audio >> dance.signal
    dance.danceability >> (pool, 'danceability')

    run(loader)

    return pool['danceability']
Ejemplo n.º 6
0
def load_audio_from_project(audios_path="../audio/preliminares/",
                            audio_name=None,
                            print_output=True):
    """
  Carrega os áudios em audios_path. Se audio_name estiver definido, carrega-o
  """

    output = {}
    if not audio_name:
        audio_files = [
            f for f in listdir(audios_path)
            if not path.isdir(path.join(audios_path, f))
        ]
        for name in audio_files:
            metadata = es_mode.MetadataReader(filename=audios_path + name)()
            loader = es_mode.MonoLoader(filename=(audios_path+name),\
                                        sampleRate=(checkSampleRate(metadata[10])))
            if print_output:
                print("Loaded", name, "with sampleRate:",
                      checkSampleRate(metadata[10]))
            output[name] = {
                "data": loader(),
                "path": audios_path + name,
                "sample_rate": checkSampleRate(metadata[10]),
                "metadata": metadata
            }
    else:
        metadata = es_mode.MetadataReader(filename=audios_path + audio_name)()
        loader = es_mode.MonoLoader(filename=(audios_path+audio_name),\
                                    sampleRate=checkSampleRate(metadata[10]))
        if print_output:
            print("Loaded", audio_name, "with sampleRate:",
                  checkSampleRate(metadata[10]))
        output[audio_name] = {
            "data": loader(),
            "path": audios_path + audio_name,
            "sample_rate": checkSampleRate(metadata[10]),
            "metadata": metadata
        }

    return output
Ejemplo n.º 7
0
    def testRegressionStreaming(self):
        #  In Streaming mode the duration of the file is not known by the algorithm so it works by concatenating fragments.
        #  About 30 seconds of processing are required to show results consistant with the Standard mode.

        loader = es.MonoLoader(filename=join(testdata.audio_dir, 'recorded', 'mozart_c_major_30sec.wav'))
        cp = es.Chromaprinter(analysisTime=30, concatenate=True)
        pool = Pool()

        loader.audio >> cp.signal
        cp.fingerprint >> (pool, 'chromaprint')

        es.essentia.run(loader)
        self.assertEqualVector(self.expected, pool['chromaprint'][0])
Ejemplo n.º 8
0
    def estimate_chroma(self, uid):
        loader = esstr.MonoLoader(
            filename=self.audio_path_extractor.audio_path_name(uid))
        framecutter = esstr.FrameCutter(hopSize=self.hop_size,
                                        frameSize=self.frame_size)
        windowing = esstr.Windowing(type="blackmanharris62")
        spectrum = esstr.Spectrum()
        spectralpeaks = esstr.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
        hpcp = esstr.HPCP(size=12,
                          referenceFrequency=self.tuning_freq,
                          harmonics=8,
                          bandPreset=True,
                          minFrequency=float(40),
                          maxFrequency=float(5000),
                          bandSplitFrequency=500.0,
                          weightType="cosine",
                          nonLinear=True,
                          windowSize=1.0)
        """
        hpcp = esstr.HPCP(
            size=12,
            referenceFrequency = tuningFreq,
            harmonics = 8,
            bandPreset = True,
            minFrequency = 40.0,
            maxFrequency = 5000.0,
            bandSplitFrequency = 250.0,
            weightType = "cosine",
            nonLinear = False,
            windowSize = 1.0)
        """
        pool = essentia.Pool()
        # connect algorithms together
        loader.audio >> framecutter.signal
        framecutter.frame >> windowing.frame >> spectrum.frame
        spectrum.spectrum >> spectralpeaks.spectrum
        spectrum.spectrum >> (pool, 'spectrum.magnitude')
        spectralpeaks.magnitudes >> hpcp.magnitudes
        spectralpeaks.frequencies >> hpcp.frequencies
        hpcp.hpcp >> (pool, 'chroma.hpcp')

        essentia.run(loader)
        # roll from 'A' based to 'C' based
        chroma = pool['chroma.hpcp']
        chroma = np.roll(chroma, shift=-3, axis=1)
        return chroma
Ejemplo n.º 9
0
    def testStreamingRegression(self):
        # Streaming mode should also be tested to ensure it works well
        # with the real accumulator.
        import essentia.streaming as estr
        loader = estr.MonoLoader(filename=join(
            testdata.audio_dir, 'recorded/mozart_c_major_30sec.wav'))
        realAccumulator = estr.RealAccumulator()
        startStopCut = estr.StartStopCut()
        pool = Pool()

        loader.audio >> realAccumulator.data

        realAccumulator.array >> startStopCut.audio

        startStopCut.startCut >> (pool, 'start')
        startStopCut.stopCut >> (pool, 'stop')

        essentia.run(loader)

        self.assertEqual(pool['start'], 0)
        self.assertEqual(pool['stop'], 1)
Ejemplo n.º 10
0
params = { 'frameSize': 2048, 'hopSize': 128, 'startFromZero': False, 'sampleRate': 44100,'maxnSines': 100,'magnitudeThreshold': -74,'minSineDur': 0.02,'freqDevOffset': 10, 'freqDevSlope': 0.001}


# loop over all frames
audioout = np.array(0)
counter = 0

# input and output files
import os.path
tutorial_dir = os.path.dirname(os.path.realpath(__file__))
inputFilename = os.path.join(tutorial_dir, 'singing-female.wav')
outputFilename = os.path.join(tutorial_dir, 'singing-female-out-sinesubtraction.wav')


out = np.array(0)
loader = es.MonoLoader(filename = inputFilename, sampleRate =  params['sampleRate'])
pool = essentia.Pool()
fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
w = es.Windowing(type = "blackmanharris92");
fft = es.FFT(size = params['frameSize']);
smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'])
subtrFFTSize = min(params['frameSize']/4, 4* params['hopSize'])
smsub = es.SineSubtraction(sampleRate = params['sampleRate'], fftSize = subtrFFTSize, hopSize = params['hopSize'])


# analysis
loader.audio >> fcut.signal
fcut.frame >> w.frame
w.frame >> fft.frame
fft.fft >> smanal.fft
smanal.magnitudes >> (pool, 'magnitudes')
Ejemplo n.º 11
0
import essentia

db_conn = sqlite3.connect("data.db")
db_cursor = db_conn.cursor()

names = [
    'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop',
    'reggae', 'rock'
]

for name in names:

    for i in range(10):
        song_name = name + '.' + '0000' + str(i) + '.au'
        print(song_name)
        loader = ess.MonoLoader(filename="genres/" + name + "/" + song_name)
        framecutter = ess.FrameCutter(frameSize=4096,
                                      hopSize=2048,
                                      silentFrames='noise')
        windowing = ess.Windowing(type='blackmanharris62')
        spectrum = ess.Spectrum()
        spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                          magnitudeThreshold=0.00001,
                                          minFrequency=20,
                                          maxFrequency=3500,
                                          maxPeaks=60)

        # Use default HPCP parameters
        hpcp = ess.HPCP()

        # Use pool to store data
    # STFT synthesis
    ifftframe = ifft(outfft)
    out = overl(ifftframe)    

    if counter >= (framesize/(2*hopsize)):
      audioout = np.append(audioout, out)
    counter += 1

  # write audio output
  print audioout.shape
  awrite(audioout.astype(np.float32))


if mode == 'streaming':
  out = np.array(0)
  loader = es.MonoLoader(filename = inputFilename, sampleRate = 44100)
  pool = essentia.Pool()
  fcut = es.FrameCutter(frameSize = framesize, hopSize = hopsize, startFromZero =  False);
  w = es.Windowing(type = "hann");
  fft = es.FFT(size = framesize);
  ifft = es.IFFT(size = framesize);
  overl = es.OverlapAdd (frameSize = framesize, hopSize = hopsize);
  awrite = es.MonoWriter (filename = outputFilename, sampleRate = 44100);
  
  #gen = audio #VectorInput(audio)
  loader.audio >> fcut.signal
  fcut.frame >> w.frame
  w.frame >> fft.frame
  fft.fft >> ifft.fft
  ifft.frame >> overl.frame
  overl.signal >> awrite.audio
Ejemplo n.º 13
0
import essentia.streaming as ess
import essentia
import numpy as np
import librosa

# Initialize algorithms we will use
loader = ess.MonoLoader(filename='genres/blues/blues.00000.au')
framecutter = ess.FrameCutter(frameSize=4096, hopSize=2048, silentFrames='noise')
windowing = ess.Windowing(type='blackmanharris62')
spectrum = ess.Spectrum()
spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                  magnitudeThreshold=0.00001,
                                  minFrequency=20,
                                  maxFrequency=3500,
                                  maxPeaks=60)

# Use default HPCP parameters for plots, however we will need higher resolution
# and custom parameters for better Key estimation

hpcp = ess.HPCP()
hpcp_key = ess.HPCP(size=36, # we will need higher resolution for Key estimation
                    referenceFrequency=440, # assume tuning frequency is 44100.
                    bandPreset=False,
                    minFrequency=20,
                    maxFrequency=3500,
                    weightType='cosine',
                    nonLinear=False,
                    windowSize=1.)

key = ess.Key(profileType='edma', # Use profile for electronic music
              numHarmonics=4,
Ejemplo n.º 14
0
import essentia.standard as es
import essentia.streaming as ess



import matplotlib.pyplot as plt


loader = ess.MonoLoader(filename = '/home/lib/audio/work/arglaaa-mini/22.wav')
fps = ess.Chromaprinter(analysisTime=20, concatenate=True)
pool = ess.essentia.Pool()

# Conecting the algorithms
loader.audio >> fps.signal
fps.fingerprint >> (pool, 'chromaprint')

ess.essentia.run(loader)

fp = pool['chromaprint'][0]

print(('fp = {0}'.format(fp)))

import acoustid as ai
# import acoustid.chromaprint
import codecs
import numpy as np

print('type(fp) = ', type(fp))
fpbytes = bytes(fp,  'utf-8')
print('fpbytes = {0}'.format(fpbytes))
fp_int = ai.chromaprint.decode_fingerprint(fpbytes)[0]
Ejemplo n.º 15
0
import os
import essentia as e
import essentia.streaming as estr

# CONFIGURATION
# ================================================================================

# Default parameters
sample_rate = 44100
window_size = 16384
hop_size = 8192
tuning_frequency = 440

# retrieve filenames from folder:
soundfiles = os.listdir(audio_folder)
if '.DS_Store' in soundfiles:
    soundfiles.remove('.DS_Store')
    
# ANALYSIS
# ================================================================================
print "\nANALYSIS..."
for item in soundfiles:
    loader = estr.MonoLoader(filename=audio_folder+'/'+item,sampleRate=sample_rate)
    tuningExtractor = <estr.TuningFrequencyExtractor(frameSize=window_size,hopSize=hop_size)
    pool = e.Pool()
    loader.audio >> tuningExtractor.signal
    tuningExtractor.tuningFrequency >> (pool, 'tuning_reference')
    # run and print the results.
    e.run(loader)
    result = pool['tuning_reference']
    print item[:20]+'...     ', result
Ejemplo n.º 16
0
def compute_features(path, f_mfcc_kl, f_mfcc_euclid, f_notes, f_chroma, f_bh):
    gc.enable()
    # Loading audio file
    #will resample if sampleRate is different!
    try:
        audio = es.MonoLoader(filename=path, sampleRate=fs)()
    except:
        print("Erroneos File detected by essentia standard: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #will resample if sampleRate is different!
    try:
        loader = ess.MonoLoader(filename=path, sampleRate=44100)
    except:
        print("Erroneos File detected by essentia streaming: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #Initialize algorithms we will use
    frameSize = 4096  #512
    hopSize = 2048  #256
    #######################################
    # DO FILTERING ONLY FOR MFCC - not with essentia standard
    # below is just an example
    #HP = es.HighPass(cutoffFrequency=128)
    #LP = es.LowPass(cutoffFrequency=4096)
    #lp_f = LP(audio)
    #hp_f = HP(lp_f)
    #audio = hp_f
    #MonoWriter(filename='music/filtered.wav')(filtered_audio)
    HP = ess.HighPass(cutoffFrequency=128)
    LP = ess.LowPass(cutoffFrequency=4096)
    #loader = ess.MonoLoader(filename=path, sampleRate=44100)
    #writer = ess.MonoWriter(filename='music/filtered.wav')
    #frameCutter = FrameCutter(frameSize = 1024, hopSize = 512)
    #pool = essentia.Pool()
    # Connect streaming algorithms
    #loader.audio >> HP.signal
    #HP.signal >> LP.signal
    #LP.signal >> writer.audio
    # Run streaming network
    #essentia.run(loader)
    bpm = 0
    histogram = 0
    key = 0
    scale = 0
    notes = 0
    chroma_matrix = 0
    mean = 0
    cov = 0
    var = 0
    cov_kl = 0
    #####################################
    # extract mfcc
    #####################################
    if f_mfcc_kl == 1 or f_mfcc_euclid == 1:
        #features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path)
        #m, n = features['lowlevel.mfcc.cov'].shape
        #print m
        #iu1 = np.triu_indices(m)
        #cov = features['lowlevel.mfcc.cov'][iu1]
        #mean = features['lowlevel.mfcc.mean']
        #print(features['lowlevel.mfcc.cov'])
        hamming_window = es.Windowing(type='hamming')
        spectrum = es.Spectrum()  # we just want the magnitude spectrum
        mfcc = es.MFCC(numberCoefficients=13)
        frame_sz = 2048  #512
        hop_sz = 1024  #256
        mfccs = np.array([
            mfcc(spectrum(hamming_window(frame)))[1] for frame in
            es.FrameGenerator(audio, frameSize=frame_sz, hopSize=hop_sz)
        ])
        #Let's scale the MFCCs such that each coefficient dimension has zero mean and unit variance:
        #mfccs = sklearn.preprocessing.scale(mfccs)
        #print mfccs.shape
        mean = np.mean(mfccs.T, axis=1)
        #print(mean)
        var = np.var(mfccs.T, axis=1)
        #print(var)
        cov = np.cov(mfccs.T)
        cov_kl = cov  #.flatten()
        #get only upper triangular matrix values to shorten length
        iu1 = np.triu_indices(13)
        cov = cov[iu1]
        #plt.imshow(mfccs.T, origin='lower', aspect='auto', interpolation='nearest')
        #plt.ylabel('MFCC Coefficient Index')
        #plt.xlabel('Frame Index')
        #plt.colorbar()
    #####################################
    # extract beat features and histogram
    #####################################
    if f_bh == 1 or f_chroma == 1 or f_notes == 1:
        # Compute beat positions and BPM
        rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
        bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(
            audio)
        if f_bh == 1:
            peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = es.BpmHistogramDescriptors(
            )(beats_intervals)
        tempo = bpm
        times = beats
        beats_frames = (beats * fs) / hopSize
        beats_frames = beats_frames.astype(int)

        #fig, ax = plt.subplots()
        #ax.bar(range(len(histogram)), histogram, width=1)
        #ax.set_xlabel('BPM')
        #ax.set_ylabel('Frequency')
        #plt.title("BPM histogram")
        #ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))])
        #ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))])
        #plt.show()

    #####################################
    # extract full beat aligned chroma
    #####################################

    framecutter = ess.FrameCutter(frameSize=frameSize,
                                  hopSize=hopSize,
                                  silentFrames='noise')
    windowing = ess.Windowing(type='blackmanharris62')
    spectrum = ess.Spectrum()
    spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                      magnitudeThreshold=0.00001,
                                      minFrequency=20,
                                      maxFrequency=3500,
                                      maxPeaks=60)
    # Use default HPCP parameters for plots, however we will need higher resolution
    # and custom parameters for better Key estimation
    hpcp = ess.HPCP()
    hpcp_key = ess.HPCP(
        size=36,  # we will need higher resolution for Key estimation
        referenceFrequency=440,  # assume tuning frequency is 44100.
        bandPreset=False,
        minFrequency=20,
        maxFrequency=3500,
        weightType='cosine',
        nonLinear=False,
        windowSize=1.)
    key = ess.Key(
        profileType='edma',  # Use profile for electronic music
        numHarmonics=4,
        pcpSize=36,
        slope=0.6,
        usePolyphony=True,
        useThreeChords=True)
    # Use pool to store data
    pool = essentia.Pool()
    # Connect streaming algorithms
    ###################################
    # USE FILTER - comment next lines in
    loader.audio >> HP.signal
    HP.signal >> LP.signal
    LP.signal >> framecutter.signal
    ###################################
    ###################################
    # NO FILTER - comment next line in
    #loader.audio >> framecutter.signal
    ###################################
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    spectralpeaks.magnitudes >> hpcp_key.magnitudes
    spectralpeaks.frequencies >> hpcp_key.frequencies
    hpcp_key.hpcp >> key.pcp
    hpcp.hpcp >> (pool, 'tonal.hpcp')
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')
    # Run streaming network
    essentia.run(loader)
    #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale'])
    #print(pool['tonal.hpcp'].T)
    chroma = pool['tonal.hpcp'].T
    key = pool['tonal.key_key']
    scale = pool['tonal.key_scale']
    if f_chroma == 1:
        # Plot HPCP
        #imshow(pool['tonal.hpcp'].T, aspect='auto', origin='lower', interpolation='none')
        #plt.title("HPCPs in frames (the 0-th HPCP coefficient corresponds to A)")
        #show()
        #print beats_frames.shape[0]
        chroma_matrix = np.zeros((beats_frames.shape[0], 12))
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma_align = chroma
        chroma_align = chroma_align.transpose()
        mat_index = 0
        for i in beats_frames:
            act_beat = i
            value = sum(
                chroma_align[prev_beat:act_beat]) / (act_beat - prev_beat)
            chroma_align[prev_beat:act_beat] = value
            prev_beat = i
            if np.linalg.norm(value, ord=1) != 0:
                value = value / np.linalg.norm(value, ord=1)
            chroma_matrix[mat_index] = value
            mat_index = mat_index + 1

        #chroma_align = chroma_align.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma_align, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma_align = chroma_align.transpose()
    #print(chroma_align[24:28])
    #####################################
    # extract full chroma text
    #####################################
    if f_notes == 1:
        #print(chroma.shape)
        m, n = chroma.shape
        avg = 0
        chroma = chroma.transpose()
        m, n = chroma.shape
        for j in chroma:
            avg = avg + np.sum(j)
        avg = avg / m
        threshold = avg / 2
        for i in chroma:
            if np.sum(i) > threshold:
                ind = np.where(i == np.max(i))
                max_val = i[ind]  #is always 1!
                i[ind] = 0

                ind2 = np.where(i == np.max(i))
                i[ind] = 1

                #if np.any(i[ind2][0] >= 0.8 * max_val):
                #i[ind2] = i[ind2]
                #pass
                #low_values_flags = i < 1
                low_values_flags = i < 0.8

                i[low_values_flags] = 0
            else:
                i.fill(0)
        chroma = chroma.transpose()
        # Compute beat positions and BPM
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma = chroma.transpose()
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            #print(sum_key)
            #print(chroma[prev_beat:act_beat])

            ind = np.where(sum_key == np.max(sum_key))
            ind = ind[0]
            #print("debug")
            fill = np.zeros(len(j))
            if (np.all(chroma[prev_beat:act_beat] == 0)):
                fill[ind] = 0
            else:
                fill[ind] = 1
            chroma[prev_beat:act_beat] = fill
            #print(chroma[prev_beat:act_beat])
            prev_beat = i
            #print("BEAT")
        notes = []
        for i in notes:
            del i
        prev_beat = 0
        act_beat = 0
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            ind = np.where(sum_key == np.max(sum_key))
            prev_beat = i
            notes.append(ind[0][0])
            prev_beat = i
        #chroma = chroma.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma = chroma.transpose()
    gc.collect()
    return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl