Exemple #1
0
def melody_extraction_melodia(audioPath, exportPath):

    # Predominant pitch extraction using MELODIA[1] (implementation from Essentia[2])
    # The audioPath input variable is the path of the audio files we want to extract the pitch of

    # [1] J. Salamon and E. Gomez, "Melody Extraction from Polyphonic Music Signals using Pitch Contour Characteristics",
    # IEEE Transactions on Audio, Speech and Language Processing, 20(6):1759-1770, Aug. 2012.
    # [2] Bogdanov, D., Wack N., Gomez E., Gulati S., Herrera P., Mayor O., et al. (2013). ESSENTIA: an Audio Analysis Library for
    # Music Information Retrieval. International Society for Music Information Retrieval Conference (ISMIR'13). 493-498.

    listOfFiles = os.listdir(audioPath)

    for file in listOfFiles:
        if '.wav' not in file: continue
        if file.startswith('._'): continue

        # Loading audio
        audioLoader = es.EqloudLoader(filename=audioPath + file)
        audio = audioLoader()

        if '_MIX' in file:
            file = file[:-8] + '.wav'  # handle MedleyDB with original names

        L = (len(audio)) / 44100.0  # seconds
        H = 441  # 10 ms for evaluation

        # MELODIA algorithm
        melodia = es.PredominantPitchMelodia(hopSize=H)
        [pitch, confidence] = melodia(audio)
        pitch = np.array(pitch)

        N = pitch.shape[0]

        # Create time vector for output format
        time = []
        k = 0
        for i in range(N):
            time.append(round(k, 2))
            k += 0.01

        time = np.array(time)

        pitchExp = np.zeros([N, 2])
        pitchExp[:, 0] = time
        pitchExp[:, 1] = pitch
        pitchExp = np.reshape(pitchExp, [N, 2])

        # Export predictions
        # path to save files with predictions
        exportFilename = file[:-3] + 'csv'

        with open(exportPath + exportFilename, 'wb') as f:
            writer = csv.writer(f)
            for line in pitchExp:
                writer.writerow(line)

        print(file + ' computed and exported!')
Exemple #2
0
def melody_extraction_melodia(audioPath, exportPath):

    listOfFiles = os.listdir(audioPath)

    for file in listOfFiles:
        if '.wav' not in file: continue
        if file.startswith('._'): continue

        # Loading audio
        audioLoader = es.EqloudLoader(filename=audioPath + file)
        audio = audioLoader()

        # Filter audio
        cutoff = 700
        newaudio = HPFilter(audio, cutoff)
        del audio
        audio = newaudio

        if '_MIX' in file:
            file = file[:-8] + '.wav'  # handle MedleyDB with original names

        L = (len(audio)) / 44100.0  # seconds
        H = 441  # 10 ms for evaluation

        # MELODIA algorithm
        melodia = es.PredominantPitchMelodia(hopSize=H)
        [pitch, confidence] = melodia(audio)
        pitch = np.array(pitch)

        N = pitch.shape[0]

        # Create time vector for output format
        time = []
        k = 0
        for i in range(N):
            time.append(round(k, 2))
            k += 0.01

        time = np.array(time)

        pitchExp = np.zeros([N, 2])
        pitchExp[:, 0] = time
        pitchExp[:, 1] = pitch
        pitchExp = np.reshape(pitchExp, [N, 2])

        # Export predictions
        # path to save files with predictions
        exportFilename = file[:-3] + 'csv'

        with open(exportPath + exportFilename, 'wb') as f:
            writer = csv.writer(f)
            for line in pitchExp:
                writer.writerow(line)

        print(file + ' computed and exported!')
def onsetFunctionAllRecordings(recordings,
                               textgrid_path,
                               dict_recording_name_mapping,
                               dataset_path,
                               feature_type='mfcc',
                               dmfcc=False,
                               nbf=True,
                               mth='jordi',
                               late_fusion=True):
    """
    ODF and viter decoding
    :param recordings:
    :param textgrid_path:
    :param dict_recording_name_mapping: mapping from "fem_01" to standard format, see filePath.py
    :param dataset_path:
    :param feature_type: 'mfcc', 'mfccBands1D' or 'mfccBands2D'
    :param dmfcc: delta for 'mfcc'
    :param nbf: context frames
    :param mth: jordi, jordi_horizontal_timbral, jan, jan_chan3
    :param late_fusion: Bool
    :return:
    """

    scaler = pickle.load(open(full_path_mfccBands_2D_scaler_onset, 'rb'))

    # kerasModel = _LRHMM.kerasModel(full_path_keras_cnn_am)

    for i_recording, recording_name in enumerate(recordings):

        groundtruth_textgrid_file   = join(textgrid_path, dict_recording_name_mapping[recording_name]+'.TextGrid')
        score_file                  = join(aCapella_root, dataset_path, score_path,      recording_name+'.csv')
        wav_file                    = join(aCapella_root, dataset_path, audio_path,      recording_name+'.wav')

        if not isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        lineList        = textGrid2WordList(groundtruth_textgrid_file, whichTier='line')
        utteranceList   = textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence')

        # parse lines of groundtruth
        nestedUtteranceLists, numLines, numUtterances = wordListsParseByLines(lineList, utteranceList)

        # parse score
        syllables, pinyins, syllable_durations, bpm = generatePinyin(score_file)

        # print(pinyins)
        # print(syllable_durations)

        if varin['obs'] == 'tocal':
            # load audio
            audio_monoloader               = ess.MonoLoader(downmix = 'left', filename = wav_file, sampleRate = fs)()
            audio_eqloudloder              = ess.EqloudLoader(filename=wav_file, sampleRate = fs)()

            if mth == 'jordi' or mth == 'jordi_horizontal_timbral' or mth == 'jan':
                mfcc, mfcc_reshaped = featureExtraction(audio_monoloader,
                                                              scaler,
                                                              int(round(0.025 * fs)),
                                                              dmfcc=dmfcc,
                                                              nbf=nbf,
                                                              feature_type='mfccBands2D')

        for i_obs, lineList in enumerate(nestedUtteranceLists):
            if int(bpm[i_obs]):
                sample_start    = int(round(lineList[0][0] * fs))
                sample_end      = int(round(lineList[0][1] * fs))
                frame_start     = int(round(lineList[0][0] * fs / hopsize))
                frame_end       = int(round(lineList[0][1] * fs / hopsize))
                # print(feature.shape)

                obs_path = join('./obs', cnnModel_name, dataset_path)
                obs_filename = recording_name + '_' + str(i_obs + 1) + '.pkl'
                full_obs_name = join(obs_path, obs_filename)

                if varin['obs'] == 'tocal':
                    if mth == 'jordi' or mth == 'jordi_horizontal_timbral' or mth == 'jan':
                        audio_eqloudloder_line = audio_eqloudloder[sample_start:sample_end]
                        mfcc_line          = mfcc[frame_start:frame_end]
                        mfcc_reshaped_line = mfcc_reshaped[frame_start:frame_end]

                    mfcc_reshaped_line = np.expand_dims(mfcc_reshaped_line, axis=1)
                    obs     = getOnsetFunction(observations=mfcc_reshaped_line,
                                               model=model_keras_cnn_0,
                                               method=mth)
                    # obs_i   = obs[:,1]
                    obs_i = obs[:, 0]

                    hann = np.hanning(5)
                    hann /= np.sum(hann)

                    obs_i = np.convolve(hann, obs_i, mode='same')

                    # save onset curve
                    print('save onset curve ... ...')
                    obs_dirpath = dirname(full_obs_name)
                    if not exists(obs_dirpath):
                        makedirs(obs_dirpath)
                    pickle.dump(obs_i, open(full_obs_name, 'w'))
                else:
                    obs_i = pickle.load(open(full_obs_name, 'r'))

                if late_fusion:
                    if varin['obs'] == 'viterbi':
                        obs_2 = getOnsetFunction(observations=mfcc_reshaped_line,
                                                 path_keras_cnn=full_path_keras_cnn_1,
                                                 method=mth)
                        obs_2_i = obs_2[:, 1]
                        obs_2_i = np.convolve(hann, obs_2_i, mode='same')
                    else:
                        obs_path_1 = join('./obs', cnnModel_name_1, dataset_path)
                        full_obs_name_1 = join(obs_path_1, obs_filename)
                        obs_2_i = pickle.load(open(full_obs_name_1, 'r'))

                    obs_i = late_fusion_calc(obs_i, obs_2_i, mth=2)

                # organize score
                print('Calculating: '+recording_name+' phrase '+str(i_obs))
                print('ODF Methods: '+mth_ODF+' Late fusion: '+str(fusion))

                time_line      = lineList[0][1] - lineList[0][0]

                lyrics_line    = [ll[2] for ll in lineList[1]]
                groundtruth_syllable = [ll[0]-lineList[0][0] for ll in lineList[1]]

                print('Syllable:')
                print(lyrics_line)

                print('Length of syllables, length of ground truth syllables:')
                print(len(lyrics_line), len(groundtruth_syllable))

                pinyin_score   = pinyins[i_obs]
                pinyin_score   = [ps for ps in pinyin_score if len(ps)]
                duration_score = syllable_durations[i_obs]
                duration_score = np.array([float(ds) for ds in duration_score if len(ds)])
                duration_score = duration_score * (time_line/np.sum(duration_score))

                if varin['decoding'] == 'viterbi':
                    # segmental decoding
                    obs_i[0] = 1.0
                    obs_i[-1] = 1.0
                    i_boundary = viterbiSegmental2(obs_i, duration_score, varin)
                    # # uncomment this section if we want to write boundaries to .syll.lab file
                    filename_syll_lab = join(eval_results_path, dataset_path, recording_name+'_'+str(i_obs+1)+'.syll.lab')
                    label = True

                else:
                    i_boundary = peakPicking(1.0-obs_i)

                    # arg_pp = {'threshold': 0.54, 'smooth': 0, 'fps': 1. / hopsize_t, 'pre_max': hopsize_t,
                    #           'post_max': hopsize_t}
                    # # peak_picking = OnsetPeakPickingProcessor(threshold=threshold,smooth=smooth,fps=fps,pre_max=pre_max,post_max=post_max)
                    # peak_picking = OnsetPeakPickingProcessor(**arg_pp)
                    # i_boundary = peak_picking.process(obs_i)
                    # i_boundary = np.append(i_boundary, (len(obs_i) - 1) * hopsize_t)
                    # i_boundary /= hopsize_t
                    filename_syll_lab = join(eval_results_path + '_peakPicking', dataset_path,
                                             recording_name + '_' + str(i_obs + 1) + '.syll.lab')
                    label = False

                time_boundray_start = np.array(i_boundary[:-1]) * hopsize_t
                time_boundray_end = np.array(i_boundary[1:]) * hopsize_t

                eval_results_data_path = dirname(filename_syll_lab)

                if not exists(eval_results_data_path):
                    makedirs(eval_results_data_path)

                if varin['decoding'] == 'viterbi':
                    boundaryList = zip(time_boundray_start.tolist(), time_boundray_end.tolist(), lyrics_line)
                else:
                    boundaryList = zip(time_boundray_start.tolist(), time_boundray_end.tolist())

                # write boundary lab file
                boundaryLabWriter(boundaryList=boundaryList,
                                  outputFilename=filename_syll_lab,
                                    label=label)

                # print(i_boundary)
                # print(len(obs_i))
                # print(np.array(groundtruth_syllable)*fs/hopsize)

                if varin['plot']:
                    # plot Error analysis figures
                    plt.figure(figsize=(16, 6))
                    # plt.figure(figsize=(8, 4))
                    # class weight
                    ax1 = plt.subplot(3,1,1)
                    y = np.arange(0, 80)
                    x = np.arange(0, mfcc_line.shape[0])*(hopsize/float(fs))
                    cax = plt.pcolormesh(x, y, np.transpose(mfcc_line[:, 80 * 11:80 * 12]))
                    for gs in groundtruth_syllable:
                        plt.axvline(gs, color='r', linewidth=2)
                    # cbar = fig.colorbar(cax)
                    ax1.set_ylabel('Mel bands', fontsize=12)
                    ax1.get_xaxis().set_visible(False)
                    ax1.axis('tight')
                    plt.title('Calculating: '+recording_name+' phrase '+str(i_obs))

                    ax2 = plt.subplot(312, sharex=ax1)
                    plt.plot(np.arange(0,len(obs_i))*(hopsize/float(fs)), obs_i)
                    for ib in i_boundary:
                        plt.axvline(ib * (hopsize / float(fs)), color='r', linewidth=2)

                    ax2.set_ylabel('ODF', fontsize=12)
                    ax2.axis('tight')


                    ax3 = plt.subplot(313, sharex=ax1)
                    print(duration_score)
                    time_start = 0
                    for ii_ds, ds in enumerate(duration_score):
                        ax3.add_patch(
                            patches.Rectangle(
                                (time_start, ii_ds),  # (x,y)
                                ds,  # width
                                1,  # height
                            ))
                        time_start += ds
                    ax3.set_ylim((0,len(duration_score)))
                    # plt.xlabel('Time (s)')
                    # plt.tight_layout()

                    plt.show()
    def runAnalysis(self):

        # Get all samples referenced in DB, except for those
        # that have been marked as samples to exclude
        # TODO: Need to clarify whether or not sample packs
        # should be excluded if we can't find enough info on them,
        # for now including all samplepacks
        samples = Sample.objects.all().filter(exclude=False,
                                              #kit__sample_pack__exclude=False,
                                              )

        numSamples = len(samples)
        self.stdout.write("Running low-level extractors on %s samples. " %
                          (numSamples))
        i = 0.0

        for sample in samples:
            # Get audio and run loudness analysis
            try:
                loader = es.MonoLoader(filename=sample.path)
                neqAudio = loader()

                eqLoader = es.EqloudLoader(filename=sample.path)
                eqAudio = eqLoader()

                # Trim the audio clip
                trimmer = es.Trimmer(startTime=sample.start_time,
                                     endTime=sample.stop_time)
                neqAudio = trimmer(neqAudio)
                eqAudio = trimmer(eqAudio)

            except RuntimeError as esExcept:
                self.stderr.write("%s\n" % esExcept)
                self.stderr.write(
                    "%s failed to load. Excluding sample from further analysis"
                    % sample.path)
                sample.exclude = True
                sample.save()
                i = i + 1
                continue

            # Frame size & hop size
            frameSize = 2048
            hopSize = 256

            # Amplitude envelope of sample
            envelope = es.Envelope()
            audioEnv = envelope(eqAudio)

            # Find attack phase and LAT
            latFunc = es.LogAttackTime()
            lat, attackStart, attackEnd = latFunc(audioEnv)

            # Temporal Centroid on entire sample length
            tc = self.temporal_centroid(eqAudio)

            # Time segmentation starting point
            windowFunc = es.LogAttackTime(startAttackThreshold=float(
                self.windowStart if self.windowStart < 90 else 90) / 100)
            _, windowStart, windowEnd = windowFunc(audioEnv)
            windowStart = windowStart if self.windowStart < 90 else windowEnd

            if self.windowLength > 0:
                # Window from onset
                trimmer = es.Trimmer(startTime=windowStart,
                                     endTime=windowStart +
                                     (float(self.windowLength) / 1000))
                eqAudio = trimmer(eqAudio)
                neqAudio = trimmer(neqAudio)

            # Get analysis object for this sample
            try:
                analysisObject = Feature.objects.get(
                    sample=sample,
                    window_length=self.windowLength,
                    window_start=self.windowStart)
            except Feature.DoesNotExist:
                analysisObject = Feature(sample=sample,
                                         window_length=self.windowLength,
                                         window_start=self.windowStart)

            analysisObject.lat = lat
            analysisObject.rms = self.rms(eqAudio)
            analysisObject.temporal_centroid = tc

            # Spectral extractor without equal loudness filter
            neqSpectralExtractor = es.LowLevelSpectralExtractor(
                frameSize=frameSize, hopSize=hopSize)
            neqSpectralResults = neqSpectralExtractor(neqAudio)

            bark_mean = np.mean(neqSpectralResults[0], axis=0)
            analysisObject.bark_1_mean = bark_mean[0]
            analysisObject.bark_2_mean = bark_mean[1]
            analysisObject.bark_3_mean = bark_mean[2]
            analysisObject.bark_4_mean = bark_mean[3]
            analysisObject.bark_5_mean = bark_mean[4]
            analysisObject.bark_6_mean = bark_mean[5]
            analysisObject.bark_7_mean = bark_mean[6]
            analysisObject.bark_8_mean = bark_mean[7]
            analysisObject.bark_9_mean = bark_mean[8]
            analysisObject.bark_10_mean = bark_mean[9]
            analysisObject.bark_11_mean = bark_mean[10]
            analysisObject.bark_12_mean = bark_mean[11]
            analysisObject.bark_13_mean = bark_mean[12]
            analysisObject.bark_14_mean = bark_mean[13]
            analysisObject.bark_15_mean = bark_mean[14]
            analysisObject.bark_16_mean = bark_mean[15]
            analysisObject.bark_17_mean = bark_mean[16]
            analysisObject.bark_18_mean = bark_mean[17]
            analysisObject.bark_19_mean = bark_mean[18]
            analysisObject.bark_20_mean = bark_mean[19]
            analysisObject.bark_21_mean = bark_mean[20]
            analysisObject.bark_22_mean = bark_mean[21]
            analysisObject.bark_23_mean = bark_mean[22]
            analysisObject.bark_24_mean = bark_mean[23]
            analysisObject.bark_25_mean = bark_mean[24]
            analysisObject.bark_26_mean = bark_mean[25]
            analysisObject.bark_27_mean = bark_mean[26]

            bark_dev = np.std(neqSpectralResults[0], axis=0)
            analysisObject.bark_1_dev = bark_dev[0]
            analysisObject.bark_2_dev = bark_dev[1]
            analysisObject.bark_3_dev = bark_dev[2]
            analysisObject.bark_4_dev = bark_dev[3]
            analysisObject.bark_5_dev = bark_dev[4]
            analysisObject.bark_6_dev = bark_dev[5]
            analysisObject.bark_7_dev = bark_dev[6]
            analysisObject.bark_8_dev = bark_dev[7]
            analysisObject.bark_9_dev = bark_dev[8]
            analysisObject.bark_10_dev = bark_dev[9]
            analysisObject.bark_11_dev = bark_dev[10]
            analysisObject.bark_12_dev = bark_dev[11]
            analysisObject.bark_13_dev = bark_dev[12]
            analysisObject.bark_14_dev = bark_dev[13]
            analysisObject.bark_15_dev = bark_dev[14]
            analysisObject.bark_16_dev = bark_dev[15]
            analysisObject.bark_17_dev = bark_dev[16]
            analysisObject.bark_18_dev = bark_dev[17]
            analysisObject.bark_19_dev = bark_dev[18]
            analysisObject.bark_20_dev = bark_dev[19]
            analysisObject.bark_21_dev = bark_dev[20]
            analysisObject.bark_22_dev = bark_dev[21]
            analysisObject.bark_23_dev = bark_dev[22]
            analysisObject.bark_24_dev = bark_dev[23]
            analysisObject.bark_25_dev = bark_dev[24]
            analysisObject.bark_26_dev = bark_dev[25]
            analysisObject.bark_27_dev = bark_dev[26]

            analysisObject.bark_kurtosis = np.mean(neqSpectralResults[1])
            analysisObject.bark_skewness = np.mean(neqSpectralResults[2])
            analysisObject.bark_spread = np.mean(neqSpectralResults[3])

            analysisObject.bark_kurtosis_dev = np.std(neqSpectralResults[1])
            analysisObject.bark_skewness_dev = np.std(neqSpectralResults[2])
            analysisObject.bark_spread_dev = np.std(neqSpectralResults[3])

            analysisObject.hfc = np.mean(neqSpectralResults[4])
            analysisObject.hfc_dev = np.std(neqSpectralResults[4])

            # MFCCs
            mfcc_mean = np.mean(neqSpectralResults[5], axis=0)
            analysisObject.mfcc_1_mean = mfcc_mean[0]
            analysisObject.mfcc_2_mean = mfcc_mean[1]
            analysisObject.mfcc_3_mean = mfcc_mean[2]
            analysisObject.mfcc_4_mean = mfcc_mean[3]
            analysisObject.mfcc_5_mean = mfcc_mean[4]
            analysisObject.mfcc_6_mean = mfcc_mean[5]
            analysisObject.mfcc_7_mean = mfcc_mean[6]
            analysisObject.mfcc_8_mean = mfcc_mean[7]
            analysisObject.mfcc_9_mean = mfcc_mean[8]
            analysisObject.mfcc_10_mean = mfcc_mean[9]
            analysisObject.mfcc_11_mean = mfcc_mean[10]
            analysisObject.mfcc_12_mean = mfcc_mean[11]
            analysisObject.mfcc_13_mean = mfcc_mean[12]

            mfcc_dev = np.std(neqSpectralResults[5], axis=0)
            analysisObject.mfcc_1_dev = mfcc_dev[0]
            analysisObject.mfcc_2_dev = mfcc_dev[1]
            analysisObject.mfcc_3_dev = mfcc_dev[2]
            analysisObject.mfcc_4_dev = mfcc_dev[3]
            analysisObject.mfcc_5_dev = mfcc_dev[4]
            analysisObject.mfcc_6_dev = mfcc_dev[5]
            analysisObject.mfcc_7_dev = mfcc_dev[6]
            analysisObject.mfcc_8_dev = mfcc_dev[7]
            analysisObject.mfcc_9_dev = mfcc_dev[8]
            analysisObject.mfcc_10_dev = mfcc_dev[9]
            analysisObject.mfcc_11_dev = mfcc_dev[10]
            analysisObject.mfcc_12_dev = mfcc_dev[11]
            analysisObject.mfcc_13_dev = mfcc_dev[12]

            analysisObject.pitch_salience = np.mean(neqSpectralResults[8])
            analysisObject.spectral_complexity = np.mean(
                neqSpectralResults[12])
            analysisObject.spectral_crest = np.mean(neqSpectralResults[13])
            analysisObject.spectral_decrease = np.mean(neqSpectralResults[14])
            analysisObject.spectral_energy = np.mean(neqSpectralResults[15])
            analysisObject.spectral_energyband_low = np.mean(
                neqSpectralResults[16])
            analysisObject.spectral_energyband_middle_low = np.mean(
                neqSpectralResults[17])
            analysisObject.spectral_energyband_middle_high = np.mean(
                neqSpectralResults[18])
            analysisObject.spectral_energyband_high = np.mean(
                neqSpectralResults[19])
            analysisObject.spectral_flatness_db = np.mean(
                neqSpectralResults[20])
            analysisObject.spectral_flux = np.mean(neqSpectralResults[21])
            analysisObject.spectral_rms = np.mean(neqSpectralResults[22])
            analysisObject.spectral_rolloff = np.mean(neqSpectralResults[23])
            analysisObject.spectral_strongpeak = np.mean(
                neqSpectralResults[24])
            analysisObject.zero_crossing_rate = np.mean(neqSpectralResults[25])
            analysisObject.inharmonicity = np.mean(neqSpectralResults[26])

            analysisObject.pitch_salience_dev = np.std(neqSpectralResults[8])
            analysisObject.spectral_complexity_dev = np.std(
                neqSpectralResults[12])
            analysisObject.spectral_crest_dev = np.std(neqSpectralResults[13])
            analysisObject.spectral_decrease_dev = np.std(
                neqSpectralResults[14])
            analysisObject.spectral_energy_dev = np.std(neqSpectralResults[15])
            analysisObject.spectral_energyband_low_dev = np.std(
                neqSpectralResults[16])
            analysisObject.spectral_energyband_middle_low_dev = np.std(
                neqSpectralResults[17])
            analysisObject.spectral_energyband_middle_high_dev = np.std(
                neqSpectralResults[18])
            analysisObject.spectral_energyband_high_dev = np.std(
                neqSpectralResults[19])
            analysisObject.spectral_flatness_db_dev = np.std(
                neqSpectralResults[20])
            analysisObject.spectral_flux_dev = np.std(neqSpectralResults[21])
            analysisObject.spectral_rms_dev = np.std(neqSpectralResults[22])
            analysisObject.spectral_rolloff_dev = np.std(
                neqSpectralResults[23])
            analysisObject.spectral_strongpeak_dev = np.std(
                neqSpectralResults[24])
            analysisObject.zero_crossing_rate_dev = np.std(
                neqSpectralResults[25])
            analysisObject.inharmonicity_dev = np.std(neqSpectralResults[26])

            tristimulus = np.mean(neqSpectralResults[27], axis=0)
            analysisObject.tristimulus_1 = tristimulus[0]
            analysisObject.tristimulus_2 = tristimulus[1]
            analysisObject.tristimulus_3 = tristimulus[2]

            tristimulus_dev = np.std(neqSpectralResults[27], axis=0)
            analysisObject.tristimulus_1_dev = tristimulus_dev[0]
            analysisObject.tristimulus_2_dev = tristimulus_dev[1]
            analysisObject.tristimulus_3_dev = tristimulus_dev[2]

            # Spectral extractor with equal loudness filter
            eqSpectralExtractor = es.LowLevelSpectralEqloudExtractor(
                frameSize=frameSize, hopSize=hopSize)
            eqSpectralResults = eqSpectralExtractor(eqAudio)

            analysisObject.spectral_centroid = np.mean(eqSpectralResults[3])
            analysisObject.spectral_kurtosis = np.mean(eqSpectralResults[4])
            analysisObject.spectral_skewness = np.mean(eqSpectralResults[5])
            analysisObject.spectral_spread = np.mean(eqSpectralResults[6])

            analysisObject.spectral_centroid_dev = np.std(eqSpectralResults[3])
            analysisObject.spectral_kurtosis_dev = np.std(eqSpectralResults[4])
            analysisObject.spectral_skewness_dev = np.std(eqSpectralResults[5])
            analysisObject.spectral_spread_dev = np.std(eqSpectralResults[6])

            analysisObject.save()

            i = i + 1
            self.stdout.write("\t\t%2.2f%%" % (100.0 *
                                               (i / float(numSamples))),
                              ending='\r')
            self.stdout.flush()

        self.stdout.write("\r", ending='\r')
        self.stdout.flush()
sets = os.listdir(path)

data_df = pd.DataFrame({})

for seti in sets:
    if not os.path.isdir(path + '/' + seti):
        continue
    for categ in category:
        if not os.path.exists(path + '/' + seti + '/' + stroke + '/' + categ):
            continue
        files_categ = os.listdir(path + '/' + seti + '/' + stroke + '/' +
                                 categ)
        for wave in files_categ:
            fileName = path + '/' + seti + '/' + stroke + '/' + categ + '/' + wave
            audio = estd.EqloudLoader(filename=fileName)()
            dict_temp = {}
            dict_temp['Filename'] = wave
            dict_temp['Set'] = seti
            dict_temp['Category'] = categ
            dict_temp['Decay Rate'] = band_decay(audio, rate, 1)[0]
            dict_temp['Sustain'] = sustain_durn(audio, rate)
            data_df = data_df.append(dict_temp, ignore_index=True)

plt.title(stroke)
sns.swarmplot(x="Set",
              y="Centroid1",
              hue="Category",
              data=data_df,
              palette="Set2",
              dodge=True)
import essentia.standard as estd
loader=estd.EqloudLoader(filename='')
audio=loader()
energy=estd.Energy()
en=[]
for frame in estd.FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
  en.append(energy(frame))
emax=max(en)
ep=en.index(emax)
th=emax/2
e=[]
for frame in estd.FrameGenerator(audio[:0.6*44100], frameSize=1024, hopSize=512, startFromZero=True):
  e.append(energy(frame))
for i in range(0,len(e)-1):
  d=e[i+1]-e[i]
  if d > 0.01:
    p=e.index(e[i+1])
    if (ep-p) > 2:
      en=e[:p]+en[ep:]
      break