def melody_extraction_melodia(audioPath, exportPath): # Predominant pitch extraction using MELODIA[1] (implementation from Essentia[2]) # The audioPath input variable is the path of the audio files we want to extract the pitch of # [1] J. Salamon and E. Gomez, "Melody Extraction from Polyphonic Music Signals using Pitch Contour Characteristics", # IEEE Transactions on Audio, Speech and Language Processing, 20(6):1759-1770, Aug. 2012. # [2] Bogdanov, D., Wack N., Gomez E., Gulati S., Herrera P., Mayor O., et al. (2013). ESSENTIA: an Audio Analysis Library for # Music Information Retrieval. International Society for Music Information Retrieval Conference (ISMIR'13). 493-498. listOfFiles = os.listdir(audioPath) for file in listOfFiles: if '.wav' not in file: continue if file.startswith('._'): continue # Loading audio audioLoader = es.EqloudLoader(filename=audioPath + file) audio = audioLoader() if '_MIX' in file: file = file[:-8] + '.wav' # handle MedleyDB with original names L = (len(audio)) / 44100.0 # seconds H = 441 # 10 ms for evaluation # MELODIA algorithm melodia = es.PredominantPitchMelodia(hopSize=H) [pitch, confidence] = melodia(audio) pitch = np.array(pitch) N = pitch.shape[0] # Create time vector for output format time = [] k = 0 for i in range(N): time.append(round(k, 2)) k += 0.01 time = np.array(time) pitchExp = np.zeros([N, 2]) pitchExp[:, 0] = time pitchExp[:, 1] = pitch pitchExp = np.reshape(pitchExp, [N, 2]) # Export predictions # path to save files with predictions exportFilename = file[:-3] + 'csv' with open(exportPath + exportFilename, 'wb') as f: writer = csv.writer(f) for line in pitchExp: writer.writerow(line) print(file + ' computed and exported!')
def melody_extraction_melodia(audioPath, exportPath): listOfFiles = os.listdir(audioPath) for file in listOfFiles: if '.wav' not in file: continue if file.startswith('._'): continue # Loading audio audioLoader = es.EqloudLoader(filename=audioPath + file) audio = audioLoader() # Filter audio cutoff = 700 newaudio = HPFilter(audio, cutoff) del audio audio = newaudio if '_MIX' in file: file = file[:-8] + '.wav' # handle MedleyDB with original names L = (len(audio)) / 44100.0 # seconds H = 441 # 10 ms for evaluation # MELODIA algorithm melodia = es.PredominantPitchMelodia(hopSize=H) [pitch, confidence] = melodia(audio) pitch = np.array(pitch) N = pitch.shape[0] # Create time vector for output format time = [] k = 0 for i in range(N): time.append(round(k, 2)) k += 0.01 time = np.array(time) pitchExp = np.zeros([N, 2]) pitchExp[:, 0] = time pitchExp[:, 1] = pitch pitchExp = np.reshape(pitchExp, [N, 2]) # Export predictions # path to save files with predictions exportFilename = file[:-3] + 'csv' with open(exportPath + exportFilename, 'wb') as f: writer = csv.writer(f) for line in pitchExp: writer.writerow(line) print(file + ' computed and exported!')
def onsetFunctionAllRecordings(recordings, textgrid_path, dict_recording_name_mapping, dataset_path, feature_type='mfcc', dmfcc=False, nbf=True, mth='jordi', late_fusion=True): """ ODF and viter decoding :param recordings: :param textgrid_path: :param dict_recording_name_mapping: mapping from "fem_01" to standard format, see filePath.py :param dataset_path: :param feature_type: 'mfcc', 'mfccBands1D' or 'mfccBands2D' :param dmfcc: delta for 'mfcc' :param nbf: context frames :param mth: jordi, jordi_horizontal_timbral, jan, jan_chan3 :param late_fusion: Bool :return: """ scaler = pickle.load(open(full_path_mfccBands_2D_scaler_onset, 'rb')) # kerasModel = _LRHMM.kerasModel(full_path_keras_cnn_am) for i_recording, recording_name in enumerate(recordings): groundtruth_textgrid_file = join(textgrid_path, dict_recording_name_mapping[recording_name]+'.TextGrid') score_file = join(aCapella_root, dataset_path, score_path, recording_name+'.csv') wav_file = join(aCapella_root, dataset_path, audio_path, recording_name+'.wav') if not isfile(score_file): print 'Score not found: ' + score_file continue lineList = textGrid2WordList(groundtruth_textgrid_file, whichTier='line') utteranceList = textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence') # parse lines of groundtruth nestedUtteranceLists, numLines, numUtterances = wordListsParseByLines(lineList, utteranceList) # parse score syllables, pinyins, syllable_durations, bpm = generatePinyin(score_file) # print(pinyins) # print(syllable_durations) if varin['obs'] == 'tocal': # load audio audio_monoloader = ess.MonoLoader(downmix = 'left', filename = wav_file, sampleRate = fs)() audio_eqloudloder = ess.EqloudLoader(filename=wav_file, sampleRate = fs)() if mth == 'jordi' or mth == 'jordi_horizontal_timbral' or mth == 'jan': mfcc, mfcc_reshaped = featureExtraction(audio_monoloader, scaler, int(round(0.025 * fs)), dmfcc=dmfcc, nbf=nbf, feature_type='mfccBands2D') for i_obs, lineList in enumerate(nestedUtteranceLists): if int(bpm[i_obs]): sample_start = int(round(lineList[0][0] * fs)) sample_end = int(round(lineList[0][1] * fs)) frame_start = int(round(lineList[0][0] * fs / hopsize)) frame_end = int(round(lineList[0][1] * fs / hopsize)) # print(feature.shape) obs_path = join('./obs', cnnModel_name, dataset_path) obs_filename = recording_name + '_' + str(i_obs + 1) + '.pkl' full_obs_name = join(obs_path, obs_filename) if varin['obs'] == 'tocal': if mth == 'jordi' or mth == 'jordi_horizontal_timbral' or mth == 'jan': audio_eqloudloder_line = audio_eqloudloder[sample_start:sample_end] mfcc_line = mfcc[frame_start:frame_end] mfcc_reshaped_line = mfcc_reshaped[frame_start:frame_end] mfcc_reshaped_line = np.expand_dims(mfcc_reshaped_line, axis=1) obs = getOnsetFunction(observations=mfcc_reshaped_line, model=model_keras_cnn_0, method=mth) # obs_i = obs[:,1] obs_i = obs[:, 0] hann = np.hanning(5) hann /= np.sum(hann) obs_i = np.convolve(hann, obs_i, mode='same') # save onset curve print('save onset curve ... ...') obs_dirpath = dirname(full_obs_name) if not exists(obs_dirpath): makedirs(obs_dirpath) pickle.dump(obs_i, open(full_obs_name, 'w')) else: obs_i = pickle.load(open(full_obs_name, 'r')) if late_fusion: if varin['obs'] == 'viterbi': obs_2 = getOnsetFunction(observations=mfcc_reshaped_line, path_keras_cnn=full_path_keras_cnn_1, method=mth) obs_2_i = obs_2[:, 1] obs_2_i = np.convolve(hann, obs_2_i, mode='same') else: obs_path_1 = join('./obs', cnnModel_name_1, dataset_path) full_obs_name_1 = join(obs_path_1, obs_filename) obs_2_i = pickle.load(open(full_obs_name_1, 'r')) obs_i = late_fusion_calc(obs_i, obs_2_i, mth=2) # organize score print('Calculating: '+recording_name+' phrase '+str(i_obs)) print('ODF Methods: '+mth_ODF+' Late fusion: '+str(fusion)) time_line = lineList[0][1] - lineList[0][0] lyrics_line = [ll[2] for ll in lineList[1]] groundtruth_syllable = [ll[0]-lineList[0][0] for ll in lineList[1]] print('Syllable:') print(lyrics_line) print('Length of syllables, length of ground truth syllables:') print(len(lyrics_line), len(groundtruth_syllable)) pinyin_score = pinyins[i_obs] pinyin_score = [ps for ps in pinyin_score if len(ps)] duration_score = syllable_durations[i_obs] duration_score = np.array([float(ds) for ds in duration_score if len(ds)]) duration_score = duration_score * (time_line/np.sum(duration_score)) if varin['decoding'] == 'viterbi': # segmental decoding obs_i[0] = 1.0 obs_i[-1] = 1.0 i_boundary = viterbiSegmental2(obs_i, duration_score, varin) # # uncomment this section if we want to write boundaries to .syll.lab file filename_syll_lab = join(eval_results_path, dataset_path, recording_name+'_'+str(i_obs+1)+'.syll.lab') label = True else: i_boundary = peakPicking(1.0-obs_i) # arg_pp = {'threshold': 0.54, 'smooth': 0, 'fps': 1. / hopsize_t, 'pre_max': hopsize_t, # 'post_max': hopsize_t} # # peak_picking = OnsetPeakPickingProcessor(threshold=threshold,smooth=smooth,fps=fps,pre_max=pre_max,post_max=post_max) # peak_picking = OnsetPeakPickingProcessor(**arg_pp) # i_boundary = peak_picking.process(obs_i) # i_boundary = np.append(i_boundary, (len(obs_i) - 1) * hopsize_t) # i_boundary /= hopsize_t filename_syll_lab = join(eval_results_path + '_peakPicking', dataset_path, recording_name + '_' + str(i_obs + 1) + '.syll.lab') label = False time_boundray_start = np.array(i_boundary[:-1]) * hopsize_t time_boundray_end = np.array(i_boundary[1:]) * hopsize_t eval_results_data_path = dirname(filename_syll_lab) if not exists(eval_results_data_path): makedirs(eval_results_data_path) if varin['decoding'] == 'viterbi': boundaryList = zip(time_boundray_start.tolist(), time_boundray_end.tolist(), lyrics_line) else: boundaryList = zip(time_boundray_start.tolist(), time_boundray_end.tolist()) # write boundary lab file boundaryLabWriter(boundaryList=boundaryList, outputFilename=filename_syll_lab, label=label) # print(i_boundary) # print(len(obs_i)) # print(np.array(groundtruth_syllable)*fs/hopsize) if varin['plot']: # plot Error analysis figures plt.figure(figsize=(16, 6)) # plt.figure(figsize=(8, 4)) # class weight ax1 = plt.subplot(3,1,1) y = np.arange(0, 80) x = np.arange(0, mfcc_line.shape[0])*(hopsize/float(fs)) cax = plt.pcolormesh(x, y, np.transpose(mfcc_line[:, 80 * 11:80 * 12])) for gs in groundtruth_syllable: plt.axvline(gs, color='r', linewidth=2) # cbar = fig.colorbar(cax) ax1.set_ylabel('Mel bands', fontsize=12) ax1.get_xaxis().set_visible(False) ax1.axis('tight') plt.title('Calculating: '+recording_name+' phrase '+str(i_obs)) ax2 = plt.subplot(312, sharex=ax1) plt.plot(np.arange(0,len(obs_i))*(hopsize/float(fs)), obs_i) for ib in i_boundary: plt.axvline(ib * (hopsize / float(fs)), color='r', linewidth=2) ax2.set_ylabel('ODF', fontsize=12) ax2.axis('tight') ax3 = plt.subplot(313, sharex=ax1) print(duration_score) time_start = 0 for ii_ds, ds in enumerate(duration_score): ax3.add_patch( patches.Rectangle( (time_start, ii_ds), # (x,y) ds, # width 1, # height )) time_start += ds ax3.set_ylim((0,len(duration_score))) # plt.xlabel('Time (s)') # plt.tight_layout() plt.show()
def runAnalysis(self): # Get all samples referenced in DB, except for those # that have been marked as samples to exclude # TODO: Need to clarify whether or not sample packs # should be excluded if we can't find enough info on them, # for now including all samplepacks samples = Sample.objects.all().filter(exclude=False, #kit__sample_pack__exclude=False, ) numSamples = len(samples) self.stdout.write("Running low-level extractors on %s samples. " % (numSamples)) i = 0.0 for sample in samples: # Get audio and run loudness analysis try: loader = es.MonoLoader(filename=sample.path) neqAudio = loader() eqLoader = es.EqloudLoader(filename=sample.path) eqAudio = eqLoader() # Trim the audio clip trimmer = es.Trimmer(startTime=sample.start_time, endTime=sample.stop_time) neqAudio = trimmer(neqAudio) eqAudio = trimmer(eqAudio) except RuntimeError as esExcept: self.stderr.write("%s\n" % esExcept) self.stderr.write( "%s failed to load. Excluding sample from further analysis" % sample.path) sample.exclude = True sample.save() i = i + 1 continue # Frame size & hop size frameSize = 2048 hopSize = 256 # Amplitude envelope of sample envelope = es.Envelope() audioEnv = envelope(eqAudio) # Find attack phase and LAT latFunc = es.LogAttackTime() lat, attackStart, attackEnd = latFunc(audioEnv) # Temporal Centroid on entire sample length tc = self.temporal_centroid(eqAudio) # Time segmentation starting point windowFunc = es.LogAttackTime(startAttackThreshold=float( self.windowStart if self.windowStart < 90 else 90) / 100) _, windowStart, windowEnd = windowFunc(audioEnv) windowStart = windowStart if self.windowStart < 90 else windowEnd if self.windowLength > 0: # Window from onset trimmer = es.Trimmer(startTime=windowStart, endTime=windowStart + (float(self.windowLength) / 1000)) eqAudio = trimmer(eqAudio) neqAudio = trimmer(neqAudio) # Get analysis object for this sample try: analysisObject = Feature.objects.get( sample=sample, window_length=self.windowLength, window_start=self.windowStart) except Feature.DoesNotExist: analysisObject = Feature(sample=sample, window_length=self.windowLength, window_start=self.windowStart) analysisObject.lat = lat analysisObject.rms = self.rms(eqAudio) analysisObject.temporal_centroid = tc # Spectral extractor without equal loudness filter neqSpectralExtractor = es.LowLevelSpectralExtractor( frameSize=frameSize, hopSize=hopSize) neqSpectralResults = neqSpectralExtractor(neqAudio) bark_mean = np.mean(neqSpectralResults[0], axis=0) analysisObject.bark_1_mean = bark_mean[0] analysisObject.bark_2_mean = bark_mean[1] analysisObject.bark_3_mean = bark_mean[2] analysisObject.bark_4_mean = bark_mean[3] analysisObject.bark_5_mean = bark_mean[4] analysisObject.bark_6_mean = bark_mean[5] analysisObject.bark_7_mean = bark_mean[6] analysisObject.bark_8_mean = bark_mean[7] analysisObject.bark_9_mean = bark_mean[8] analysisObject.bark_10_mean = bark_mean[9] analysisObject.bark_11_mean = bark_mean[10] analysisObject.bark_12_mean = bark_mean[11] analysisObject.bark_13_mean = bark_mean[12] analysisObject.bark_14_mean = bark_mean[13] analysisObject.bark_15_mean = bark_mean[14] analysisObject.bark_16_mean = bark_mean[15] analysisObject.bark_17_mean = bark_mean[16] analysisObject.bark_18_mean = bark_mean[17] analysisObject.bark_19_mean = bark_mean[18] analysisObject.bark_20_mean = bark_mean[19] analysisObject.bark_21_mean = bark_mean[20] analysisObject.bark_22_mean = bark_mean[21] analysisObject.bark_23_mean = bark_mean[22] analysisObject.bark_24_mean = bark_mean[23] analysisObject.bark_25_mean = bark_mean[24] analysisObject.bark_26_mean = bark_mean[25] analysisObject.bark_27_mean = bark_mean[26] bark_dev = np.std(neqSpectralResults[0], axis=0) analysisObject.bark_1_dev = bark_dev[0] analysisObject.bark_2_dev = bark_dev[1] analysisObject.bark_3_dev = bark_dev[2] analysisObject.bark_4_dev = bark_dev[3] analysisObject.bark_5_dev = bark_dev[4] analysisObject.bark_6_dev = bark_dev[5] analysisObject.bark_7_dev = bark_dev[6] analysisObject.bark_8_dev = bark_dev[7] analysisObject.bark_9_dev = bark_dev[8] analysisObject.bark_10_dev = bark_dev[9] analysisObject.bark_11_dev = bark_dev[10] analysisObject.bark_12_dev = bark_dev[11] analysisObject.bark_13_dev = bark_dev[12] analysisObject.bark_14_dev = bark_dev[13] analysisObject.bark_15_dev = bark_dev[14] analysisObject.bark_16_dev = bark_dev[15] analysisObject.bark_17_dev = bark_dev[16] analysisObject.bark_18_dev = bark_dev[17] analysisObject.bark_19_dev = bark_dev[18] analysisObject.bark_20_dev = bark_dev[19] analysisObject.bark_21_dev = bark_dev[20] analysisObject.bark_22_dev = bark_dev[21] analysisObject.bark_23_dev = bark_dev[22] analysisObject.bark_24_dev = bark_dev[23] analysisObject.bark_25_dev = bark_dev[24] analysisObject.bark_26_dev = bark_dev[25] analysisObject.bark_27_dev = bark_dev[26] analysisObject.bark_kurtosis = np.mean(neqSpectralResults[1]) analysisObject.bark_skewness = np.mean(neqSpectralResults[2]) analysisObject.bark_spread = np.mean(neqSpectralResults[3]) analysisObject.bark_kurtosis_dev = np.std(neqSpectralResults[1]) analysisObject.bark_skewness_dev = np.std(neqSpectralResults[2]) analysisObject.bark_spread_dev = np.std(neqSpectralResults[3]) analysisObject.hfc = np.mean(neqSpectralResults[4]) analysisObject.hfc_dev = np.std(neqSpectralResults[4]) # MFCCs mfcc_mean = np.mean(neqSpectralResults[5], axis=0) analysisObject.mfcc_1_mean = mfcc_mean[0] analysisObject.mfcc_2_mean = mfcc_mean[1] analysisObject.mfcc_3_mean = mfcc_mean[2] analysisObject.mfcc_4_mean = mfcc_mean[3] analysisObject.mfcc_5_mean = mfcc_mean[4] analysisObject.mfcc_6_mean = mfcc_mean[5] analysisObject.mfcc_7_mean = mfcc_mean[6] analysisObject.mfcc_8_mean = mfcc_mean[7] analysisObject.mfcc_9_mean = mfcc_mean[8] analysisObject.mfcc_10_mean = mfcc_mean[9] analysisObject.mfcc_11_mean = mfcc_mean[10] analysisObject.mfcc_12_mean = mfcc_mean[11] analysisObject.mfcc_13_mean = mfcc_mean[12] mfcc_dev = np.std(neqSpectralResults[5], axis=0) analysisObject.mfcc_1_dev = mfcc_dev[0] analysisObject.mfcc_2_dev = mfcc_dev[1] analysisObject.mfcc_3_dev = mfcc_dev[2] analysisObject.mfcc_4_dev = mfcc_dev[3] analysisObject.mfcc_5_dev = mfcc_dev[4] analysisObject.mfcc_6_dev = mfcc_dev[5] analysisObject.mfcc_7_dev = mfcc_dev[6] analysisObject.mfcc_8_dev = mfcc_dev[7] analysisObject.mfcc_9_dev = mfcc_dev[8] analysisObject.mfcc_10_dev = mfcc_dev[9] analysisObject.mfcc_11_dev = mfcc_dev[10] analysisObject.mfcc_12_dev = mfcc_dev[11] analysisObject.mfcc_13_dev = mfcc_dev[12] analysisObject.pitch_salience = np.mean(neqSpectralResults[8]) analysisObject.spectral_complexity = np.mean( neqSpectralResults[12]) analysisObject.spectral_crest = np.mean(neqSpectralResults[13]) analysisObject.spectral_decrease = np.mean(neqSpectralResults[14]) analysisObject.spectral_energy = np.mean(neqSpectralResults[15]) analysisObject.spectral_energyband_low = np.mean( neqSpectralResults[16]) analysisObject.spectral_energyband_middle_low = np.mean( neqSpectralResults[17]) analysisObject.spectral_energyband_middle_high = np.mean( neqSpectralResults[18]) analysisObject.spectral_energyband_high = np.mean( neqSpectralResults[19]) analysisObject.spectral_flatness_db = np.mean( neqSpectralResults[20]) analysisObject.spectral_flux = np.mean(neqSpectralResults[21]) analysisObject.spectral_rms = np.mean(neqSpectralResults[22]) analysisObject.spectral_rolloff = np.mean(neqSpectralResults[23]) analysisObject.spectral_strongpeak = np.mean( neqSpectralResults[24]) analysisObject.zero_crossing_rate = np.mean(neqSpectralResults[25]) analysisObject.inharmonicity = np.mean(neqSpectralResults[26]) analysisObject.pitch_salience_dev = np.std(neqSpectralResults[8]) analysisObject.spectral_complexity_dev = np.std( neqSpectralResults[12]) analysisObject.spectral_crest_dev = np.std(neqSpectralResults[13]) analysisObject.spectral_decrease_dev = np.std( neqSpectralResults[14]) analysisObject.spectral_energy_dev = np.std(neqSpectralResults[15]) analysisObject.spectral_energyband_low_dev = np.std( neqSpectralResults[16]) analysisObject.spectral_energyband_middle_low_dev = np.std( neqSpectralResults[17]) analysisObject.spectral_energyband_middle_high_dev = np.std( neqSpectralResults[18]) analysisObject.spectral_energyband_high_dev = np.std( neqSpectralResults[19]) analysisObject.spectral_flatness_db_dev = np.std( neqSpectralResults[20]) analysisObject.spectral_flux_dev = np.std(neqSpectralResults[21]) analysisObject.spectral_rms_dev = np.std(neqSpectralResults[22]) analysisObject.spectral_rolloff_dev = np.std( neqSpectralResults[23]) analysisObject.spectral_strongpeak_dev = np.std( neqSpectralResults[24]) analysisObject.zero_crossing_rate_dev = np.std( neqSpectralResults[25]) analysisObject.inharmonicity_dev = np.std(neqSpectralResults[26]) tristimulus = np.mean(neqSpectralResults[27], axis=0) analysisObject.tristimulus_1 = tristimulus[0] analysisObject.tristimulus_2 = tristimulus[1] analysisObject.tristimulus_3 = tristimulus[2] tristimulus_dev = np.std(neqSpectralResults[27], axis=0) analysisObject.tristimulus_1_dev = tristimulus_dev[0] analysisObject.tristimulus_2_dev = tristimulus_dev[1] analysisObject.tristimulus_3_dev = tristimulus_dev[2] # Spectral extractor with equal loudness filter eqSpectralExtractor = es.LowLevelSpectralEqloudExtractor( frameSize=frameSize, hopSize=hopSize) eqSpectralResults = eqSpectralExtractor(eqAudio) analysisObject.spectral_centroid = np.mean(eqSpectralResults[3]) analysisObject.spectral_kurtosis = np.mean(eqSpectralResults[4]) analysisObject.spectral_skewness = np.mean(eqSpectralResults[5]) analysisObject.spectral_spread = np.mean(eqSpectralResults[6]) analysisObject.spectral_centroid_dev = np.std(eqSpectralResults[3]) analysisObject.spectral_kurtosis_dev = np.std(eqSpectralResults[4]) analysisObject.spectral_skewness_dev = np.std(eqSpectralResults[5]) analysisObject.spectral_spread_dev = np.std(eqSpectralResults[6]) analysisObject.save() i = i + 1 self.stdout.write("\t\t%2.2f%%" % (100.0 * (i / float(numSamples))), ending='\r') self.stdout.flush() self.stdout.write("\r", ending='\r') self.stdout.flush()
sets = os.listdir(path) data_df = pd.DataFrame({}) for seti in sets: if not os.path.isdir(path + '/' + seti): continue for categ in category: if not os.path.exists(path + '/' + seti + '/' + stroke + '/' + categ): continue files_categ = os.listdir(path + '/' + seti + '/' + stroke + '/' + categ) for wave in files_categ: fileName = path + '/' + seti + '/' + stroke + '/' + categ + '/' + wave audio = estd.EqloudLoader(filename=fileName)() dict_temp = {} dict_temp['Filename'] = wave dict_temp['Set'] = seti dict_temp['Category'] = categ dict_temp['Decay Rate'] = band_decay(audio, rate, 1)[0] dict_temp['Sustain'] = sustain_durn(audio, rate) data_df = data_df.append(dict_temp, ignore_index=True) plt.title(stroke) sns.swarmplot(x="Set", y="Centroid1", hue="Category", data=data_df, palette="Set2", dodge=True)
import essentia.standard as estd loader=estd.EqloudLoader(filename='') audio=loader() energy=estd.Energy() en=[] for frame in estd.FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True): en.append(energy(frame)) emax=max(en) ep=en.index(emax) th=emax/2 e=[] for frame in estd.FrameGenerator(audio[:0.6*44100], frameSize=1024, hopSize=512, startFromZero=True): e.append(energy(frame)) for i in range(0,len(e)-1): d=e[i+1]-e[i] if d > 0.01: p=e.index(e[i+1]) if (ep-p) > 2: en=e[:p]+en[ep:] break