import numpy as np import sys sys.path.append('../') from datagrabber import extractAndSave, extractAndSaveYoutubeData IEMOCAP_LOCATION = "../../../../local" YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds" def amplitude(frame, filename): ''' Returns the maxmimum, average and variance amplitude of a frame ''' return [np.amax(frame), np.average(frame), np.var(frame)] # Extract amplitude from IEMOCAP and YouTube datasets extractAndSave(amplitude, ["max", "mean", "var"], IEMOCAP_LOCATION, 2, True, True) extractAndSaveYoutubeData(amplitude, ["max", "mean", "var"], YOUTUBE_LOCATION, 2, True, True)
greatest_amp = (spectrum.peaks()[0])[0] #a counter will be used to iterate over the positions of the list list_pos = 0 #the pairs with amplitudes which are within the threshold will be copied to a new list selected_pairs = [] while True: #select the next pair from the list pair = (spectrum.peaks()[list_pos]) #test whether the pair's amplitude is within the threshold within_threshold = (pair[0] > (greatest_amp * threshold)) #if not within the threshold the loop can break as all followings pairs will have lower amplitudes if within_threshold == False: break #if within the threshold add the pair to the list selected_pairs.append(pair) #increment the counter for the loop list_pos = list_pos + 1 # selected_pairs is now a list of the pairs which have a prominent amplitude sorted_by_Hz = sorted(selected_pairs, key=lambda tup: tup[1]) # the lowest Hz should now be F0 if len(sorted_by_Hz) == 0: return [0] else: return [sorted_by_Hz[0][1]] # Extract f0 from IEMOCAP and YouTube datasets extractAndSave(f0, ['f0'], IEMOCAP_LOCATION, 2, True, True) extractAndSaveYoutubeData(f0, ["f0"], YOUTUBE_LOCATION, 2, True, True)
Created on Fri Nov 11 12:18:26 2016 @author: Olly Styles """ import sys sys.path.append('../') from datagrabber import extractAndSave from datagrabber import extractAndSave, extractAndSaveYoutubeData IEMOCAP_LOCATION = "../../../../local" YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds" def zerocrossing(frame, audiofile): ''' Returns the number of times the signal crosses the y-axis for a given frame. ''' n = 0 for i in range(0, len(frame) - 1): if (frame[i] > 0 != frame[i + 1] > 0): # != is xor operator in python n += 1 return [n] # Extract energy from IEMOCAP and YouTube datasets extractAndSave(zerocrossing, ['zerocrossing'], IEMOCAP_LOCATION, 2, True, True) extractAndSaveYoutubeData(zerocrossing, ["zerocrossing"], YOUTUBE_LOCATION, 2, True, True)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Nov 12 12:04:22 2016 @author: Tom """ import sys sys.path.append('../') from datagrabber import extractAndSave, extractAndSaveYoutubeData import numpy as np IEMOCAP_LOCATION = "../../../../local" YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds" def energy(frame, audiofile): ''' Return energy of a frame, defined as the sum of squared amplitudes ''' return [sum(np.apply_along_axis(lambda x: x**2, 0, frame))] # Extract energy from IEMOCAP and YouTube datasets extractAndSave(energy, ['energy'], IEMOCAP_LOCATION, 2, True, True) extractAndSaveYoutubeData(energy, ["energy"], YOUTUBE_LOCATION, 2, True, True)
m = aub.mfcc(fftsize, 40, coefficientsCount, sampleRate) #first we need to convert this frame to the power spectrum using a DFT p = aub.pvoc(fftsize, int(frame_size)) #in order to compute DFT the frame must be of a length which is a power of 2, so expand to fftsize using zero padding if len(frame) != 16000: frame = np.pad(frame, (0, frame_size - len(frame)), 'constant', constant_values=0) #compute the power spectrum spec = p(frame.astype(np.float32)) #compute the MFCC, which returns the coefficents of each of the 12 coefficents mfcc_out = m(spec) return mfcc_out # 1. Frame the signal into short frames. # 2. For each frame calculate the periodogram estimate of the power spectrum. # 3. Apply the mel filterbank to the power spectra, sum the energy in each filter. # 4. Take the logarithm of all filterbank energies. # 5. Take the DCT of the log filterbank energies. # 6. Keep DCT coefficients 2-13, discard the rest #http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/#computing-the-mel-filterbank # Extract MFCC from IEMOCAP and YouTube datasets extractAndSave(mfcc, labels, IEMOCAP_LOCATION, 2, True, True) extractAndSaveYoutubeData(mfcc, labels, YOUTUBE_LOCATION, 2, True, True)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Nov 12 12:04:22 2016 @author: Tom """ import sys sys.path.append('../sourceFiles/') import thinkplot as tp import thinkdsp as td sys.path.append('../') from datagrabber import extractAndSave, extractAndSaveYoutubeData IEMOCAP_LOCATION = "../../../../local" YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds" #make_spectrum is sorted in order of decreasing amplitude, so the first pair is going to be the amplitude of the pitch and the pitch frequency. def pitch(frame, audiofile): clip = td.Wave(frame) spectrum = clip.make_spectrum() return [spectrum.peaks()[0][1]] #extractAndSave(pitch,['pitch'],IEMOCAP_LOCATION,2,False) extractAndSaveYoutubeData(pitch, ["pitch"], YOUTUBE_LOCATION, 2)
sys.path.append('../') from datagrabber import extractAndSave,extractAndSaveYoutubeData IEMOCAP_LOCATION = "../../../../local" YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds" def cepstrum(frame, filename): ''' Computes the cepstrum of the frame, which is calculated by converting the power spectrum (|DCT|^2) of the audio frame to a log-scale. The max average and variance of the resulting audio clip are returned as features ''' #get DCT audio = np.fft.fft(frame) #get |DCT| audio = abs(audio) #get |DCT|^2, i.e. the power spectrum audio = audio ** 2 #take a log, which simulates the non-linearality of human hearing audio = np.log2(audio) #take the real of the IDFT as the feature audio = np.fft.ifft(audio) audio = audio.real return [np.amax(audio), np.average(audio), np.var(audio)] # Extract cepstrum from IEMOCAP and YouTube datasets extractAndSave(cepstrum,["max", "mean", "var"],IEMOCAP_LOCATION,2,True,True) extractAndSaveYoutubeData(cepstrum,["max", "mean","var"],YOUTUBE_LOCATION,2,True,True)