def get_RMS_over_time(audio_file, window_size = 1024, in_db = True): """ parameters: audio_file : file to anlayse window_size : window size for FFT computing returns : time series with the RMS and the time warning : this function only works for mono files """ import glob from scikits.audiolab import wavread, aiffread from scipy import signal import numpy as np try: sound_in, fs, enc = aiffread(audio_file) except ValueError: sound_in, fs, enc = wavread(audio_file) begin = 0 values = [] time_tags = [] while (begin + window_size) < len(sound_in): data = sound_in[begin : begin + window_size] time_tag = (begin + (window_size / 2)) / np.float(fs) values.append(get_rms_from_data(data, in_db = in_db)) time_tags.append(time_tag) begin = begin + window_size return time_tags, values
def basefreq(audiofile): """ This function reads in the audio file and does the hann windowed fft of the right input. It then smooths the output using a gaussian filter and then finds the peaks. It returns the peaks in the right audio channel since testing showed there was no significant difference in the two. """ #read the data into an ndarray using scikits-audiolab data, rate, enc = al.aiffread(audiofile) #split the left and right channel datar = data[:,1] datal = data[:,0] #take the fft of both of the channels with the hann window applied #the hann window reduces spectral leakage in the FFT dftr = abs(fft.fft(datar*signal.hann(len(datar)))) dftl = abs(fft.fft(datal*signal.hann(len(datal)))) #compute the frequencies in the FFT freq = float(rate)/float(len(datar)) freqs = np.arange(len(dftr)/2+99)*freq dftr = dftr[0:np.size(dftr)/2] dftl = dftl[0:np.size(dftr)/2] #smooth the fft with a gaussian c = signal.gaussian(100,20) dftr = signal.convolve(dftr,c) dftl = signal.convolve(dftl,c) #find the significant peaks in each channel peaksr = findpeaks(dftr,freqs) peaksl = findpeaks(dftl,freqs) #plot the output fft for testing #plt.plot(freqs,dftr) #plt.show() #print peaksr return peaksr
def aif_to_wav(source, target): """ source : source audio file target : target audio file """ try: x, fs, enc = aiffread(str(source)) WavFileName = target wavwrite(x, WavFileName, fs, enc='pcm24') except: print "File is not aif" pass
def find_silence(audio_file, threshold = -65, wnd_size = 16384): """ find a segment of silence (<threshold dB)in the sound file return tag in seconds """ try: x, fs, enc = aiffread(str(audio_file)) except: x, fs, enc = wavread(str(audio_file)) index = 0 NbofWrittendFiles = 1 silence_tags = [] while index + wnd_size < len(x): DataArray = x[index: index + wnd_size] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = wnd_size + index if rms < threshold: end = 0 begining = index index = wnd_size + index while rms < threshold: if index + wnd_size < len(x): index = wnd_size + index DataArray = x[index: index + wnd_size] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) end = index else: break #if file is over 250 ms long, write it if (end - begining) > (fs / 8) : begining = begining - wnd_size if begining < 0: begining = 0 end = end + wnd_size if end > len(x): end = len(x) #samples to seconds, minutes, hours begining_s = begining/float(fs) end_s = end/float(fs) silence_tags.append([begining_s, end_s]) return silence_tags
def IndexFileInFolder(FolderName): for file in glob.glob(FolderName + "/*.wav"): # Wav Files x, fs, enc = aiffread(str(file)) WndSize = 16384 rmsTreshhold = -50 index = 0 NbofWrittendFiles = 1 while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = Lin2db(rms) index = WndSize + index if rms > -55: end = 0 begining = index index = WndSize + index while rms > -55: if index + WndSize < len(x): index = WndSize + index DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = Lin2db(rms) end = index else: break #if file is over 500 ms long, write it if (end - begining) > (fs / 2): duree = (end - begining) / float(fs) print "duree : " + str(duree) begining = begining - WndSize if begining < 0: begining = 0 end = end + WndSize if end > len(x): end = len(x) name = os.path.splitext(str(file))[0] name = os.path.basename(name) wavwrite(x[begining:end], "Indexed/" + "/" + FolderName + "/" + name + "_" + str(NbofWrittendFiles) + ".wav", fs, enc='pcm24') NbofWrittendFiles = NbofWrittendFiles + 1
def get_spectral_centroid(audio_file, window_size = 256, noverlap = 0, plot_specgram = False): """ parameters: audio_file : file to anlayse window_size : window size for FFT computing plot_specgram : Do youw ant to plot specgram of analysis? returns : time series with the spectral centroid and the time warning : this function only works for mono files """ import glob from scikits.audiolab import wavread, aiffread from scipy import signal try: sound_in, fs, enc = aiffread(audio_file) except ValueError: sound_in, fs, enc = wavread(audio_file) #compute gaussian spectrogram f, t, Sxx = signal.spectrogram(sound_in , fs , nperseg = window_size , noverlap = noverlap , scaling ='spectrum' , mode = 'magnitude' ) #plot specgram if plot_specgram: plt.figure() fig, ax = plt.subplots() plt.pcolormesh(t, f, Sxx, cmap = 'nipy_spectral') ax.axis('tight') plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]') plt.title("Normal FFT of audio signal") plt.show() centroid_list = [] for spectrum in np.transpose(Sxx): centroid_list.append(centroid(spectrum, f)) return t, centroid_list
header += "}; // end namespace" text_file = open("test-headers/" + variableName + ".h", "w") text_file.write(header) text_file.close() # get all wav files for fileName in os.listdir("test-audio"): if fileName.endswith(".wav") or fileName.endswith(".aif"): if fileName.endswith(".wav"): audioSignal, fs, enc = wavread("test-audio/" + fileName) fileFormat = "wav" elif fileName.endswith(".aif"): audioSignal, fs, enc = aiffread("test-audio/" + fileName) fileFormat = "aif" else: assert (False) if len(audioSignal.shape) == 1: numChannels = 1 elif len(audioSignal.shape) == 2: numChannels = audioSignal.shape[1] else: assert (False) #print fileName, enc if enc == "pcmu8" or enc == "pcms8": makeHeader(fileName, audioSignal, numChannels, 8, fs, fileFormat)
def index_wav_file(source, rms_threshold=-50, WndSize=16384, target_folder="Indexed"): """ input: source : fsource audio file rms_threshold : this is the threshold WndSize : window size to compue the RMS on target_folder : folder to save the extracted sounds in This function separates all the sentences inside an audiofile. It takes each sentence and put it into one audio file inside target_folder with the name target_nb The default parameters were tested with notmal speech. Only works if file is at least 500 ms long, which can be tuned You can change the rms threshold to tune the algorithm """ try: x, fs, enc = aiffread(str(source)) except: x, fs, enc = wavread(str(source)) index = 0 NbofWrittendFiles = 1 while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = WndSize + index if rms > rms_threshold: end = 0 begining = index index = WndSize + index while rms > rms_threshold: if index + WndSize < len(x): index = WndSize + index DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) end = index else: break #if file is over 500 ms long, write it if (end - begining) > (fs / 2): duree = (end - begining) / float(fs) print "duree : " + str(duree) begining = begining - WndSize if begining < 0: begining = 0 end = end + WndSize if end > len(x): end = len(x) print file sound_tag = os.path.basename(source) sound_tag = os.path.splitext(sound_tag)[0] try: os.mkdir(target_folder) except: pass #write(filename = target_folder+"/"+ sound_tag + "_" + str(NbofWrittendFiles)+".wav",rate = fs, data= x[begining:end]) wavwrite(x[begining:end], target_folder + "/" + sound_tag + "_" + str(NbofWrittendFiles) + ".wav", fs, enc='pcm24') NbofWrittendFiles = NbofWrittendFiles + 1
def extract_sentences_tags(source, rms_threshold=-50, WndSize=16384, overlap=8192): """ This function separates all the sentences inside an audiofile. It takes each sentence and put it into one audio file inside target_folder with the name target_nb The default parameters were tested with notmal speech. Only works if file is at least 500 ms long, which can be tuned You can change the rms threshold to tune the algorithm input: source : fsource audio file rms_threshold : this is the threshold WndSize : window size to compue the RMS on overlap : nb of overlap samples returns: tags in pairs of [begining end] """ try: x, fs, enc = aiffread(str(source)) except: x, fs, enc = wavread(str(source)) index = 0 NbofWrittendFiles = 1 tags = [] vid_lengths = [] while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = WndSize + index #index += overlap if rms > rms_threshold: end = 0 begining = index index = WndSize + index #index += overlap while rms > rms_threshold: if index + WndSize < len(x): index = WndSize + index #index += overlap DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) end = index else: break #if file is over 500 ms long, write it if (end - begining) > (fs / 2): begining = begining - WndSize if begining < 0: begining = 0 end = end + WndSize if end > len(x): end = len(x) #samples to seconds, minutes, hours begining_s = begining / float(fs) end_s = end / float(fs) len_s = (end - begining) / float(fs) print "duree : " + str(len_s) from datetime import timedelta, datetime begining_s = datetime(1, 1, 1) + timedelta(seconds=begining_s) end_s = datetime(1, 1, 1) + timedelta(seconds=end_s) len_s = datetime(1, 1, 1) + timedelta(seconds=len_s) begining_s = "%d:%d:%d.%3d" % ( begining_s.hour, begining_s.minute, begining_s.second, begining_s.microsecond) end_s = "%d:%d:%d.%3d" % (end_s.hour, end_s.minute, end_s.second, end_s.microsecond) len_s = "%d:%d:%d.%3d" % (len_s.hour, len_s.minute, len_s.second, len_s.microsecond) print "la longueur est" print len_s tags.append([begining_s, end_s]) vid_lengths.append(len_s) NbofWrittendFiles = NbofWrittendFiles + 1 return tags, vid_lengths
header += "\n\n" header += "}; // end namespace" text_file = open ("test-headers/" + variableName + ".h", "w") text_file.write (header) text_file.close() # get all wav files for fileName in os.listdir("test-audio"): if fileName.endswith(".wav") or fileName.endswith(".aif"): if fileName.endswith(".wav"): audioSignal, fs, enc = wavread ("test-audio/" + fileName) fileFormat = "wav" elif fileName.endswith(".aif"): audioSignal, fs, enc = aiffread ("test-audio/" + fileName) fileFormat = "aif" else: assert (False) if len (audioSignal.shape) == 1: numChannels = 1 elif len (audioSignal.shape) == 2: numChannels = 2 else: assert (False) #print fileName, enc if enc == "pcmu8" or enc == "pcms8": makeHeader (fileName, audioSignal, numChannels, 8, fs, fileFormat)