def cut_silence_in_sound(source, target, rmsTreshhold=-40, WndSize=128): """ source : fsource audio file target : output sound This function cuts the silence at the begining and at the end of an audio file in order. It's usefull for normalizing the length of the audio stimuli in an experiment. The default parameters were tested with notmal speech. """ NbofWrittendFiles = 1 x, fs, enc = wavread(str(source)) index = 0 #Remove the silence at the begining while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 beginning = index print beginning / 44100 break #Remove the silence at the end x, fs, enc = wavread(str(source)) WndSize = 128 index = 0 x = list(reversed(x)) while index + WndSize < len(x): DataArray = x[int(index):int(index + WndSize)] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 final = index print(len(x) - final) / 44100 break #write the sound source without silences x, fs, enc = wavread(str(source)) WndSize = 128 rmsTreshhold = -70 index = 0 name_of_source = str(os.path.basename(source)) name_of_source = os.path.splitext(name_of_source)[0] path, sourcename = os.path.split(source) wavwrite(x[beginning:len(x) - final], target, fs, enc='pcm24')
def find_silence(audio_file, threshold = -65, wnd_size = 16384): """ find a segment of silence (<threshold dB)in the sound file return tag in seconds """ try: x, fs, enc = aiffread(str(audio_file)) except: x, fs, enc = wavread(str(audio_file)) index = 0 NbofWrittendFiles = 1 silence_tags = [] while index + wnd_size < len(x): DataArray = x[index: index + wnd_size] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = wnd_size + index if rms < threshold: end = 0 begining = index index = wnd_size + index while rms < threshold: if index + wnd_size < len(x): index = wnd_size + index DataArray = x[index: index + wnd_size] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) end = index else: break #if file is over 250 ms long, write it if (end - begining) > (fs / 8) : begining = begining - wnd_size if begining < 0: begining = 0 end = end + wnd_size if end > len(x): end = len(x) #samples to seconds, minutes, hours begining_s = begining/float(fs) end_s = end/float(fs) silence_tags.append([begining_s, end_s]) return silence_tags
def get_sound_without_silence(source, rmsTreshhold=-40, WndSize=128): """ source : source audio file This function returns a begining and end time tags for the begining and the end of audio in a file """ x, fs, enc = wavread(str(source)) index = 0 #Remove the silence at the begining while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 beginning = index break #Remove the silence at the end x, fs, enc = wavread(str(source)) WndSize = 128 index = 0 x = list(reversed(x)) while index + WndSize < len(x): DataArray = x[int(index):int(index + WndSize)] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 final = index break #write the sound source without silences x, fs, enc = wavread(str(source)) WndSize = 128 rmsTreshhold = -70 index = 0 end = len(x) - final return beginning / fs, end / fs
def get_rms_from_data(data, in_db = True): """ Returns the root-mean-square (power) of the audio buffer """ from conversions import lin2db rms = np.sqrt(np.mean(data**2)) if in_db: rms = lin2db(rms) return rms
def get_matrix_values(sdif): """ load data from ascii or SDIF file return time-tagged values and matrix data return tlist, Matrix_data This can be used to extract data from lpc or true env .sdif files """ inent = eaSDIF.Entity() res = inent.OpenRead(sdif) if res == False: raise RuntimeError("get_lpc:: " + sdif + " is no sdif file or does not exist") dlist = [] tlist = [] vec = eaSDIF.Vector() frame = eaSDIF.Frame() #fft size #intypes = inent.GetTypeString() # Very practical line fr printing what is inside for frame in inent: has_IGBG = frame.MatrixExists("IGBG") if has_IGBG: mat = frame.GetMatrixWithSig("IGBG") mat.GetRow(vec, 0) sample_rate = vec[1] fftsize = vec[3] fftsize = int(fftsize / 2) #Extract time tag values for frame in inent: mat = frame.GetMatrix(1) nrow = mat.GetNbRows() ncol = mat.GetNbCols() if nrow > 1 and ncol > 0: tlist.append(frame.GetTime()) #Extract Matrix data values for frame in inent: for i in range(0, fftsize + 1): mat = frame.GetMatrix(1) nrow = mat.GetNbRows() ncol = mat.GetNbCols() if nrow > 1 and ncol >= 0: mat.GetRow(vec, i) dlist.append(float(np.array((vec)[0]))) #Convert dlist into a matrix sample_nb = len( tlist) - 1 # Because the first value in tlist should be ignored fftsize_range = fftsize + 1 sample_nb_range = sample_nb + 1 matrix_data = np.zeros((sample_nb_range, fftsize_range)) for row in range(0, sample_nb_range): for col in range(0, fftsize_range): matrix_data[row][col] = dlist[row * (fftsize_range) + col] #when using the flag -OS1 in super vp the amplitude values are in linear, here we transform it to db so the amplitudes are in db from conversions import lin2db matrix_data = lin2db(matrix_data) return tlist, matrix_data
def index_wav_file(source, rms_threshold=-50, WndSize=16384, target_folder="Indexed"): """ input: source : fsource audio file rms_threshold : this is the threshold WndSize : window size to compue the RMS on target_folder : folder to save the extracted sounds in This function separates all the sentences inside an audiofile. It takes each sentence and put it into one audio file inside target_folder with the name target_nb The default parameters were tested with notmal speech. Only works if file is at least 500 ms long, which can be tuned You can change the rms threshold to tune the algorithm """ try: x, fs, enc = aiffread(str(source)) except: x, fs, enc = wavread(str(source)) index = 0 NbofWrittendFiles = 1 while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = WndSize + index if rms > rms_threshold: end = 0 begining = index index = WndSize + index while rms > rms_threshold: if index + WndSize < len(x): index = WndSize + index DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) end = index else: break #if file is over 500 ms long, write it if (end - begining) > (fs / 2): duree = (end - begining) / float(fs) print "duree : " + str(duree) begining = begining - WndSize if begining < 0: begining = 0 end = end + WndSize if end > len(x): end = len(x) print file sound_tag = os.path.basename(source) sound_tag = os.path.splitext(sound_tag)[0] try: os.mkdir(target_folder) except: pass #write(filename = target_folder+"/"+ sound_tag + "_" + str(NbofWrittendFiles)+".wav",rate = fs, data= x[begining:end]) wavwrite(x[begining:end], target_folder + "/" + sound_tag + "_" + str(NbofWrittendFiles) + ".wav", fs, enc='pcm24') NbofWrittendFiles = NbofWrittendFiles + 1
def extract_sentences_tags(source, rms_threshold=-50, WndSize=16384, overlap=8192): """ This function separates all the sentences inside an audiofile. It takes each sentence and put it into one audio file inside target_folder with the name target_nb The default parameters were tested with notmal speech. Only works if file is at least 500 ms long, which can be tuned You can change the rms threshold to tune the algorithm input: source : fsource audio file rms_threshold : this is the threshold WndSize : window size to compue the RMS on overlap : nb of overlap samples returns: tags in pairs of [begining end] """ try: x, fs, enc = aiffread(str(source)) except: x, fs, enc = wavread(str(source)) index = 0 NbofWrittendFiles = 1 tags = [] vid_lengths = [] while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = WndSize + index #index += overlap if rms > rms_threshold: end = 0 begining = index index = WndSize + index #index += overlap while rms > rms_threshold: if index + WndSize < len(x): index = WndSize + index #index += overlap DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) end = index else: break #if file is over 500 ms long, write it if (end - begining) > (fs / 2): begining = begining - WndSize if begining < 0: begining = 0 end = end + WndSize if end > len(x): end = len(x) #samples to seconds, minutes, hours begining_s = begining / float(fs) end_s = end / float(fs) len_s = (end - begining) / float(fs) print "duree : " + str(len_s) from datetime import timedelta, datetime begining_s = datetime(1, 1, 1) + timedelta(seconds=begining_s) end_s = datetime(1, 1, 1) + timedelta(seconds=end_s) len_s = datetime(1, 1, 1) + timedelta(seconds=len_s) begining_s = "%d:%d:%d.%3d" % ( begining_s.hour, begining_s.minute, begining_s.second, begining_s.microsecond) end_s = "%d:%d:%d.%3d" % (end_s.hour, end_s.minute, end_s.second, end_s.microsecond) len_s = "%d:%d:%d.%3d" % (len_s.hour, len_s.minute, len_s.second, len_s.microsecond) print "la longueur est" print len_s tags.append([begining_s, end_s]) vid_lengths.append(len_s) NbofWrittendFiles = NbofWrittendFiles + 1 return tags, vid_lengths