Ejemplo n.º 1
0
def get_RMS_over_time(audio_file, window_size = 1024, in_db = True):
	"""
 	parameters:
		audio_file 	: file to anlayse
		window_size : window size for FFT computing

		returns : time series with the RMS and the time
	
	warning : 
		this function only works for mono files
	"""	
	import glob
	from scikits.audiolab import wavread, aiffread
	from scipy import signal
	import numpy as np

	try:
		sound_in, fs, enc = aiffread(audio_file)
	except ValueError:
		sound_in, fs, enc = wavread(audio_file)

	begin = 0
	values = []
	time_tags = []
	while (begin + window_size) < len(sound_in):
		data = sound_in[begin : begin + window_size]
		time_tag = (begin + (window_size / 2)) / np.float(fs)
		
		values.append(get_rms_from_data(data, in_db = in_db))
		time_tags.append(time_tag)
		begin = begin + window_size

	return time_tags, values
Ejemplo n.º 2
0
def basefreq(audiofile):
    """
    This function reads in the audio file and does the hann windowed fft of 
    the right input. It then smooths the output using a gaussian filter and
    then finds the peaks. It returns the peaks in the right audio channel since
    testing showed there was no significant difference in the two.
    """
    #read the data into an ndarray using scikits-audiolab        
    data, rate, enc = al.aiffread(audiofile)
    #split the left and right channel
    datar = data[:,1]
    datal = data[:,0]
    #take the fft of both of the channels with the hann window applied
    #the hann window reduces spectral leakage in the FFT     
    dftr = abs(fft.fft(datar*signal.hann(len(datar))))
    dftl = abs(fft.fft(datal*signal.hann(len(datal))))
    #compute the frequencies in the FFT
    freq = float(rate)/float(len(datar))
    freqs = np.arange(len(dftr)/2+99)*freq
    dftr = dftr[0:np.size(dftr)/2]
    dftl = dftl[0:np.size(dftr)/2]
    #smooth the fft with a gaussian
    c = signal.gaussian(100,20)
    dftr = signal.convolve(dftr,c)
    dftl = signal.convolve(dftl,c)
    #find the significant peaks in each channel
    peaksr = findpeaks(dftr,freqs)
    peaksl = findpeaks(dftl,freqs)
    #plot the output fft for testing
    #plt.plot(freqs,dftr)
    #plt.show()
    #print peaksr
    return peaksr
Ejemplo n.º 3
0
def aif_to_wav(source, target):
    """
	source : source audio file
	target : target audio file
	"""
    try:
        x, fs, enc = aiffread(str(source))
        WavFileName = target
        wavwrite(x, WavFileName, fs, enc='pcm24')
    except:
        print "File is not aif"
        pass
Ejemplo n.º 4
0
def find_silence(audio_file, threshold = -65, wnd_size = 16384):
	"""
	find a segment of silence (<threshold dB)in the sound file
	return tag in seconds
	"""	
	try:
		x, fs, enc 		= aiffread(str(audio_file))
	except:
		x, fs, enc 		= wavread(str(audio_file))

	index = 0
	NbofWrittendFiles = 1
	silence_tags = []
	while index + wnd_size < len(x):
 		DataArray 	= x[index: index + wnd_size]
 		rms 		= np.sqrt(np.mean(np.absolute(DataArray)**2))
 		rms 		= lin2db(rms)
		index 		= wnd_size + index
		if rms < threshold:
			end 		= 0
			begining 	= index
			index 		= wnd_size + index
			while rms < threshold:
				if index + wnd_size < len(x):
					index 		= wnd_size + index
					DataArray 	= x[index: index + wnd_size]
 					rms 		= np.sqrt(np.mean(np.absolute(DataArray)**2))
 					rms 		= lin2db(rms)
 					end 		= index
 				else:
 					break

			
 			#if file is over 250 ms long, write it
 			if (end - begining) > (fs / 8) :
 				begining = begining - wnd_size
 				if begining < 0: 
 					begining = 0
 				 
 				end = end + wnd_size
 				if end > len(x): 
 					end = len(x)

 				#samples to seconds, minutes, hours 
 				begining_s = begining/float(fs)
 				end_s = end/float(fs)
 				silence_tags.append([begining_s, end_s])

 	return silence_tags
Ejemplo n.º 5
0
def IndexFileInFolder(FolderName):
    for file in glob.glob(FolderName + "/*.wav"):  # Wav Files
        x, fs, enc = aiffread(str(file))
        WndSize = 16384
        rmsTreshhold = -50

        index = 0
        NbofWrittendFiles = 1
        while index + WndSize < len(x):
            DataArray = x[index:index + WndSize]
            rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
            rms = Lin2db(rms)
            index = WndSize + index
            if rms > -55:
                end = 0
                begining = index
                index = WndSize + index
                while rms > -55:
                    if index + WndSize < len(x):
                        index = WndSize + index
                        DataArray = x[index:index + WndSize]
                        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
                        rms = Lin2db(rms)
                        end = index
                    else:
                        break

            #if file is over 500 ms long, write it
                if (end - begining) > (fs / 2):
                    duree = (end - begining) / float(fs)
                    print "duree  :  " + str(duree)

                    begining = begining - WndSize
                    if begining < 0:
                        begining = 0

                    end = end + WndSize
                    if end > len(x):
                        end = len(x)

                    name = os.path.splitext(str(file))[0]
                    name = os.path.basename(name)
                    wavwrite(x[begining:end],
                             "Indexed/" + "/" + FolderName + "/" + name + "_" +
                             str(NbofWrittendFiles) + ".wav",
                             fs,
                             enc='pcm24')
                    NbofWrittendFiles = NbofWrittendFiles + 1
Ejemplo n.º 6
0
def get_spectral_centroid(audio_file, window_size = 256, noverlap = 0, plot_specgram = False):
	"""
 	parameters:
		audio_file 	: file to anlayse
		window_size : window size for FFT computing
		plot_specgram : Do youw ant to plot specgram of analysis?

		returns : time series with the spectral centroid and the time
	
	warning : 
		this function only works for mono files
	"""	
	import glob
	from scikits.audiolab import wavread, aiffread
	from scipy import signal
	
	try:
		sound_in, fs, enc = aiffread(audio_file)
	except ValueError:
		sound_in, fs, enc = wavread(audio_file)
	
		
	
	#compute gaussian spectrogram
	f, t, Sxx = signal.spectrogram(sound_in       , fs , nperseg = window_size 
	                               , noverlap = noverlap , scaling ='spectrum'
	                               , mode = 'magnitude'
	                              )


	#plot specgram
	if plot_specgram:
	    plt.figure()
	    fig, ax = plt.subplots()
	    plt.pcolormesh(t, f, Sxx, cmap = 'nipy_spectral')
	    ax.axis('tight')
	    plt.ylabel('Frequency [Hz]')
	    plt.xlabel('Time [sec]')
	    plt.title("Normal FFT of audio signal")
	    plt.show()

	centroid_list = []
	for spectrum in np.transpose(Sxx):
	    centroid_list.append(centroid(spectrum, f))
	    
	return t, centroid_list
Ejemplo n.º 7
0
    header += "}; // end namespace"

    text_file = open("test-headers/" + variableName + ".h", "w")
    text_file.write(header)
    text_file.close()


# get all wav files
for fileName in os.listdir("test-audio"):
    if fileName.endswith(".wav") or fileName.endswith(".aif"):

        if fileName.endswith(".wav"):
            audioSignal, fs, enc = wavread("test-audio/" + fileName)
            fileFormat = "wav"
        elif fileName.endswith(".aif"):
            audioSignal, fs, enc = aiffread("test-audio/" + fileName)
            fileFormat = "aif"
        else:
            assert (False)

        if len(audioSignal.shape) == 1:
            numChannels = 1
        elif len(audioSignal.shape) == 2:
            numChannels = audioSignal.shape[1]
        else:
            assert (False)

        #print fileName, enc

        if enc == "pcmu8" or enc == "pcms8":
            makeHeader(fileName, audioSignal, numChannels, 8, fs, fileFormat)
Ejemplo n.º 8
0
def index_wav_file(source,
                   rms_threshold=-50,
                   WndSize=16384,
                   target_folder="Indexed"):
    """
	input:
		source : fsource audio file
		rms_threshold : this is the threshold
		WndSize : window size to compue the RMS on
		target_folder : folder to save the extracted sounds in

	This function separates all the sentences inside an audiofile.
	It takes each sentence and put it into one audio file inside target_folder with the name target_nb
	The default parameters were tested with notmal speech.
	Only works if file is at least 500 ms long, which can be tuned
	You can change the rms threshold to tune the algorithm
	"""
    try:
        x, fs, enc = aiffread(str(source))
    except:
        x, fs, enc = wavread(str(source))

    index = 0
    NbofWrittendFiles = 1
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = WndSize + index
        if rms > rms_threshold:
            end = 0
            begining = index
            index = WndSize + index
            while rms > rms_threshold:
                if index + WndSize < len(x):
                    index = WndSize + index
                    DataArray = x[index:index + WndSize]
                    rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
                    rms = lin2db(rms)
                    end = index
                else:
                    break

        #if file is over 500 ms long, write it
            if (end - begining) > (fs / 2):
                duree = (end - begining) / float(fs)
                print "duree  :  " + str(duree)

                begining = begining - WndSize
                if begining < 0:
                    begining = 0

                end = end + WndSize
                if end > len(x):
                    end = len(x)
                print file
                sound_tag = os.path.basename(source)
                sound_tag = os.path.splitext(sound_tag)[0]
                try:
                    os.mkdir(target_folder)
                except:
                    pass

                #write(filename = target_folder+"/"+ sound_tag + "_" + str(NbofWrittendFiles)+".wav",rate = fs, data= x[begining:end])
                wavwrite(x[begining:end],
                         target_folder + "/" + sound_tag + "_" +
                         str(NbofWrittendFiles) + ".wav",
                         fs,
                         enc='pcm24')
                NbofWrittendFiles = NbofWrittendFiles + 1
Ejemplo n.º 9
0
def extract_sentences_tags(source,
                           rms_threshold=-50,
                           WndSize=16384,
                           overlap=8192):
    """
	This function separates all the sentences inside an audiofile.
	It takes each sentence and put it into one audio file inside target_folder with the name target_nb
	The default parameters were tested with notmal speech.
	Only works if file is at least 500 ms long, which can be tuned
	You can change the rms threshold to tune the algorithm

	input:
		source : fsource audio file
		rms_threshold : this is the threshold
		WndSize : window size to compue the RMS on
		overlap : nb of overlap samples

	returns:
		tags in pairs of [begining end]
	"""
    try:
        x, fs, enc = aiffread(str(source))
    except:
        x, fs, enc = wavread(str(source))

    index = 0
    NbofWrittendFiles = 1
    tags = []
    vid_lengths = []
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = WndSize + index
        #index       += overlap
        if rms > rms_threshold:
            end = 0
            begining = index
            index = WndSize + index
            #index       += overlap
            while rms > rms_threshold:
                if index + WndSize < len(x):
                    index = WndSize + index
                    #index       += overlap
                    DataArray = x[index:index + WndSize]
                    rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
                    rms = lin2db(rms)
                    end = index
                else:
                    break

        #if file is over 500 ms long, write it
            if (end - begining) > (fs / 2):
                begining = begining - WndSize
                if begining < 0:
                    begining = 0

                end = end + WndSize
                if end > len(x):
                    end = len(x)

                #samples to seconds, minutes, hours
                begining_s = begining / float(fs)
                end_s = end / float(fs)
                len_s = (end - begining) / float(fs)
                print "duree  :  " + str(len_s)

                from datetime import timedelta, datetime
                begining_s = datetime(1, 1, 1) + timedelta(seconds=begining_s)
                end_s = datetime(1, 1, 1) + timedelta(seconds=end_s)
                len_s = datetime(1, 1, 1) + timedelta(seconds=len_s)
                begining_s = "%d:%d:%d.%3d" % (
                    begining_s.hour, begining_s.minute, begining_s.second,
                    begining_s.microsecond)
                end_s = "%d:%d:%d.%3d" % (end_s.hour, end_s.minute,
                                          end_s.second, end_s.microsecond)
                len_s = "%d:%d:%d.%3d" % (len_s.hour, len_s.minute,
                                          len_s.second, len_s.microsecond)

                print "la longueur est"
                print len_s

                tags.append([begining_s, end_s])
                vid_lengths.append(len_s)

                NbofWrittendFiles = NbofWrittendFiles + 1

    return tags, vid_lengths
Ejemplo n.º 10
0
	header += "\n\n"
	header += "}; // end namespace"

	text_file = open ("test-headers/" + variableName + ".h", "w")
	text_file.write (header)
	text_file.close()

# get all wav files
for fileName in os.listdir("test-audio"):
    if fileName.endswith(".wav") or fileName.endswith(".aif"):
      	
      	if fileName.endswith(".wav"):
      		audioSignal,  fs,  enc  =  wavread ("test-audio/" + fileName)
      		fileFormat = "wav"
      	elif fileName.endswith(".aif"):
      		audioSignal,  fs,  enc  =  aiffread ("test-audio/" + fileName)
      		fileFormat = "aif"
      	else:
      		assert (False) 

      	if len (audioSignal.shape) == 1:
      		numChannels = 1
      	elif len (audioSignal.shape) == 2:
      		numChannels = 2
      	else:
      		assert (False)

      	#print fileName, enc

      	if enc == "pcmu8" or enc == "pcms8":
      		makeHeader (fileName, audioSignal, numChannels, 8, fs, fileFormat)