Ejemplo n.º 1
0
def cut_silence_in_sound(source, target, rmsTreshhold=-40, WndSize=128):
    """
	source : fsource audio file
	target : output sound
	This function cuts the silence at the begining and at the end of an audio file in order. 
	It's usefull for normalizing the length of the audio stimuli in an experiment.
	The default parameters were tested with notmal speech.
	"""
    NbofWrittendFiles = 1
    x, fs, enc = wavread(str(source))
    index = 0

    #Remove the silence at the begining
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            beginning = index
            print beginning / 44100
            break

    #Remove the silence at the end
    x, fs, enc = wavread(str(source))
    WndSize = 128
    index = 0
    x = list(reversed(x))

    while index + WndSize < len(x):
        DataArray = x[int(index):int(index + WndSize)]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            final = index
            print(len(x) - final) / 44100
            break

    #write the sound source without silences
    x, fs, enc = wavread(str(source))
    WndSize = 128
    rmsTreshhold = -70
    index = 0

    name_of_source = str(os.path.basename(source))
    name_of_source = os.path.splitext(name_of_source)[0]
    path, sourcename = os.path.split(source)
    wavwrite(x[beginning:len(x) - final], target, fs, enc='pcm24')
Ejemplo n.º 2
0
def find_silence(audio_file, threshold = -65, wnd_size = 16384):
	"""
	find a segment of silence (<threshold dB)in the sound file
	return tag in seconds
	"""	
	try:
		x, fs, enc 		= aiffread(str(audio_file))
	except:
		x, fs, enc 		= wavread(str(audio_file))

	index = 0
	NbofWrittendFiles = 1
	silence_tags = []
	while index + wnd_size < len(x):
 		DataArray 	= x[index: index + wnd_size]
 		rms 		= np.sqrt(np.mean(np.absolute(DataArray)**2))
 		rms 		= lin2db(rms)
		index 		= wnd_size + index
		if rms < threshold:
			end 		= 0
			begining 	= index
			index 		= wnd_size + index
			while rms < threshold:
				if index + wnd_size < len(x):
					index 		= wnd_size + index
					DataArray 	= x[index: index + wnd_size]
 					rms 		= np.sqrt(np.mean(np.absolute(DataArray)**2))
 					rms 		= lin2db(rms)
 					end 		= index
 				else:
 					break

			
 			#if file is over 250 ms long, write it
 			if (end - begining) > (fs / 8) :
 				begining = begining - wnd_size
 				if begining < 0: 
 					begining = 0
 				 
 				end = end + wnd_size
 				if end > len(x): 
 					end = len(x)

 				#samples to seconds, minutes, hours 
 				begining_s = begining/float(fs)
 				end_s = end/float(fs)
 				silence_tags.append([begining_s, end_s])

 	return silence_tags
Ejemplo n.º 3
0
def get_sound_without_silence(source, rmsTreshhold=-40, WndSize=128):
    """
	source : source audio file
	This function returns a begining and end time tags for the begining and the end of audio in a file
	"""
    x, fs, enc = wavread(str(source))
    index = 0

    #Remove the silence at the begining
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            beginning = index
            break

    #Remove the silence at the end
    x, fs, enc = wavread(str(source))
    WndSize = 128
    index = 0
    x = list(reversed(x))

    while index + WndSize < len(x):
        DataArray = x[int(index):int(index + WndSize)]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            final = index
            break

    #write the sound source without silences
    x, fs, enc = wavread(str(source))
    WndSize = 128
    rmsTreshhold = -70
    index = 0

    end = len(x) - final
    return beginning / fs, end / fs
Ejemplo n.º 4
0
def get_rms_from_data(data, in_db = True):
    """
    Returns the root-mean-square (power) of the audio buffer
    """
    from conversions import lin2db
    rms = np.sqrt(np.mean(data**2))
    if in_db:
	    rms 		= lin2db(rms)
    return rms
Ejemplo n.º 5
0
def get_matrix_values(sdif):
    """
    load data from ascii or SDIF file 
    return time-tagged values and matrix data
    return tlist, Matrix_data
    This can be used to extract data from lpc or true env .sdif files
    """
    inent = eaSDIF.Entity()
    res = inent.OpenRead(sdif)
    if res == False:
        raise RuntimeError("get_lpc:: " + sdif +
                           " is no sdif file or does not exist")
    dlist = []
    tlist = []
    vec = eaSDIF.Vector()
    frame = eaSDIF.Frame()

    #fft size
    #intypes = inent.GetTypeString() # Very practical line fr printing what is inside
    for frame in inent:
        has_IGBG = frame.MatrixExists("IGBG")
        if has_IGBG:
            mat = frame.GetMatrixWithSig("IGBG")
            mat.GetRow(vec, 0)
            sample_rate = vec[1]
            fftsize = vec[3]
    fftsize = int(fftsize / 2)

    #Extract time tag values
    for frame in inent:
        mat = frame.GetMatrix(1)
        nrow = mat.GetNbRows()
        ncol = mat.GetNbCols()
        if nrow > 1 and ncol > 0:
            tlist.append(frame.GetTime())

    #Extract Matrix data values
    for frame in inent:
        for i in range(0, fftsize + 1):
            mat = frame.GetMatrix(1)
            nrow = mat.GetNbRows()
            ncol = mat.GetNbCols()
            if nrow > 1 and ncol >= 0:
                mat.GetRow(vec, i)
                dlist.append(float(np.array((vec)[0])))

    #Convert dlist into a matrix
    sample_nb = len(
        tlist) - 1  # Because the first value in tlist should be ignored
    fftsize_range = fftsize + 1
    sample_nb_range = sample_nb + 1
    matrix_data = np.zeros((sample_nb_range, fftsize_range))
    for row in range(0, sample_nb_range):
        for col in range(0, fftsize_range):
            matrix_data[row][col] = dlist[row * (fftsize_range) + col]

    #when using the flag -OS1 in super vp the amplitude values are in linear, here we transform it to db so the amplitudes are in db
    from conversions import lin2db
    matrix_data = lin2db(matrix_data)

    return tlist, matrix_data
Ejemplo n.º 6
0
def index_wav_file(source,
                   rms_threshold=-50,
                   WndSize=16384,
                   target_folder="Indexed"):
    """
	input:
		source : fsource audio file
		rms_threshold : this is the threshold
		WndSize : window size to compue the RMS on
		target_folder : folder to save the extracted sounds in

	This function separates all the sentences inside an audiofile.
	It takes each sentence and put it into one audio file inside target_folder with the name target_nb
	The default parameters were tested with notmal speech.
	Only works if file is at least 500 ms long, which can be tuned
	You can change the rms threshold to tune the algorithm
	"""
    try:
        x, fs, enc = aiffread(str(source))
    except:
        x, fs, enc = wavread(str(source))

    index = 0
    NbofWrittendFiles = 1
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = WndSize + index
        if rms > rms_threshold:
            end = 0
            begining = index
            index = WndSize + index
            while rms > rms_threshold:
                if index + WndSize < len(x):
                    index = WndSize + index
                    DataArray = x[index:index + WndSize]
                    rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
                    rms = lin2db(rms)
                    end = index
                else:
                    break

        #if file is over 500 ms long, write it
            if (end - begining) > (fs / 2):
                duree = (end - begining) / float(fs)
                print "duree  :  " + str(duree)

                begining = begining - WndSize
                if begining < 0:
                    begining = 0

                end = end + WndSize
                if end > len(x):
                    end = len(x)
                print file
                sound_tag = os.path.basename(source)
                sound_tag = os.path.splitext(sound_tag)[0]
                try:
                    os.mkdir(target_folder)
                except:
                    pass

                #write(filename = target_folder+"/"+ sound_tag + "_" + str(NbofWrittendFiles)+".wav",rate = fs, data= x[begining:end])
                wavwrite(x[begining:end],
                         target_folder + "/" + sound_tag + "_" +
                         str(NbofWrittendFiles) + ".wav",
                         fs,
                         enc='pcm24')
                NbofWrittendFiles = NbofWrittendFiles + 1
Ejemplo n.º 7
0
def extract_sentences_tags(source,
                           rms_threshold=-50,
                           WndSize=16384,
                           overlap=8192):
    """
	This function separates all the sentences inside an audiofile.
	It takes each sentence and put it into one audio file inside target_folder with the name target_nb
	The default parameters were tested with notmal speech.
	Only works if file is at least 500 ms long, which can be tuned
	You can change the rms threshold to tune the algorithm

	input:
		source : fsource audio file
		rms_threshold : this is the threshold
		WndSize : window size to compue the RMS on
		overlap : nb of overlap samples

	returns:
		tags in pairs of [begining end]
	"""
    try:
        x, fs, enc = aiffread(str(source))
    except:
        x, fs, enc = wavread(str(source))

    index = 0
    NbofWrittendFiles = 1
    tags = []
    vid_lengths = []
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = WndSize + index
        #index       += overlap
        if rms > rms_threshold:
            end = 0
            begining = index
            index = WndSize + index
            #index       += overlap
            while rms > rms_threshold:
                if index + WndSize < len(x):
                    index = WndSize + index
                    #index       += overlap
                    DataArray = x[index:index + WndSize]
                    rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
                    rms = lin2db(rms)
                    end = index
                else:
                    break

        #if file is over 500 ms long, write it
            if (end - begining) > (fs / 2):
                begining = begining - WndSize
                if begining < 0:
                    begining = 0

                end = end + WndSize
                if end > len(x):
                    end = len(x)

                #samples to seconds, minutes, hours
                begining_s = begining / float(fs)
                end_s = end / float(fs)
                len_s = (end - begining) / float(fs)
                print "duree  :  " + str(len_s)

                from datetime import timedelta, datetime
                begining_s = datetime(1, 1, 1) + timedelta(seconds=begining_s)
                end_s = datetime(1, 1, 1) + timedelta(seconds=end_s)
                len_s = datetime(1, 1, 1) + timedelta(seconds=len_s)
                begining_s = "%d:%d:%d.%3d" % (
                    begining_s.hour, begining_s.minute, begining_s.second,
                    begining_s.microsecond)
                end_s = "%d:%d:%d.%3d" % (end_s.hour, end_s.minute,
                                          end_s.second, end_s.microsecond)
                len_s = "%d:%d:%d.%3d" % (len_s.hour, len_s.minute,
                                          len_s.second, len_s.microsecond)

                print "la longueur est"
                print len_s

                tags.append([begining_s, end_s])
                vid_lengths.append(len_s)

                NbofWrittendFiles = NbofWrittendFiles + 1

    return tags, vid_lengths