def song(self,path): y, sr = librosa.load(self.path, duration=120) S_full, phase = librosa.magphase(librosa.stft(y)) S_filter = librosa.decompose.nn_filter(S_full,aggregate=np.median,metric='cosine', width=int(librosa.time_to_frames(2, sr=sr))) S_filter = np.minimum(S_full, S_filter) margin_i, margin_v = 2, 10 power = 2 mask_i = librosa.util.softmask(S_filter,margin_i * (S_full - S_filter),power=power) mask_v = librosa.util.softmask(S_full - S_filter,margin_v * S_filter,power=power) S_foreground = mask_v * S_full S_background = mask_i * S_full music =librosa.griffinlim(S_background) vocal =librosa.griffinlim(S_foreground) scipy.io.wavfile.write('sound_results/song/music.wav',sr,music) scipy.io.wavfile.write('sound_results/song/vocal.wav',sr,vocal) utl.plotSounds([music, vocal], ["music", "vocal"], sr, "plot_results/song/song_separation_plot.png") img = pg.QtGui.QGraphicsPixmapItem(pg.QtGui.QPixmap('plot_results/song/song_separation_plot.png')) self.ui1.widget_song.addItem(img) self.ui1.widget_song.invertY(True) self.alarm("Check Plot & Sound Results Files")
def cocktail(self): eps = 0.00000001 rate1, data1 = wavfile.read(self.path1) rate2, data2 = wavfile.read(self.path2) if(data1.ndim != 1 or data2.ndim != 1): self.alarm("Please Chose another file with 1D data") else: data1 = data1 - np.mean(data1) data1 = data1/max(data1) data2 = data2 - np.mean(data2) data2 = data2/max(data2) signals = [data1, data2] matrix = np.vstack(signals) whiteMatrix = utl.whitenMatrix(matrix) X = whiteMatrix vectors = [] for i in range(0, X.shape[0]): vector = FA(X, vectors, eps) vectors.append(vector) W = np.vstack(vectors) S = np.dot(W, whiteMatrix) utl.plotSounds([S[0], S[1]], ["source_1", "source_2"], rate1, "plot_results/cocktail_party/song_separation_plot.png") wavfile.write("sound_results/cocktail_party/source1.wav" ,rate1, 5000*S[0].astype(np.int16)) wavfile.write("sound_results/cocktail_party/source2.wav" , rate1, 5000*S[1].astype(np.int16)) img = pg.QtGui.QGraphicsPixmapItem(pg.QtGui.QPixmap('plot_results/cocktail_party/song_separation_plot.png')) self.ui2.widget_party.addItem(img) self.ui2.widget_party.invertY(True) self.alarm("Check Plot & Sound Results Files")
signals = [data1, data2] matrix = np.vstack(signals) # Whitening the matrix as a pre-processing step whiteMatrix = utils.whitenMatrix(matrix) X = whiteMatrix # Find the individual components one by one vectors = [] for i in range(0, X.shape[0]): # The FastICA function is used as is from FastICA_image.py, and the it works out of the box vector = FastICA(X, vectors, eps) vectors.append(vector) # Stack the vectors to form the unmixing matrix W = np.vstack(vectors) # Get the original matrix S = np.dot(W, whiteMatrix) # Plot the separated sound signals utils.plotSounds([S[0], S[1]], ["SignalA", "SignalB"], rate1, "Ring_StarWars_separated") # Write the separated sound signals, 5000 is multiplied so that signal is audible wavfile.write("./ICAseparate" + name[0] + ".wav", rate1, 5000 * S[0].astype(np.int16)) wavfile.write("./ICAseparate" + name[1] + ".wav", rate1, 5000 * S[1].astype(np.int16))
import utilities as utils from scipy.io import wavfile import numpy as np # Read the files as numpy array rate1, data1 = wavfile.read("sourceA.wav") rate2, data2 = wavfile.read("sourceB.wav") # Using the mixSounds helper function from utilities.py mixedX = utils.mixSounds([data1, data2], [0.3, 0.7]).astype(np.int16) mixedY = utils.mixSounds([data1, data2], [0.6, 0.4]).astype(np.int16) # Plot the mixed sound sources utils.plotSounds([mixedX, mixedY], ["mixedA", "mixedB"], rate1, "../plots/sounds/Ring_StarWars_mixed", False) # Save the mixed sources as wav files wavfile.write("mixedA.wav", rate1, mixedX) wavfile.write("mixedB.wav", rate1, mixedY)
"""The script makes the sources to have same length, as well as have the same sampling rate""" from scipy.io import wavfile import utilities as utl # Read the .wav files as numpy arrays rate1, data1 = wavfile.read("sourceX.wav") rate2, data2 = wavfile.read("sourceY.wav") # Plot the sounds as time series data utl.plotSounds([data1, data2], ["PhoneRing", "StarWars"], rate1, "../plots/sounds/Ring_StarWars_original") # Make both of the files to have same length as well as same sampling rate minimum = min(data1.shape[0], data2.shape[0]) # Slicing the array for both the sources data1 = data1[0:minimum] data2 = data2[0:minimum] # writing the array into to the wav file with sampling rate which is average of the two wavfile.write("sourceX.wav", (rate1 + rate2) / 2, data1) wavfile.write("sourceY.wav", (rate1 + rate2) / 2, data2)
vectors.append(vector) # Stack the vectors to form the unmixing matrix W = np.vstack(vectors) # Get the original matrix S = np.dot(W, whiteMatrix) # Unmixing matrix through FOBI fobiW = FOBI(X) # Get the original matrix using fobiW fobiS = np.dot(fobiW.T, whiteMatrix) # Plot the separated sound signals utl.plotSounds([S[0], S[1]], ["1", "2"], rate1, "Ring_StarWars_separated") # Write the separated sound signals, 5000 is multiplied so that signal is audible wavfile.write("./sounds/FOBIseparate" + name[0] + ".wav", rate1, 5000 * S[0].astype(np.int16)) wavfile.write("./sounds/FOBIseparate" + name[1] + ".wav", rate1, 5000 * S[1].astype(np.int16)) # Plot the separated sound signals utl.plotSounds([fobiS[1], fobiS[0]], ["1", "2"], rate1, "Ring_StarWars_separated_FOBI") # Write the separated sound signals, 5000 is multiplied so that signal is audible wavfile.write("./sounds/FOBIseparate" + name[0] + ".wav", rate1, 5000 * fobiS[1].astype(np.int16)) wavfile.write("./sounds/FOBIseparate" + name[1] + ".wav", rate1,
def separate_sound3(b, name): name = ["1", "2", "3"] eps = 0.00000001 #Read the mixed signals rate1, data1 = wavfile.read(b[0]) rate2, data2 = wavfile.read(b[1]) rate3, data3 = wavfile.read(b[2]) #Centering the mixed signals and scaling the values as well data1 = data1 - np.mean(data1) data1 = data1 / 32768 data2 = data2 - np.mean(data2) data2 = data2 / 32768 data3 = data3 - np.mean(data3) data3 = data3 / 32768 #Creating a matrix out of the signals signals = [data1, data2, data3] matrix = np.vstack(signals) #Whitening the matrix as a pre-processing step whiteMatrix = utl.whitenMatrix(matrix) X = whiteMatrix #Find the individual components one by one vectors = [] for i in range(0, X.shape[0]): #The FastICA function is used as is from FastICA_image.py, and the it works out of the box vector = FastICA(X, vectors, eps) vectors.append(vector) #Stack the vectors to form the unmixing matrix W = np.vstack(vectors) #Get the original matrix S = np.dot(W, whiteMatrix) #Unmixing matrix through FOBI fobiW = FOBI(X) #Get the original matrix using fobiW fobiS = np.dot(fobiW.T, whiteMatrix) #Plot the separated sound signals utl.plotSounds([S[0], S[1], S[2]], ["1", "2", "3"], rate1, "separated") #Write the separated sound signals, 5000 is multiplied so that signal is audible wavfile.write( "C:/SPEECH_SEPARATION--FinalYear/Separated/separate-" + name[0] + ".wav", rate1, 5000 * S[0].astype(np.int16)) wavfile.write( "C:/SPEECH_SEPARATION--FinalYear/Separated/separate-" + name[1] + ".wav", rate1, 5000 * S[1].astype(np.int16)) wavfile.write( "C:/SPEECH_SEPARATION--FinalYear/Separated/separate-" + name[2] + ".wav", rate1, 5000 * S[2].astype(np.int16)) #Plot the separated sound signals utl.plotSounds([fobiS[1], fobiS[0], fobiS[2]], ["1", "2", "3"], rate1, "separated_FOBI") #Write the separated sound signals, 5000 is multiplied so that signal is audible wavfile.write( "C:/SPEECH_SEPARATION--FinalYear/Separated/separate-" + name[0] + ".wav", rate1, 5000 * fobiS[1].astype(np.int16)) wavfile.write( "C:/SPEECH_SEPARATION--FinalYear/Separated/separate-" + name[1] + ".wav", rate1, 5000 * fobiS[0].astype(np.int16)) wavfile.write( "C:/SPEECH_SEPARATION--FinalYear/Separated/separate-" + name[2] + ".wav", rate3, 5000 * fobiS[2].astype(np.int16))
import utilities as utl from scipy.io import wavfile import numpy as np # Read the files as numpy array rate1, data1 = wavfile.read("13-84-1.wav") rate2, data2 = wavfile.read("13-172-1.wav") # Using the mixSounds helper function from utilities.py mixedX = utl.mixSounds([data1, data2], [0.3, 0.7]).astype(np.int16) mixedY = utl.mixSounds([data1, data2], [0.6, 0.4]).astype(np.int16) # Plot the mixed sound sources utl.plotSounds([mixedX, mixedY], ["mixed-1", "mixed-3"], rate1, "../plots/sounds/Ring_StarWars_mixed", False) # Save the mixed sources as wav files wavfile.write("mixed-1.wav", rate1, mixedX) wavfile.write("mixed-3.wav", rate1, mixedY)