def wav_read(filename,normalize=True,verbose=True,auto_resample=True): '''read WAV files :param filename: input filename to read from :param normalize: normalize the read values (usually signed integers) to range (-1,1) :param verbose: output some information during reading :param auto_resample: auto-resampling: if sample rate is different than 11, 22 or 44 kHz it will resample to 44 khZ :return: tuple of 3 elements: samplereate (e.g. 44100), samplewith (e.g. 2 for 16 bit) and wavedata (simple array for mono, 2-dim. array for stereo) ''' # check if file exists if not os.path.exists(filename): raise NameError("File does not exist:" + filename) samplerate, samplewidth, wavedata = wavio.readwav(filename) if auto_resample and samplerate != 11025 and samplerate != 22050 and samplerate != 44100: #print original file info if verbose: print samplerate, "Hz,", wavedata.shape[1], "channel(s),", wavedata.shape[0], "samples" to_samplerate = 22050 if samplerate < 22050 else 44100 filename2 = resample(filename, to_samplerate, normalize=True, verbose=verbose) samplerate, samplewidth, wavedata = wavio.readwav(filename2) #os.remove(filename2) # delete temp file if (normalize): wavedata = normalize_wav(wavedata,samplewidth) return (samplerate, samplewidth, wavedata)
def wav_read(filename,normalize=True,verbose=True): # check if file exists if not os.path.exists(filename): raise NameError("File does not exist:" + filename) samplerate, samplewidth, wavedata = wavio.readwav(filename) if (normalize): wavedata = normalize_wav(wavedata,samplewidth) return (samplerate, samplewidth, wavedata)
def read(filename, start=None, limit=None): """ Reads any file supported by pydub (ffmpeg) and returns the data contained within. If file reading fails due to input being a 24-bit wav file, wavio is used as a backup. Can be optionally limited to a certain amount of seconds from the start of the file by specifying the `limit` parameter. This is the amount of seconds from the start of the file. returns: (channels, samplerate) """ # pydub does not support 24-bit wav files, use wavio when this occurs try: audiofile = AudioSegment.from_file(filename) #if limit : # print "limit" + str(limit) # audiofile = audiofile[:limit * 1000] if start != None and limit : print "start" + str(start) print "limit" + str(limit) audiofile = audiofile[start*1000: limit*1000] #print audiofile #audiofile.export("/media/sf_C_DRIVE/Users/vsahu/Desktop/audio/train/abc.mp3") data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in xrange(audiofile.channels): channels.append(data[chn::audiofile.channels]) fs = audiofile.frame_rate except audioop.error: fs, _, audiofile = wavio.readwav(filename) if limit: audiofile = audiofile[:limit * 1000] audiofile = audiofile.T audiofile = audiofile.astype(np.int16) channels = [] for chn in audiofile: channels.append(chn) return channels, audiofile.frame_rate, unique_hash(filename)
def getdecibels(filename, chunks=None, chunk_factor=1): ''' getdecibels function takes a filename and produces the decibel readings from it. TODO : It returns a list right now, it would be nice if it could be converted into a generator Arguments : chunks: default None. The number of chunks to divide the audio file into. chunk_factor : If chunks is not provided, we split the audio files according to the chunk_factor. This essentially refers to number of chunks per minute. If the total duration is less than a minute, we will take 2 chunks. ''' #get data from the wavio. #NOTE : The original wavio module does not return the duration. duration, samprate, sampwidth, wavdata = wavio.readwav(filename) #figure out exactly how many chunks we need to split it. if chunks==None: chunks = max(int(duration/60),2)*chunk_factor #we need to convert the wavdata into 64 bits because calculating the decibels require squaring it # If it's stored as a 32 bit int, the integer will overflow. wavdata = wavdata.astype('int64') #Split the dataset into chunks chunks = np.array_split(wavdata, chunks) # clear out about a gig of data. This simple line saved a ton of trouble while dealing with large files. wavdata = 0 #dB SPL is basically dB = 20 * log10(amplitude) dbs = [] b,a = A_weighting(samprate) dbs_a = [] for chunk in chunks: dbs.append(20*np.log10(rms_flat(chunk))) y = lfilter(b, a, chunk) dbs_a.append(20*np.log10(rms_flat(y))) return dbs,dbs_a
def test1(self): path = tempfile.mkdtemp() filename = os.path.join(path, "test1data.wav") wavio.writewav24(filename, 44100, data1) try: f = wave.open(filename, 'r') self.assertEqual(f.getnchannels(), 1) self.assertEqual(f.getsampwidth(), 3) self.assertEqual(f.getframerate(), 44100) f.close() rate, sampwidth, data = wavio.readwav(filename) self.assertEqual(rate, 44100) self.assertEqual(sampwidth, 3) self.assertEqual(data.dtype, np.int32) self.assertEqual(data.shape, (len(data1), 1)) np.testing.assert_equal(data, data1) finally: os.remove(filename) os.removedirs(path)
def read(filename, limit=None): """ Reads any file supported by pydub (ffmpeg) and returns the data contained within. If file reading fails due to input being a 24-bit wav file, wavio is used as a backup. Can be optionally limited to a certain amount of seconds from the start of the file by specifying the `limit` parameter. This is the amount of seconds from the start of the file. returns: (channels, samplerate) """ # pydub does not support 24-bit wav files, use wavio when this occurs try: audiofile = AudioSegment.from_file(filename) if limit: audiofile = audiofile[:limit * 1000] data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in xrange(audiofile.channels): channels.append(data[chn::audiofile.channels]) fs = audiofile.frame_rate except audioop.error: fs, _, audiofile = wavio.readwav(filename) if limit: audiofile = audiofile[:limit * 1000] audiofile = audiofile.T audiofile = audiofile.astype(np.int16) channels = [] for chn in audiofile: channels.append(chn) length_in_seconds = len(audiofile) / 1000.0 return channels, audiofile.frame_rate, unique_hash(filename), length_in_seconds
def read(filename, limit=None): """ Reads any file supported by pydub (ffmpeg) and returns the data contained within. If file reading fails due to input being a 24-bit wav file, wavio is used as a backup. Can be optionally limited to a certain amount of seconds from the start of the file by specifying the `limit` parameter. This is the amount of seconds from the start of the file. returns: (channels, samplerate) """ # pydub does not support 24-bit wav files, use wavio when this occurs try: audiofile = pd.AudioSegment.from_file(filename) if limit: audiofile = audiofile[:limit * 1000] data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in xrange(audiofile.channels): channels.append(data[chn::audiofile.channels]) fs = audiofile.frame_rate except pd.utils.audioop.error: fs, _, audiofile = wavio.readwav(filename) if limit: audiofile = audiofile[:limit * 1000] audiofile = audiofile.T audiofile = audiofile.astype(np.int16) channels = [] for chn in audiofile: channels.append(chn) return channels, audiofile.frame_rate, unique_hash(filename)
def extract_drums(attack, decay, onset_array, wavefile): """ extracts the individual drum hits from a wav file and writes them as separate wav files. attack and decay are in samples instead of milliseconds. So 44 samples is about 1 millisecond for a second recorded at 44.1Khz samplerate. The conditionals here need to be refined since the length of the subsequent wav files appear to be slightly random. It was found that an attack time of 1500 samples works well for drum transients. If the transient is too short (2200 samples =~50 ms not including the attack time), the file for that drum hit is not written as it will not be useful. """ read_data = wavio.readwav(wavefile) filename = os.path.splitext(wavefile)[0] read_array = read_data[2] # a list of sample values in sequential order for the wavefile output_csv_filename = '{0}_samples_to_onset.csv'.format(filename) samples_to_onset_array = [] output_dir = 'separated_input' for i in range(len(onset_array) - 1): if onset_array[i + 1] >= len(read_array) and onset_array[i] - attack < 0: start = 0 write_array = read_array[0:] elif onset_array[i] - attack < 0: start = 0 write_array = read_array[0: onset_array[i + 1]] elif onset_array[i + 1] >= len(read_array): start = onset_array[i] - attack write_array = read_array[onset_array[i] - attack:] else: start = onset_array[i] - attack write_array = read_array[onset_array[i] - attack: onset_array[i+1]] if len(write_array) - attack >= decay: #if the drumhit file is long enough, write it into the unclassified_drums directory output_filename = '{0}_{1}.wav'.format(filename, "%05d" % i) wavio.writewav24(output_filename, read_data[0], write_array) shutil.move(output_filename, '{0}'.format(output_dir)) samples_to_onset = onset_array[i] - start samples_to_onset_array.append([output_filename, samples_to_onset]) with open(output_csv_filename, 'w') as csvfile: a = csv.writer(csvfile) a.writerows(samples_to_onset_array) shutil.move(output_csv_filename, '{0}'.format(output_dir))
def get_spectrogram_feature(filepath): (rate, width, sig) = wavio.readwav(filepath) sig = sig.ravel() # [length] stft = torch.stft(torch.FloatTensor(sig), N_FFT, hop_length=int(0.01 * SAMPLE_RATE), win_length=int(0.030 * SAMPLE_RATE), window=torch.hamming_window(int(0.030 * SAMPLE_RATE)), center=False, normalized=False, onesided=True) stft = (stft[:, :, 0].pow(2) + stft[:, :, 1].pow(2)).pow( 0.5) # (N_FFT / 2 + 1 * T) amag = stft.numpy() feat = torch.FloatTensor(amag) feat = torch.FloatTensor(feat).transpose(0, 1) feat -= feat.mean() # T * (N_FFT / 2 + 1) return feat
def get_spectrogram_feature(filepath): if filepath.split('/')[1] == 'TIMIT': sig = np.fromfile(filepath, dtype=np.int16)[512:].reshape((-1, 1)) else: (fate, width, sig) = wavio.readwav(filepath) sig = sig.ravel().astype(np.float) / 32767 sig = sig.astype(np.int16) stft = torch.stft(torch.FloatTensor(sig), N_FFT, hop_length=int(0.01*SAMPLE_RATE), win_length=int(0.03*SAMPLE_RATE), window=torch.hamming_window(int(0.03*SAMPLE_RATE)), center=False, normalized=False, onesided=True) stft = (stft[:, :, 0].pow(2) + stft[:, :, 1].pow(2)).pow(0.5) amag = stft.numpy() feat = torch.FloatTensor(amag) feat = torch.FloatTensor(feat).transpose(0, 1) return feat
#!/usr/bin/python3 # Copyright © 2019 Bart Massey # [This program is licensed under the "MIT License"] # Please see the file LICENSE in the source # distribution of this software for license terms. import numpy as np import resamp import wavio # Combine a sample with a copy shifted up a third and a copy # shifted down two octaves for a harmonizing effect. # Play it for 5 seconds. # Get some samples. samples = wavio.readwav("loop.wav") nsamples = len(samples) # Minimum and maximum expected fundamental frequency of # samples in Hz. f_min = 110 f_max = 1720 # Minimum and maximum periods in samples. s_max = 48000 // f_min s_min = 48000 // f_max # Do an FFT to try to find the period of the signal. nfft = 2**14 nwin = 4 * s_max windowed = np.hamming(nwin) * np.array(samples[:nwin])
n_mfcc=40) print(feat.shape) feat[0] = librosa.feature.rmse(sig, hop_length=input_stride, frame_length=int(frame_length * SAMPLE_RATE)) print(feat[0].shape) feat = [feat] feat.append(librosa.feature.delta(feat[0])) feat.append(librosa.feature.delta(feat[0], order=2)) return feat path = "./sample_dataset/train/train_data/wav_001.wav" (rate, width, sig) = wavio.readwav(path) sig = sig.ravel() sig = sig.astype(np.float32) #print(librosa.core.stft(sig, 512)) y, sr = librosa.load(path, sr=16000) #print(librosa.feature.mfcc(sig, sr = 16000)) x = get_spectrogram_feature(path) #print(y, type(y), y.shape) #print(sig,type(sig), sig.shape) mel_x = Mel_S(path) mfcc_x = MFCC(path) print(x.shape) print(mel_x.shape)
def main(): """ writes the new file which will show up in the current directory as {filename}replaced.wav at the moment the replacement method is a simple prototype without using any ML yet. Once we are able to find the most similar-sounding drumhit this will obviously have to change. """ similarDrums = os.listdir( './separated_corpus') # list of drumhit filenames directory. similarDrums = filter(lambda f: '.wav' in f, similarDrums) vSpace = vectSpace.vectSpace( [], []) # the vectSpace object to find the closest-souding drumhit. csvList = [] # csvList is made of a list of tuples of form ({filename}, {mfcc numpy array}) with open(corpus, 'rb') as csvfile: reader = csv.reader(csvfile) for row in reader: csvList.append( [row[0], np.array(map(lambda e: float(e), row[1:]))]) # create drumVect objects from each entry in csvList and add said drumVect to vSpace for elem in csvList: dVect = drumVect.drumVect('{0}'.format(elem[0]), elem[1]) #print type(dVect) vSpace.add_vect(dVect) # create a drumVect objects from the input .wav inputCsvList = [] with open('input_mfcc.csv', 'rb') as csvfile: reader = csv.reader(csvfile) for row in reader: inputCsvList.append( [row[0], np.array(map(lambda e: float(e), row[1:]))]) inVectArray = [] #array of drumVect objects from the input .wav for elem in inputCsvList: dVect = drumVect.drumVect('{0}'.format(elem[0]), elem[1]) inVectArray.append(dVect) w = wavio.readwav(sys.argv[1]) writeArray = w[ 2] #a copy of the data from the original file supplied. modifying this. hit = 0 while hit < len(inVectArray): if onsets[hit] - attack < 0: start = onsets[hit] else: start = onsets[hit] - 1500 if hit == len(onsets) - 1: nextHit = None else: nextHit = onsets[hit + 1] replacedHit = vSpace.k_closest( 1, inVectArray[hit])[1].get_filename() #our replacement hit. repl = wavio.readwav( './{0}'.format(replacedHit)) #file of replacement hit replacedHitArray = repl[2] #sample array if nextHit != None: #the replacedHitArray is longer than the distance between current and next hit if len(writeArray[start:nextHit + 1]) < len(replacedHitArray): writeArray[start:nextHit + 1] = replacedHitArray[ 0:len(writeArray[start:nextHit + 1])] #the replacedHitArray is shorter or equal to distance between current and next hit else: writeArray[start:start + len(replacedHitArray)] = replacedHitArray elif nextHit == None: #if the replacedHitArray is longer than the rest of the writeArray or both equal length if len(writeArray[start:]) <= len(replacedHitArray): writeArray[start:] = replacedHitArray[0:len(writeArray[start:] )] #if the replacedHitArray is shorter than or equal to the rest of the writeArray else: writeArray[start:start + len(replacedHitArray)] = replacedHitArray hit += 1 wavio.writewav24( '{0}replaced.wav'.format(filename[:len(filename) - 4]), w[0], writeArray) #save the replaced drum file as a new file.
wav_files = filter(lambda f: '.wav' in f, separated_files) # individual hits from the input file csv_file = filter(lambda f: '.csv' in f, separated_files)[0] # samples to onset csv samples_to_onset_dict = {} with open('separated_input/{0}'.format(csv_file), 'r') as c: read = csv.reader(c) for row in read: samples_to_onset_dict[os.path.basename(row[0])] = row[1] hop_s = win_s / 4 temp_files = [] for wav in wav_files: wav_file_path = "separated_input/{0}".format(wav) samples_to_onset = int(samples_to_onset_dict[wav]) readwav = wavio.readwav(wav_file_path) readwav_array = readwav[2] mfccwav_array = readwav_array[samples_to_onset:samples_to_onset + samples_for_mfcc] temp_filename = 'separated_input/{0}_temp.wav'.format( os.path.splitext(wav)[0]) wavio.writewav24(temp_filename, readwav[0], mfccwav_array) temp_files.append(temp_filename) csv_output = [] for temp_file in temp_files: samplerate = 0 s = a.source(temp_file, samplerate, hop_s) samplerate = s.samplerate p = a.pvoc(win_s, hop_s)
import wavio from scipy.signal import decimate rate, sampwidth, data=wavio.readwav('/home/eudocio/Escritorio/borrar/EHB/Cuestionarios/121 (AUDIO A).wav') decimationFactor=2 nframes=len(data) subSamplingData=decimate(data, decimationFactor, 1,'fir', axis=0) monoAverage=data.mean(axis=1) monoLeft=data[:, 0] monoRight=data[:, 1] wavio.writewav24('/home/eudocio/Escritorio/borrar/EHB/Cuestionarios/121_AUDIO_A_decimado.wav', 24000, subSamplingData) wavio.writewav24('/home/eudocio/Escritorio/borrar/EHB/Cuestionarios/121_AUDIO_A_nuevo.wav', 48000, monoAverage) wavio.writewav24('/home/eudocio/Escritorio/borrar/EHB/Cuestionarios/121_AUDIO_A_izquierdo.wav', 48000, monoLeft) wavio.writewav24('/home/eudocio/Escritorio/borrar/EHB/Cuestionarios/121_AUDIO_A_derecho.wav', 48000, monoRight) print(subSamplingData)
def main(): """ writes the new file which will show up in the current directory as {filename}replaced.wav at the moment the replacement method is a simple prototype without using any ML yet. Once we are able to find the most similar-sounding drumhit this will obviously have to change. """ similarDrums = os.listdir('./separated_corpus') # list of drumhit filenames directory. similarDrums = filter(lambda f : '.wav' in f, similarDrums) vSpace = vectSpace.vectSpace([],[]) # the vectSpace object to find the closest-souding drumhit. csvList = [] # csvList is made of a list of tuples of form ({filename}, {mfcc numpy array}) with open(corpus, 'rb') as csvfile: reader = csv.reader(csvfile) for row in reader: csvList.append([row[0], np.array(map(lambda e : float(e), row[1:]))]) # create drumVect objects from each entry in csvList and add said drumVect to vSpace for elem in csvList: dVect = drumVect.drumVect('{0}'.format(elem[0]), elem[1]) #print type(dVect) vSpace.add_vect(dVect) # create a drumVect objects from the input .wav inputCsvList = [] with open('input_mfcc.csv', 'rb') as csvfile: reader = csv.reader(csvfile) for row in reader: inputCsvList.append([row[0], np.array(map(lambda e : float(e), row[1:]))]) inVectArray = [] #array of drumVect objects from the input .wav for elem in inputCsvList: dVect = drumVect.drumVect('{0}'.format(elem[0]), elem[1]) inVectArray.append(dVect) w = wavio.readwav(sys.argv[1]) writeArray = w[2] #a copy of the data from the original file supplied. modifying this. hit = 0 while hit < len(inVectArray): if onsets[hit] - attack < 0: start = onsets[hit] else: start = onsets[hit] - 1500 if hit == len(onsets) - 1: nextHit = None else: nextHit = onsets[hit + 1] replacedHit = vSpace.k_closest(1, inVectArray[hit])[1].get_filename() #our replacement hit. repl = wavio.readwav('./{0}'.format(replacedHit)) #file of replacement hit replacedHitArray = repl[2] #sample array if nextHit != None: #the replacedHitArray is longer than the distance between current and next hit if len(writeArray[start: nextHit + 1]) < len(replacedHitArray): writeArray[start: nextHit + 1] = replacedHitArray[0: len(writeArray[start: nextHit + 1])] #the replacedHitArray is shorter or equal to distance between current and next hit else: writeArray[start: start + len(replacedHitArray)] = replacedHitArray elif nextHit == None: #if the replacedHitArray is longer than the rest of the writeArray or both equal length if len(writeArray[start:]) <= len(replacedHitArray): writeArray[start:] = replacedHitArray[0: len(writeArray[start:])] #if the replacedHitArray is shorter than or equal to the rest of the writeArray else: writeArray[start: start + len(replacedHitArray)] = replacedHitArray hit += 1 wavio.writewav24('{0}replaced.wav'.format(filename[:len(filename) - 4]), w[0], writeArray) #save the replaced drum file as a new file.
# compute MFCC for input file separated_files = os.listdir('separated_input') wav_files = filter(lambda f : '.wav' in f, separated_files) # individual hits from the input file csv_file = filter(lambda f : '.csv' in f, separated_files)[0] # samples to onset csv samples_to_onset_dict = {} with open('separated_input/{0}'.format(csv_file), 'r') as c: read = csv.reader(c) for row in read: samples_to_onset_dict[os.path.basename(row[0])] = row[1] hop_s = win_s / 4 temp_files = [] for wav in wav_files: wav_file_path = "separated_input/{0}".format(wav) samples_to_onset = int(samples_to_onset_dict[wav]) readwav = wavio.readwav(wav_file_path) readwav_array = readwav[2] mfccwav_array = readwav_array[samples_to_onset : samples_to_onset + samples_for_mfcc] temp_filename = 'separated_input/{0}_temp.wav'.format(os.path.splitext(wav)[0]) wavio.writewav24(temp_filename, readwav[0],mfccwav_array) temp_files.append(temp_filename) csv_output = [] for temp_file in temp_files: samplerate = 0 s = a.source(temp_file, samplerate, hop_s) samplerate = s.samplerate p = a.pvoc(win_s, hop_s) m = a.mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = np.zeros([n_coeffs,])