def _test_write(self, func, format, filext): """ Check *write functions from matpi """ rfd1, fd1, cfilename1 = open_tmp_file('matapi_test.' + filext) rfd2, fd2, cfilename2 = open_tmp_file('matapi_test.' + filext) try: nbuff = 22050 fs = nbuff noise = 0.1 * N.random.randn(nbuff) # Open the first file for writing with Sndfile b = Sndfile(cfilename1, 'w', format, 1, fs) b.write_frames(noise) b.close() # Write same data with wavwrite func(noise, cfilename2, fs) # Compare if both files have both same audio data and same # meta-data f1 = Sndfile(cfilename1) f2 = Sndfile(cfilename2) assert_array_equal(f1.read_frames(f1.nframes), f2.read_frames(f2.nframes)) assert_equal(f1.format, f2.format) assert_equal(f1.samplerate, f2.samplerate) assert_equal(f1.channels, f2.channels) f1.close() f2.close() finally: close_tmp_file(rfd1, cfilename1) close_tmp_file(rfd2, cfilename2)
def test_basic_io(self): """ Check open, close and basic read/write""" # dirty ! ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 # Open the test file for reading a = Sndfile(ofilename, 'r') nframes = a.nframes # Open the copy file for writing format = Format('wav', 'pcm16') b = Sndfile(fd, 'w', format, a.channels, a.samplerate) # Copy the data for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff) assert tmpa.dtype == np.float b.write_frames(tmpa) nrem = nframes % nbuff tmpa = a.read_frames(nrem) assert tmpa.dtype == np.float b.write_frames(tmpa) a.close() b.close() finally: close_tmp_file(rfd, cfilename)
def load_soundfile(inwavpath, startpossecs, maxdursecs=None): """Loads audio data, optionally limiting to a specified start position and duration. Must be SINGLE-CHANNEL and matching our desired sample-rate.""" framelen = 4096 hopspls = framelen unhopspls = framelen - hopspls if (framelen % wavdownsample) != 0: raise ValueError("framelen needs to be a multiple of wavdownsample: %i, %i" % ( framelen, wavdownsample)) if (hopspls % wavdownsample) != 0: raise ValueError("hopspls needs to be a multiple of wavdownsample: %i, %i" % ( hopspls, wavdownsample)) if maxdursecs == None: maxdursecs = 9999 sf = Sndfile(inwavpath, "r") splsread = 0 framesread = 0 if sf.channels != 1: raise ValueError( "Sound file %s has multiple channels (%i) - mono required." % (inwavpath, sf.channels)) timemax_spls = int(maxdursecs * sf.samplerate) if sf.samplerate != (srate * wavdownsample): raise ValueError( "Sample rate mismatch: we expect %g, file has %g" % (srate, sf.samplerate)) if startpossecs > 0: # note: returns IOError if beyond the end sf.seek(startpossecs * sf.samplerate) audiodata = np.array([], dtype=np.float32) while(True): try: if splsread == 0: chunk = sf.read_frames(framelen)[::wavdownsample] splsread += framelen else: chunk = np.hstack( (chunk[:unhopspls], sf.read_frames(hopspls)[::wavdownsample])) splsread += hopspls framesread += 1 if framesread % 25000 == 0: print("Read %i frames" % framesread) if len(chunk) != (framelen / wavdownsample): print("Not read sufficient samples - returning") break chunk = np.array(chunk, dtype=np.float32) audiodata = np.hstack((audiodata, chunk)) if splsread >= timemax_spls: break except RuntimeError: break sf.close() return audiodata
def test_simple(self): ofilename = join(TEST_DATA_DIR, 'test.wav') # Open the test file for reading a = Sndfile(ofilename, 'r') nframes = a.nframes buffsize = 1024 buffsize = min(nframes, buffsize) # First, read some frames, go back, and compare buffers buff = a.read_frames(buffsize) a.seek(0) buff2 = a.read_frames(buffsize) assert_array_equal(buff, buff2) a.close() # Now, read some frames, go back, and compare buffers # (check whence == 1 == SEEK_CUR) a = Sndfile(ofilename, 'r') a.read_frames(buffsize) buff = a.read_frames(buffsize) a.seek(-buffsize, 1) buff2 = a.read_frames(buffsize) assert_array_equal(buff, buff2) a.close() # Now, read some frames, go back, and compare buffers # (check whence == 2 == SEEK_END) a = Sndfile(ofilename, 'r') buff = a.read_frames(nframes) a.seek(-buffsize, 2) buff2 = a.read_frames(buffsize) assert_array_equal(buff[-buffsize:], buff2)
def read_sound(fp): """ create a normalized float array and datarate from any audo file """ if fp.endswith('mp3'): try: oname = 'temp.wav' #cmd = 'lame --decode "{0}" {1}'.format( fp ,oname ) result = subprocess.call(['lame', '--decode', fp, oname]) assert(result is 0) samplerate, data = wav.read(oname) except: print "couldn't run lame" try: import moviepy.editor as mpy aud_clip = mpy.AudioFileClip(fp) samplerate = aud_clip.fps data = aud_clip.to_soundarray() except: print "moviepy not installed?" if fp.endswith('aif'): #sf = aifc.open(fp) oname = fp sf = Sndfile(fp, 'r') sf.seek(0) data = sf.read_frames(sf.nframes) samplerate = sf.samplerate if fp.endswith('wav'): samplerate, data = wav.read(fp) if len(data.shape)>1: data = data[:,0] data = data.astype('float64') data /= data.max() return data, samplerate
def test_bad_wavread(self): """ Check wavread on bad file""" # Create a tmp audio file with non wav format, write some random data into it, # and check it can not be opened by wavread rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 noise = 0.1 * N.random.randn(nbuff) # Open the copy file for writing format = audio_format('aiff', 'pcm16') b = Sndfile(cfilename, 'w', format, 1, nbuff) b.write_frames(noise) b.close() b = Sndfile(cfilename, 'r') rcnoise = b.read_frames(nbuff) b.close() try: rnoise = wavread(cfilename)[0] raise Exception("wavread on non wav file succeded, expected to fail") except ValueError, e: pass #print str(e) + ", as expected" finally: close_tmp_file(rfd, cfilename)
def load_pcm(path): wave = Sndfile(path, "r") pcm = wave.read_frames(wave.nframes) wave.close() if wave.channels is not 1: pcm = pcm[:, 0] return (pcm, wave.samplerate)
def file_to_specgram(path, specgrammode=None): if specgrammode==None: # default is to do a "normal" spectrogram right here if fftsize != framelen: raise ValueError("this mode requires normal fftsize") if not os.path.isfile(path): raise ValueError("path %s not found" % path) sf = Sndfile(path, "r") if sf.channels != 1: raise Error("ERROR in spemptk: sound file has multiple channels (%i) - mono audio required." % sf.channels) if sf.samplerate != fs: raise Error("ERROR in spemptk: wanted srate %g - got %g." % (fs, sf.samplerate)) chunksize = 4096 pcm = np.array([]) while(True): try: chunk = sf.read_frames(chunksize, dtype=np.float32) pcm = np.hstack((pcm, chunk)) except RuntimeError: break spec = stft(pcm).T else: raise ValueError("specgrammode not recognised: %s" % specgrammode) spec = spec[specfreqbinrange[0]:specfreqbinrange[1],:] mags = abs(spec) phasifiers = spec / mags if specgrammode==None: mags = np.log(mags) return (mags, phasifiers)
def file_to_specgram(path, specgrammode=None): if specgrammode == None: # default is to do a "normal" spectrogram right here if fftsize != framelen: raise ValueError("this mode requires normal fftsize") if not os.path.isfile(path): raise ValueError("path %s not found" % path) sf = Sndfile(path, "r") if sf.channels != 1: raise Error( "ERROR in spemptk: sound file has multiple channels (%i) - mono audio required." % sf.channels) if sf.samplerate != fs: raise Error("ERROR in spemptk: wanted srate %g - got %g." % (fs, sf.samplerate)) chunksize = 4096 pcm = np.array([]) while (True): try: chunk = sf.read_frames(chunksize, dtype=np.float32) pcm = np.hstack((pcm, chunk)) except RuntimeError: break spec = stft(pcm).T else: raise ValueError("specgrammode not recognised: %s" % specgrammode) spec = spec[specfreqbinrange[0]:specfreqbinrange[1], :] mags = abs(spec) phasifiers = spec / mags if specgrammode == None: mags = np.log(mags) return (mags, phasifiers)
def _test_int_io(self, dt): # TODO: check if neg or pos value is the highest in abs rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: # Use almost full possible range possible for the given data-type nb = 2**(8 * np.dtype(dt).itemsize - 3) fs = 22050 nbuff = fs a = np.random.random_integers(-nb, nb, nbuff) a = a.astype(dt) # Open the file for writing format = Format('wav', _DTYPE_TO_ENC[dt]) b = Sndfile(fd, 'w', format, 1, fs) b.write_frames(a) b.close() b = Sndfile(cfilename, 'r') read_a = b.read_frames(nbuff, dtype=dt) b.close() assert_array_equal(a, read_a) finally: close_tmp_file(rfd, cfilename)
def file_to_features(self, wavpath): "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array." if verbose: print("Reading %s" % wavpath) if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath) sf = Sndfile(wavpath, "r") #if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels) if sf.samplerate != fs: raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate)) window = np.hamming(framelen) features = [] while(True): try: chunk = sf.read_frames(framelen, dtype=np.float32) if len(chunk) != framelen: print("Not read sufficient samples - returning") break if sf.channels != 1: chunk = np.mean(chunk, 1) # mixdown framespectrum = np.fft.fft(window * chunk) magspec = abs(framespectrum[:framelen/2]) # do the frequency warping and MFCC computation melSpectrum = self.mfccMaker.warpSpectrum(magspec) melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True) melCepstrum = melCepstrum[1:] # exclude zeroth coefficient melCepstrum = melCepstrum[:13] # limit to lower MFCCs framefeatures = melCepstrum # todo: include deltas? that can be your homework. features.append(framefeatures) except RuntimeError: break sf.close() return np.array(features)
def test_bad_wavread(self): """ Check wavread on bad file""" # Create a tmp audio file with non wav format, write some random data into it, # and check it can not be opened by wavread rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 noise = 0.1 * N.random.randn(nbuff) # Open the copy file for writing format = audio_format('aiff', 'pcm16') b = Sndfile(cfilename, 'w', format, 1, nbuff) b.write_frames(noise) b.close() b = Sndfile(cfilename, 'r') rcnoise = b.read_frames(nbuff) b.close() try: rnoise = wavread(cfilename)[0] raise Exception( "wavread on non wav file succeded, expected to fail") except ValueError, e: pass #print str(e) + ", as expected" finally: close_tmp_file(rfd, cfilename)
def _test_int_io(self, dt): # TODO: check if neg or pos value is the highest in abs rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: # Use almost full possible range possible for the given data-type nb = 2 ** (8 * np.dtype(dt).itemsize - 3) fs = 22050 nbuff = fs a = np.random.random_integers(-nb, nb, nbuff) a = a.astype(dt) # Open the file for writing format = Format('wav', _DTYPE_TO_ENC[dt]) b = Sndfile(fd, 'w', format, 1, fs) b.write_frames(a) b.close() b = Sndfile(cfilename, 'r') read_a = b.read_frames(nbuff, dtype=dt) b.close() assert_array_equal(a, read_a) finally: close_tmp_file(rfd, cfilename)
def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1): originalWav = Sndfile(inputAudio, 'r') x = originalWav.read_frames(originalWav.nframes) fs = originalWav.samplerate nChannel = originalWav.channels print fs if nChannel >1: x = x[0] w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns/4 tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) inputDur = float(len(tfreq)*H/fs) #timeScale = np.array([0.1,0.1, inputDur, inputDur*2]) timeScale = np.array([0,0, .4,outputDuration]) ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale) y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) if writeOutput ==1: outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate) outputWav.write_frames(y) outputWav.close() else: return y, fs, nChannel
def downsample(fs, sig): in_file = random_string() + ".wav" out_file = random_string() + ".wav" frame_len = fs * WINDOW_SIZE pad = len(sig)%frame_len if pad > 0: sig = np.append(sig, np.zeros(frame_len - pad)) f = Sndfile(in_file, 'w', Format(type="wav", encoding='pcm16', endianness="file"), 1, fs) f.write_frames(sig) f.close() sox_in = pysox.CSoxStream(in_file) sox_out = pysox.CSoxStream(out_file, 'w', pysox.CSignalInfo(SAMPLE_RATE, 1, 8), fileType='wav') sox_chain = pysox.CEffectsChain(sox_in, sox_out) sox_chain.add_effect(pysox.CEffect("rate", [str(SAMPLE_RATE)])) sox_chain.flow_effects() sox_out.close() f = Sndfile(out_file, 'r') sig = f.read_frames(f.nframes) f.close() os.unlink(in_file) os.unlink(out_file) return sig
def get_fft_points(sound_filename, fps, fft_pixels, rate = 1, fourierwidth = 0.3): """TODO will generate rate points per frame Based on the script from http://classicalconvert.com/2008/04/ how-to-visualize-music-using-animated-spectrograms-with -open-source-everything/""" f = Sndfile(sound_filename, 'r') divisor = f.samplerate / (rate * fps) # should be integer points = [] framepos = 0L while framepos < f.nframes: read_len = ( divisor if (framepos + divisor < f.nframes) else f.nframes - framepos) frames = f.read_frames(read_len) buff = [] for frame in frames: # is frame iterable or just one chan? if getattr(frame, '__iter__', False): fval = sum(frame) / len(frame) else: fval = frame buff.append(fval) # TODO: trim to 1024 or so? outfft = fft(buff) spectrum = [ (outfft[y].real if y < len(outfft) else 0.0) for y in xrange(fft_pixels)] points.append(spectrum) framepos += len(frames) f.close() # maximise return points
def __create_feature(self, input_path, speaker_name, feature_filename, mode): speaker_featurepath = os.path.join(self.features_rootpath, speaker_name) if not os.path.exists(speaker_featurepath): os.mkdir(speaker_featurepath) output_path = os.path.join(speaker_featurepath, feature_filename) f = Sndfile(input_path) n = f.nframes rate = f.samplerate data = f.read_frames(n) original_data = data * pow(2, 15) extractor = bob.bio.spear.extractor.Cepstral(win_length_ms=25, n_filters=27, n_ceps=13, with_energy=False, mel_scale=True, features_mask=np.arange( 0, 39)) preprocessor = bob.bio.spear.preprocessor.Energy_Thr() __, __, labels = preprocessor((rate, original_data)) feature = extractor([rate, original_data, labels]) out_file = bob.io.base.HDF5File(output_path, 'w') extractor.write_feature(feature, out_file) out_file.close()
def downsample(fs, sig): in_file = random_string() + ".wav" out_file = random_string() + ".wav" frame_len = fs * WINDOW_SIZE pad = len(sig) % frame_len if pad > 0: sig = np.append(sig, np.zeros(frame_len - pad)) f = Sndfile(in_file, 'w', Format(type="wav", encoding='pcm16', endianness="file"), 1, fs) f.write_frames(sig) f.close() sox_in = pysox.CSoxStream(in_file) sox_out = pysox.CSoxStream(out_file, 'w', pysox.CSignalInfo(SAMPLE_RATE, 1, 8), fileType='wav') sox_chain = pysox.CEffectsChain(sox_in, sox_out) sox_chain.add_effect(pysox.CEffect("rate", [str(SAMPLE_RATE)])) sox_chain.flow_effects() sox_out.close() f = Sndfile(out_file, 'r') sig = f.read_frames(f.nframes) f.close() os.unlink(in_file) os.unlink(out_file) return sig
def extractData(file_names): data = [] targets = [] for k, v in file_names.items(): for f_name in v: source_fname = k + "/" + f_name target_fname = k + "/" + f_name.split(".")[0] + ".TXT" source_fname = "./TIMIT" + source_fname[1:] target_fname = "./TIMIT" + target_fname[1:] audio_file = Sndfile(source_fname, "r") sr = audio_file.samplerate audio = audio_file.read_frames(audio_file.nframes) datum = mfcc(audio, samplerate=sr, nfilt=64, numcep=40) #datum = logfbank( audio, samplerate=sr, nfilt=64 ) datum = preprocessing.scale(datum) data.append(datum) audio_file.close() with open(target_fname, "r") as text_file: target_txt = ' '.join(text_file.read().lower().strip().replace( ".", "").split()[2:]) target_txt = filter(lambda x: x not in special_chars, target_txt) target_txt = target_txt.replace(' ', ' ').split(' ') target = np.hstack( ['<space>' if x == '' else list(x) for x in target_txt]) target = np.asarray( [ 0 if x == '<space>' else ord(x) - ( ord('a') - 1 )\ for x in target ] ) targets.append(target) return data, targets
def read_wav(self, sample_path): sample = Sndfile(cwd + sample_path, 'r') sampling_rate = sample.samplerate channels = sample.channels encoding = sample.encoding frames_count = sample.nframes frames = sample.read_frames(frames_count, dtype=np.float32) sample.close() del sample if channels == 1: text_type = 'mono' sample_type = 0 elif channels == 2: text_type = 'stereo' sample_type = 0b01100100 else: text_type = '{0}-channels'.format(channels) if OPTIONS['verbose'] > 1: print "*", encoding, text_type, 'sample "', sample_path, '"', 4 * frames_count, 'kB' if OPTIONS['play_sound']: play(frames.astype(np.float64).T, sampling_rate) self.update({ 'sample_data': frames, 'sample_type': sample_type, 'channels': 2, 'sample_bittype': 4 })
def file_to_features(self,wavpath): sf = Sndfile(wavpath, "r") window = np.hamming(framelen) features = [] while(True): try: chunk = sf.read_frames(framelen, dtype=np.float32) if len(chunk) != framelen: print("Not read sufficient samples - returning") break if sf.channels != 1: chunk = np.mean(chunk, 1) # mixdown framespectrum = np.fft.fft(window * chunk) magspec = abs(framespectrum[:framelen/2]) # do the frequency warping and MFCC computation melSpectrum = self.mfccMaker.warpSpectrum(magspec) melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True) melCepstrum = melCepstrum[1:] # exclude zeroth coefficient melCepstrum = melCepstrum[:13] # limit to lower MFCCs framefeatures = melCepstrum features.append(framefeatures) except RuntimeError: break sf.close() return np.array(features)
def file_to_features(self, wavpath): sf = Sndfile(wavpath, "r") window = np.hamming(framelen) features = [] while (True): try: chunk = sf.read_frames(framelen, dtype=np.float32) if len(chunk) != framelen: print("Not read sufficient samples - returning") break if sf.channels != 1: chunk = np.mean(chunk, 1) # mixdown framespectrum = np.fft.fft(window * chunk) magspec = abs(framespectrum[:framelen / 2]) # do the frequency warping and MFCC computation melSpectrum = self.mfccMaker.warpSpectrum(magspec) melCepstrum = self.mfccMaker.getMFCCs(melSpectrum, cn=True) melCepstrum = melCepstrum[1:] # exclude zeroth coefficient melCepstrum = melCepstrum[:13] # limit to lower MFCCs framefeatures = melCepstrum features.append(framefeatures) except RuntimeError: break sf.close() return np.array(features)
def test_read_wave(): f = Sndfile("../fcjf0/sa1.wav", 'r') data = f.read_frames(46797) data_arr = np.array(data) #print data_arr pyplot.figure() pyplot.specgram(data_arr) pyplot.show()
def test_rw(self): """Test read/write pointers for seek.""" ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('rwseektest.wav') try: ref = Sndfile(ofilename, 'r') test = Sndfile(fd, 'rw', format=ref.format, channels=ref.channels, samplerate=ref.samplerate) n = 1024 rbuff = ref.read_frames(n, dtype = np.int16) test.write_frames(rbuff) tbuff = test.read_frames(n, dtype = np.int16) assert_array_equal(rbuff, tbuff) # Test seeking both read and write pointers test.seek(0, 0) test.write_frames(rbuff) tbuff = test.read_frames(n, dtype = np.int16) assert_array_equal(rbuff, tbuff) # Test seeking only read pointer rbuff1 = rbuff.copy() rbuff2 = rbuff1 * 2 + 1 rbuff2.clip(-30000, 30000) test.seek(0, 0, 'r') test.write_frames(rbuff2) tbuff1 = test.read_frames(n, dtype = np.int16) try: tbuff2 = test.read_frames(n, dtype = np.int16) except IOError, e: msg = "write pointer was updated in read seek !" msg += "\n(msg is %s)" % e raise AssertionError(msg) assert_array_equal(rbuff1, tbuff1) assert_array_equal(rbuff2, tbuff2) if np.all(rbuff2 == tbuff1): raise AssertionError("write pointer was updated"\ " in read seek !") # Test seeking only write pointer rbuff3 = rbuff1 * 2 - 1 rbuff3.clip(-30000, 30000) test.seek(0, 0, 'rw') test.seek(n, 0, 'w') test.write_frames(rbuff3) tbuff1 = test.read_frames(n, np.int16) try: assert_array_equal(tbuff1, rbuff1) except AssertionError: raise AssertionError("read pointer was updated in write seek !") try: tbuff3 = test.read_frames(n, np.int16) except IOError, e: msg = "read pointer was updated in write seek !" msg += "\n(msg is %s)" % e raise AssertionError(msg)
class AudioFile: def __init__(self, file_name): self.sf = Sndfile(file_name) self.file_format = self.sf.format self.nchans = self.sf.channels self.sr = self.sf.samplerate self.length = self.sf.nframes self.audio = self.sf.read_frames(self.length)
def load_sound(filename): """ load a sound file and return a numpy array INFO: The values are normalized between -1 and 1 :param filename: :return: numpy array with (sound_lenght, channels) shape """ f = Sndfile(filename, 'r') data = f.read_frames(f.nframes, dtype=np.float64) return data, f.samplerate
def load(filename): """Load an audio file and average over channels. Returns the data as a numpy array and the sampling rate. """ fh = Sndfile(filename, "r") data = fh.read_frames(fh.nframes) if data.ndim == 2: data = np.mean(data, axis=-1) rate = fh.samplerate return data, rate
def CQT(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length) if nfreqs != None: cqt = cqt[:nfreqs,:] delta1 = librosa.feature.delta(cqt,order=1) delta2 = librosa.feature.delta(cqt,order=2) energy = librosa.feature.rmse(y=data) features = np.vstack((cqt,delta1,delta2,energy)) return features.T
def load(filename): """Load a wave file and return the signal, sample rate and number of channels. Can be any format that libsndfile supports, like .wav, .flac, etc. """ wave_file = Sndfile(filename, 'r') signal = wave_file.read_frames(wave_file.nframes) channels = wave_file.channels sample_rate = wave_file.samplerate return signal, sample_rate, channels
def load(filename): """ Load a wave file and return the signal, sample rate and number of channels. Can be any format that libsndfile supports, like .wav, .flac, etc. """ wave_file = Sndfile(filename, 'r') signal = wave_file.read_frames(wave_file.nframes) channels = wave_file.channels sample_rate = wave_file.samplerate return signal, sample_rate, channels
def hodorifyIt(inputFile, outputFile, karaokeExt = '.txt'): #reading input wave file inputAudio = Sndfile(inputFile, 'r') audio = inputAudio.read_frames(inputAudio.nframes) nframes = inputAudio.nframes fs = inputAudio.samplerate nChannel = inputAudio.channels fname, ext = os.path.splitext(inputFile) karaokeFile = fname + karaokeExt #parse the karaoke file karaokeData = KP.parseKaraokeFile(karaokeFile) #assign which syllable to use ho and which ones to use dor toogle = 0 sylType = ['ho', 'dor'] for ii,elem in enumerate(karaokeData['data']): if elem['syl'] == '-': toogle =0 continue karaokeData['data'][ii]['sylType'] = sylType[toogle] toogle = (toogle +1)%2 dumpSonicVisualizerAnnotFile("tryHODOR.txt", karaokeData['data']) #initialize all the hodor locations with not processed flag (later to be for exploiting repetitive hodors) for ii,elem in enumerate(karaokeData['data']): karaokeData['data'][ii]['processed']=0 #creating mapping between file names and tones toneMapp = createToneMappFiles(toneMappFile) #processHere the logic for Hodor input file for each word karaokeData = hodorFileSelection(karaokeData, toneMapp) #do center channel cut audio = cutCenterChannel(audio, fs, karaokeData) #estimate the possible repetitions in the karaoke data, i.e. output with same note and duration print len(karaokeData['data']) repMTX = estimateRepetitiveHodors(karaokeData) emptyTrack = np.zeros(len(audio)) emptyTrack = generateHodorTrack(emptyTrack, fs, karaokeData, repMTX) audio[:,1] = audio[:,1] + emptyTrack audio[:,0] = audio[:,0] + emptyTrack outputWav = Sndfile(outputFile, 'w', inputAudio.format, inputAudio.channels, inputAudio.samplerate) outputWav.write_frames(audio) outputWav.close()
def _test_read_write(self, dtype): # dirty ! ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 # Open the test file for reading a = Sndfile(ofilename, 'r') nframes = a.nframes # Open the copy file for writing format = Format('wav', _DTYPE_TO_ENC[dtype]) b = Sndfile(fd, 'w', format, a.channels, a.samplerate) # Copy the data in the wav file for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff, dtype=dtype) assert tmpa.dtype == dtype b.write_frames(tmpa) nrem = nframes % nbuff tmpa = a.read_frames(nrem) b.write_frames(tmpa) a.close() b.close() # Now, reopen both files in for reading, and check data are # the same a = Sndfile(ofilename, 'r') b = Sndfile(cfilename, 'r') for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff, dtype=dtype) tmpb = b.read_frames(nbuff, dtype=dtype) assert_array_equal(tmpa, tmpb) a.close() b.close() finally: close_tmp_file(rfd, cfilename)
def offs(self, track1, track2): """ offs(audiofile track1, audiofile track2) calculates the head offset between two (supposedly) otherwise identitical audio files this is achieved via finding the peak-to-peak difference of the waveform heads """ # opens files for reading try: track_one_file_obj = Sndfile(track1.encode('utf-8'), 'r') except: print('Corrupted File 1 : '+ track1) return pass try: track_two_file_obj = Sndfile(track2, 'r') except: print('Corrupted File 2 : '+ track2) return pass # calculates the head of each file (first twentieth of the waveform) # if this is less than 5 seconds of audio (that is, the waveform is under 100 seconds long) # then the head is the first five seconds of the waveform track_one_file_obj_head = floor(.05 * track_one_file_obj.nframes) if track_one_file_obj_head < (track_one_file_obj.samplerate * 5): track_one_file_obj_head = track_one_file_obj.nframes track_two_file_obj_head = floor(.05 * track_two_file_obj.nframes) if track_two_file_obj_head < (track_two_file_obj.samplerate * 5): track_two_file_obj_head = track_two_file_obj.nframes # reads the head of each file (as absolute values, accounting for reversed waveforms) # into a 1-dimensional numpy matrix (via mono function) numpy_matrix_of_track1 = self.mono(np.absolute(track_one_file_obj.read_frames(track_one_file_obj_head))) numpy_matrix_of_track2 = self.mono(np.absolute(track_two_file_obj.read_frames(track_two_file_obj_head))) # returns the difference between the peak of each list return np.argmax(numpy_matrix_of_track1) - np.argmax(numpy_matrix_of_track2)
def analysefile(path, hopsize=0.5, mode='ch', numtop=1, framesize = 1024, chrm_kwargs=None, maxdursecs=None): """Analyses an audio file from disk, dividing into lapped frames and returning an array holding [raw, peaks, slopecent] for each frame. Can also do plain FFT-type analysis as an alternative.""" if (mode != 'ch') and (mode != 'fft'): raise ValueError('Mode %s not recognised' % mode) if not os.path.isfile(path): raise ValueError("path %s not found" % path) sf = Sndfile(path, "r") if sf.channels != 1: raise Error("ERROR in chirpletringmod: sound file has multiple channels (%i) - mono audio required." % sf.channels) #print sf.format if maxdursecs!=None: maxdurspls = maxdursecs * sf.samplerate else: maxdurspls = sf.nframes if chrm_kwargs != None: chrm_kwargs = deepcopy(chrm_kwargs) chrm_kwargs['samplerate'] = sf.samplerate chrm_kwargs['framesize'] = framesize else: chrm_kwargs = {'samplerate':sf.samplerate, 'framesize':framesize} ch = chirpletringmod.Chirpletringmod(**chrm_kwargs) ihop = int(hopsize * ch.framesize) unhop = ch.framesize - ihop numspecframes = sf.nframes / ihop print "File contains %i spectral frames" % numspecframes storeraw = numspecframes < 500 frames = [] moretocome = True data = zeros(ch.framesize, float32) while(moretocome): try: nextdata = sf.read_frames(ihop, dtype=float32) except RuntimeError: #print "sf.read_frames runtime error, assuming EOF" moretocome = False if len(nextdata) != ihop: print "data truncated, detected EOF" moretocome = False nextdata = hstack((nextdata, zeros(ihop - len(nextdata)))) data = hstack(( data[ihop:], nextdata )) frames.append(ch.analyseframeplusfeatures(data, hopsize, mode, numtop, storeraw)) if len(data) >= maxdurspls: break sf.close() return {'ch':ch, 'frames':frames, 'srate':sf.samplerate, 'hopsize':hopsize, 'framesize':ch.framesize} # the ch knows srate and framesize, why are we duplicating?
def offs(track1, track2): # opens files for reading s1 = Sndfile(track1, 'r') s2 = Sndfile(track2, 'r') # calculates the head of each file (first twentieth of the waveform) # if this is less than 5 seconds of audio (that is, the waveform is under 100 seconds long) # then the head is the first five seconds of the waveform s1head = floor(.05 * s1.nframes) if s1head < (s1.samplerate * 5): s1head = s1.nframes s2head = floor(.05 * s2.nframes) if s2head < (s2.samplerate * 5): s2head = s2.nframes # reads the head of each file (as absolute values, accounting for reversed waveforms) # into a 1-dimensional numpy matrix (via mono function) t1 = mono(np.absolute(s1.read_frames(s1head))) t2 = mono(np.absolute(s2.read_frames(s2head))) # returns the difference between the peak of each list return np.argmax(t1) - np.argmax(t2)
def plotSpectrogram(f,mode,channel): plt.close('all') # Extracting the name from the '.wav' file length = len(f)-1 name = f[length-length :length-3] print "Processing: %s CH %s" % (name ,channel) plt.figure(figsize=(10.5,3), dpi=100); #figsize=(13,4) try: # Some sound information # print "Input to Sndfile FN is %s" %f r = Sndfile(f) begin = 0 * r.samplerate stop = 59.8 * r.samplerate sample=r.read_frames(stop-begin) # Setting out of some spectrogram variables Fs = r.samplerate #10000 NFFT = int(Fs*0.05) #*0.005) # 5ms window noverlap = int(Fs*0.0025) # Plotting fig= plt.specgram(sample[:,channel],Fs=Fs, NFFT=NFFT,noverlap=noverlap, cmap=plt.get_cmap('jet')) except: print "Could not process %s" % name plt.figtext(0.5,0.5,"ERROR") # Colourmap values that work well are: 'binary','bone' and 'jet' # plt.title(name) plt.title("CH"+str(channel)+" : "+f) plt.xticks([],[]) #gets rid of the x ticks and numbers plt.yticks([],[]) #gets rid of the y ticks and numbers # For normal plotting # plt.title(f) # plt.xlabel("Time (s)") # plt.yticks([2000,4000,6000,8000,10000],[2,4,6,8,10]) # plt.ylabel("Frequency (kHz)") #plt.colorbar() try: # plt.savefig("./"+ name + ".png",fig=fig, bbox_inches='tight') plt.savefig(name+"CH"+str(channel)+".png",fig=fig, bbox_inches='tight') #Save the results # if spectrogram has been sucessfully generated add it to the list of images that has$ namePNG = "./" + name+"CH"+str(channel) + ".png" listN.append(namePNG) # print "Done." except: print "ERROR: %s" % name
def decompose(inpath, outdir='output', niters=3, framesize=1024, hopsize=0.5, writefiles=True, wintype='hann'): """Given a path to an input file, runs a pursuit iteration to decompose the signal into atoms. Writes out quite a lot of files - for each iteration, partial resynth, total resynth, residual. Also returns the aggregated peaks and the residual.""" if not os.path.isfile(inpath): raise ValueError("path %s not found" % inpath) sf = Sndfile(inpath, "r") if sf.channels != 1: raise Error("ERROR in chirpletringmod: sound file has multiple channels (%i) - mono audio required." % sf.channels) ch = chirpletringmod.Chirpletringmod(samplerate=sf.samplerate, framesize=framesize, wintype=wintype) signal = sf.read_frames(sf.nframes, dtype=float32) sf.close() outnamestem = "%s/%s" % (outdir, os.path.splitext(os.path.basename(inpath))[0]) resynthtot = zeros(len(signal)) aggpeaks = [] residual = signal print("chf.decompose: original signal energy %g" % sum(signal ** 2)) for whichiter in range(niters): print("----------------------------------------") print("iteration %i" % whichiter) iterdata = ch.decompose_oneiter(residual, hopsize=hopsize) """Given an input signal, decomposes it a bit like one round of matching-pursuit or suchlike, with the added constraint of one detection per frame. Returns the peaks found, the resynthesised version, and the residual.""" #return {'peaks':framespeaks, 'resynth':resynth, 'residual':residual} resynthtot += iterdata['resynth'] aggpeaks.extend(iterdata['peaks']) if writefiles: sf = Sndfile("%s_%i_resynth.wav" % (outnamestem, whichiter), "w", Format(), 1, ch.sr) sf.write_frames(iterdata['resynth']) sf.close() sf = Sndfile("%s_%i_resynthtot.wav" % (outnamestem, whichiter), "w", Format(), 1, ch.sr) sf.write_frames(resynthtot) sf.close() sf = Sndfile("%s_%i_residual.wav" % (outnamestem, whichiter), "w", Format(), 1, ch.sr) sf.write_frames(iterdata['residual']) sf.close() residual = iterdata['residual'] # fodder for next iter print("resynth signal energy %g" % sum(iterdata['resynth'] ** 2)) print("resynthtot signal energy %g" % sum(resynthtot ** 2)) print("residual signal energy %g" % sum(residual ** 2)) return {'ch':ch, 'peaks':aggpeaks, 'residual':residual}
def main(): audioFiles = glob.glob("testSamples/*") for audioFile in audioFiles: snd = Sndfile(audioFile, "r") data = snd.read_frames(snd.nframes) fs = snd.samplerate (frames, freqs, bins, ax) = mp.specgram(data, frameSize, noverlap=(frameSize/2), Fs=fs) mp.subplot(211) mp.plot(np.linspace(0,float(snd.nframes)/fs, snd.nframes), data * 10000+10000, alpha=0.4) mp.subplot(212) mp.plot(bins[0:-1], getSlices(frames, bins, 20)) mp.show()
def CQT_stacked(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length) if nfreqs != None: cqt = cqt[:nfreqs,:] delta1 = librosa.feature.delta(cqt,order=1) delta2 = librosa.feature.delta(cqt,order=2) d,L = cqt.shape cqt = cqt.T.reshape(1,L,d) delta1 = delta1.T.reshape(1,L,d) delta2 = delta2.T.reshape(1,L,d) features = np.vstack((cqt,delta1,delta2)) return features
def readwavefile(inputwav): f = Sndfile(inputwav, 'r') fs = f.samplerate if fs != 44100 : print 'only 44.1kHz filess are supported at present' exit(1) nc = f.channels if nc != 1 : print 'only 1 channel supported at present' exit(1) nframes = f.nframes wav = f.read_frames(nframes, dtype=np.float32) f.close() return wav
def logmel(filename,n_fft=2048,hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length) logmel = librosa.core.logamplitude(melspectrogram) if nfreqs != None: logmel = logmel[:nfreqs,:] energy = librosa.feature.rmse(y=data) spectr = np.vstack((logmel,energy)) delta1 = librosa.feature.delta(spectr,order=1) delta2 = librosa.feature.delta(spectr,order=2) features = np.vstack((spectr,delta1,delta2)) return features.T
def logmel_stacked(filename,n_fft=2048,hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length) logmel = librosa.core.logamplitude(melspectrogram) if nfreqs != None: logmel = logmel[:nfreqs,:] delta1 = librosa.feature.delta(logmel,order=1) delta2 = librosa.feature.delta(logmel,order=2) d,L = logmel.shape logmel = logmel.T.reshape(1,L,d) delta1 = delta1.T.reshape(1,L,d) delta2 = delta2.T.reshape(1,L,d) features = np.vstack((logmel,delta1,delta2)) return features
def convert(filename, name): f = Sndfile(filename, 'r') fs = f.samplerate nc = f.channels enc = f.encoding data = f.read_frames(f.nframes) new_name = '/home/bitnami/apps/django/django_projects/Project/sonic_bar_code/static/newfile.wav' format = Format('wav') # f = Sndfile(new_name, 'w', format, 1, fs) f = Sndfile(new_name, 'w', format, nc, fs) f.write_frames(data) f.close() return new_name
def process_recording(filename, window_width=.03, window_spacing=.02, num_coeffs=40, mel_encode=True): f = Sndfile(filename, 'r') fs = f.samplerate nc = f.channels enc = f.encoding n = f.nframes data = f.read_frames(n) samples = int(fs * window_width) num_windows = int((len(data) / (fs * window_spacing))) - 1 freqs = np.fft.rfftfreq(samples, d=1. / fs) if len(freqs) % 2 == 1: idx = len(freqs) / 2 pos_freqs = freqs[len(freqs) / 2:] else: idx = len(freqs) / 2 - 1 pos_freqs = freqs[len(freqs) / 2 - 1:] spectragram = np.empty((num_windows, len(pos_freqs))) for i in range(num_windows): left = ((i + 1) * fs * window_spacing) - int(samples / 2) right = ((i + 1) * fs * window_spacing) + int(math.ceil(samples / 2)) window = data[left:right] spectragram[i] = np.abs(np.fft.rfft(window)[idx:]) edges = np.linspace(0, fs / 2., num=(num_coeffs + 2)) if mel_encode: edges = mel_transform(edges) filter_bank = np.matrix(np.empty((num_coeffs, len(pos_freqs)))) for i in range(num_coeffs): for j in range(len(pos_freqs)): if edges[i] <= pos_freqs[j] <= edges[i + 2]: filter_bank[i, j] = triangle(edges[i], edges[i + 1], edges[i + 2], pos_freqs[j]) coeffs = np.empty((num_windows, num_coeffs)) for i in range(num_windows): coeffs[i] = np.transpose(filter_bank * np.transpose(np.matrix(spectragram[i]))) return np.transpose(coeffs), np.transpose(spectragram)
def test_float_frames(self): """ Check nframes can be a float""" rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: # Open the file for writing format = Format('wav', 'pcm16') a = Sndfile(fd, 'rw', format, channels=1, samplerate=22050) tmp = np.random.random_integers(-100, 100, 1000) tmp = tmp.astype(np.short) a.write_frames(tmp) a.seek(0) a.sync() ctmp = a.read_frames(1e2, dtype=np.short) a.close() finally: close_tmp_file(rfd, cfilename)
def file_to_features(self, wavpath): "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array." if verbose: self.count = self.count + 1 print("Reading %s :" % wavpath) #print self.count if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath) sf = Sndfile(wavpath, "r") #if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels) if sf.samplerate != fs: raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate)) window = np.hamming(framelen / 2) #check here features = [] while (True): try: chunk = sf.read_frames( framelen / 2, dtype=np.float32 ) #read each window sized value from the audio sf.seek(-framelen / 4, 1) #take the current pointer backward for overlap if len(chunk) != framelen / 2: print("Not read sufficient samples - returning") break if sf.channels != 1: chunk = np.mean(chunk, 1) # mixdown framespectrum = np.fft.fft( window * chunk, framelen / 2 ) # first the window type is implemented then the padding is done here magspec = abs(framespectrum[:framelen / 2]) # do the frequency warping and MFCC computation melSpectrum = self.mfccMaker.warpSpectrum(magspec) melCepstrum = self.mfccMaker.getMFCCs(melSpectrum, cn=True) melCepstrum = melCepstrum[1:] # exclude zeroth coefficient melCepstrum = melCepstrum[:13] # limit to lower MFCCs framefeatures = melCepstrum features.append(framefeatures) except RuntimeError: break sf.close() return np.array(features)
def analyze(filename): wave_file = Sndfile(filename, 'r') signal = wave_file.read_frames(wave_file.nframes) channels = wave_file.channels sample_rate = wave_file.samplerate header = 'dBFS values are relative to a full-scale square wave' results = [ 'Properties for "' + filename + '"', str(wave_file.format), 'Channels:\t%d' % channels, 'Sampling rate:\t%d Hz' % sample_rate, 'Samples:\t%d' % wave_file.nframes, 'Length: \t' + str(wave_file.nframes / sample_rate) + ' seconds', '-----------------', ] wave_file.close() if channels == 1: # Monaural results += properties(signal, sample_rate) elif channels == 2: # Stereo if array_equal(signal[:, 0], signal[:, 1]): results += ['Left and Right channels are identical:'] results += properties(signal[:, 0], sample_rate) else: results += ['Left channel:'] results += properties(signal[:, 0], sample_rate) results += ['Right channel:'] results += properties(signal[:, 1], sample_rate) else: # Multi-channel for ch_no, channel in enumerate(signal.transpose()): results += ['Channel %d:' % (ch_no + 1)] results += properties(channel, sample_rate) display(header, results) plot_histogram = False if plot_histogram: histogram(signal)
def load_wav(fname, rate=None): fp = Sndfile(fname, 'r') _signal = fp.read_frames(fp.nframes) _signal = _signal.reshape((-1, fp.channels)) _rate = fp.samplerate if _signal.ndim == 1: _signal.reshape((-1, 1)) if rate is not None and rate != _rate: signal = resampy.resample(_signal, _rate, rate, axis=0, filter='kaiser_best') else: signal = _signal rate = _rate return signal, rate
def convertAndRemoveVoice(inputfile): #print strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) song_folder = os.getcwd() + '/' print inputfile[-3:] mp3_file = song_folder + inputfile wav_file = song_folder + inputfile[:-4] + '.wav' #'song.wav' command = "ffmpeg " + "-i " + '"' + mp3_file + '"' + " -y " + " -ac 2 " + " -ar 44100 " + '"' + wav_file + '"' #print '\n' print command try: p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) output = p.communicate()[0] #lyricfileess = song_folder + inputfile.replace('.mp3','_beatSynced.json') #origfileess = song_folder + inputfile.replace('.mp3','_original.json') except: print 'wav conversion problem' return 0 original_wav = Sndfile(wav_file, 'r') audio = original_wav.read_frames(original_wav.nframes) #return audio #audio /= float(np.max(abs(audio))) # normalize audio #outputAudio = np.zeros(original_wav.nframes) #print type(outputAudio) #for idx,frame in enumerate(audio): #print idx #print frame outputAudio = (audio[:, 0] - audio[:, 1]) / 2 #print len(audio) print 2 new_filename = wav_file.replace('.wav', '_VocalRemoved.wav') print new_filename output_wav = Sndfile(new_filename, 'w', original_wav.format, 1, original_wav.samplerate) output_wav.write_frames(outputAudio) output_wav.close() #original_wav.close() return 1
def getWordSegmentation(path): """ Function to get the segmented frame data of the audio wave and its corresponding segmented word transcription """ f_names = [] frame_data = [] word_seg = [] # Find all .wav files for root, dirs, files in os.walk(path): for f_name in files: f_name, ext = f_name.split(".") if ext == "WAV": f_names.append(os.path.join(root, f_name)) for f_name in f_names: audio_file = Sndfile(f_name + ".WAV", "r") # Get audio audio = audio_file.read_frames(audio_file.nframes) # Get transcription word_segmentation = open(f_name + ".WRD", "r").read().strip().split("\n") temp_seg = [] temp_frames = [] for word in word_segmentation: # Get corresponding time frame of audio wave start_time, end_time, word = word.split(" ") temp_seg.append((word, (start_time, end_time))) frame = audio[int(start_time):int(end_time)] temp_frames.append(frame) frame_data.append(temp_frames) word_seg.append(temp_seg) # Store all the data print "Taking a dump.." joblib.dump(np.asarray(frame_data), "frame_data.npy") pickle.dump(word_seg, open("word_seg.p", "w"))