def file_to_specgram(path, specgrammode=None): if specgrammode==None: # default is to do a "normal" spectrogram right here if fftsize != framelen: raise ValueError("this mode requires normal fftsize") if not os.path.isfile(path): raise ValueError("path %s not found" % path) sf = Sndfile(path, "r") if sf.channels != 1: raise Error("ERROR in spemptk: sound file has multiple channels (%i) - mono audio required." % sf.channels) if sf.samplerate != fs: raise Error("ERROR in spemptk: wanted srate %g - got %g." % (fs, sf.samplerate)) chunksize = 4096 pcm = np.array([]) while(True): try: chunk = sf.read_frames(chunksize, dtype=np.float32) pcm = np.hstack((pcm, chunk)) except RuntimeError: break spec = stft(pcm).T else: raise ValueError("specgrammode not recognised: %s" % specgrammode) spec = spec[specfreqbinrange[0]:specfreqbinrange[1],:] mags = abs(spec) phasifiers = spec / mags if specgrammode==None: mags = np.log(mags) return (mags, phasifiers)
def file_to_specgram(path, specgrammode=None): if specgrammode == None: # default is to do a "normal" spectrogram right here if fftsize != framelen: raise ValueError("this mode requires normal fftsize") if not os.path.isfile(path): raise ValueError("path %s not found" % path) sf = Sndfile(path, "r") if sf.channels != 1: raise Error( "ERROR in spemptk: sound file has multiple channels (%i) - mono audio required." % sf.channels) if sf.samplerate != fs: raise Error("ERROR in spemptk: wanted srate %g - got %g." % (fs, sf.samplerate)) chunksize = 4096 pcm = np.array([]) while (True): try: chunk = sf.read_frames(chunksize, dtype=np.float32) pcm = np.hstack((pcm, chunk)) except RuntimeError: break spec = stft(pcm).T else: raise ValueError("specgrammode not recognised: %s" % specgrammode) spec = spec[specfreqbinrange[0]:specfreqbinrange[1], :] mags = abs(spec) phasifiers = spec / mags if specgrammode == None: mags = np.log(mags) return (mags, phasifiers)
def read_sound(fp): """ create a normalized float array and datarate from any audo file """ if fp.endswith('mp3'): try: oname = 'temp.wav' #cmd = 'lame --decode "{0}" {1}'.format( fp ,oname ) result = subprocess.call(['lame', '--decode', fp, oname]) assert(result is 0) samplerate, data = wav.read(oname) except: print "couldn't run lame" try: import moviepy.editor as mpy aud_clip = mpy.AudioFileClip(fp) samplerate = aud_clip.fps data = aud_clip.to_soundarray() except: print "moviepy not installed?" if fp.endswith('aif'): #sf = aifc.open(fp) oname = fp sf = Sndfile(fp, 'r') sf.seek(0) data = sf.read_frames(sf.nframes) samplerate = sf.samplerate if fp.endswith('wav'): samplerate, data = wav.read(fp) if len(data.shape)>1: data = data[:,0] data = data.astype('float64') data /= data.max() return data, samplerate
def __create_feature(self, input_path, speaker_name, feature_filename, mode): speaker_featurepath = os.path.join(self.features_rootpath, speaker_name) if not os.path.exists(speaker_featurepath): os.mkdir(speaker_featurepath) output_path = os.path.join(speaker_featurepath, feature_filename) f = Sndfile(input_path) n = f.nframes rate = f.samplerate data = f.read_frames(n) original_data = data * pow(2, 15) extractor = bob.bio.spear.extractor.Cepstral(win_length_ms=25, n_filters=27, n_ceps=13, with_energy=False, mel_scale=True, features_mask=np.arange( 0, 39)) preprocessor = bob.bio.spear.preprocessor.Energy_Thr() __, __, labels = preprocessor((rate, original_data)) feature = extractor([rate, original_data, labels]) out_file = bob.io.base.HDF5File(output_path, 'w') extractor.write_feature(feature, out_file) out_file.close()
def file_to_features(self, wavpath): sf = Sndfile(wavpath, "r") window = np.hamming(framelen) features = [] while (True): try: chunk = sf.read_frames(framelen, dtype=np.float32) if len(chunk) != framelen: print("Not read sufficient samples - returning") break if sf.channels != 1: chunk = np.mean(chunk, 1) # mixdown framespectrum = np.fft.fft(window * chunk) magspec = abs(framespectrum[:framelen / 2]) # do the frequency warping and MFCC computation melSpectrum = self.mfccMaker.warpSpectrum(magspec) melCepstrum = self.mfccMaker.getMFCCs(melSpectrum, cn=True) melCepstrum = melCepstrum[1:] # exclude zeroth coefficient melCepstrum = melCepstrum[:13] # limit to lower MFCCs framefeatures = melCepstrum features.append(framefeatures) except RuntimeError: break sf.close() return np.array(features)
def read_wav(self, sample_path): sample = Sndfile(cwd + sample_path, 'r') sampling_rate = sample.samplerate channels = sample.channels encoding = sample.encoding frames_count = sample.nframes frames = sample.read_frames(frames_count, dtype=np.float32) sample.close() del sample if channels == 1: text_type = 'mono' sample_type = 0 elif channels == 2: text_type = 'stereo' sample_type = 0b01100100 else: text_type = '{0}-channels'.format(channels) if OPTIONS['verbose'] > 1: print "*", encoding, text_type, 'sample "', sample_path, '"', 4 * frames_count, 'kB' if OPTIONS['play_sound']: play(frames.astype(np.float64).T, sampling_rate) self.update({ 'sample_data': frames, 'sample_type': sample_type, 'channels': 2, 'sample_bittype': 4 })
def file_to_features(self, wavpath): "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array." if verbose: print("Reading %s" % wavpath) if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath) sf = Sndfile(wavpath, "r") #if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels) if sf.samplerate != fs: raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate)) window = np.hamming(framelen) features = [] while(True): try: chunk = sf.read_frames(framelen, dtype=np.float32) if len(chunk) != framelen: print("Not read sufficient samples - returning") break if sf.channels != 1: chunk = np.mean(chunk, 1) # mixdown framespectrum = np.fft.fft(window * chunk) magspec = abs(framespectrum[:framelen/2]) # do the frequency warping and MFCC computation melSpectrum = self.mfccMaker.warpSpectrum(magspec) melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True) melCepstrum = melCepstrum[1:] # exclude zeroth coefficient melCepstrum = melCepstrum[:13] # limit to lower MFCCs framefeatures = melCepstrum # todo: include deltas? that can be your homework. features.append(framefeatures) except RuntimeError: break sf.close() return np.array(features)
def load_pcm(path): wave = Sndfile(path, "r") pcm = wave.read_frames(wave.nframes) wave.close() if wave.channels is not 1: pcm = pcm[:, 0] return (pcm, wave.samplerate)
def file_to_features(self,wavpath): sf = Sndfile(wavpath, "r") window = np.hamming(framelen) features = [] while(True): try: chunk = sf.read_frames(framelen, dtype=np.float32) if len(chunk) != framelen: print("Not read sufficient samples - returning") break if sf.channels != 1: chunk = np.mean(chunk, 1) # mixdown framespectrum = np.fft.fft(window * chunk) magspec = abs(framespectrum[:framelen/2]) # do the frequency warping and MFCC computation melSpectrum = self.mfccMaker.warpSpectrum(magspec) melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True) melCepstrum = melCepstrum[1:] # exclude zeroth coefficient melCepstrum = melCepstrum[:13] # limit to lower MFCCs framefeatures = melCepstrum features.append(framefeatures) except RuntimeError: break sf.close() return np.array(features)
def extractData(file_names): data = [] targets = [] for k, v in file_names.items(): for f_name in v: source_fname = k + "/" + f_name target_fname = k + "/" + f_name.split(".")[0] + ".TXT" source_fname = "./TIMIT" + source_fname[1:] target_fname = "./TIMIT" + target_fname[1:] audio_file = Sndfile(source_fname, "r") sr = audio_file.samplerate audio = audio_file.read_frames(audio_file.nframes) datum = mfcc(audio, samplerate=sr, nfilt=64, numcep=40) #datum = logfbank( audio, samplerate=sr, nfilt=64 ) datum = preprocessing.scale(datum) data.append(datum) audio_file.close() with open(target_fname, "r") as text_file: target_txt = ' '.join(text_file.read().lower().strip().replace( ".", "").split()[2:]) target_txt = filter(lambda x: x not in special_chars, target_txt) target_txt = target_txt.replace(' ', ' ').split(' ') target = np.hstack( ['<space>' if x == '' else list(x) for x in target_txt]) target = np.asarray( [ 0 if x == '<space>' else ord(x) - ( ord('a') - 1 )\ for x in target ] ) targets.append(target) return data, targets
class AudioWriter: syllableIndex = 0 baseFilename = "syllable" fileOpen = False format = Format('flac', 'pcm24') f = None filecount = 0 def open(self): self.f = Sndfile(self.baseFilename + "." + str(self.syllableIndex) + '.flac', 'w', self.format, 1, 44100) self.fileOpen = True def close(self): if self.fileOpen: self.f.close() self.syllableIndex += 1 self.fileOpen = False def write(self, data): if not self.fileOpen: self.open() self.f.write_frames(data) def parseData(self, data): buffer = [] for i in range(len(data) - 1): if i == len(data) - 2 or (data[i] == zero_val and data[i + 1] == zero_val): if len(buffer) > 0: self.write(np.array(buffer)) self.filecount += 1 buffer = [] self.close() else: buffer.append(data[i])
def get_fft_points(sound_filename, fps, fft_pixels, rate = 1, fourierwidth = 0.3): """TODO will generate rate points per frame Based on the script from http://classicalconvert.com/2008/04/ how-to-visualize-music-using-animated-spectrograms-with -open-source-everything/""" f = Sndfile(sound_filename, 'r') divisor = f.samplerate / (rate * fps) # should be integer points = [] framepos = 0L while framepos < f.nframes: read_len = ( divisor if (framepos + divisor < f.nframes) else f.nframes - framepos) frames = f.read_frames(read_len) buff = [] for frame in frames: # is frame iterable or just one chan? if getattr(frame, '__iter__', False): fval = sum(frame) / len(frame) else: fval = frame buff.append(fval) # TODO: trim to 1024 or so? outfft = fft(buff) spectrum = [ (outfft[y].real if y < len(outfft) else 0.0) for y in xrange(fft_pixels)] points.append(spectrum) framepos += len(frames) f.close() # maximise return points
def __init__(self, fn, rate=None, pad_start=0, seek=None, duration=None, rotation=None): fp = Sndfile(fn, 'r') if fn.endswith('.wav') else None if fp is None or (rate is not None and fp.samplerate != rate): # Convert to wav file if not os.path.isdir('/tmp/'): os.makedirs('/tmp/') snd_file = tempfile.NamedTemporaryFile('w', prefix='/tmp/', suffix='.wav', delete=False) snd_file.close() convert2wav(fn, snd_file.name, rate) self.snd_fn = snd_file.name self.rm_flag = True else: self.snd_fn = fn self.rm_flag = False self.fp = Sndfile(self.snd_fn, 'r') self.num_channels = self.fp.channels self.rate = self.fp.samplerate self.num_frames = self.fp.nframes self.duration = self.num_frames / float(self.rate) self.k = 0 self.pad = pad_start if seek is not None and seek > 0: num_frames = int(seek * self.rate) self.fp.read_frames(num_frames) else: seek = 0 if duration is not None: self.duration = min(duration, self.duration - seek) self.num_frames = int(self.duration * self.rate) if rotation is not None: assert self.num_channels > 2 # Spatial audio assert -np.pi <= rotation < np.pi c = np.cos(rotation) s = np.sin(rotation) rot_mtx = np.array([ [1, 0, 0, 0], # W' = W [0, c, 0, s], # Y' = X sin + Y cos [0, 0, 1, 0], # Z' = Z [0, -s, 0, c] ]) # X' = X cos - Y sin self.rot_mtx = rot_mtx else: self.rot_mtx = None
def test_read_wave(): f = Sndfile("../fcjf0/sa1.wav", 'r') data = f.read_frames(46797) data_arr = np.array(data) #print data_arr pyplot.figure() pyplot.specgram(data_arr) pyplot.show()
def __init__(self, file_name): self.sf = Sndfile(file_name) self.file_format = self.sf.format self.nchans = self.sf.channels self.sr = self.sf.samplerate self.length = self.sf.nframes self.audio = self.sf.read_frames(self.length)
def writeWAV(self, data, filename): format = Format('wav') if (len(data.shape) == 2): f = Sndfile(filename, 'w', format, 2, self.samplingRate) f.write_frames(data) f.close() else: f = Sndfile(filename, 'w', format, 1, self.samplingRate) f.write_frames(data) f.close()
def downsample(fs, sig): in_file = random_string() + ".wav" out_file = random_string() + ".wav" frame_len = fs * WINDOW_SIZE pad = len(sig)%frame_len if pad > 0: sig = np.append(sig, np.zeros(frame_len - pad)) f = Sndfile(in_file, 'w', Format(type="wav", encoding='pcm16', endianness="file"), 1, fs) f.write_frames(sig) f.close() sox_in = pysox.CSoxStream(in_file) sox_out = pysox.CSoxStream(out_file, 'w', pysox.CSignalInfo(SAMPLE_RATE, 1, 8), fileType='wav') sox_chain = pysox.CEffectsChain(sox_in, sox_out) sox_chain.add_effect(pysox.CEffect("rate", [str(SAMPLE_RATE)])) sox_chain.flow_effects() sox_out.close() f = Sndfile(out_file, 'r') sig = f.read_frames(f.nframes) f.close() os.unlink(in_file) os.unlink(out_file) return sig
def _test_int_io(self, dt): # TODO: check if neg or pos value is the highest in abs rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: # Use almost full possible range possible for the given data-type nb = 2 ** (8 * np.dtype(dt).itemsize - 3) fs = 22050 nbuff = fs a = np.random.random_integers(-nb, nb, nbuff) a = a.astype(dt) # Open the file for writing format = Format('wav', _DTYPE_TO_ENC[dt]) b = Sndfile(fd, 'w', format, 1, fs) b.write_frames(a) b.close() b = Sndfile(cfilename, 'r') read_a = b.read_frames(nbuff, dtype=dt) b.close() assert_array_equal(a, read_a) finally: close_tmp_file(rfd, cfilename)
def test_bad_wavread(self): """ Check wavread on bad file""" # Create a tmp audio file with non wav format, write some random data into it, # and check it can not be opened by wavread rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 noise = 0.1 * N.random.randn(nbuff) # Open the copy file for writing format = audio_format('aiff', 'pcm16') b = Sndfile(cfilename, 'w', format, 1, nbuff) b.write_frames(noise) b.close() b = Sndfile(cfilename, 'r') rcnoise = b.read_frames(nbuff) b.close() try: rnoise = wavread(cfilename)[0] raise Exception("wavread on non wav file succeded, expected to fail") except ValueError, e: pass #print str(e) + ", as expected" finally: close_tmp_file(rfd, cfilename)
def load_sound(filename): """ load a sound file and return a numpy array INFO: The values are normalized between -1 and 1 :param filename: :return: numpy array with (sound_lenght, channels) shape """ f = Sndfile(filename, 'r') data = f.read_frames(f.nframes, dtype=np.float64) return data, f.samplerate
def load(filename): """Load an audio file and average over channels. Returns the data as a numpy array and the sampling rate. """ fh = Sndfile(filename, "r") data = fh.read_frames(fh.nframes) if data.ndim == 2: data = np.mean(data, axis=-1) rate = fh.samplerate return data, rate
def save_record(): global data n_channels, fmt = 1, Format('flac', 'pcm16') caller_id = agi.get_variable("CALLERID(num)") file_name = 'TmpSpeechFile_' + caller_id + '.flac' _, temp_sound_file = mkstemp(file_name) flac_file = Sndfile(temp_sound_file, 'w', fmt, n_channels, SAMPLE_RATE) flac_file.write_frames(data) flac_audio = AudioSegment.from_file(temp_sound_file, "flac") flac_audio.export(PATH + FILE_NAME, format="mp3")
def load(filename): """ Load a wave file and return the signal, sample rate and number of channels. Can be any format that libsndfile supports, like .wav, .flac, etc. """ wave_file = Sndfile(filename, 'r') signal = wave_file.read_frames(wave_file.nframes) channels = wave_file.channels sample_rate = wave_file.samplerate return signal, sample_rate, channels
def save_wav(sound, action_label, object_label): wav_path = '/tmp/new_wav' filename = os.path.join(wav_path, action_label + '-' + object_label + '-' + str(time.time()) + '.wav') format = Format('wav') print 'writing', filename, '...', f = Sndfile(filename, 'w', format, 1, 44100) f.write_frames(sound) f.close() print 'DONE'
def load(filename): """Load a wave file and return the signal, sample rate and number of channels. Can be any format that libsndfile supports, like .wav, .flac, etc. """ wave_file = Sndfile(filename, 'r') signal = wave_file.read_frames(wave_file.nframes) channels = wave_file.channels sample_rate = wave_file.samplerate return signal, sample_rate, channels
def CQT(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length) if nfreqs != None: cqt = cqt[:nfreqs,:] delta1 = librosa.feature.delta(cqt,order=1) delta2 = librosa.feature.delta(cqt,order=2) energy = librosa.feature.rmse(y=data) features = np.vstack((cqt,delta1,delta2,energy)) return features.T
def writeAudioOutput(output, fs, f, f2, outputTitle): """Writes audio output""" # Define an output audio format formt = Format('wav', 'float64') outFile = Sndfile(outputTitle, 'w', formt, 1, fs) outFile.write_frames(output) #Clean Up f.close() f2.close() outFile.close()
def extractOnsets(audio): od1 = OnsetDetection(method = 'hfc') od2 = OnsetDetection(method = 'complex') # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type = 'hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) pool = essentia.Pool() # let's get down to business for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mag, phase, = c2p(fft(w(frame))) pool.add('features.hfc', od1(mag, phase)) pool.add('features.complex', od2(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() onsets_hfc = onsets(# this algo expects a matrix, not a vector array([ pool['features.hfc'] ]), # you need to specify weights, but as there is only a single # function, it doesn't actually matter which weight you give it [ 1 ]) # np.savetxt(outFile, onsets_hfc, fmt='%f') #Let's just take the complex as an example onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ]) startTimes = onsets_hfc endTimes = onsets_hfc[1:] duration = Duration() endTimes = np.append(endTimes, duration(audio)) slicer = Slicer(startTimes = array(startTimes), endTimes = array(endTimes)) frames = slicer(audio) lengthInFrames = 0 for i in range(len(frames)): lengthInFrames = lengthInFrames + len(frames[i]) format = Format('wav') global counter f = Sndfile('out'+ str(counter) + '.wav' , 'w', format, 1, 44100) counter = counter + 1 f.write_frames(np.asarray(frames[0])) return frames
def specgram_to_file(path, mags, phasifiers, normalise=True, specgrammode=None): if specgrammode==None: mags = np.exp(mags) if normalise: mags -= np.min(mags) cplx = mags * phasifiers pcm = istft(cplx.T) if normalise: pcm /= np.max(pcm) outsf = Sndfile(path, "w", Format('wav'), 1, fs) outsf.write_frames(pcm) outsf.close()
def wav_to_flac(wav_name): cd, tmp_name = mkstemp('tmp.flac') Signal, fs = wavread(wav_name)[:2] assert(fs == RATE) fmt = Format('flac', 'pcm16') nchannels = 1 flac_file = Sndfile(tmp_name, 'w', fmt, nchannels, RATE) flac_file.write_frames(Signal) return tmp_name
def create_flac_from(sound_samples): __console.log('prepare flac format for writing to file') n_channels, fmt = 1, Format('flac', 'pcm16') caller_id = get_stdn_var(stdin.CALLER_ID) __console.log('write to temp file') _, temp_sound_file = mkstemp('TmpSpeechFile_' + caller_id + '.flac') __console.log('prepare sound file') flac_file = Sndfile(temp_sound_file, 'w', fmt, n_channels, constants.RAW_RATE) flac_file.write_frames(np.array(sound_samples)) __console.log('sound file saved') return temp_sound_file
def __init__(self, filename, write=False, format='wav', rate=None, channels=None): """ Open audiofile for writing or reading Parameters ---------- filename : mixed Input wav file. String if a real file, `sys.stdin` for standard in. write: boolean Set true for writing to a file rate : int Sample rate. Only required for writing channels : int Number of Channels. Only required for writing Notes ----- * The data is assumed to be a numpy array of floats, normalized between -1 and 1. """ try: from scikits.audiolab import Format, Sndfile except: raise RuntimeError('You must have scikits.audiolab installed') if filename is sys.stdin: filename = '-' if write is True and (rate is None or channels is None): raise ValueError('You must provide sampling rate and ' 'number of channels for writing file.') if write is False: self.f = Sndfile(filename, 'r') self.channels = self.f.channels self.rate = self.f.samplerate else: format = Format(format) self.f = Sndfile(filename, 'w', format, channels, rate) self.channels = channels self.rate = rate
def analysefile(path, hopsize=0.5, mode='ch', numtop=1, framesize = 1024, chrm_kwargs=None, maxdursecs=None): """Analyses an audio file from disk, dividing into lapped frames and returning an array holding [raw, peaks, slopecent] for each frame. Can also do plain FFT-type analysis as an alternative.""" if (mode != 'ch') and (mode != 'fft'): raise ValueError('Mode %s not recognised' % mode) if not os.path.isfile(path): raise ValueError("path %s not found" % path) sf = Sndfile(path, "r") if sf.channels != 1: raise Error("ERROR in chirpletringmod: sound file has multiple channels (%i) - mono audio required." % sf.channels) #print sf.format if maxdursecs!=None: maxdurspls = maxdursecs * sf.samplerate else: maxdurspls = sf.nframes if chrm_kwargs != None: chrm_kwargs = deepcopy(chrm_kwargs) chrm_kwargs['samplerate'] = sf.samplerate chrm_kwargs['framesize'] = framesize else: chrm_kwargs = {'samplerate':sf.samplerate, 'framesize':framesize} ch = chirpletringmod.Chirpletringmod(**chrm_kwargs) ihop = int(hopsize * ch.framesize) unhop = ch.framesize - ihop numspecframes = sf.nframes / ihop print "File contains %i spectral frames" % numspecframes storeraw = numspecframes < 500 frames = [] moretocome = True data = zeros(ch.framesize, float32) while(moretocome): try: nextdata = sf.read_frames(ihop, dtype=float32) except RuntimeError: #print "sf.read_frames runtime error, assuming EOF" moretocome = False if len(nextdata) != ihop: print "data truncated, detected EOF" moretocome = False nextdata = hstack((nextdata, zeros(ihop - len(nextdata)))) data = hstack(( data[ihop:], nextdata )) frames.append(ch.analyseframeplusfeatures(data, hopsize, mode, numtop, storeraw)) if len(data) >= maxdurspls: break sf.close() return {'ch':ch, 'frames':frames, 'srate':sf.samplerate, 'hopsize':hopsize, 'framesize':ch.framesize} # the ch knows srate and framesize, why are we duplicating?
def __init__(self, fn, samplerate, filefmt='wav', datafmt='pcm16', channels=1): fmt = Format(filefmt, datafmt) Sndfile.__init__(self, fn, mode='w', format=fmt, channels=channels, samplerate=samplerate)
def test_bigframes(self): """ Try to seek really far.""" rawname = join(TEST_DATA_DIR, 'test.wav') a = Sndfile(rawname, 'r') try: try: a.seek(2 ** 60) raise Exception, \ "Seek really succeded ! This should not happen" except IOError, e: pass finally: a.close()
def signal(self): if not hasattr(self, '_signal'): if self.offset_seconds is None: self._signal = Sndfile(self.path, mode='r').read_frames( self.nframes, dtype=numpy.dtype(theanoconfig.floatX).type) else: self._signal = Sndfile(self.path, mode='r').read_frames( self.nframes_extended, dtype=numpy.dtype(theanoconfig.floatX).type) self._signal = \ self._signal[self.nframes_extended - self.nframes:] self.normalize() return self._signal
def plotSpectrogram(f,mode,channel): plt.close('all') # Extracting the name from the '.wav' file length = len(f)-1 name = f[length-length :length-3] print "Processing: %s CH %s" % (name ,channel) plt.figure(figsize=(10.5,3), dpi=100); #figsize=(13,4) try: # Some sound information # print "Input to Sndfile FN is %s" %f r = Sndfile(f) begin = 0 * r.samplerate stop = 59.8 * r.samplerate sample=r.read_frames(stop-begin) # Setting out of some spectrogram variables Fs = r.samplerate #10000 NFFT = int(Fs*0.05) #*0.005) # 5ms window noverlap = int(Fs*0.0025) # Plotting fig= plt.specgram(sample[:,channel],Fs=Fs, NFFT=NFFT,noverlap=noverlap, cmap=plt.get_cmap('jet')) except: print "Could not process %s" % name plt.figtext(0.5,0.5,"ERROR") # Colourmap values that work well are: 'binary','bone' and 'jet' # plt.title(name) plt.title("CH"+str(channel)+" : "+f) plt.xticks([],[]) #gets rid of the x ticks and numbers plt.yticks([],[]) #gets rid of the y ticks and numbers # For normal plotting # plt.title(f) # plt.xlabel("Time (s)") # plt.yticks([2000,4000,6000,8000,10000],[2,4,6,8,10]) # plt.ylabel("Frequency (kHz)") #plt.colorbar() try: # plt.savefig("./"+ name + ".png",fig=fig, bbox_inches='tight') plt.savefig(name+"CH"+str(channel)+".png",fig=fig, bbox_inches='tight') #Save the results # if spectrogram has been sucessfully generated add it to the list of images that has$ namePNG = "./" + name+"CH"+str(channel) + ".png" listN.append(namePNG) # print "Done." except: print "ERROR: %s" % name
def main(RAW_DATA_DIR, chunk_length, slide_length): OUTPUT_DIR=os.path.join(RAW_DATA_DIR, "parts") if os.path.isdir(OUTPUT_DIR): answer = raw_input("Parts already exist.\nType y to rebuild from scratch : ") if answer != 'y': print("NOT REBUILT !") sys.exit() else: shutil.rmtree(OUTPUT_DIR) os.makedirs(OUTPUT_DIR) print RAW_DATA_DIR print OUTPUT_DIR chunk_counter = 0 format = Format('wav') wav_files = glob.glob(RAW_DATA_DIR + '/*.wav') for filename in wav_files: wav_file_path = filename path_no_extension = re.sub(ur'\.wav$', '', filename) # Read wav and csv files wave_info = scikits.audiolab.wavread(wav_file_path) data_wave = wave_info[0] samplerate = wave_info[1] len_wave = len(data_wave) # Cut them in chunk_length seconds chunks. # Sliding of slide_length seconds # Zero padding at the end time_counter = 0 start_index = 0 while(start_index < len_wave): end_index = (time_counter + chunk_length) * samplerate if end_index > len_wave: chunk_wave = np.concatenate((data_wave[start_index:], np.zeros((end_index-len_wave)))) else: chunk_wave = data_wave[start_index:end_index] time_counter += slide_length start_index = time_counter * samplerate # Write the chunks outwave_name = OUTPUT_DIR + '/p' + str(chunk_counter) + '.wav' f = Sndfile(outwave_name, 'w', format, 1, samplerate) # 1 stands for the number of channels f.write_frames(chunk_wave) chunk_counter += 1 return
def readwavefile(inputwav): f = Sndfile(inputwav, 'r') fs = f.samplerate if fs != 44100 : print 'only 44.1kHz filess are supported at present' exit(1) nc = f.channels if nc != 1 : print 'only 1 channel supported at present' exit(1) nframes = f.nframes wav = f.read_frames(nframes, dtype=np.float32) f.close() return wav
def logmel(filename,n_fft=2048,hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length) logmel = librosa.core.logamplitude(melspectrogram) if nfreqs != None: logmel = logmel[:nfreqs,:] energy = librosa.feature.rmse(y=data) spectr = np.vstack((logmel,energy)) delta1 = librosa.feature.delta(spectr,order=1) delta2 = librosa.feature.delta(spectr,order=2) features = np.vstack((spectr,delta1,delta2)) return features.T
def _createDiff(self, expected, result): diff = Sndfile(self.diffPath, mode='w', format=result.format, channels=result.channels, samplerate=result.samplerate) index = 0 frameSize = 1024 while index < result.nframes: frameSize = min(result.nframes - index ,frameSize) resultFrame = result.read_frames(frameSize) expectedFrame = expected.read_frames(frameSize) diffFrame = expectedFrame - resultFrame diff.write_frames(diffFrame) if not allclose(resultFrame, expectedFrame): return index += frameSize
def main(): audioFiles = glob.glob("testSamples/*") for audioFile in audioFiles: snd = Sndfile(audioFile, "r") data = snd.read_frames(snd.nframes) fs = snd.samplerate (frames, freqs, bins, ax) = mp.specgram(data, frameSize, noverlap=(frameSize/2), Fs=fs) mp.subplot(211) mp.plot(np.linspace(0,float(snd.nframes)/fs, snd.nframes), data * 10000+10000, alpha=0.4) mp.subplot(212) mp.plot(bins[0:-1], getSlices(frames, bins, 20)) mp.show()
def read_audio_file(self, file_name): #print "read_audio_file: reading file [",file_name,"]" if file_name=='': return f = Sndfile(unicode(file_name), 'r') wav_data = np.array(f.read_frames(f.nframes), dtype=np.float64) samplerate = f.samplerate f.close() nsamples = len(wav_data) if (len(wav_data.shape) > 1): # take left channel of stereo track wav_data = wav_data[:,0] y = 1.0*wav_data x = np.arange(nsamples)*1.0/samplerate # time values #print "read_audio_file: finished with file [",file_name,"]" return y, x, samplerate
def CQT_stacked(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length) if nfreqs != None: cqt = cqt[:nfreqs,:] delta1 = librosa.feature.delta(cqt,order=1) delta2 = librosa.feature.delta(cqt,order=2) d,L = cqt.shape cqt = cqt.T.reshape(1,L,d) delta1 = delta1.T.reshape(1,L,d) delta2 = delta2.T.reshape(1,L,d) features = np.vstack((cqt,delta1,delta2)) return features
def logmel_stacked(filename,n_fft=2048,hop_length=512,nfreqs=None): f = Sndfile(filename, 'r') data = f.read_frames(f.nframes) melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length) logmel = librosa.core.logamplitude(melspectrogram) if nfreqs != None: logmel = logmel[:nfreqs,:] delta1 = librosa.feature.delta(logmel,order=1) delta2 = librosa.feature.delta(logmel,order=2) d,L = logmel.shape logmel = logmel.T.reshape(1,L,d) delta1 = delta1.T.reshape(1,L,d) delta2 = delta2.T.reshape(1,L,d) features = np.vstack((logmel,delta1,delta2)) return features
def readAudioFromFile(self): with contextlib.closing(Sndfile(self.filename)) as f: self.channels = f.channels self.sample_count = f.nframes self.sample_rate = f.samplerate self.samples = f.read_frames(f.nframes, dtype=numpy.dtype('int16')) return True
def load_dict(filename): """ Load a wave file and return the signal, sample rate and number of channels. Can be any format supported by the underlying library (libsndfile or SciPy) """ soundfile = {} if wav_loader == 'pysoundfile': sf = SoundFile(filename) soundfile['signal'] = sf.read() soundfile['channels'] = sf.channels soundfile['fs'] = sf.samplerate soundfile['samples'] = len(sf) soundfile['format'] = sf.format_info + ' ' + sf.subtype_info sf.close() elif wav_loader == 'scikits.audiolab': sf = Sndfile(filename, 'r') soundfile['signal'] = sf.read_frames(sf.nframes) soundfile['channels'] = sf.channels soundfile['fs'] = sf.samplerate soundfile['samples'] = sf.nframes soundfile['format'] = sf.format sf.close() elif wav_loader == 'scipy.io.wavfile': soundfile['fs'], soundfile['signal'] = read(filename) try: soundfile['channels'] = soundfile['signal'].shape[1] except IndexError: soundfile['channels'] = 1 soundfile['samples'] = soundfile['signal'].shape[0] soundfile['format'] = str(soundfile['signal'].dtype) return soundfile
def bin_calculate(sent_file, bpm, stepsize): sound_file = Sndfile('{0}'.format(sent_file), 'r') sec_bin_size = ((60000/float((bpm*(stepsize/4)))))*0.001 sixfour_sec_bin_size = ((60000/float((bpm*((stepsize*2)/4)))))*0.001 bin_size = sec_bin_size*(sound_file.samplerate) shunt_window = int(sixfour_sec_bin_size*(sound_file.samplerate)) return bin_size, shunt_window
def load(filename): """ Load a wave file and return the signal, sample rate and number of channels. Can be any format supported by the underlying library (libsndfile or SciPy) """ if wav_loader == 'pysoundfile': sf = SoundFile(filename) signal = sf.read() channels = sf.channels sample_rate = sf.samplerate samples = len(sf) file_format = sf.format_info + ' ' + sf.subtype_info sf.close() elif wav_loader == 'scikits.audiolab': sf = Sndfile(filename, 'r') signal = sf.read_frames(sf.nframes) channels = sf.channels sample_rate = sf.samplerate samples = sf.nframes file_format = sf.format sf.close() elif wav_loader == 'scipy.io.wavfile': sample_rate, signal = read(filename) try: channels = signal.shape[1] except IndexError: channels = 1 samples = signal.shape[0] file_format = str(signal.dtype) return signal, sample_rate, channels
def process_recording(filename, window_width=.03, window_spacing=.02, num_coeffs=40, mel_encode=True): f = Sndfile(filename, 'r') fs = f.samplerate nc = f.channels enc = f.encoding n = f.nframes data = f.read_frames(n) samples = int(fs * window_width) num_windows = int((len(data) / (fs * window_spacing))) - 1 freqs = np.fft.rfftfreq(samples, d=1. / fs) if len(freqs) % 2 == 1: idx = len(freqs) / 2 pos_freqs = freqs[len(freqs) / 2:] else: idx = len(freqs) / 2 - 1 pos_freqs = freqs[len(freqs) / 2 - 1:] spectragram = np.empty((num_windows, len(pos_freqs))) for i in range(num_windows): left = ((i + 1) * fs * window_spacing) - int(samples / 2) right = ((i + 1) * fs * window_spacing) + int(math.ceil(samples / 2)) window = data[left:right] spectragram[i] = np.abs(np.fft.rfft(window)[idx:]) edges = np.linspace(0, fs / 2., num=(num_coeffs + 2)) if mel_encode: edges = mel_transform(edges) filter_bank = np.matrix(np.empty((num_coeffs, len(pos_freqs)))) for i in range(num_coeffs): for j in range(len(pos_freqs)): if edges[i] <= pos_freqs[j] <= edges[i + 2]: filter_bank[i, j] = triangle(edges[i], edges[i + 1], edges[i + 2], pos_freqs[j]) coeffs = np.empty((num_windows, num_coeffs)) for i in range(num_windows): coeffs[i] = np.transpose(filter_bank * np.transpose(np.matrix(spectragram[i]))) return np.transpose(coeffs), np.transpose(spectragram)
def duration(filename): """ return duration of wav file in second :param filename: :return: """ f = Sndfile(filename, 'r') return f.nframes / float(f.samplerate)
def export_audio_vidId(fn, samplerate, vid, time, cols): f = [interp1d(time, cols[:, i]) for i in range(cols.shape[1])] fmt = Format(type=fn.split('.')[-1]) newfn = '.'.join( fn.split('.')[:-2] + [fn.split('.')[-2] + ',%d' % vid] + [fn.split('.')[-1]]) print 'Writing', newfn wav = Sndfile(newfn, 'w', fmt, cols.shape[1], samplerate) a = arange(samplerate) / samplerate + time[0] for i in xrange(int((time[-1] - time[0]))): y = array([g(a + i) for g in f]).T wav.write_frames(y) print '%0.2f%% \r' % (i / (time[-1] - time[0]) * 100), sys.stdout.flush() print '100% '
def __init__( self, filename, samplerate=44100, channels=1, format=Format('wav', 'float32'), ): self._info = { 'filename': filename, 'samplerate': samplerate, 'channels': channels, 'format': format, 'frames': 0, } # TODO: metadata not implemented self._sndfile = Sndfile(filename, 'w', format, channels, samplerate) if not self._sndfile: raise NameError('Sndfile error loading file %s' % filename)
def load_wav(fname, rate=None): fp = Sndfile(fname, 'r') _signal = fp.read_frames(fp.nframes) _signal = _signal.reshape((-1, fp.channels)) _rate = fp.samplerate if _signal.ndim == 1: _signal.reshape((-1, 1)) if rate is not None and rate != _rate: signal = resampy.resample(_signal, _rate, rate, axis=0, filter='kaiser_best') else: signal = _signal rate = _rate return signal, rate